]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/netinet/in_tclass.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / netinet / in_tclass.c
index 02d9ccc86bf13b7bbcd999f613a4871825224756..d14323a4cac8f3cd1817efac6af1b0a4f125ec17 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2009-2020 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #include <sys/domain.h>
 #include <sys/mbuf.h>
 #include <sys/queue.h>
+#include <sys/sysctl.h>
+#include <sys/sysproto.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_cc.h>
+#include <netinet/in_tclass.h>
 
-extern char *proc_name_address(void *p);
+struct net_qos_dscp_map {
+       uint8_t        sotc_to_dscp[SO_TC_MAX];
+       uint8_t        netsvctype_to_dscp[_NET_SERVICE_TYPE_COUNT];
+};
+
+struct dcsp_msc_map {
+       uint8_t                 dscp;
+       mbuf_svc_class_t        msc;
+};
+static inline int so_throttle_best_effort(struct socket *, struct ifnet *);
+static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *, int);
+static errno_t dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *, size_t,
+    struct dcsp_msc_map *);
+
+static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */
+static lck_grp_t *tclass_lck_grp = NULL;        /* mutex group definition */
+static lck_attr_t *tclass_lck_attr = NULL;      /* mutex attributes */
+decl_lck_mtx_data(static, tclass_lock_data);
+static lck_mtx_t *tclass_lock = &tclass_lock_data;
+
+SYSCTL_NODE(_net, OID_AUTO, qos,
+    CTLFLAG_RW | CTLFLAG_LOCKED, 0, "QoS");
+
+static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS;
+SYSCTL_PROC(_net_qos, OID_AUTO, default_netsvctype_to_dscp_map,
+    CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
+    0, 0, sysctl_default_netsvctype_to_dscp_map, "S", "");
+
+static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
+SYSCTL_PROC(_net_qos, OID_AUTO, dscp_to_wifi_ac_map,
+    CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
+    0, 0, sysctl_dscp_to_wifi_ac_map, "S", "");
+
+static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
+SYSCTL_PROC(_net_qos, OID_AUTO, reset_dscp_to_wifi_ac_map,
+    CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
+    0, 0, sysctl_reset_dscp_to_wifi_ac_map, "I", "");
+
+int net_qos_verbose = 0;
+SYSCTL_INT(_net_qos, OID_AUTO, verbose,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_verbose, 0, "");
+
+/*
+ * Fastlane QoS policy:
+ * By Default allow all apps to get traffic class to DSCP mapping
+ */
+SYSCTL_NODE(_net_qos, OID_AUTO, policy,
+    CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
+
+int net_qos_policy_restricted = 0;
+SYSCTL_INT(_net_qos_policy, OID_AUTO, restricted,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restricted, 0, "");
+
+int net_qos_policy_restrict_avapps = 0;
+SYSCTL_INT(_net_qos_policy, OID_AUTO, restrict_avapps,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restrict_avapps, 0, "");
+
+int net_qos_policy_wifi_enabled = 0;
+SYSCTL_INT(_net_qos_policy, OID_AUTO, wifi_enabled,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_wifi_enabled, 0, "");
+
+int net_qos_policy_capable_enabled = 0;
+SYSCTL_INT(_net_qos_policy, OID_AUTO, capable_enabled,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_capable_enabled, 0, "");
+
+/*
+ * Socket traffic class from network service type
+ */
+const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT] = {
+       SO_TC_BE,       /* NET_SERVICE_TYPE_BE */
+       SO_TC_BK_SYS,   /* NET_SERVICE_TYPE_BK */
+       SO_TC_VI,       /* NET_SERVICE_TYPE_SIG */
+       SO_TC_VI,       /* NET_SERVICE_TYPE_VI */
+       SO_TC_VO,       /* NET_SERVICE_TYPE_VO */
+       SO_TC_RV,       /* NET_SERVICE_TYPE_RV */
+       SO_TC_AV,       /* NET_SERVICE_TYPE_AV */
+       SO_TC_OAM,      /* NET_SERVICE_TYPE_OAM */
+       SO_TC_RD        /* NET_SERVICE_TYPE_RD */
+};
+
+/*
+ * DSCP mappings for QoS Fastlane as based on network service types
+ */
+static const
+struct netsvctype_dscp_map fastlane_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
+       { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
+       { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_AF11 },
+       { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS3 },
+       { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
+       { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
+       { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
+       { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
+       { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
+       { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
+};
+
+
+/*
+ * DSCP mappings for QoS RFC4594 as based on network service types
+ */
+static const
+struct netsvctype_dscp_map rfc4594_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
+       { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
+       { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_CS1 },
+       { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS5 },
+       { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
+       { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
+       { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
+       { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
+       { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
+       { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
+};
+
+static struct net_qos_dscp_map fastlane_net_qos_dscp_map;
+static struct net_qos_dscp_map rfc4594_net_qos_dscp_map;
+
+/*
+ * The size is one more than the max because DSCP start at zero
+ */
+#define DSCP_ARRAY_SIZE (_MAX_DSCP + 1)
+
+/*
+ * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping
+ * that implemented at the 802.11 driver level when the mbuf service class is
+ * MBUF_SC_BE.
+ *
+ * This clashes with the recommended mapping documented by the IETF document
+ * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain
+ * binary compatibility. Applications should use the network service type socket
+ * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS.
+ */
+static const struct dcsp_msc_map default_dscp_to_wifi_ac_map[] = {
+       { .dscp = _DSCP_DF, .msc = MBUF_SC_BE },        /* RFC 2474 Standard */
+       { .dscp = 1, .msc = MBUF_SC_BE },               /*  */
+       { .dscp = 2, .msc = MBUF_SC_BE },               /*  */
+       { .dscp = 3, .msc = MBUF_SC_BE },               /*  */
+       { .dscp = 4, .msc = MBUF_SC_BE },               /*  */
+       { .dscp = 5, .msc = MBUF_SC_BE },               /*  */
+       { .dscp = 6, .msc = MBUF_SC_BE },               /*  */
+       { .dscp = 7, .msc = MBUF_SC_BE },               /*  */
+
+       { .dscp = _DSCP_CS1, .msc = MBUF_SC_BK },       /* RFC 3662 Low-Priority Data */
+       { .dscp = 9, .msc = MBUF_SC_BK },               /*  */
+       { .dscp = _DSCP_AF11, .msc = MBUF_SC_BK },      /* RFC 2597 High-Throughput Data */
+       { .dscp = 11, .msc = MBUF_SC_BK },              /*  */
+       { .dscp = _DSCP_AF12, .msc = MBUF_SC_BK },      /* RFC 2597 High-Throughput Data */
+       { .dscp = 13, .msc = MBUF_SC_BK },              /*  */
+       { .dscp = _DSCP_AF13, .msc = MBUF_SC_BK },      /* RFC 2597 High-Throughput Data */
+       { .dscp = 15, .msc = MBUF_SC_BK },              /*  */
+
+       { .dscp = _DSCP_CS2, .msc = MBUF_SC_BK },       /* RFC 4594 OAM */
+       { .dscp = 17, .msc = MBUF_SC_BK },              /*  */
+       { .dscp = _DSCP_AF21, .msc = MBUF_SC_BK },      /* RFC 2597 Low-Latency Data */
+       { .dscp = 19, .msc = MBUF_SC_BK },              /*  */
+       { .dscp = _DSCP_AF22, .msc = MBUF_SC_BK },      /* RFC 2597 Low-Latency Data */
+       { .dscp = 21, .msc = MBUF_SC_BK },              /*  */
+       { .dscp = _DSCP_AF23, .msc = MBUF_SC_BK },      /* RFC 2597 Low-Latency Data */
+       { .dscp = 23, .msc = MBUF_SC_BK },              /*  */
+
+       { .dscp = _DSCP_CS3, .msc = MBUF_SC_BE },       /* RFC 2474 Broadcast Video */
+       { .dscp = 25, .msc = MBUF_SC_BE },              /*  */
+       { .dscp = _DSCP_AF31, .msc = MBUF_SC_BE },      /* RFC 2597 Multimedia Streaming */
+       { .dscp = 27, .msc = MBUF_SC_BE },              /*  */
+       { .dscp = _DSCP_AF32, .msc = MBUF_SC_BE },      /* RFC 2597 Multimedia Streaming */
+       { .dscp = 29, .msc = MBUF_SC_BE },              /*  */
+       { .dscp = _DSCP_AF33, .msc = MBUF_SC_BE },      /* RFC 2597 Multimedia Streaming */
+       { .dscp = 31, .msc = MBUF_SC_BE },              /*  */
+
+       { .dscp = _DSCP_CS4, .msc = MBUF_SC_VI },       /* RFC 2474 Real-Time Interactive */
+       { .dscp = 33, .msc = MBUF_SC_VI },              /*  */
+       { .dscp = _DSCP_AF41, .msc = MBUF_SC_VI },      /* RFC 2597 Multimedia Conferencing */
+       { .dscp = 35, .msc = MBUF_SC_VI },              /*  */
+       { .dscp = _DSCP_AF42, .msc = MBUF_SC_VI },      /* RFC 2597 Multimedia Conferencing */
+       { .dscp = 37, .msc = MBUF_SC_VI },              /*  */
+       { .dscp = _DSCP_AF43, .msc = MBUF_SC_VI },      /* RFC 2597 Multimedia Conferencing */
+       { .dscp = 39, .msc = MBUF_SC_VI },              /*  */
+
+       { .dscp = _DSCP_CS5, .msc = MBUF_SC_VI },       /* RFC 2474 Signaling */
+       { .dscp = 41, .msc = MBUF_SC_VI },              /*  */
+       { .dscp = 42, .msc = MBUF_SC_VI },              /*  */
+       { .dscp = 43, .msc = MBUF_SC_VI },              /*  */
+       { .dscp = _DSCP_VA, .msc = MBUF_SC_VI },        /* RFC 5865 VOICE-ADMIT */
+       { .dscp = 45, .msc = MBUF_SC_VI },              /*  */
+       { .dscp = _DSCP_EF, .msc = MBUF_SC_VI },        /* RFC 3246 Telephony */
+       { .dscp = 47, .msc = MBUF_SC_VI },              /*  */
+
+       { .dscp = _DSCP_CS6, .msc = MBUF_SC_VO },       /* Wi-Fi WMM Certification: Chariot */
+       { .dscp = 49, .msc = MBUF_SC_VO },              /*  */
+       { .dscp = 50, .msc = MBUF_SC_VO },              /*  */
+       { .dscp = 51, .msc = MBUF_SC_VO },              /*  */
+       { .dscp = 52, .msc = MBUF_SC_VO },              /* Wi-Fi WMM Certification: Sigma */
+       { .dscp = 53, .msc = MBUF_SC_VO },              /*  */
+       { .dscp = 54, .msc = MBUF_SC_VO },              /*  */
+       { .dscp = 55, .msc = MBUF_SC_VO },              /*  */
+
+       { .dscp = _DSCP_CS7, .msc = MBUF_SC_VO },       /* Wi-Fi WMM Certification: Chariot */
+       { .dscp = 57, .msc = MBUF_SC_VO },              /*  */
+       { .dscp = 58, .msc = MBUF_SC_VO },              /*  */
+       { .dscp = 59, .msc = MBUF_SC_VO },              /*  */
+       { .dscp = 60, .msc = MBUF_SC_VO },              /*  */
+       { .dscp = 61, .msc = MBUF_SC_VO },              /*  */
+       { .dscp = 62, .msc = MBUF_SC_VO },              /*  */
+       { .dscp = 63, .msc = MBUF_SC_VO },              /*  */
+
+       { .dscp = 255, .msc = MBUF_SC_UNSPEC }          /* invalid DSCP to mark last entry */
+};
+
+mbuf_svc_class_t wifi_dscp_to_msc_array[DSCP_ARRAY_SIZE];
+
+/*
+ * If there is no foreground activity on the interface for bg_switch_time
+ * seconds, the background connections can switch to foreground TCP
+ * congestion control.
+ */
+#define TCP_BG_SWITCH_TIME 2 /* seconds */
+
+#if (DEVELOPMENT || DEBUG)
 
 static int tfp_count = 0;
 
@@ -64,13 +284,13 @@ static TAILQ_HEAD(, tclass_for_proc) tfp_head =
     TAILQ_HEAD_INITIALIZER(tfp_head);
 
 struct tclass_for_proc {
-       TAILQ_ENTRY(tclass_for_proc)    tfp_link;
-       int     tfp_class;
-       pid_t   tfp_pid;
-       char    tfp_pname[MAXCOMLEN + 1];
+       TAILQ_ENTRY(tclass_for_proc)    tfp_link;
+       int             tfp_class;
+       pid_t           tfp_pid;
+       char            tfp_pname[(2 * MAXCOMLEN) + 1];
+       uint32_t        tfp_qos_mode;
 };
 
-static int dscp_code_from_mbuf_tclass(mbuf_traffic_class_t);
 static int get_pid_tclass(struct so_tcdbg *);
 static int get_pname_tclass(struct so_tcdbg *);
 static int set_pid_tclass(struct so_tcdbg *);
@@ -78,14 +298,7 @@ static int set_pname_tclass(struct so_tcdbg *);
 static int flush_pid_tclass(struct so_tcdbg *);
 static int purge_tclass_for_proc(void);
 static int flush_tclass_for_proc(void);
-static void so_set_lro(struct socket*, int);
-int get_tclass_for_curr_proc(int *);
-
-static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */
-static lck_grp_t *tclass_lck_grp = NULL;       /* mutex group definition */
-static lck_attr_t *tclass_lck_attr = NULL;     /* mutex attributes */
-decl_lck_mtx_data(static, tclass_lock_data);
-static lck_mtx_t *tclass_lock = &tclass_lock_data;
+static void set_tclass_for_curr_proc(struct socket *);
 
 /*
  * Must be called with tclass_lock held
@@ -96,10 +309,11 @@ find_tfp_by_pid(pid_t pid)
        struct tclass_for_proc *tfp;
 
        TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
-               if (tfp->tfp_pid == pid)
+               if (tfp->tfp_pid == pid) {
                        break;
+               }
        }
-       return (tfp);
+       return tfp;
 }
 
 /*
@@ -112,36 +326,41 @@ find_tfp_by_pname(const char *pname)
 
        TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
                if (strncmp(pname, tfp->tfp_pname,
-                   sizeof (tfp->tfp_pname)) == 0)
+                   sizeof(tfp->tfp_pname)) == 0) {
                        break;
+               }
        }
-       return (tfp);
+       return tfp;
 }
 
-__private_extern__ int
-get_tclass_for_curr_proc(int *sotc)
+__private_extern__ void
+set_tclass_for_curr_proc(struct socket *so)
 {
        struct tclass_for_proc *tfp = NULL;
-       proc_t p = current_proc();      /* Not ref counted */
+       proc_t p = current_proc();      /* Not ref counted */
        pid_t pid = proc_pid(p);
-       char *pname = proc_name_address(p);
-
-       *sotc = -1;
+       char *pname = proc_best_name(p);
 
        lck_mtx_lock(tclass_lock);
 
        TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
                if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
                    strncmp(pname, tfp->tfp_pname,
-                   sizeof (tfp->tfp_pname)) == 0)) {
-                       *sotc = tfp->tfp_class;
+                   sizeof(tfp->tfp_pname)) == 0)) {
+                       if (tfp->tfp_class != SO_TC_UNSPEC) {
+                               so->so_traffic_class = (uint16_t)tfp->tfp_class;
+                       }
+
+                       if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
+                               so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
+                       } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
+                               so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
+                       }
                        break;
                }
        }
 
        lck_mtx_unlock(tclass_lock);
-
-       return ((tfp == NULL) ? 0 : 1);
 }
 
 /*
@@ -158,8 +377,9 @@ purge_tclass_for_proc(void)
        TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
                proc_t p;
 
-               if (tfp->tfp_pid == -1)
+               if (tfp->tfp_pid == -1) {
                        continue;
+               }
                if ((p = proc_find(tfp->tfp_pid)) == NULL) {
                        tfp_count--;
                        TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
@@ -172,7 +392,7 @@ purge_tclass_for_proc(void)
 
        lck_mtx_unlock(tclass_lock);
 
-       return (error);
+       return error;
 }
 
 /*
@@ -182,8 +402,9 @@ purge_tclass_for_proc(void)
 static void
 free_tclass_for_proc(struct tclass_for_proc *tfp)
 {
-       if (tfp == NULL)
+       if (tfp == NULL) {
                return;
+       }
        tfp_count--;
        TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
        _FREE(tfp, M_TEMP);
@@ -206,8 +427,7 @@ flush_tclass_for_proc(void)
 
        lck_mtx_unlock(tclass_lock);
 
-       return (error);
-
+       return error;
 }
 
 /*
@@ -218,12 +438,14 @@ alloc_tclass_for_proc(pid_t pid, const char *pname)
 {
        struct tclass_for_proc *tfp;
 
-       if (pid == -1 && pname == NULL)
-               return (NULL);
+       if (pid == -1 && pname == NULL) {
+               return NULL;
+       }
 
-       tfp = _MALLOC(sizeof (struct tclass_for_proc), M_TEMP, M_NOWAIT|M_ZERO);
-       if (tfp == NULL)
-               return (NULL);
+       tfp = _MALLOC(sizeof(struct tclass_for_proc), M_TEMP, M_NOWAIT | M_ZERO);
+       if (tfp == NULL) {
+               return NULL;
+       }
 
        tfp->tfp_pid = pid;
        /*
@@ -233,29 +455,27 @@ alloc_tclass_for_proc(pid_t pid, const char *pname)
        if (pid != -1) {
                TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
        } else {
-               strlcpy(tfp->tfp_pname, pname, sizeof (tfp->tfp_pname));
+               strlcpy(tfp->tfp_pname, pname, sizeof(tfp->tfp_pname));
                TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
        }
 
        tfp_count++;
 
-       return (tfp);
+       return tfp;
 }
 
 /*
- * -1 for tclass means to remove the entry
+ * SO_TC_UNSPEC for tclass means to remove the entry
  */
 int
 set_pid_tclass(struct so_tcdbg *so_tcdbg)
 {
        int error = EINVAL;
        proc_t p = NULL;
-       struct filedesc *fdp;
-       struct fileproc *fp;
        struct tclass_for_proc *tfp;
-       int i;
        pid_t pid = so_tcdbg->so_tcdbg_pid;
        int tclass = so_tcdbg->so_tcdbg_tclass;
+       int netsvctype = so_tcdbg->so_tcdbg_netsvctype;
 
        p = proc_find(pid);
        if (p == NULL) {
@@ -276,38 +496,41 @@ set_pid_tclass(struct so_tcdbg *so_tcdbg)
                }
        }
        tfp->tfp_class = tclass;
+       tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
 
        lck_mtx_unlock(tclass_lock);
 
        if (tfp != NULL) {
-               proc_fdlock(p);
+               struct fileproc *fp;
 
-               fdp = p->p_fd;
-               for (i = 0; i < fdp->fd_nfiles; i++) {
+               fdt_foreach(fp, p) {
                        struct socket *so;
 
-                       fp = fdp->fd_ofiles[i];
-                       if (fp == NULL ||
-                           (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
-                           fp->f_fglob->fg_type != DTYPE_SOCKET)
+                       if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
                                continue;
+                       }
 
-                       so = (struct socket *)fp->f_fglob->fg_data;
-                       if (so->so_proto->pr_domain->dom_family != AF_INET &&
-                           so->so_proto->pr_domain->dom_family != AF_INET6)
+                       so = (struct socket *)fp->fp_glob->fg_data;
+                       if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
                                continue;
+                       }
+
                        socket_lock(so, 1);
-                       if (tclass != -1) {
-                               error = so_set_traffic_class(so, tclass);
-                               if (error != 0) {
-                                       printf("%s: so_set_traffic_class"
-                                           "(so=%p, fd=%d, tclass=%d) "
-                                           "failed %d\n", __func__,
-                                           so, i, tclass, error);
-                                       error = 0;
-                               }
+                       if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
+                               so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
+                       } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
+                               so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
                        }
                        socket_unlock(so, 1);
+
+                       if (netsvctype != _NET_SERVICE_TYPE_UNSPEC) {
+                               error = sock_setsockopt(so, SOL_SOCKET,
+                                   SO_NET_SERVICE_TYPE, &netsvctype, sizeof(int));
+                       }
+                       if (tclass != SO_TC_UNSPEC) {
+                               error = sock_setsockopt(so, SOL_SOCKET,
+                                   SO_TRAFFIC_CLASS, &tclass, sizeof(int));
+                       }
                }
 
                proc_fdunlock(p);
@@ -315,10 +538,11 @@ set_pid_tclass(struct so_tcdbg *so_tcdbg)
 
        error = 0;
 done:
-       if (p != NULL)
+       if (p != NULL) {
                proc_rele(p);
+       }
 
-       return (error);
+       return error;
 }
 
 int
@@ -339,13 +563,14 @@ set_pname_tclass(struct so_tcdbg *so_tcdbg)
                }
        }
        tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
+       tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
 
        lck_mtx_unlock(tclass_lock);
 
        error = 0;
 done:
 
-       return (error);
+       return error;
 }
 
 static int
@@ -353,47 +578,40 @@ flush_pid_tclass(struct so_tcdbg *so_tcdbg)
 {
        pid_t pid = so_tcdbg->so_tcdbg_pid;
        int tclass = so_tcdbg->so_tcdbg_tclass;
-       struct filedesc *fdp;
-       int error = EINVAL;
+       struct fileproc *fp;
        proc_t p;
-       int i;
+       int error;
 
        p = proc_find(pid);
        if (p == PROC_NULL) {
                printf("%s proc_find(%d) failed\n", __func__, pid);
-               goto done;
+               return EINVAL;
        }
 
        proc_fdlock(p);
-       fdp = p->p_fd;
-       for (i = 0; i < fdp->fd_nfiles; i++) {
+
+       fdt_foreach(fp, p) {
                struct socket *so;
-               struct fileproc *fp;
 
-               fp = fdp->fd_ofiles[i];
-               if (fp == NULL ||
-                   (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
-                   fp->f_fglob->fg_type != DTYPE_SOCKET)
+               if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
                        continue;
+               }
 
-               so = (struct socket *)fp->f_fglob->fg_data;
+               so = (struct socket *)fp->fp_glob->fg_data;
                error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
-                   sizeof (tclass));
+                   sizeof(tclass));
                if (error != 0) {
-                       printf("%s: setsockopt(SO_FLUSH) (so=%p, fd=%d, "
-                           "tclass=%d) failed %d\n", __func__, so, i, tclass,
+                       printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
+                           "tclass=%d) failed %d\n", __func__,
+                           (uint64_t)VM_KERNEL_ADDRPERM(so), fdt_foreach_fd(), tclass,
                            error);
-                       error = 0;
                }
        }
-       proc_fdunlock(p);
 
-       error = 0;
-done:
-       if (p != PROC_NULL)
-               proc_rele(p);
+       proc_fdunlock(p);
 
-       return (error);
+       proc_rele(p);
+       return 0;
 }
 
 int
@@ -404,8 +622,7 @@ get_pid_tclass(struct so_tcdbg *so_tcdbg)
        struct tclass_for_proc *tfp;
        pid_t pid = so_tcdbg->so_tcdbg_pid;
 
-       so_tcdbg->so_tcdbg_tclass = -1; /* Means not set */
-       so_tcdbg->so_tcdbg_opportunistic = -1; /* Means not set */
+       so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
 
        p = proc_find(pid);
        if (p == NULL) {
@@ -419,14 +636,16 @@ get_pid_tclass(struct so_tcdbg *so_tcdbg)
        tfp = find_tfp_by_pid(pid);
        if (tfp != NULL) {
                so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
+               so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
                error = 0;
        }
        lck_mtx_unlock(tclass_lock);
 done:
-       if (p != NULL)
+       if (p != NULL) {
                proc_rele(p);
+       }
 
-       return (error);
+       return error;
 }
 
 int
@@ -435,8 +654,7 @@ get_pname_tclass(struct so_tcdbg *so_tcdbg)
        int error = EINVAL;
        struct tclass_for_proc *tfp;
 
-       so_tcdbg->so_tcdbg_tclass = -1; /* Means not set */
-       so_tcdbg->so_tcdbg_opportunistic = -1; /* Means not set */
+       so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
 
        /* Need a tfp */
        lck_mtx_lock(tclass_lock);
@@ -444,11 +662,12 @@ get_pname_tclass(struct so_tcdbg *so_tcdbg)
        tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
        if (tfp != NULL) {
                so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
+               so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
                error = 0;
        }
        lck_mtx_unlock(tclass_lock);
 
-       return (error);
+       return error;
 }
 
 static int
@@ -460,10 +679,11 @@ delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
 
        lck_mtx_lock(tclass_lock);
 
-       if (pid != -1)
+       if (pid != -1) {
                tfp = find_tfp_by_pid(pid);
-       else
+       } else {
                tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
+       }
 
        if (tfp != NULL) {
                free_tclass_for_proc(tfp);
@@ -472,7 +692,7 @@ delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
 
        lck_mtx_unlock(tclass_lock);
 
-       return (error);
+       return error;
 }
 
 /*
@@ -483,44 +703,45 @@ so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
 {
        int error = 0;
 
-       if ((so->so_state & SS_PRIV) == 0)
-               return (EPERM);
+       if ((so->so_state & SS_PRIV) == 0) {
+               return EPERM;
+       }
 
        socket_unlock(so, 0);
 
        switch (so_tcdbg->so_tcdbg_cmd) {
-               case SO_TCDBG_PID:
-                       error = set_pid_tclass(so_tcdbg);
-                       break;
+       case SO_TCDBG_PID:
+               error = set_pid_tclass(so_tcdbg);
+               break;
 
-               case SO_TCDBG_PNAME:
-                       error = set_pname_tclass(so_tcdbg);
-                       break;
+       case SO_TCDBG_PNAME:
+               error = set_pname_tclass(so_tcdbg);
+               break;
 
-               case SO_TCDBG_PURGE:
-                       error = purge_tclass_for_proc();
-                       break;
+       case SO_TCDBG_PURGE:
+               error = purge_tclass_for_proc();
+               break;
 
-               case SO_TCDBG_FLUSH:
-                       error = flush_tclass_for_proc();
-                       break;
+       case SO_TCDBG_FLUSH:
+               error = flush_tclass_for_proc();
+               break;
 
-               case SO_TCDBG_DELETE:
-                       error = delete_tclass_for_pid_pname(so_tcdbg);
-                       break;
+       case SO_TCDBG_DELETE:
+               error = delete_tclass_for_pid_pname(so_tcdbg);
+               break;
 
-               case SO_TCDBG_TCFLUSH_PID:
-                       error = flush_pid_tclass(so_tcdbg);
-                       break;
+       case SO_TCDBG_TCFLUSH_PID:
+               error = flush_pid_tclass(so_tcdbg);
+               break;
 
-               default:
-                       error = EINVAL;
-                       break;
+       default:
+               error = EINVAL;
+               break;
        }
 
        socket_lock(so, 0);
 
-       return (error);
+       return error;
 }
 
 /*
@@ -534,77 +755,80 @@ sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
        void *buf = NULL;
        size_t len = sopt->sopt_valsize;
 
-       error = sooptcopyin(sopt, &so_tcdbg, sizeof (struct so_tcdbg),
-           sizeof (struct so_tcdbg));
-       if (error != 0)
-               return (error);
+       error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg),
+           sizeof(struct so_tcdbg));
+       if (error != 0) {
+               return error;
+       }
 
        sopt->sopt_valsize = len;
 
        socket_unlock(so, 0);
 
        switch (so_tcdbg.so_tcdbg_cmd) {
-               case SO_TCDBG_PID:
-                       error = get_pid_tclass(&so_tcdbg);
-                       break;
+       case SO_TCDBG_PID:
+               error = get_pid_tclass(&so_tcdbg);
+               break;
 
-               case SO_TCDBG_PNAME:
-                       error = get_pname_tclass(&so_tcdbg);
-                       break;
+       case SO_TCDBG_PNAME:
+               error = get_pname_tclass(&so_tcdbg);
+               break;
 
-               case SO_TCDBG_COUNT:
-                       lck_mtx_lock(tclass_lock);
-                       so_tcdbg.so_tcdbg_count = tfp_count;
-                       lck_mtx_unlock(tclass_lock);
-                       break;
+       case SO_TCDBG_COUNT:
+               lck_mtx_lock(tclass_lock);
+               so_tcdbg.so_tcdbg_count = tfp_count;
+               lck_mtx_unlock(tclass_lock);
+               break;
 
-               case SO_TCDBG_LIST: {
-                       struct tclass_for_proc *tfp;
-                       int n, alloc_count;
-                       struct so_tcdbg *ptr;
+       case SO_TCDBG_LIST: {
+               struct tclass_for_proc *tfp;
+               int n, alloc_count;
+               struct so_tcdbg *ptr;
 
-                       lck_mtx_lock(tclass_lock);
-                       if ((alloc_count = tfp_count) == 0) {
-                               lck_mtx_unlock(tclass_lock);
-                               error = EINVAL;
-                               break;
-                       }
-                       len = alloc_count * sizeof (struct so_tcdbg);
+               lck_mtx_lock(tclass_lock);
+               if ((alloc_count = tfp_count) == 0) {
                        lck_mtx_unlock(tclass_lock);
+                       error = EINVAL;
+                       break;
+               }
+               len = alloc_count * sizeof(struct so_tcdbg);
+               lck_mtx_unlock(tclass_lock);
+
+               buf = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
+               if (buf == NULL) {
+                       error = ENOBUFS;
+                       break;
+               }
 
-                       buf = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
-                       if (buf == NULL) {
-                               error = ENOBUFS;
+               lck_mtx_lock(tclass_lock);
+               n = 0;
+               ptr = (struct so_tcdbg *)buf;
+               TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
+                       if (++n > alloc_count) {
                                break;
                        }
-
-                       lck_mtx_lock(tclass_lock);
-                       n = 0;
-                       ptr = (struct so_tcdbg *)buf;
-                       TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
-                               if (++n > alloc_count)
-                                       break;
-                               if (tfp->tfp_pid != -1) {
-                                       ptr->so_tcdbg_cmd = SO_TCDBG_PID;
-                                       ptr->so_tcdbg_pid = tfp->tfp_pid;
-                               } else {
-                                       ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
-                                       ptr->so_tcdbg_pid = -1;
-                                       strlcpy(ptr->so_tcdbg_pname,
-                                           tfp->tfp_pname,
-                                           sizeof (ptr->so_tcdbg_pname));
-                               }
-                               ptr->so_tcdbg_tclass = tfp->tfp_class;
-                               ptr++;
+                       if (tfp->tfp_pid != -1) {
+                               ptr->so_tcdbg_cmd = SO_TCDBG_PID;
+                               ptr->so_tcdbg_pid = tfp->tfp_pid;
+                       } else {
+                               ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
+                               ptr->so_tcdbg_pid = -1;
+                               strlcpy(ptr->so_tcdbg_pname,
+                                   tfp->tfp_pname,
+                                   sizeof(ptr->so_tcdbg_pname));
                        }
+                       ptr->so_tcdbg_tclass = tfp->tfp_class;
+                       ptr->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
+                       ptr++;
+               }
 
-                       lck_mtx_unlock(tclass_lock);
-                       }
-                       break;
+               lck_mtx_unlock(tclass_lock);
+       }
+       break;
 
-               default:
-                       error = EINVAL;
-                       break;
+       default:
+               error = EINVAL;
+               break;
        }
 
        socket_lock(so, 0);
@@ -612,15 +836,56 @@ sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
        if (error == 0) {
                if (buf == NULL) {
                        error = sooptcopyout(sopt, &so_tcdbg,
-                           sizeof (struct so_tcdbg));
+                           sizeof(struct so_tcdbg));
                } else {
                        error = sooptcopyout(sopt, buf, len);
                        _FREE(buf, M_TEMP);
                }
        }
-       return (error);
+       return error;
 }
 
+#endif /* (DEVELOPMENT || DEBUG) */
+
+int
+so_get_netsvc_marking_level(struct socket *so)
+{
+       int marking_level = NETSVC_MRKNG_UNKNOWN;
+       struct ifnet *ifp = NULL;
+
+       switch (SOCK_DOM(so)) {
+       case PF_INET: {
+               struct inpcb *inp = sotoinpcb(so);
+
+               if (inp != NULL) {
+                       ifp = inp->inp_last_outifp;
+               }
+               break;
+       }
+       case PF_INET6: {
+               struct in6pcb *in6p = sotoin6pcb(so);
+
+               if (in6p != NULL) {
+                       ifp = in6p->in6p_last_outifp;
+               }
+               break;
+       }
+       default:
+               break;
+       }
+       if (ifp != NULL) {
+               if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0) {
+                       if ((so->so_flags1 & SOF1_QOSMARKING_ALLOWED)) {
+                               marking_level = NETSVC_MRKNG_LVL_L3L2_ALL;
+                       } else {
+                               marking_level = NETSVC_MRKNG_LVL_L3L2_BK;
+                       }
+               } else {
+                       marking_level = NETSVC_MRKNG_LVL_L2;
+               }
+       }
+       return marking_level;
+}
 
 __private_extern__ int
 so_set_traffic_class(struct socket *so, int optval)
@@ -641,8 +906,9 @@ so_set_traffic_class(struct socket *so, int optval)
                        optval = SO_TC_VO;
                        break;
                default:
-                       if (!SO_VALID_TC(optval))
+                       if (!SO_VALID_TC(optval)) {
                                error = EINVAL;
+                       }
                        break;
                }
 
@@ -650,135 +916,225 @@ so_set_traffic_class(struct socket *so, int optval)
                        int oldval = so->so_traffic_class;
 
                        VERIFY(SO_VALID_TC(optval));
-                       so->so_traffic_class = optval;
+                       so->so_traffic_class = (uint16_t)optval;
 
-                       if ((INP_SOCKAF(so) == AF_INET ||
-                           INP_SOCKAF(so) == AF_INET6) &&
-                           INP_SOCKTYPE(so) == SOCK_STREAM) {
+                       if ((SOCK_DOM(so) == PF_INET ||
+                           SOCK_DOM(so) == PF_INET6) &&
+                           SOCK_TYPE(so) == SOCK_STREAM) {
                                set_tcp_stream_priority(so);
-
-                               /* Set/unset use of Large Receive Offload */
-                               so_set_lro(so, optval);
                        }
 
-                       if ((INP_SOCKAF(so) == AF_INET ||
-                           INP_SOCKAF(so) == AF_INET6) &&
+                       if ((SOCK_DOM(so) == PF_INET ||
+                           SOCK_DOM(so) == PF_INET6) &&
                            optval != oldval && (optval == SO_TC_BK_SYS ||
                            oldval == SO_TC_BK_SYS)) {
                                /*
                                 * If the app switches from BK_SYS to something
                                 * else, resume the socket if it was suspended.
                                 */
-                               if (oldval == SO_TC_BK_SYS)
+                               if (oldval == SO_TC_BK_SYS) {
                                        inp_reset_fc_state(so->so_pcb);
+                               }
 
-                               SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] "
-                                   "opportunistic %s\n", so->last_pid,
-                                   so, INP_SOCKAF(so), INP_SOCKTYPE(so),
-                                   (optval == SO_TC_BK_SYS) ? "ON" : "OFF"));
+                               SOTHROTTLELOG("throttle[%d]: so 0x%llx "
+                                   "[%d,%d] opportunistic %s\n", so->last_pid,
+                                   (uint64_t)VM_KERNEL_ADDRPERM(so),
+                                   SOCK_DOM(so), SOCK_TYPE(so),
+                                   (optval == SO_TC_BK_SYS) ? "ON" : "OFF");
                        }
                }
        }
-       return (error);
+       return error;
+}
+
+__private_extern__ int
+so_set_net_service_type(struct socket *so, int netsvctype)
+{
+       int sotc;
+       int error;
+
+       if (!IS_VALID_NET_SERVICE_TYPE(netsvctype)) {
+               return EINVAL;
+       }
+
+       sotc = sotc_by_netservicetype[netsvctype];
+       error = so_set_traffic_class(so, sotc);
+       if (error != 0) {
+               return error;
+       }
+       so->so_netsvctype = (int8_t)netsvctype;
+       so->so_flags1 |= SOF1_TC_NET_SERV_TYPE;
+
+       return 0;
 }
 
 __private_extern__ void
 so_set_default_traffic_class(struct socket *so)
 {
-       int sotc = -1;
+       so->so_traffic_class = SO_TC_BE;
 
-       if (tfp_count > 0 &&
-           (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6)) {
-               get_tclass_for_curr_proc(&sotc);
+       if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) {
+               if (net_qos_policy_restricted == 0) {
+                       so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
+               }
+#if (DEVELOPMENT || DEBUG)
+               if (tfp_count > 0) {
+                       set_tclass_for_curr_proc(so);
+               }
+#endif /* (DEVELOPMENT || DEBUG) */
        }
-
-       so->so_traffic_class = (sotc != -1) ? sotc : SO_TC_BE;
 }
 
 __private_extern__ int
 so_set_opportunistic(struct socket *so, int optval)
 {
-       return (so_set_traffic_class(so, (optval == 0) ?
-           SO_TC_BE : SO_TC_BK_SYS));
+       return so_set_traffic_class(so, (optval == 0) ?
+                  SO_TC_BE : SO_TC_BK_SYS);
 }
 
 __private_extern__ int
 so_get_opportunistic(struct socket *so)
 {
-       return (so->so_traffic_class == SO_TC_BK_SYS);
+       return so->so_traffic_class == SO_TC_BK_SYS;
 }
 
-__private_extern__ mbuf_svc_class_t
-mbuf_service_class_from_control(struct mbuf *control)
+__private_extern__ int
+so_tc_from_control(struct mbuf *control, int *out_netsvctype)
 {
        struct cmsghdr *cm;
-       mbuf_svc_class_t msc = MBUF_SC_UNSPEC;
+       int sotc = SO_TC_UNSPEC;
 
-       for (cm = M_FIRST_CMSGHDR(control); cm != NULL;
-           cm = M_NXT_CMSGHDR(control, cm)) {
-               int tc;
+       *out_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
 
-               if (cm->cmsg_len < sizeof (struct cmsghdr))
-                       break;
+       for (cm = M_FIRST_CMSGHDR(control);
+           is_cmsg_valid(control, cm);
+           cm = M_NXT_CMSGHDR(control, cm)) {
+               int val;
 
                if (cm->cmsg_level != SOL_SOCKET ||
-                   cm->cmsg_type != SO_TRAFFIC_CLASS)
+                   cm->cmsg_len != CMSG_LEN(sizeof(int))) {
                        continue;
-               if (cm->cmsg_len != CMSG_LEN(sizeof (int)))
-                       continue;
-
-               tc = *(int *)(void *)CMSG_DATA(cm);
-               msc = so_tc2msc(tc);
-               if (MBUF_VALID_SC(msc))
+               }
+               val = *(int *)(void *)CMSG_DATA(cm);
+               /*
+                * The first valid option wins
+                */
+               switch (cm->cmsg_type) {
+               case SO_TRAFFIC_CLASS:
+                       if (SO_VALID_TC(val)) {
+                               sotc = val;
+                               return sotc;
+                               /* NOT REACHED */
+                       } else if (val < SO_TC_NET_SERVICE_OFFSET) {
+                               break;
+                       }
+                       /*
+                        * Handle the case SO_NET_SERVICE_TYPE values are
+                        * passed using SO_TRAFFIC_CLASS
+                        */
+                       val = val - SO_TC_NET_SERVICE_OFFSET;
+                       OS_FALLTHROUGH;
+               case SO_NET_SERVICE_TYPE:
+                       if (!IS_VALID_NET_SERVICE_TYPE(val)) {
+                               break;
+                       }
+                       *out_netsvctype = val;
+                       sotc = sotc_by_netservicetype[val];
+                       return sotc;
+               /* NOT REACHED */
+               default:
                        break;
+               }
        }
 
-       return (msc);
+       return sotc;
 }
 
-__private_extern__  int
-dscp_code_from_mbuf_tclass(mbuf_traffic_class_t mtc)
+__private_extern__ int
+so_tos_from_control(struct mbuf *control)
 {
-       int dscp_code;
+       struct cmsghdr *cm;
+       int tos = IPTOS_UNSPEC;
 
-       switch (mtc) {
-               default:
-               case MBUF_TC_BE:
-                       dscp_code = 0;
-                       break;
-               case MBUF_TC_BK:
-                       dscp_code = 0x08;
-                       break;
-               case MBUF_TC_VI:
-                       dscp_code = 0x20;
-                       break;
-               case MBUF_TC_VO:
-                       dscp_code = 0x30;
+       for (cm = M_FIRST_CMSGHDR(control);
+           is_cmsg_valid(control, cm);
+           cm = M_NXT_CMSGHDR(control, cm)) {
+               if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
+                       continue;
+               }
+
+               if ((cm->cmsg_level == IPPROTO_IP &&
+                   cm->cmsg_type == IP_TOS) ||
+                   (cm->cmsg_level == IPPROTO_IPV6 &&
+                   cm->cmsg_type == IPV6_TCLASS)) {
+                       tos = *(int *)(void *)CMSG_DATA(cm) & IPTOS_MASK;
+                       /* The first valid option wins */
                        break;
+               }
        }
 
-       return (dscp_code);
+       return tos;
 }
 
 __private_extern__ void
 so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
 {
-       uint32_t sotc = m_get_traffic_class(m);
+       uint32_t mtc = m_get_traffic_class(m);
 
-       if (sotc >= SO_TC_STATS_MAX)
-               sotc = SO_TC_BE;
+       if (mtc >= SO_TC_STATS_MAX) {
+               mtc = MBUF_TC_BE;
+       }
 
-       so->so_tc_stats[sotc].rxpackets += 1;
-       so->so_tc_stats[sotc].rxbytes +=
+       so->so_tc_stats[mtc].rxpackets += 1;
+       so->so_tc_stats[mtc].rxbytes +=
            ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
 }
 
+__private_extern__ void
+so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes,
+    uint32_t mtc)
+{
+       if (mtc >= SO_TC_STATS_MAX) {
+               mtc = MBUF_TC_BE;
+       }
+
+       so->so_tc_stats[mtc].rxpackets += pkts;
+       so->so_tc_stats[mtc].rxbytes += bytes;
+}
+
+static inline int
+so_throttle_best_effort(struct socket *so, struct ifnet *ifp)
+{
+       uint32_t uptime = (uint32_t)net_uptime();
+       return soissrcbesteffort(so) &&
+              net_io_policy_throttle_best_effort == 1 &&
+              ifp->if_rt_sendts > 0 &&
+              (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME;
+}
+
 __private_extern__ void
 set_tcp_stream_priority(struct socket *so)
 {
-       struct tcpcb *tp = intotcpcb(sotoinpcb(so));
-       int old_cc = tp->tcp_cc_index;
+       struct inpcb *inp = sotoinpcb(so);
+       struct tcpcb *tp = intotcpcb(inp);
+       struct ifnet *outifp;
+       u_char old_cc = tp->tcp_cc_index;
        int recvbg = IS_TCP_RECV_BG(so);
+       bool is_local = false, fg_active = false;
+       uint32_t uptime;
+
+       VERIFY((SOCK_CHECK_DOM(so, PF_INET) ||
+           SOCK_CHECK_DOM(so, PF_INET6)) &&
+           SOCK_CHECK_TYPE(so, SOCK_STREAM) &&
+           SOCK_CHECK_PROTO(so, IPPROTO_TCP));
+
+       /* Return if the socket is in a terminal state */
+       if (inp->inp_state == INPCB_STATE_DEAD) {
+               return;
+       }
+
+       outifp = inp->inp_last_outifp;
+       uptime = (uint32_t)net_uptime();
 
        /*
         * If the socket was marked as a background socket or if the
@@ -787,33 +1143,88 @@ set_tcp_stream_priority(struct socket *so)
         * background. The variable sotcdb which can be set with sysctl
         * is used to disable these settings for testing.
         */
-       if (soisthrottled(so) || IS_SO_TC_BACKGROUND(so->so_traffic_class)) {
-               if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0) {
-                       if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX)
+       if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) {
+               is_local = true;
+       }
+
+       /* Check if there has been recent foreground activity */
+       if (outifp != NULL) {
+               /*
+                * If the traffic source is background, check if
+                * if it can be switched to foreground. This can
+                * happen when there is no indication of foreground
+                * activity.
+                */
+               if (soissrcbackground(so) && outifp->if_fg_sendts > 0 &&
+                   (int)(uptime - outifp->if_fg_sendts) <= TCP_BG_SWITCH_TIME) {
+                       fg_active = true;
+               }
+
+               /*
+                * The traffic source is best-effort -- check if
+                * the policy to throttle best effort is enabled
+                * and there was realtime activity on this
+                * interface recently. If this is true, enable
+                * algorithms that respond to increased latency
+                * on best-effort traffic.
+                */
+               if (so_throttle_best_effort(so, outifp)) {
+                       fg_active = true;
+               }
+       }
+
+       /*
+        * System initiated background traffic like cloud uploads should
+        * always use background delay sensitive algorithms. This will
+        * make the stream more responsive to other streams on the user's
+        * network and it will minimize latency induced.
+        */
+       if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
+               /*
+                * If the interface that the connection is using is
+                * loopback, do not use background congestion
+                * control algorithm.
+                *
+                * If there has been recent foreground activity or if
+                * there was an indication that a foreground application
+                * is going to use networking (net_io_policy_throttled),
+                * switch the backgroung streams to use background
+                * congestion control algorithm. Otherwise, even background
+                * flows can move into foreground.
+                */
+               if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local ||
+                   !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
+                       if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
                                tcp_set_foreground_cc(so);
+                       }
                } else {
-                       if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX)
+                       if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX) {
                                tcp_set_background_cc(so);
+                       }
                }
 
                /* Set receive side background flags */
-               if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0)
+               if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local ||
+                   !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
                        tcp_clear_recv_bg(so);
-               else
+               } else {
                        tcp_set_recv_bg(so);
+               }
        } else {
                tcp_clear_recv_bg(so);
-               if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX)
+               if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
                        tcp_set_foreground_cc(so);
+               }
        }
 
        if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
-               SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] TCP %s send; "
-                  "%s recv\n", so->last_pid, so, INP_SOCKAF(so),
-                  INP_SOCKTYPE(so),
-                  (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
-                  "background" : "foreground",
-                  IS_TCP_RECV_BG(so) ? "background" : "foreground"));
+               SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
+                   "%s recv\n", so->last_pid,
+                   (uint64_t)VM_KERNEL_ADDRPERM(so),
+                   SOCK_DOM(so), SOCK_TYPE(so),
+                   (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
+                   "background" : "foreground",
+                   IS_TCP_RECV_BG(so) ? "background" : "foreground");
        }
 }
 
@@ -824,18 +1235,14 @@ set_tcp_stream_priority(struct socket *so)
  */
 __private_extern__ void
 set_packet_service_class(struct mbuf *m, struct socket *so,
-    mbuf_svc_class_t in_msc, u_int32_t flags)
+    int sotc, uint32_t flags)
 {
-       mbuf_svc_class_t msc = MBUF_SC_BE;         /* Best effort by default */
+       mbuf_svc_class_t msc = MBUF_SC_BE;         /* Best effort by default */
        struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
-       struct ip *ip = mtod(m, struct ip *);
-#if INET6
-       struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
-#endif /* INET6 */
-       int isipv6 = ((flags & PKT_SCF_IPV6) != 0) ? 1 : 0; 
 
-       if (!(m->m_flags & M_PKTHDR))
+       if (!(m->m_flags & M_PKTHDR)) {
                return;
+       }
 
        /*
         * Here is the precedence:
@@ -843,127 +1250,72 @@ set_packet_service_class(struct mbuf *m, struct socket *so,
         * 2) Traffic class passed via ancillary data to sendmsdg(2)
         * 3) Traffic class socket option last
         */
-       if (in_msc != MBUF_SC_UNSPEC) {
-               if (in_msc >= MBUF_SC_BE && in_msc <= MBUF_SC_CTL)
-                       msc = in_msc;
-       } else {
-               VERIFY(SO_VALID_TC(so->so_traffic_class));
-               msc = so_tc2msc(so->so_traffic_class);
+       if (sotc != SO_TC_UNSPEC) {
+               VERIFY(SO_VALID_TC(sotc));
+               msc = so_tc2msc(sotc);
                /* Assert because tc must have been valid */
                VERIFY(MBUF_VALID_SC(msc));
        }
 
        /*
-        * If TRAFFIC_MGT_SO_BACKGROUND is set, depress the priority.
+        * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
+        * best effort is set, depress the priority.
         */
-       if (soisthrottled(so) && !IS_MBUF_SC_BACKGROUND(msc))
+       if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so)) {
+               msc = MBUF_SC_BK;
+       }
+
+       if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL &&
+           so_throttle_best_effort(so, inp->inp_last_outifp)) {
                msc = MBUF_SC_BK;
+       }
+
+       if (soissrcbackground(so)) {
+               m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND;
+       }
 
+       if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc)) {
+               m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME;
+       }
        /*
         * Set the traffic class in the mbuf packet header svc field
         */
-       if (sotcdb & SOTCDB_NO_MTC)
+       if (sotcdb & SOTCDB_NO_MTC) {
                goto no_mbtc;
+       }
 
-       /* Elevate service class if the packet is a pure TCP ACK.
+       /*
+        * Elevate service class if the packet is a pure TCP ACK.
         * We can do this only when the flow is not a background
-        * flow and the outgoing interface supports 
+        * flow and the outgoing interface supports
         * transmit-start model.
         */
-       if (!IS_MBUF_SC_BACKGROUND(msc) && (flags & PKT_SCF_TCP_ACK))
+       if (!IS_MBUF_SC_BACKGROUND(msc) &&
+           (flags & (PKT_SCF_TCP_ACK | PKT_SCF_TCP_SYN)) != 0) {
                msc = MBUF_SC_CTL;
+       }
 
        (void) m_set_service_class(m, msc);
 
        /*
-        * Set the privileged traffic auxiliary flag if applicable, or clear it.
+        * Set the privileged traffic auxiliary flag if applicable,
+        * or clear it.
         */
        if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
-           msc != MBUF_SC_UNSPEC)
-               m->m_pkthdr.aux_flags |= MAUXF_PRIO_PRIVILEGED;
-       else
-               m->m_pkthdr.aux_flags &= ~MAUXF_PRIO_PRIVILEGED;
+           msc != MBUF_SC_UNSPEC) {
+               m->m_pkthdr.pkt_flags |= PKTF_PRIO_PRIVILEGED;
+       } else {
+               m->m_pkthdr.pkt_flags &= ~PKTF_PRIO_PRIVILEGED;
+       }
 
 no_mbtc:
        /*
-        * Quick exit when best effort
-        */
-       if (msc == MBUF_SC_BE)
-               goto no_dscp;
-
-       /*
-        * The default behavior is for the networking stack to not set the
-        * DSCP code, based on SOTCDB_NO_DSCP being set.  If the flag is
-        * cleared, set the DSCP code in IPv4 or IPv6 header only for local
-        * traffic, if it is not already set.  <rdar://problem/11277343>
-        */
-       if (sotcdb & SOTCDB_NO_DSCP)
-               goto no_dscp;
-
-       /*
-        * Test if a IP TOS or IPV6 TCLASS has already been set
-        * on the socket or the raw packet.
+        * For TCP with background traffic class switch CC algo based on sysctl
         */
-       if (!(sotcdb & SOTCDB_NO_DSCPTST)) {
-#if INET6
-               if (isipv6) {
-                       if ((so->so_type == SOCK_RAW &&
-                           (ip6->ip6_flow & htonl(0xff << 20)) != 0) ||
-                           (inp->in6p_outputopts &&
-                           inp->in6p_outputopts->ip6po_tclass != -1))
-                               goto no_dscp;
-               } else
-#endif /* INET6 */
-               if ((so->so_type == SOCK_RAW &&
-                   (inp->inp_flags & INP_HDRINCL)) ||
-                   inp->inp_ip_tos != 0)
-                       goto no_dscp;
+       if (so->so_type == SOCK_STREAM) {
+               set_tcp_stream_priority(so);
        }
 
-       /*
-        * Test if destination is local
-        */
-       if (!(sotcdb & SOTCDB_NO_LCLTST)) {
-               int islocal = 0;
-               struct rtentry *rt = inp->inp_route.ro_rt;
-
-               if (so->so_type == SOCK_STREAM) {
-                       if (intotcpcb(inp)->t_flags & TF_LOCAL)
-                               islocal = 1;
-               } else if (rt != NULL &&
-                   (rt->rt_gateway->sa_family == AF_LINK ||
-                   (rt->rt_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))) {
-                       if (!(rt->rt_ifp->if_flags & IFF_POINTOPOINT))
-                               islocal = 1;
-               } else
-#if INET6
-               if (isipv6 && in6addr_local(&ip6->ip6_dst)) {
-                       islocal = 1;
-               } else
-#endif /* INET6 */
-               if (inaddr_local(ip->ip_dst)) {
-                       islocal = 1;
-               }
-               if (islocal == 0)
-                       goto no_dscp;
-       }
-
-#if INET6
-       if (isipv6)
-               ip6->ip6_flow |= htonl(dscp_code_from_mbuf_tclass(
-                   m_get_traffic_class(m)) << 20);
-       else
-#endif /* INET6 */
-               ip->ip_tos |= dscp_code_from_mbuf_tclass(
-                   m_get_traffic_class(m)) << 2;
-
-no_dscp:
-       /*
-        * For TCP with background traffic class switch CC algo based on sysctl
-        */
-       if (so->so_type == SOCK_STREAM)
-               set_tcp_stream_priority(so);
-
        so_tc_update_stats(m, so, msc);
 }
 
@@ -987,6 +1339,8 @@ so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc)
 __private_extern__ void
 socket_tclass_init(void)
 {
+       _CASSERT(_SO_TC_MAX == SO_TC_STATS_MAX);
+
        tclass_lck_grp_attr = lck_grp_attr_alloc_init();
        tclass_lck_grp = lck_grp_alloc_init("tclass", tclass_lck_grp_attr);
        tclass_lck_attr = lck_attr_alloc_init();
@@ -1025,6 +1379,9 @@ so_tc2msc(int tc)
        case _SO_TC_VI:
                msc = MBUF_SC_VI;
                break;
+       case SO_TC_NETSVC_SIG:
+               msc = MBUF_SC_SIG;
+               break;
        case SO_TC_VO:
        case _SO_TC_VO:
                msc = MBUF_SC_VO;
@@ -1038,15 +1395,13 @@ so_tc2msc(int tc)
                break;
        }
 
-       return (msc);
+       return msc;
 }
 
 __private_extern__ int
 so_svc2tc(mbuf_svc_class_t svc)
 {
        switch (svc) {
-       case MBUF_SC_UNSPEC:
-               return SO_TC_BE;
        case MBUF_SC_BK_SYS:
                return SO_TC_BK_SYS;
        case MBUF_SC_BK:
@@ -1063,27 +1418,667 @@ so_svc2tc(mbuf_svc_class_t svc)
                return SO_TC_RV;
        case MBUF_SC_VI:
                return SO_TC_VI;
+       case MBUF_SC_SIG:
+               return SO_TC_NETSVC_SIG;
        case MBUF_SC_VO:
                return SO_TC_VO;
        case MBUF_SC_CTL:
                return SO_TC_CTL;
+       case MBUF_SC_UNSPEC:
        default:
                return SO_TC_BE;
        }
 }
 
+static size_t
+sotc_index(int sotc)
+{
+       switch (sotc) {
+       case SO_TC_BK_SYS:
+               return SOTCIX_BK_SYS;
+       case _SO_TC_BK:
+       case SO_TC_BK:
+               return SOTCIX_BK;
+
+       case SO_TC_BE:
+               return SOTCIX_BE;
+       case SO_TC_RD:
+               return SOTCIX_RD;
+       case SO_TC_OAM:
+               return SOTCIX_OAM;
+
+       case SO_TC_AV:
+               return SOTCIX_AV;
+       case SO_TC_RV:
+               return SOTCIX_RV;
+       case _SO_TC_VI:
+       case SO_TC_VI:
+               return SOTCIX_VI;
+
+       case _SO_TC_VO:
+       case SO_TC_VO:
+               return SOTCIX_VO;
+       case SO_TC_CTL:
+               return SOTCIX_CTL;
+
+       default:
+               break;
+       }
+       /*
+        * Unknown traffic class value
+        */
+       return SIZE_T_MAX;
+}
+
+uint8_t
+fastlane_sc_to_dscp(uint32_t svc_class)
+{
+       uint8_t dscp = _DSCP_DF;
+
+       switch (svc_class) {
+       case MBUF_SC_BK_SYS:
+       case MBUF_SC_BK:
+               dscp = _DSCP_AF11;
+               break;
+
+       case MBUF_SC_BE:
+               dscp = _DSCP_DF;
+               break;
+       case MBUF_SC_RD:
+               dscp = _DSCP_AF21;
+               break;
+       case MBUF_SC_OAM:
+               dscp = _DSCP_CS2;
+               break;
+
+       case MBUF_SC_AV:
+               dscp = _DSCP_AF31;
+               break;
+       case MBUF_SC_RV:
+               dscp = _DSCP_CS4;
+               break;
+       case MBUF_SC_VI:
+               dscp = _DSCP_AF41;
+               break;
+       case MBUF_SC_SIG:
+               dscp = _DSCP_CS3;
+               break;
+
+       case MBUF_SC_VO:
+               dscp = _DSCP_EF;
+               break;
+       case MBUF_SC_CTL:
+               dscp = _DSCP_DF;
+               break;
+       default:
+               dscp = _DSCP_DF;
+               break;
+       }
+
+       return dscp;
+}
+
+uint8_t
+rfc4594_sc_to_dscp(uint32_t svc_class)
+{
+       uint8_t dscp = _DSCP_DF;
+
+       switch (svc_class) {
+       case MBUF_SC_BK_SYS:            /* Low-Priority Data */
+       case MBUF_SC_BK:
+               dscp = _DSCP_CS1;
+               break;
+
+       case MBUF_SC_BE:                        /* Standard */
+               dscp = _DSCP_DF;
+               break;
+       case MBUF_SC_RD:                        /* Low-Latency Data */
+               dscp = _DSCP_AF21;
+               break;
+
+       /* SVC_CLASS Not Defined:  High-Throughput Data */
+
+       case MBUF_SC_OAM:               /* OAM */
+               dscp = _DSCP_CS2;
+               break;
+
+       /* SVC_CLASS Not Defined:  Broadcast Video */
+
+       case MBUF_SC_AV:                        /* Multimedia Streaming */
+               dscp = _DSCP_AF31;
+               break;
+       case MBUF_SC_RV:                        /* Real-Time Interactive */
+               dscp = _DSCP_CS4;
+               break;
+       case MBUF_SC_VI:                        /* Multimedia Conferencing */
+               dscp = _DSCP_AF41;
+               break;
+       case MBUF_SC_SIG:               /* Signaling */
+               dscp = _DSCP_CS5;
+               break;
+
+       case MBUF_SC_VO:                        /* Telephony */
+               dscp = _DSCP_EF;
+               break;
+       case MBUF_SC_CTL:               /* Network Control*/
+               dscp = _DSCP_CS6;
+               break;
+       default:
+               dscp = _DSCP_DF;
+               break;
+       }
+
+       return dscp;
+}
+
+mbuf_traffic_class_t
+rfc4594_dscp_to_tc(uint8_t dscp)
+{
+       mbuf_traffic_class_t tc = MBUF_TC_BE;
+
+       switch (dscp) {
+       case _DSCP_CS1:
+               tc = MBUF_TC_BK;
+               break;
+       case _DSCP_DF:
+       case _DSCP_AF21:
+       case _DSCP_CS2:
+               tc = MBUF_TC_BE;
+               break;
+       case _DSCP_AF31:
+       case _DSCP_CS4:
+       case _DSCP_AF41:
+       case _DSCP_CS5:
+               tc = MBUF_TC_VI;
+               break;
+       case _DSCP_EF:
+       case _DSCP_CS6:
+               tc = MBUF_TC_VO;
+               break;
+       default:
+               tc = MBUF_TC_BE;
+               break;
+       }
+
+       return tc;
+}
+
 /*
- * LRO is turned on for AV streaming and background classes.
+ * Pass NULL ifp for default map
  */
-static void
-so_set_lro(struct socket *so, int optval)
+static errno_t
+set_netsvctype_dscp_map(struct net_qos_dscp_map *net_qos_dscp_map,
+    const struct netsvctype_dscp_map *netsvctype_dscp_map)
 {
-       if ((optval == SO_TC_BK) ||
-            (optval == SO_TC_BK_SYS) ||
-           (optval == SO_TC_AV)) {
-               so->so_flags |= SOF_USELRO;
+       size_t i;
+       int netsvctype;
+
+       /*
+        * Do not accept more that max number of distinct DSCPs
+        */
+       if (net_qos_dscp_map == NULL || netsvctype_dscp_map == NULL) {
+               return EINVAL;
+       }
+
+       /*
+        * Validate input parameters
+        */
+       for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
+               if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map[i].netsvctype)) {
+                       return EINVAL;
+               }
+               if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
+                       return EINVAL;
+               }
+       }
+
+       for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
+               netsvctype = netsvctype_dscp_map[i].netsvctype;
+
+               net_qos_dscp_map->netsvctype_to_dscp[netsvctype] =
+                   netsvctype_dscp_map[i].dscp;
+       }
+       for (netsvctype = 0; netsvctype < _NET_SERVICE_TYPE_COUNT; netsvctype++) {
+               switch (netsvctype) {
+               case NET_SERVICE_TYPE_BE:
+               case NET_SERVICE_TYPE_BK:
+               case NET_SERVICE_TYPE_VI:
+               case NET_SERVICE_TYPE_VO:
+               case NET_SERVICE_TYPE_RV:
+               case NET_SERVICE_TYPE_AV:
+               case NET_SERVICE_TYPE_OAM:
+               case NET_SERVICE_TYPE_RD: {
+                       size_t sotcix;
+
+                       sotcix = sotc_index(sotc_by_netservicetype[netsvctype]);
+                       if (sotcix != SIZE_T_MAX) {
+                               net_qos_dscp_map->sotc_to_dscp[sotcix]  =
+                                   netsvctype_dscp_map[netsvctype].dscp;
+                       }
+                       break;
+               }
+               case  NET_SERVICE_TYPE_SIG:
+                       /* Signaling does not have its own traffic class */
+                       break;
+               default:
+                       /* We should not be here */
+                       ASSERT(0);
+               }
+       }
+       if (net_qos_dscp_map == &fastlane_net_qos_dscp_map) {
+               /* Network control socket traffic class is always best effort for fastlane*/
+               net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_DF;
        } else {
-               so->so_flags &= ~SOF_USELRO;
+               net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_CS6;
+       }
+
+       /* Backround socket traffic class DSCP same as backround system */
+       net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK] =
+           net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK_SYS];
+
+       return 0;
+}
+
+static size_t
+get_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map)
+{
+       struct net_qos_dscp_map *net_qos_dscp_map;
+       int i;
+
+       net_qos_dscp_map = &fastlane_net_qos_dscp_map;
+
+       for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
+               netsvctype_dscp_map[i].netsvctype = i;
+               netsvctype_dscp_map[i].dscp = net_qos_dscp_map->netsvctype_to_dscp[i];
        }
+
+       return i * sizeof(struct netsvctype_dscp_map);
 }
 
+void
+net_qos_map_init()
+{
+       errno_t error;
+
+       error = set_netsvctype_dscp_map(&fastlane_net_qos_dscp_map,
+           fastlane_netsvctype_dscp_map);
+       ASSERT(error == 0);
+
+       error = set_netsvctype_dscp_map(&rfc4594_net_qos_dscp_map,
+           rfc4594_netsvctype_dscp_map);
+       ASSERT(error == 0);
+
+       set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
+}
+
+int
+sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error = 0;
+
+       if (req->oldptr == USER_ADDR_NULL) {
+               req->oldidx =
+                   _NET_SERVICE_TYPE_COUNT * sizeof(struct netsvctype_dscp_map);
+       } else if (req->oldlen > 0) {
+               struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
+               size_t len;
+
+               len = get_netsvctype_dscp_map(netsvctype_dscp_map);
+
+               error = SYSCTL_OUT(req, netsvctype_dscp_map,
+                   MIN(len, req->oldlen));
+               if (error != 0) {
+                       goto done;
+               }
+       }
+
+       if (req->newptr != USER_ADDR_NULL) {
+               error = EPERM;
+       }
+done:
+       return error;
+}
+
+__private_extern__ errno_t
+set_packet_qos(struct mbuf *m, struct ifnet *ifp, boolean_t qos_allowed,
+    int sotc, int netsvctype, uint8_t *dscp_inout)
+{
+       if (ifp == NULL || dscp_inout == NULL) {
+               return EINVAL;
+       }
+
+       if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0 &&
+           ifp->if_qosmarking_mode != IFRTYPE_QOSMARKING_MODE_NONE) {
+               uint8_t dscp;
+               const struct net_qos_dscp_map *net_qos_dscp_map = NULL;
+
+               switch (ifp->if_qosmarking_mode) {
+               case IFRTYPE_QOSMARKING_FASTLANE:
+                       net_qos_dscp_map = &fastlane_net_qos_dscp_map;
+                       break;
+               case IFRTYPE_QOSMARKING_RFC4594:
+                       net_qos_dscp_map = &rfc4594_net_qos_dscp_map;
+                       break;
+               default:
+                       panic("invalid QoS marking type");
+                       /* NOTREACHED */
+               }
+
+               /*
+                * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops
+                */
+               dscp = _DSCP_DF;
+
+               /*
+                * For DSCP use the network service type is specified, otherwise
+                * use the socket traffic class
+                *
+                * When not whitelisted by the policy, set DSCP only for best
+                * effort and background, and set the mbuf service class to
+                * best effort as well so the packet will be queued and
+                * scheduled at a lower priority.
+                * We still want to prioritize control traffic on the interface
+                * so we do not change the mbuf service class for SO_TC_CTL
+                */
+               if (IS_VALID_NET_SERVICE_TYPE(netsvctype) &&
+                   netsvctype != NET_SERVICE_TYPE_BE) {
+                       dscp = net_qos_dscp_map->netsvctype_to_dscp[netsvctype];
+
+                       if (qos_allowed == FALSE &&
+                           netsvctype != NET_SERVICE_TYPE_BE &&
+                           netsvctype != NET_SERVICE_TYPE_BK) {
+                               dscp = _DSCP_DF;
+                               if (sotc != SO_TC_CTL) {
+                                       m_set_service_class(m, MBUF_SC_BE);
+                               }
+                       }
+               } else if (sotc != SO_TC_UNSPEC) {
+                       size_t sotcix = sotc_index(sotc);
+                       if (sotcix != SIZE_T_MAX) {
+                               dscp = net_qos_dscp_map->sotc_to_dscp[sotcix];
+
+                               if (qos_allowed == FALSE && sotc != SO_TC_BE &&
+                                   sotc != SO_TC_BK && sotc != SO_TC_BK_SYS &&
+                                   sotc != SO_TC_CTL) {
+                                       dscp = _DSCP_DF;
+                                       if (sotc != SO_TC_CTL) {
+                                               m_set_service_class(m, MBUF_SC_BE);
+                                       }
+                               }
+                       }
+               }
+               if (net_qos_verbose != 0) {
+                       printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n",
+                           __func__, qos_allowed, sotc, netsvctype, dscp);
+               }
+
+               if (*dscp_inout != dscp) {
+                       *dscp_inout = dscp;
+               }
+       } else if (*dscp_inout != _DSCP_DF && IFNET_IS_WIFI_INFRA(ifp)) {
+               mbuf_svc_class_t msc = m_get_service_class(m);
+
+               /*
+                * For WiFi infra, when the mbuf service class is best effort
+                * and the DSCP is not default, set the service class based
+                * on DSCP
+                */
+               if (msc == MBUF_SC_BE) {
+                       msc = wifi_dscp_to_msc_array[*dscp_inout];
+
+                       if (msc != MBUF_SC_BE) {
+                               m_set_service_class(m, msc);
+
+                               if (net_qos_verbose != 0) {
+                                       printf("%s set msc %u for dscp %u\n",
+                                           __func__, msc, *dscp_inout);
+                               }
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static void
+set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *map, int clear)
+{
+       int i;
+
+       if (clear) {
+               bzero(wifi_dscp_to_msc_array, sizeof(wifi_dscp_to_msc_array));
+       }
+
+       for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
+               const struct dcsp_msc_map *elem = map + i;
+
+               if (elem->dscp > _MAX_DSCP || elem->msc == MBUF_SC_UNSPEC) {
+                       break;
+               }
+               switch (elem->msc) {
+               case MBUF_SC_BK_SYS:
+               case MBUF_SC_BK:
+                       wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BK;
+                       break;
+               default:
+               case MBUF_SC_BE:
+               case MBUF_SC_RD:
+               case MBUF_SC_OAM:
+                       wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BE;
+                       break;
+               case MBUF_SC_AV:
+               case MBUF_SC_RV:
+               case MBUF_SC_VI:
+                       wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VI;
+                       break;
+               case MBUF_SC_VO:
+               case MBUF_SC_CTL:
+                       wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VO;
+                       break;
+               }
+       }
+}
+
+static errno_t
+dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map,
+    size_t count, struct dcsp_msc_map *dcsp_msc_map)
+{
+       errno_t error = 0;
+       uint32_t i;
+
+       /*
+        * Validate input parameters
+        */
+       for (i = 0; i < count; i++) {
+               if (!SO_VALID_TC(netsvctype_dscp_map[i].netsvctype)) {
+                       error = EINVAL;
+                       goto done;
+               }
+               if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
+                       error = EINVAL;
+                       goto done;
+               }
+       }
+
+       bzero(dcsp_msc_map, DSCP_ARRAY_SIZE * sizeof(struct dcsp_msc_map));
+
+       for (i = 0; i < count; i++) {
+               dcsp_msc_map[i].dscp = netsvctype_dscp_map[i].dscp;
+               dcsp_msc_map[i].msc = so_tc2msc(netsvctype_dscp_map[i].netsvctype);
+       }
+done:
+       return error;
+}
+
+int
+sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error = 0;
+       size_t len = DSCP_ARRAY_SIZE * sizeof(struct netsvctype_dscp_map);
+       struct netsvctype_dscp_map netsvctype_dscp_map[DSCP_ARRAY_SIZE] = {};
+       struct dcsp_msc_map dcsp_msc_map[DSCP_ARRAY_SIZE];
+       size_t count;
+
+       if (req->oldptr == USER_ADDR_NULL) {
+               req->oldidx = len;
+       } else if (req->oldlen > 0) {
+               uint8_t i;
+
+               for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
+                       netsvctype_dscp_map[i].dscp = i;
+                       netsvctype_dscp_map[i].netsvctype =
+                           so_svc2tc(wifi_dscp_to_msc_array[i]);
+               }
+               error = SYSCTL_OUT(req, netsvctype_dscp_map,
+                   MIN(len, req->oldlen));
+               if (error != 0) {
+                       goto done;
+               }
+       }
+
+       if (req->newptr == USER_ADDR_NULL) {
+               goto done;
+       }
+
+       error = proc_suser(current_proc());
+       if (error != 0) {
+               goto done;
+       }
+
+       /*
+        * Check input length
+        */
+       if (req->newlen > len) {
+               error = EINVAL;
+               goto done;
+       }
+       /*
+        * Cap the number of entries to copy from input buffer
+        */
+       if (len > req->newlen) {
+               len = req->newlen;
+       }
+       error = SYSCTL_IN(req, netsvctype_dscp_map, len);
+       if (error != 0) {
+               goto done;
+       }
+       count = len / sizeof(struct netsvctype_dscp_map);
+       bzero(dcsp_msc_map, sizeof(dcsp_msc_map));
+       error = dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map, count,
+           dcsp_msc_map);
+       if (error != 0) {
+               goto done;
+       }
+       set_dscp_to_wifi_ac_map(dcsp_msc_map, 0);
+done:
+       return error;
+}
+
+int
+sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error = 0;
+       int val = 0;
+
+       error = sysctl_handle_int(oidp, &val, 0, req);
+       if (error || !req->newptr) {
+               return error;
+       }
+
+       set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
+
+       return 0;
+}
+
+/*
+ * Returns whether a large upload or download transfer should be marked as
+ * BK service type for network activity. This is a system level
+ * hint/suggestion to classify application traffic based on statistics
+ * collected from the current network attachment
+ *
+ * Returns 1 for BK and 0 for default
+ */
+
+int
+net_qos_guideline(struct proc *p, struct net_qos_guideline_args *arg,
+    int *retval)
+{
+#pragma unused(p)
+#define RETURN_USE_BK   1
+#define RETURN_USE_DEFAULT      0
+       struct net_qos_param qos_arg;
+       struct ifnet *ipv4_primary, *ipv6_primary;
+       int err = 0;
+
+       if (arg->param == USER_ADDR_NULL || retval == NULL ||
+           arg->param_len != sizeof(qos_arg)) {
+               return EINVAL;
+       }
+       err = copyin(arg->param, (caddr_t) &qos_arg, sizeof(qos_arg));
+       if (err != 0) {
+               return err;
+       }
+
+       *retval = RETURN_USE_DEFAULT;
+       ipv4_primary = ifindex2ifnet[get_primary_ifscope(AF_INET)];
+       ipv6_primary = ifindex2ifnet[get_primary_ifscope(AF_INET6)];
+
+       /*
+        * If either of the interfaces is in Low Internet mode, enable
+        * background delay based algorithms on this transfer
+        */
+       if (qos_arg.nq_uplink) {
+               if ((ipv4_primary != NULL &&
+                   (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_UL)) ||
+                   (ipv6_primary != NULL &&
+                   (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_UL))) {
+                       *retval = RETURN_USE_BK;
+                       return 0;
+               }
+       } else {
+               if ((ipv4_primary != NULL &&
+                   (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_DL)) ||
+                   (ipv6_primary != NULL &&
+                   (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_DL))) {
+                       *retval = RETURN_USE_BK;
+                       return 0;
+               }
+       }
+
+       /*
+        * Some times IPv4 and IPv6 primary interfaces can be different.
+        * In this case, if either of them is non-cellular, we should mark
+        * the transfer as BK as it can potentially get used based on
+        * the host name resolution
+        */
+       if (ipv4_primary != NULL && IFNET_IS_EXPENSIVE(ipv4_primary) &&
+           ipv6_primary != NULL && IFNET_IS_EXPENSIVE(ipv6_primary)) {
+               if (qos_arg.nq_use_expensive) {
+                       return 0;
+               } else {
+                       *retval = RETURN_USE_BK;
+                       return 0;
+               }
+       }
+       if (ipv4_primary != NULL && IFNET_IS_CONSTRAINED(ipv4_primary) &&
+           ipv6_primary != NULL && IFNET_IS_CONSTRAINED(ipv6_primary)) {
+               if (qos_arg.nq_use_constrained) {
+                       return 0;
+               } else {
+                       *retval = RETURN_USE_BK;
+                       return 0;
+               }
+       }
+       if (qos_arg.nq_transfer_size >= 5 * 1024 * 1024) {
+               *retval = RETURN_USE_BK;
+               return 0;
+       }
+
+
+#undef  RETURN_USE_BK
+#undef  RETURN_USE_DEFAULT
+       return 0;
+}