2  * Copyright (c) 2009-2019 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29 #include <sys/systm.h> 
  30 #include <sys/kernel.h> 
  31 #include <sys/types.h> 
  32 #include <sys/filedesc.h> 
  33 #include <sys/file_internal.h> 
  35 #include <sys/socket.h> 
  36 #include <sys/socketvar.h> 
  37 #include <sys/errno.h> 
  38 #include <sys/protosw.h> 
  39 #include <sys/domain.h> 
  41 #include <sys/queue.h> 
  42 #include <sys/sysctl.h> 
  43 #include <sys/sysproto.h> 
  46 #include <net/if_var.h> 
  47 #include <net/route.h> 
  49 #include <netinet/in.h> 
  50 #include <netinet/in_var.h> 
  51 #include <netinet/in_pcb.h> 
  52 #include <netinet/ip.h> 
  53 #include <netinet/ip_var.h> 
  54 #include <netinet/ip6.h> 
  55 #include <netinet6/ip6_var.h> 
  56 #include <netinet/udp.h> 
  57 #include <netinet/udp_var.h> 
  58 #include <netinet/tcp.h> 
  59 #include <netinet/tcp_var.h> 
  60 #include <netinet/tcp_cc.h> 
  61 #include <netinet/lro_ext.h> 
  62 #include <netinet/in_tclass.h> 
  64 struct net_qos_dscp_map 
{ 
  65         uint8_t        sotc_to_dscp
[SO_TC_MAX
]; 
  66         uint8_t        netsvctype_to_dscp
[_NET_SERVICE_TYPE_COUNT
]; 
  73 static inline int so_throttle_best_effort(struct socket 
*, struct ifnet 
*); 
  74 static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map 
*, int); 
  75 static errno_t 
dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map 
*, size_t, 
  76     struct dcsp_msc_map 
*); 
  78 static lck_grp_attr_t 
*tclass_lck_grp_attr 
= NULL
; /* mutex group attributes */ 
  79 static lck_grp_t 
*tclass_lck_grp 
= NULL
;        /* mutex group definition */ 
  80 static lck_attr_t 
*tclass_lck_attr 
= NULL
;      /* mutex attributes */ 
  81 decl_lck_mtx_data(static, tclass_lock_data
); 
  82 static lck_mtx_t 
*tclass_lock 
= &tclass_lock_data
; 
  84 SYSCTL_NODE(_net
, OID_AUTO
, qos
, 
  85     CTLFLAG_RW 
| CTLFLAG_LOCKED
, 0, "QoS"); 
  87 static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
; 
  88 SYSCTL_PROC(_net_qos
, OID_AUTO
, default_netsvctype_to_dscp_map
, 
  89     CTLTYPE_STRUCT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
  90     0, 0, sysctl_default_netsvctype_to_dscp_map
, "S", ""); 
  92 static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
; 
  93 SYSCTL_PROC(_net_qos
, OID_AUTO
, dscp_to_wifi_ac_map
, 
  94     CTLTYPE_STRUCT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
  95     0, 0, sysctl_dscp_to_wifi_ac_map
, "S", ""); 
  97 static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
; 
  98 SYSCTL_PROC(_net_qos
, OID_AUTO
, reset_dscp_to_wifi_ac_map
, 
  99     CTLTYPE_INT 
| CTLFLAG_WR 
| CTLFLAG_LOCKED
, 
 100     0, 0, sysctl_reset_dscp_to_wifi_ac_map
, "I", ""); 
 102 int net_qos_verbose 
= 0; 
 103 SYSCTL_INT(_net_qos
, OID_AUTO
, verbose
, 
 104     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &net_qos_verbose
, 0, ""); 
 107  * Fastlane QoS policy: 
 108  * By Default allow all apps to get traffic class to DSCP mapping 
 110 SYSCTL_NODE(_net_qos
, OID_AUTO
, policy
, 
 111     CTLFLAG_RW 
| CTLFLAG_LOCKED
, 0, ""); 
 113 int net_qos_policy_restricted 
= 0; 
 114 SYSCTL_INT(_net_qos_policy
, OID_AUTO
, restricted
, 
 115     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &net_qos_policy_restricted
, 0, ""); 
 117 int net_qos_policy_restrict_avapps 
= 0; 
 118 SYSCTL_INT(_net_qos_policy
, OID_AUTO
, restrict_avapps
, 
 119     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &net_qos_policy_restrict_avapps
, 0, ""); 
 121 int net_qos_policy_wifi_enabled 
= 0; 
 122 SYSCTL_INT(_net_qos_policy
, OID_AUTO
, wifi_enabled
, 
 123     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &net_qos_policy_wifi_enabled
, 0, ""); 
 125 int net_qos_policy_capable_enabled 
= 0; 
 126 SYSCTL_INT(_net_qos_policy
, OID_AUTO
, capable_enabled
, 
 127     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &net_qos_policy_capable_enabled
, 0, ""); 
 130  * Socket traffic class from network service type 
 132 const int sotc_by_netservicetype
[_NET_SERVICE_TYPE_COUNT
] = { 
 133         SO_TC_BE
,       /* NET_SERVICE_TYPE_BE */ 
 134         SO_TC_BK_SYS
,   /* NET_SERVICE_TYPE_BK */ 
 135         SO_TC_VI
,       /* NET_SERVICE_TYPE_SIG */ 
 136         SO_TC_VI
,       /* NET_SERVICE_TYPE_VI */ 
 137         SO_TC_VO
,       /* NET_SERVICE_TYPE_VO */ 
 138         SO_TC_RV
,       /* NET_SERVICE_TYPE_RV */ 
 139         SO_TC_AV
,       /* NET_SERVICE_TYPE_AV */ 
 140         SO_TC_OAM
,      /* NET_SERVICE_TYPE_OAM */ 
 141         SO_TC_RD        
/* NET_SERVICE_TYPE_RD */ 
 145  * DSCP mappings for QoS Fastlane as based on network service types 
 148 struct netsvctype_dscp_map fastlane_netsvctype_dscp_map
[_NET_SERVICE_TYPE_COUNT
] = { 
 149         { .netsvctype 
= NET_SERVICE_TYPE_BE
, .dscp 
= _DSCP_DF 
}, 
 150         { .netsvctype 
= NET_SERVICE_TYPE_BK
, .dscp 
= _DSCP_AF11 
}, 
 151         { .netsvctype 
= NET_SERVICE_TYPE_SIG
, .dscp 
= _DSCP_CS3 
}, 
 152         { .netsvctype 
= NET_SERVICE_TYPE_VI
, .dscp 
= _DSCP_AF41 
}, 
 153         { .netsvctype 
= NET_SERVICE_TYPE_VO
, .dscp 
= _DSCP_EF 
}, 
 154         { .netsvctype 
= NET_SERVICE_TYPE_RV
, .dscp 
= _DSCP_CS4 
}, 
 155         { .netsvctype 
= NET_SERVICE_TYPE_AV
, .dscp 
= _DSCP_AF31 
}, 
 156         { .netsvctype 
= NET_SERVICE_TYPE_OAM
, .dscp 
= _DSCP_CS2 
}, 
 157         { .netsvctype 
= NET_SERVICE_TYPE_RD
, .dscp 
= _DSCP_AF21 
}, 
 162  * DSCP mappings for QoS RFC4594 as based on network service types 
 165 struct netsvctype_dscp_map rfc4594_netsvctype_dscp_map
[_NET_SERVICE_TYPE_COUNT
] = { 
 166         { .netsvctype 
= NET_SERVICE_TYPE_BE
, .dscp 
= _DSCP_DF 
}, 
 167         { .netsvctype 
= NET_SERVICE_TYPE_BK
, .dscp 
= _DSCP_CS1 
}, 
 168         { .netsvctype 
= NET_SERVICE_TYPE_SIG
, .dscp 
= _DSCP_CS5 
}, 
 169         { .netsvctype 
= NET_SERVICE_TYPE_VI
, .dscp 
= _DSCP_AF41 
}, 
 170         { .netsvctype 
= NET_SERVICE_TYPE_VO
, .dscp 
= _DSCP_EF 
}, 
 171         { .netsvctype 
= NET_SERVICE_TYPE_RV
, .dscp 
= _DSCP_CS4 
}, 
 172         { .netsvctype 
= NET_SERVICE_TYPE_AV
, .dscp 
= _DSCP_AF31 
}, 
 173         { .netsvctype 
= NET_SERVICE_TYPE_OAM
, .dscp 
= _DSCP_CS2 
}, 
 174         { .netsvctype 
= NET_SERVICE_TYPE_RD
, .dscp 
= _DSCP_AF21 
}, 
 177 static struct net_qos_dscp_map fastlane_net_qos_dscp_map
; 
 178 static struct net_qos_dscp_map rfc4594_net_qos_dscp_map
; 
 181  * The size is one more than the max because DSCP start at zero 
 183 #define DSCP_ARRAY_SIZE (_MAX_DSCP + 1) 
 186  * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping 
 187  * that implemented at the 802.11 driver level when the mbuf service class is 
 190  * This clashes with the recommended mapping documented by the IETF document 
 191  * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain 
 192  * binary compatibility. Applications should use the network service type socket 
 193  * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS. 
 195 static const struct dcsp_msc_map default_dscp_to_wifi_ac_map
[] = { 
 196         { .dscp 
= _DSCP_DF
, .msc 
= MBUF_SC_BE 
},        /* RFC 2474 Standard */ 
 197         { .dscp 
= 1, .msc 
= MBUF_SC_BE 
},               /*  */ 
 198         { .dscp 
= 2, .msc 
= MBUF_SC_BE 
},               /*  */ 
 199         { .dscp 
= 3, .msc 
= MBUF_SC_BE 
},               /*  */ 
 200         { .dscp 
= 4, .msc 
= MBUF_SC_BE 
},               /*  */ 
 201         { .dscp 
= 5, .msc 
= MBUF_SC_BE 
},               /*  */ 
 202         { .dscp 
= 6, .msc 
= MBUF_SC_BE 
},               /*  */ 
 203         { .dscp 
= 7, .msc 
= MBUF_SC_BE 
},               /*  */ 
 205         { .dscp 
= _DSCP_CS1
, .msc 
= MBUF_SC_BK 
},       /* RFC 3662 Low-Priority Data */ 
 206         { .dscp 
= 9, .msc 
= MBUF_SC_BK 
},               /*  */ 
 207         { .dscp 
= _DSCP_AF11
, .msc 
= MBUF_SC_BK 
},      /* RFC 2597 High-Throughput Data */ 
 208         { .dscp 
= 11, .msc 
= MBUF_SC_BK 
},              /*  */ 
 209         { .dscp 
= _DSCP_AF12
, .msc 
= MBUF_SC_BK 
},      /* RFC 2597 High-Throughput Data */ 
 210         { .dscp 
= 13, .msc 
= MBUF_SC_BK 
},              /*  */ 
 211         { .dscp 
= _DSCP_AF13
, .msc 
= MBUF_SC_BK 
},      /* RFC 2597 High-Throughput Data */ 
 212         { .dscp 
= 15, .msc 
= MBUF_SC_BK 
},              /*  */ 
 214         { .dscp 
= _DSCP_CS2
, .msc 
= MBUF_SC_BK 
},       /* RFC 4594 OAM */ 
 215         { .dscp 
= 17, .msc 
= MBUF_SC_BK 
},              /*  */ 
 216         { .dscp 
= _DSCP_AF21
, .msc 
= MBUF_SC_BK 
},      /* RFC 2597 Low-Latency Data */ 
 217         { .dscp 
= 19, .msc 
= MBUF_SC_BK 
},              /*  */ 
 218         { .dscp 
= _DSCP_AF22
, .msc 
= MBUF_SC_BK 
},      /* RFC 2597 Low-Latency Data */ 
 219         { .dscp 
= 21, .msc 
= MBUF_SC_BK 
},              /*  */ 
 220         { .dscp 
= _DSCP_AF23
, .msc 
= MBUF_SC_BK 
},      /* RFC 2597 Low-Latency Data */ 
 221         { .dscp 
= 23, .msc 
= MBUF_SC_BK 
},              /*  */ 
 223         { .dscp 
= _DSCP_CS3
, .msc 
= MBUF_SC_BE 
},       /* RFC 2474 Broadcast Video */ 
 224         { .dscp 
= 25, .msc 
= MBUF_SC_BE 
},              /*  */ 
 225         { .dscp 
= _DSCP_AF31
, .msc 
= MBUF_SC_BE 
},      /* RFC 2597 Multimedia Streaming */ 
 226         { .dscp 
= 27, .msc 
= MBUF_SC_BE 
},              /*  */ 
 227         { .dscp 
= _DSCP_AF32
, .msc 
= MBUF_SC_BE 
},      /* RFC 2597 Multimedia Streaming */ 
 228         { .dscp 
= 29, .msc 
= MBUF_SC_BE 
},              /*  */ 
 229         { .dscp 
= _DSCP_AF33
, .msc 
= MBUF_SC_BE 
},      /* RFC 2597 Multimedia Streaming */ 
 230         { .dscp 
= 31, .msc 
= MBUF_SC_BE 
},              /*  */ 
 232         { .dscp 
= _DSCP_CS4
, .msc 
= MBUF_SC_VI 
},       /* RFC 2474 Real-Time Interactive */ 
 233         { .dscp 
= 33, .msc 
= MBUF_SC_VI 
},              /*  */ 
 234         { .dscp 
= _DSCP_AF41
, .msc 
= MBUF_SC_VI 
},      /* RFC 2597 Multimedia Conferencing */ 
 235         { .dscp 
= 35, .msc 
= MBUF_SC_VI 
},              /*  */ 
 236         { .dscp 
= _DSCP_AF42
, .msc 
= MBUF_SC_VI 
},      /* RFC 2597 Multimedia Conferencing */ 
 237         { .dscp 
= 37, .msc 
= MBUF_SC_VI 
},              /*  */ 
 238         { .dscp 
= _DSCP_AF43
, .msc 
= MBUF_SC_VI 
},      /* RFC 2597 Multimedia Conferencing */ 
 239         { .dscp 
= 39, .msc 
= MBUF_SC_VI 
},              /*  */ 
 241         { .dscp 
= _DSCP_CS5
, .msc 
= MBUF_SC_VI 
},       /* RFC 2474 Signaling */ 
 242         { .dscp 
= 41, .msc 
= MBUF_SC_VI 
},              /*  */ 
 243         { .dscp 
= 42, .msc 
= MBUF_SC_VI 
},              /*  */ 
 244         { .dscp 
= 43, .msc 
= MBUF_SC_VI 
},              /*  */ 
 245         { .dscp 
= _DSCP_VA
, .msc 
= MBUF_SC_VI 
},        /* RFC 5865 VOICE-ADMIT */ 
 246         { .dscp 
= 45, .msc 
= MBUF_SC_VI 
},              /*  */ 
 247         { .dscp 
= _DSCP_EF
, .msc 
= MBUF_SC_VI 
},        /* RFC 3246 Telephony */ 
 248         { .dscp 
= 47, .msc 
= MBUF_SC_VI 
},              /*  */ 
 250         { .dscp 
= _DSCP_CS6
, .msc 
= MBUF_SC_VO 
},       /* Wi-Fi WMM Certification: Chariot */ 
 251         { .dscp 
= 49, .msc 
= MBUF_SC_VO 
},              /*  */ 
 252         { .dscp 
= 50, .msc 
= MBUF_SC_VO 
},              /*  */ 
 253         { .dscp 
= 51, .msc 
= MBUF_SC_VO 
},              /*  */ 
 254         { .dscp 
= 52, .msc 
= MBUF_SC_VO 
},              /* Wi-Fi WMM Certification: Sigma */ 
 255         { .dscp 
= 53, .msc 
= MBUF_SC_VO 
},              /*  */ 
 256         { .dscp 
= 54, .msc 
= MBUF_SC_VO 
},              /*  */ 
 257         { .dscp 
= 55, .msc 
= MBUF_SC_VO 
},              /*  */ 
 259         { .dscp 
= _DSCP_CS7
, .msc 
= MBUF_SC_VO 
},       /* Wi-Fi WMM Certification: Chariot */ 
 260         { .dscp 
= 57, .msc 
= MBUF_SC_VO 
},              /*  */ 
 261         { .dscp 
= 58, .msc 
= MBUF_SC_VO 
},              /*  */ 
 262         { .dscp 
= 59, .msc 
= MBUF_SC_VO 
},              /*  */ 
 263         { .dscp 
= 60, .msc 
= MBUF_SC_VO 
},              /*  */ 
 264         { .dscp 
= 61, .msc 
= MBUF_SC_VO 
},              /*  */ 
 265         { .dscp 
= 62, .msc 
= MBUF_SC_VO 
},              /*  */ 
 266         { .dscp 
= 63, .msc 
= MBUF_SC_VO 
},              /*  */ 
 268         { .dscp 
= 255, .msc 
= MBUF_SC_UNSPEC 
}          /* invalid DSCP to mark last entry */ 
 271 mbuf_svc_class_t wifi_dscp_to_msc_array
[DSCP_ARRAY_SIZE
]; 
 274  * If there is no foreground activity on the interface for bg_switch_time 
 275  * seconds, the background connections can switch to foreground TCP 
 276  * congestion control. 
 278 #define TCP_BG_SWITCH_TIME 2 /* seconds */ 
 280 #if (DEVELOPMENT || DEBUG) 
 282 static int tfp_count 
= 0; 
 284 static TAILQ_HEAD(, tclass_for_proc
) tfp_head 
= 
 285     TAILQ_HEAD_INITIALIZER(tfp_head
); 
 287 struct tclass_for_proc 
{ 
 288         TAILQ_ENTRY(tclass_for_proc
)    tfp_link
; 
 291         char            tfp_pname
[(2 * MAXCOMLEN
) + 1]; 
 292         uint32_t        tfp_qos_mode
; 
 295 static int get_pid_tclass(struct so_tcdbg 
*); 
 296 static int get_pname_tclass(struct so_tcdbg 
*); 
 297 static int set_pid_tclass(struct so_tcdbg 
*); 
 298 static int set_pname_tclass(struct so_tcdbg 
*); 
 299 static int flush_pid_tclass(struct so_tcdbg 
*); 
 300 static int purge_tclass_for_proc(void); 
 301 static int flush_tclass_for_proc(void); 
 302 static void set_tclass_for_curr_proc(struct socket 
*); 
 305  * Must be called with tclass_lock held 
 307 static struct tclass_for_proc 
* 
 308 find_tfp_by_pid(pid_t pid
) 
 310         struct tclass_for_proc 
*tfp
; 
 312         TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) { 
 313                 if (tfp
->tfp_pid 
== pid
) { 
 321  * Must be called with tclass_lock held 
 323 static struct tclass_for_proc 
* 
 324 find_tfp_by_pname(const char *pname
) 
 326         struct tclass_for_proc 
*tfp
; 
 328         TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) { 
 329                 if (strncmp(pname
, tfp
->tfp_pname
, 
 330                     sizeof(tfp
->tfp_pname
)) == 0) { 
 337 __private_extern__ 
void 
 338 set_tclass_for_curr_proc(struct socket 
*so
) 
 340         struct tclass_for_proc 
*tfp 
= NULL
; 
 341         proc_t p 
= current_proc();      /* Not ref counted */ 
 342         pid_t pid 
= proc_pid(p
); 
 343         char *pname 
= proc_best_name(p
); 
 345         lck_mtx_lock(tclass_lock
); 
 347         TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) { 
 348                 if ((tfp
->tfp_pid 
== pid
) || (tfp
->tfp_pid 
== -1 && 
 349                     strncmp(pname
, tfp
->tfp_pname
, 
 350                     sizeof(tfp
->tfp_pname
)) == 0)) { 
 351                         if (tfp
->tfp_class 
!= SO_TC_UNSPEC
) { 
 352                                 so
->so_traffic_class 
= tfp
->tfp_class
; 
 355                         if (tfp
->tfp_qos_mode 
== QOS_MODE_MARKING_POLICY_ENABLE
) { 
 356                                 so
->so_flags1 
|= SOF1_QOSMARKING_ALLOWED
; 
 357                         } else if (tfp
->tfp_qos_mode 
== QOS_MODE_MARKING_POLICY_DISABLE
) { 
 358                                 so
->so_flags1 
&= ~SOF1_QOSMARKING_ALLOWED
; 
 364         lck_mtx_unlock(tclass_lock
); 
 368  * Purge entries with PIDs of exited processes 
 371 purge_tclass_for_proc(void) 
 374         struct tclass_for_proc 
*tfp
, *tvar
; 
 376         lck_mtx_lock(tclass_lock
); 
 378         TAILQ_FOREACH_SAFE(tfp
, &tfp_head
, tfp_link
, tvar
) { 
 381                 if (tfp
->tfp_pid 
== -1) { 
 384                 if ((p 
= proc_find(tfp
->tfp_pid
)) == NULL
) { 
 386                         TAILQ_REMOVE(&tfp_head
, tfp
, tfp_link
); 
 394         lck_mtx_unlock(tclass_lock
); 
 401  * Must be called with tclass_lock held 
 404 free_tclass_for_proc(struct tclass_for_proc 
*tfp
) 
 410         TAILQ_REMOVE(&tfp_head
, tfp
, tfp_link
); 
 418 flush_tclass_for_proc(void) 
 421         struct tclass_for_proc 
*tfp
, *tvar
; 
 423         lck_mtx_lock(tclass_lock
); 
 425         TAILQ_FOREACH_SAFE(tfp
, &tfp_head
, tfp_link
, tvar
) { 
 426                 free_tclass_for_proc(tfp
); 
 429         lck_mtx_unlock(tclass_lock
); 
 435  * Must be called with tclass_lock held 
 437 static struct tclass_for_proc 
* 
 438 alloc_tclass_for_proc(pid_t pid
, const char *pname
) 
 440         struct tclass_for_proc 
*tfp
; 
 442         if (pid 
== -1 && pname 
== NULL
) { 
 446         tfp 
= _MALLOC(sizeof(struct tclass_for_proc
), M_TEMP
, M_NOWAIT 
| M_ZERO
); 
 453          * Add per pid entries before per proc name so we can find 
 454          * a specific instance of a process before the general name base entry. 
 457                 TAILQ_INSERT_HEAD(&tfp_head
, tfp
, tfp_link
); 
 459                 strlcpy(tfp
->tfp_pname
, pname
, sizeof(tfp
->tfp_pname
)); 
 460                 TAILQ_INSERT_TAIL(&tfp_head
, tfp
, tfp_link
); 
 469  * SO_TC_UNSPEC for tclass means to remove the entry 
 472 set_pid_tclass(struct so_tcdbg 
*so_tcdbg
) 
 476         struct filedesc 
*fdp
; 
 478         struct tclass_for_proc 
*tfp
; 
 480         pid_t pid 
= so_tcdbg
->so_tcdbg_pid
; 
 481         int tclass 
= so_tcdbg
->so_tcdbg_tclass
; 
 482         int netsvctype 
= so_tcdbg
->so_tcdbg_netsvctype
; 
 486                 printf("%s proc_find(%d) failed\n", __func__
, pid
); 
 491         lck_mtx_lock(tclass_lock
); 
 493         tfp 
= find_tfp_by_pid(pid
); 
 495                 tfp 
= alloc_tclass_for_proc(pid
, NULL
); 
 497                         lck_mtx_unlock(tclass_lock
); 
 502         tfp
->tfp_class 
= tclass
; 
 503         tfp
->tfp_qos_mode 
= so_tcdbg
->so_tcbbg_qos_mode
; 
 505         lck_mtx_unlock(tclass_lock
); 
 511                 for (i 
= 0; i 
< fdp
->fd_nfiles
; i
++) { 
 514                         fp 
= fdp
->fd_ofiles
[i
]; 
 516                             (fdp
->fd_ofileflags
[i
] & UF_RESERVED
) != 0 || 
 517                             FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_SOCKET
) { 
 521                         so 
= (struct socket 
*)fp
->f_fglob
->fg_data
; 
 522                         if (SOCK_DOM(so
) != PF_INET 
&& SOCK_DOM(so
) != PF_INET6
) { 
 527                         if (tfp
->tfp_qos_mode 
== QOS_MODE_MARKING_POLICY_ENABLE
) { 
 528                                 so
->so_flags1 
|= SOF1_QOSMARKING_ALLOWED
; 
 529                         } else if (tfp
->tfp_qos_mode 
== QOS_MODE_MARKING_POLICY_DISABLE
) { 
 530                                 so
->so_flags1 
&= ~SOF1_QOSMARKING_ALLOWED
; 
 532                         socket_unlock(so
, 1); 
 534                         if (netsvctype 
!= _NET_SERVICE_TYPE_UNSPEC
) { 
 535                                 error 
= sock_setsockopt(so
, SOL_SOCKET
, 
 536                                     SO_NET_SERVICE_TYPE
, &netsvctype
, sizeof(int)); 
 538                         if (tclass 
!= SO_TC_UNSPEC
) { 
 539                                 error 
= sock_setsockopt(so
, SOL_SOCKET
, 
 540                                     SO_TRAFFIC_CLASS
, &tclass
, sizeof(int)); 
 557 set_pname_tclass(struct so_tcdbg 
*so_tcdbg
) 
 560         struct tclass_for_proc 
*tfp
; 
 562         lck_mtx_lock(tclass_lock
); 
 564         tfp 
= find_tfp_by_pname(so_tcdbg
->so_tcdbg_pname
); 
 566                 tfp 
= alloc_tclass_for_proc(-1, so_tcdbg
->so_tcdbg_pname
); 
 568                         lck_mtx_unlock(tclass_lock
); 
 573         tfp
->tfp_class 
= so_tcdbg
->so_tcdbg_tclass
; 
 574         tfp
->tfp_qos_mode 
= so_tcdbg
->so_tcbbg_qos_mode
; 
 576         lck_mtx_unlock(tclass_lock
); 
 585 flush_pid_tclass(struct so_tcdbg 
*so_tcdbg
) 
 587         pid_t pid 
= so_tcdbg
->so_tcdbg_pid
; 
 588         int tclass 
= so_tcdbg
->so_tcdbg_tclass
; 
 589         struct filedesc 
*fdp
; 
 595         if (p 
== PROC_NULL
) { 
 596                 printf("%s proc_find(%d) failed\n", __func__
, pid
); 
 602         for (i 
= 0; i 
< fdp
->fd_nfiles
; i
++) { 
 606                 fp 
= fdp
->fd_ofiles
[i
]; 
 608                     (fdp
->fd_ofileflags
[i
] & UF_RESERVED
) != 0 || 
 609                     FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_SOCKET
) { 
 613                 so 
= (struct socket 
*)fp
->f_fglob
->fg_data
; 
 614                 error 
= sock_setsockopt(so
, SOL_SOCKET
, SO_FLUSH
, &tclass
, 
 617                         printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, " 
 618                             "tclass=%d) failed %d\n", __func__
, 
 619                             (uint64_t)VM_KERNEL_ADDRPERM(so
), i
, tclass
, 
 628         if (p 
!= PROC_NULL
) { 
 636 get_pid_tclass(struct so_tcdbg 
*so_tcdbg
) 
 640         struct tclass_for_proc 
*tfp
; 
 641         pid_t pid 
= so_tcdbg
->so_tcdbg_pid
; 
 643         so_tcdbg
->so_tcdbg_tclass 
= SO_TC_UNSPEC
; /* Means not set */ 
 647                 printf("%s proc_find(%d) failed\n", __func__
, pid
); 
 652         lck_mtx_lock(tclass_lock
); 
 654         tfp 
= find_tfp_by_pid(pid
); 
 656                 so_tcdbg
->so_tcdbg_tclass 
= tfp
->tfp_class
; 
 657                 so_tcdbg
->so_tcbbg_qos_mode 
= tfp
->tfp_qos_mode
; 
 660         lck_mtx_unlock(tclass_lock
); 
 670 get_pname_tclass(struct so_tcdbg 
*so_tcdbg
) 
 673         struct tclass_for_proc 
*tfp
; 
 675         so_tcdbg
->so_tcdbg_tclass 
= SO_TC_UNSPEC
; /* Means not set */ 
 678         lck_mtx_lock(tclass_lock
); 
 680         tfp 
= find_tfp_by_pname(so_tcdbg
->so_tcdbg_pname
); 
 682                 so_tcdbg
->so_tcdbg_tclass 
= tfp
->tfp_class
; 
 683                 so_tcdbg
->so_tcbbg_qos_mode 
= tfp
->tfp_qos_mode
; 
 686         lck_mtx_unlock(tclass_lock
); 
 692 delete_tclass_for_pid_pname(struct so_tcdbg 
*so_tcdbg
) 
 695         pid_t pid 
= so_tcdbg
->so_tcdbg_pid
; 
 696         struct tclass_for_proc 
*tfp 
= NULL
; 
 698         lck_mtx_lock(tclass_lock
); 
 701                 tfp 
= find_tfp_by_pid(pid
); 
 703                 tfp 
= find_tfp_by_pname(so_tcdbg
->so_tcdbg_pname
); 
 707                 free_tclass_for_proc(tfp
); 
 711         lck_mtx_unlock(tclass_lock
); 
 717  * Setting options requires privileges 
 719 __private_extern__ 
int 
 720 so_set_tcdbg(struct socket 
*so
, struct so_tcdbg 
*so_tcdbg
) 
 724         if ((so
->so_state 
& SS_PRIV
) == 0) { 
 728         socket_unlock(so
, 0); 
 730         switch (so_tcdbg
->so_tcdbg_cmd
) { 
 732                 error 
= set_pid_tclass(so_tcdbg
); 
 736                 error 
= set_pname_tclass(so_tcdbg
); 
 740                 error 
= purge_tclass_for_proc(); 
 744                 error 
= flush_tclass_for_proc(); 
 747         case SO_TCDBG_DELETE
: 
 748                 error 
= delete_tclass_for_pid_pname(so_tcdbg
); 
 751         case SO_TCDBG_TCFLUSH_PID
: 
 752                 error 
= flush_pid_tclass(so_tcdbg
); 
 766  * Not required to be privileged to get 
 768 __private_extern__ 
int 
 769 sogetopt_tcdbg(struct socket 
*so
, struct sockopt 
*sopt
) 
 772         struct so_tcdbg so_tcdbg
; 
 774         size_t len 
= sopt
->sopt_valsize
; 
 776         error 
= sooptcopyin(sopt
, &so_tcdbg
, sizeof(struct so_tcdbg
), 
 777             sizeof(struct so_tcdbg
)); 
 782         sopt
->sopt_valsize 
= len
; 
 784         socket_unlock(so
, 0); 
 786         switch (so_tcdbg
.so_tcdbg_cmd
) { 
 788                 error 
= get_pid_tclass(&so_tcdbg
); 
 792                 error 
= get_pname_tclass(&so_tcdbg
); 
 796                 lck_mtx_lock(tclass_lock
); 
 797                 so_tcdbg
.so_tcdbg_count 
= tfp_count
; 
 798                 lck_mtx_unlock(tclass_lock
); 
 801         case SO_TCDBG_LIST
: { 
 802                 struct tclass_for_proc 
*tfp
; 
 804                 struct so_tcdbg 
*ptr
; 
 806                 lck_mtx_lock(tclass_lock
); 
 807                 if ((alloc_count 
= tfp_count
) == 0) { 
 808                         lck_mtx_unlock(tclass_lock
); 
 812                 len 
= alloc_count 
* sizeof(struct so_tcdbg
); 
 813                 lck_mtx_unlock(tclass_lock
); 
 815                 buf 
= _MALLOC(len
, M_TEMP
, M_WAITOK 
| M_ZERO
); 
 821                 lck_mtx_lock(tclass_lock
); 
 823                 ptr 
= (struct so_tcdbg 
*)buf
; 
 824                 TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) { 
 825                         if (++n 
> alloc_count
) { 
 828                         if (tfp
->tfp_pid 
!= -1) { 
 829                                 ptr
->so_tcdbg_cmd 
= SO_TCDBG_PID
; 
 830                                 ptr
->so_tcdbg_pid 
= tfp
->tfp_pid
; 
 832                                 ptr
->so_tcdbg_cmd 
= SO_TCDBG_PNAME
; 
 833                                 ptr
->so_tcdbg_pid 
= -1; 
 834                                 strlcpy(ptr
->so_tcdbg_pname
, 
 836                                     sizeof(ptr
->so_tcdbg_pname
)); 
 838                         ptr
->so_tcdbg_tclass 
= tfp
->tfp_class
; 
 839                         ptr
->so_tcbbg_qos_mode 
= tfp
->tfp_qos_mode
; 
 843                 lck_mtx_unlock(tclass_lock
); 
 856                         error 
= sooptcopyout(sopt
, &so_tcdbg
, 
 857                             sizeof(struct so_tcdbg
)); 
 859                         error 
= sooptcopyout(sopt
, buf
, len
); 
 866 #endif /* (DEVELOPMENT || DEBUG) */ 
 869 so_get_netsvc_marking_level(struct socket 
*so
) 
 871         int marking_level 
= NETSVC_MRKNG_UNKNOWN
; 
 872         struct ifnet 
*ifp 
= NULL
; 
 874         switch (SOCK_DOM(so
)) { 
 876                 struct inpcb 
*inp 
= sotoinpcb(so
); 
 879                         ifp 
= inp
->inp_last_outifp
; 
 884                 struct in6pcb 
*in6p 
= sotoin6pcb(so
); 
 887                         ifp 
= in6p
->in6p_last_outifp
; 
 895                 if ((ifp
->if_eflags 
& IFEF_QOSMARKING_ENABLED
) != 0) { 
 896                         if ((so
->so_flags1 
& SOF1_QOSMARKING_ALLOWED
)) { 
 897                                 marking_level 
= NETSVC_MRKNG_LVL_L3L2_ALL
; 
 899                                 marking_level 
= NETSVC_MRKNG_LVL_L3L2_BK
; 
 902                         marking_level 
= NETSVC_MRKNG_LVL_L2
; 
 905         return marking_level
; 
 908 __private_extern__ 
int 
 909 so_set_traffic_class(struct socket 
*so
, int optval
) 
 913         if (optval 
< SO_TC_BE 
|| optval 
> SO_TC_CTL
) { 
 927                         if (!SO_VALID_TC(optval
)) { 
 934                         int oldval 
= so
->so_traffic_class
; 
 936                         VERIFY(SO_VALID_TC(optval
)); 
 937                         so
->so_traffic_class 
= optval
; 
 939                         if ((SOCK_DOM(so
) == PF_INET 
|| 
 940                             SOCK_DOM(so
) == PF_INET6
) && 
 941                             SOCK_TYPE(so
) == SOCK_STREAM
) { 
 942                                 set_tcp_stream_priority(so
); 
 945                         if ((SOCK_DOM(so
) == PF_INET 
|| 
 946                             SOCK_DOM(so
) == PF_INET6
) && 
 947                             optval 
!= oldval 
&& (optval 
== SO_TC_BK_SYS 
|| 
 948                             oldval 
== SO_TC_BK_SYS
)) { 
 950                                  * If the app switches from BK_SYS to something 
 951                                  * else, resume the socket if it was suspended. 
 953                                 if (oldval 
== SO_TC_BK_SYS
) { 
 954                                         inp_reset_fc_state(so
->so_pcb
); 
 957                                 SOTHROTTLELOG("throttle[%d]: so 0x%llx " 
 958                                     "[%d,%d] opportunistic %s\n", so
->last_pid
, 
 959                                     (uint64_t)VM_KERNEL_ADDRPERM(so
), 
 960                                     SOCK_DOM(so
), SOCK_TYPE(so
), 
 961                                     (optval 
== SO_TC_BK_SYS
) ? "ON" : "OFF"); 
 968 __private_extern__ 
int 
 969 so_set_net_service_type(struct socket 
*so
, int netsvctype
) 
 974         if (!IS_VALID_NET_SERVICE_TYPE(netsvctype
)) { 
 978         sotc 
= sotc_by_netservicetype
[netsvctype
]; 
 979         error 
= so_set_traffic_class(so
, sotc
); 
 983         so
->so_netsvctype 
= netsvctype
; 
 984         so
->so_flags1 
|= SOF1_TC_NET_SERV_TYPE
; 
 989 __private_extern__ 
void 
 990 so_set_default_traffic_class(struct socket 
*so
) 
 992         so
->so_traffic_class 
= SO_TC_BE
; 
 994         if ((SOCK_DOM(so
) == PF_INET 
|| SOCK_DOM(so
) == PF_INET6
)) { 
 995                 if (net_qos_policy_restricted 
== 0) { 
 996                         so
->so_flags1 
|= SOF1_QOSMARKING_ALLOWED
; 
 998 #if (DEVELOPMENT || DEBUG) 
1000                         set_tclass_for_curr_proc(so
); 
1002 #endif /* (DEVELOPMENT || DEBUG) */ 
1006 __private_extern__ 
int 
1007 so_set_opportunistic(struct socket 
*so
, int optval
) 
1009         return so_set_traffic_class(so
, (optval 
== 0) ? 
1010                    SO_TC_BE 
: SO_TC_BK_SYS
); 
1013 __private_extern__ 
int 
1014 so_get_opportunistic(struct socket 
*so
) 
1016         return so
->so_traffic_class 
== SO_TC_BK_SYS
; 
1019 __private_extern__ 
int 
1020 so_tc_from_control(struct mbuf 
*control
, int *out_netsvctype
) 
1023         int sotc 
= SO_TC_UNSPEC
; 
1025         *out_netsvctype 
= _NET_SERVICE_TYPE_UNSPEC
; 
1027         for (cm 
= M_FIRST_CMSGHDR(control
); 
1028             is_cmsg_valid(control
, cm
); 
1029             cm 
= M_NXT_CMSGHDR(control
, cm
)) { 
1032                 if (cm
->cmsg_level 
!= SOL_SOCKET 
|| 
1033                     cm
->cmsg_len 
!= CMSG_LEN(sizeof(int))) { 
1036                 val 
= *(int *)(void *)CMSG_DATA(cm
); 
1038                  * The first valid option wins 
1040                 switch (cm
->cmsg_type
) { 
1041                 case SO_TRAFFIC_CLASS
: 
1042                         if (SO_VALID_TC(val
)) { 
1046                         } else if (val 
< SO_TC_NET_SERVICE_OFFSET
) { 
1050                          * Handle the case SO_NET_SERVICE_TYPE values are 
1051                          * passed using SO_TRAFFIC_CLASS 
1053                         val 
= val 
- SO_TC_NET_SERVICE_OFFSET
; 
1055                 case SO_NET_SERVICE_TYPE
: 
1056                         if (!IS_VALID_NET_SERVICE_TYPE(val
)) { 
1059                         *out_netsvctype 
= val
; 
1060                         sotc 
= sotc_by_netservicetype
[val
]; 
1071 __private_extern__ 
int 
1072 so_tos_from_control(struct mbuf 
*control
) 
1075         int tos 
= IPTOS_UNSPEC
; 
1077         for (cm 
= M_FIRST_CMSGHDR(control
); 
1078             is_cmsg_valid(control
, cm
); 
1079             cm 
= M_NXT_CMSGHDR(control
, cm
)) { 
1080                 if (cm
->cmsg_len 
!= CMSG_LEN(sizeof(int))) { 
1084                 if ((cm
->cmsg_level 
== IPPROTO_IP 
&& 
1085                     cm
->cmsg_type 
== IP_TOS
) || 
1086                     (cm
->cmsg_level 
== IPPROTO_IPV6 
&& 
1087                     cm
->cmsg_type 
== IPV6_TCLASS
)) { 
1088                         tos 
= *(int *)(void *)CMSG_DATA(cm
) & IPTOS_MASK
; 
1089                         /* The first valid option wins */ 
1097 __private_extern__ 
void 
1098 so_recv_data_stat(struct socket 
*so
, struct mbuf 
*m
, size_t off
) 
1100         uint32_t mtc 
= m_get_traffic_class(m
); 
1102         if (mtc 
>= SO_TC_STATS_MAX
) { 
1106         so
->so_tc_stats
[mtc
].rxpackets 
+= 1; 
1107         so
->so_tc_stats
[mtc
].rxbytes 
+= 
1108             ((m
->m_flags 
& M_PKTHDR
) ? m
->m_pkthdr
.len 
: 0) + off
; 
1111 __private_extern__ 
void 
1112 so_inc_recv_data_stat(struct socket 
*so
, size_t pkts
, size_t bytes
, 
1115         if (mtc 
>= SO_TC_STATS_MAX
) { 
1119         so
->so_tc_stats
[mtc
].rxpackets 
+= pkts
; 
1120         so
->so_tc_stats
[mtc
].rxbytes 
+= bytes
; 
1124 so_throttle_best_effort(struct socket 
*so
, struct ifnet 
*ifp
) 
1126         uint32_t uptime 
= net_uptime(); 
1127         return soissrcbesteffort(so
) && 
1128                net_io_policy_throttle_best_effort 
== 1 && 
1129                ifp
->if_rt_sendts 
> 0 && 
1130                (int)(uptime 
- ifp
->if_rt_sendts
) <= TCP_BG_SWITCH_TIME
; 
1133 __private_extern__ 
void 
1134 set_tcp_stream_priority(struct socket 
*so
) 
1136         struct inpcb 
*inp 
= sotoinpcb(so
); 
1137         struct tcpcb 
*tp 
= intotcpcb(inp
); 
1138         struct ifnet 
*outifp
; 
1139         u_char old_cc 
= tp
->tcp_cc_index
; 
1140         int recvbg 
= IS_TCP_RECV_BG(so
); 
1141         bool is_local 
= false, fg_active 
= false; 
1144         VERIFY((SOCK_CHECK_DOM(so
, PF_INET
) || 
1145             SOCK_CHECK_DOM(so
, PF_INET6
)) && 
1146             SOCK_CHECK_TYPE(so
, SOCK_STREAM
) && 
1147             SOCK_CHECK_PROTO(so
, IPPROTO_TCP
)); 
1149         /* Return if the socket is in a terminal state */ 
1150         if (inp
->inp_state 
== INPCB_STATE_DEAD
) { 
1154         outifp 
= inp
->inp_last_outifp
; 
1155         uptime 
= net_uptime(); 
1158          * If the socket was marked as a background socket or if the 
1159          * traffic class is set to background with traffic class socket 
1160          * option then make both send and recv side of the stream to be 
1161          * background. The variable sotcdb which can be set with sysctl 
1162          * is used to disable these settings for testing. 
1164         if (outifp 
== NULL 
|| (outifp
->if_flags 
& IFF_LOOPBACK
)) { 
1168         /* Check if there has been recent foreground activity */ 
1169         if (outifp 
!= NULL
) { 
1171                  * If the traffic source is background, check if 
1172                  * if it can be switched to foreground. This can 
1173                  * happen when there is no indication of foreground 
1176                 if (soissrcbackground(so
) && outifp
->if_fg_sendts 
> 0 && 
1177                     (int)(uptime 
- outifp
->if_fg_sendts
) <= TCP_BG_SWITCH_TIME
) { 
1182                  * The traffic source is best-effort -- check if 
1183                  * the policy to throttle best effort is enabled 
1184                  * and there was realtime activity on this 
1185                  * interface recently. If this is true, enable 
1186                  * algorithms that respond to increased latency 
1187                  * on best-effort traffic. 
1189                 if (so_throttle_best_effort(so
, outifp
)) { 
1195          * System initiated background traffic like cloud uploads should 
1196          * always use background delay sensitive algorithms. This will 
1197          * make the stream more responsive to other streams on the user's 
1198          * network and it will minimize latency induced. 
1200         if (fg_active 
|| IS_SO_TC_BACKGROUNDSYSTEM(so
->so_traffic_class
)) { 
1202                  * If the interface that the connection is using is 
1203                  * loopback, do not use background congestion 
1204                  * control algorithm. 
1206                  * If there has been recent foreground activity or if 
1207                  * there was an indication that a foreground application 
1208                  * is going to use networking (net_io_policy_throttled), 
1209                  * switch the backgroung streams to use background 
1210                  * congestion control algorithm. Otherwise, even background 
1211                  * flows can move into foreground. 
1213                 if ((sotcdb 
& SOTCDB_NO_SENDTCPBG
) != 0 || is_local 
|| 
1214                     !IS_SO_TC_BACKGROUNDSYSTEM(so
->so_traffic_class
)) { 
1215                         if (old_cc 
== TCP_CC_ALGO_BACKGROUND_INDEX
) { 
1216                                 tcp_set_foreground_cc(so
); 
1219                         if (old_cc 
!= TCP_CC_ALGO_BACKGROUND_INDEX
) { 
1220                                 tcp_set_background_cc(so
); 
1224                 /* Set receive side background flags */ 
1225                 if ((sotcdb 
& SOTCDB_NO_RECVTCPBG
) != 0 || is_local 
|| 
1226                     !IS_SO_TC_BACKGROUNDSYSTEM(so
->so_traffic_class
)) { 
1227                         tcp_clear_recv_bg(so
); 
1229                         tcp_set_recv_bg(so
); 
1232                 tcp_clear_recv_bg(so
); 
1233                 if (old_cc 
== TCP_CC_ALGO_BACKGROUND_INDEX
) { 
1234                         tcp_set_foreground_cc(so
); 
1238         if (old_cc 
!= tp
->tcp_cc_index 
|| recvbg 
!= IS_TCP_RECV_BG(so
)) { 
1239                 SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; " 
1240                     "%s recv\n", so
->last_pid
, 
1241                     (uint64_t)VM_KERNEL_ADDRPERM(so
), 
1242                     SOCK_DOM(so
), SOCK_TYPE(so
), 
1243                     (tp
->tcp_cc_index 
== TCP_CC_ALGO_BACKGROUND_INDEX
) ? 
1244                     "background" : "foreground", 
1245                     IS_TCP_RECV_BG(so
) ? "background" : "foreground"); 
1250  * Set traffic class to an IPv4 or IPv6 packet 
1252  * - set the DSCP code following the WMM mapping 
1254 __private_extern__ 
void 
1255 set_packet_service_class(struct mbuf 
*m
, struct socket 
*so
, 
1256     int sotc
, uint32_t flags
) 
1258         mbuf_svc_class_t msc 
= MBUF_SC_BE
;         /* Best effort by default */ 
1259         struct inpcb 
*inp 
= sotoinpcb(so
); /* in6pcb and inpcb are the same */ 
1261         if (!(m
->m_flags 
& M_PKTHDR
)) { 
1266          * Here is the precedence: 
1267          * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all 
1268          * 2) Traffic class passed via ancillary data to sendmsdg(2) 
1269          * 3) Traffic class socket option last 
1271         if (sotc 
!= SO_TC_UNSPEC
) { 
1272                 VERIFY(SO_VALID_TC(sotc
)); 
1273                 msc 
= so_tc2msc(sotc
); 
1274                 /* Assert because tc must have been valid */ 
1275                 VERIFY(MBUF_VALID_SC(msc
)); 
1279          * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle 
1280          * best effort is set, depress the priority. 
1282         if (!IS_MBUF_SC_BACKGROUND(msc
) && soisthrottled(so
)) { 
1286         if (IS_MBUF_SC_BESTEFFORT(msc
) && inp
->inp_last_outifp 
!= NULL 
&& 
1287             so_throttle_best_effort(so
, inp
->inp_last_outifp
)) { 
1291         if (soissrcbackground(so
)) { 
1292                 m
->m_pkthdr
.pkt_flags 
|= PKTF_SO_BACKGROUND
; 
1295         if (soissrcrealtime(so
) || IS_MBUF_SC_REALTIME(msc
)) { 
1296                 m
->m_pkthdr
.pkt_flags 
|= PKTF_SO_REALTIME
; 
1299          * Set the traffic class in the mbuf packet header svc field 
1301         if (sotcdb 
& SOTCDB_NO_MTC
) { 
1306          * Elevate service class if the packet is a pure TCP ACK. 
1307          * We can do this only when the flow is not a background 
1308          * flow and the outgoing interface supports 
1309          * transmit-start model. 
1311         if (!IS_MBUF_SC_BACKGROUND(msc
) && 
1312             (flags 
& (PKT_SCF_TCP_ACK 
| PKT_SCF_TCP_SYN
)) != 0) { 
1316         (void) m_set_service_class(m
, msc
); 
1319          * Set the privileged traffic auxiliary flag if applicable, 
1322         if (!(sotcdb 
& SOTCDB_NO_PRIVILEGED
) && soisprivilegedtraffic(so
) && 
1323             msc 
!= MBUF_SC_UNSPEC
) { 
1324                 m
->m_pkthdr
.pkt_flags 
|= PKTF_PRIO_PRIVILEGED
; 
1326                 m
->m_pkthdr
.pkt_flags 
&= ~PKTF_PRIO_PRIVILEGED
; 
1331          * For TCP with background traffic class switch CC algo based on sysctl 
1333         if (so
->so_type 
== SOCK_STREAM
) { 
1334                 set_tcp_stream_priority(so
); 
1337         so_tc_update_stats(m
, so
, msc
); 
1340 __private_extern__ 
void 
1341 so_tc_update_stats(struct mbuf 
*m
, struct socket 
*so
, mbuf_svc_class_t msc
) 
1343         mbuf_traffic_class_t mtc
; 
1346          * Assume socket and mbuf traffic class values are the same 
1347          * Also assume the socket lock is held.  Note that the stats 
1348          * at the socket layer are reduced down to the legacy traffic 
1349          * classes; we could/should potentially expand so_tc_stats[]. 
1351         mtc 
= MBUF_SC2TC(msc
); 
1352         VERIFY(mtc 
< SO_TC_STATS_MAX
); 
1353         so
->so_tc_stats
[mtc
].txpackets 
+= 1; 
1354         so
->so_tc_stats
[mtc
].txbytes 
+= m
->m_pkthdr
.len
; 
1357 __private_extern__ 
void 
1358 socket_tclass_init(void) 
1360         _CASSERT(_SO_TC_MAX 
== SO_TC_STATS_MAX
); 
1362         tclass_lck_grp_attr 
= lck_grp_attr_alloc_init(); 
1363         tclass_lck_grp 
= lck_grp_alloc_init("tclass", tclass_lck_grp_attr
); 
1364         tclass_lck_attr 
= lck_attr_alloc_init(); 
1365         lck_mtx_init(tclass_lock
, tclass_lck_grp
, tclass_lck_attr
); 
1368 __private_extern__ mbuf_svc_class_t
 
1371         mbuf_svc_class_t msc
; 
1375                 msc 
= MBUF_SC_BK_SYS
; 
1400         case SO_TC_NETSVC_SIG
: 
1412                 msc 
= MBUF_SC_UNSPEC
; 
1419 __private_extern__ 
int 
1420 so_svc2tc(mbuf_svc_class_t svc
) 
1423         case MBUF_SC_BK_SYS
: 
1424                 return SO_TC_BK_SYS
; 
1440                 return SO_TC_NETSVC_SIG
; 
1445         case MBUF_SC_UNSPEC
: 
1452  * LRO is turned on for AV streaming class. 
1455 so_set_lro(struct socket 
*so
, int optval
) 
1457         if (optval 
== SO_TC_AV
) { 
1458                 so
->so_flags 
|= SOF_USELRO
; 
1460                 if (so
->so_flags 
& SOF_USELRO
) { 
1461                         /* transition to non LRO class */ 
1462                         so
->so_flags 
&= ~SOF_USELRO
; 
1463                         struct inpcb 
*inp 
= sotoinpcb(so
); 
1464                         struct tcpcb 
*tp 
= NULL
; 
1466                                 tp 
= intotcpcb(inp
); 
1467                                 if (tp 
&& (tp
->t_flagsext 
& TF_LRO_OFFLOADED
)) { 
1468                                         tcp_lro_remove_state(inp
->inp_laddr
, 
1472                                         tp
->t_flagsext 
&= ~TF_LRO_OFFLOADED
; 
1480 sotc_index(int sotc
) 
1484                 return SOTCIX_BK_SYS
; 
1514          * Unknown traffic class value 
1520 fastlane_sc_to_dscp(uint32_t svc_class
) 
1522         uint8_t dscp 
= _DSCP_DF
; 
1524         switch (svc_class
) { 
1525         case MBUF_SC_BK_SYS
: 
1568 rfc4594_sc_to_dscp(uint32_t svc_class
) 
1570         uint8_t dscp 
= _DSCP_DF
; 
1572         switch (svc_class
) { 
1573         case MBUF_SC_BK_SYS
:            /* Low-Priority Data */ 
1578         case MBUF_SC_BE
:                        /* Standard */ 
1581         case MBUF_SC_RD
:                        /* Low-Latency Data */ 
1585         /* SVC_CLASS Not Defined:  High-Throughput Data */ 
1587         case MBUF_SC_OAM
:               /* OAM */ 
1591         /* SVC_CLASS Not Defined:  Broadcast Video */ 
1593         case MBUF_SC_AV
:                        /* Multimedia Streaming */ 
1596         case MBUF_SC_RV
:                        /* Real-Time Interactive */ 
1599         case MBUF_SC_VI
:                        /* Multimedia Conferencing */ 
1602         case MBUF_SC_SIG
:               /* Signaling */ 
1606         case MBUF_SC_VO
:                        /* Telephony */ 
1609         case MBUF_SC_CTL
:               /* Network Control*/ 
1620 mbuf_traffic_class_t
 
1621 rfc4594_dscp_to_tc(uint8_t dscp
) 
1623         mbuf_traffic_class_t tc 
= MBUF_TC_BE
; 
1653  * Pass NULL ifp for default map 
1656 set_netsvctype_dscp_map(struct net_qos_dscp_map 
*net_qos_dscp_map
, 
1657     const struct netsvctype_dscp_map 
*netsvctype_dscp_map
) 
1663          * Do not accept more that max number of distinct DSCPs 
1665         if (net_qos_dscp_map 
== NULL 
|| netsvctype_dscp_map 
== NULL
) { 
1670          * Validate input parameters 
1672         for (i 
= 0; i 
< _NET_SERVICE_TYPE_COUNT
; i
++) { 
1673                 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map
[i
].netsvctype
)) { 
1676                 if (netsvctype_dscp_map
[i
].dscp 
> _MAX_DSCP
) { 
1681         for (i 
= 0; i 
< _NET_SERVICE_TYPE_COUNT
; i
++) { 
1682                 netsvctype 
= netsvctype_dscp_map
[i
].netsvctype
; 
1684                 net_qos_dscp_map
->netsvctype_to_dscp
[netsvctype
] = 
1685                     netsvctype_dscp_map
[i
].dscp
; 
1687         for (netsvctype 
= 0; netsvctype 
< _NET_SERVICE_TYPE_COUNT
; netsvctype
++) { 
1688                 switch (netsvctype
) { 
1689                 case NET_SERVICE_TYPE_BE
: 
1690                 case NET_SERVICE_TYPE_BK
: 
1691                 case NET_SERVICE_TYPE_VI
: 
1692                 case NET_SERVICE_TYPE_VO
: 
1693                 case NET_SERVICE_TYPE_RV
: 
1694                 case NET_SERVICE_TYPE_AV
: 
1695                 case NET_SERVICE_TYPE_OAM
: 
1696                 case NET_SERVICE_TYPE_RD
: { 
1699                         sotcix 
= sotc_index(sotc_by_netservicetype
[netsvctype
]); 
1700                         if (sotcix 
!= SIZE_T_MAX
) { 
1701                                 net_qos_dscp_map
->sotc_to_dscp
[sotcix
]  = 
1702                                     netsvctype_dscp_map
[netsvctype
].dscp
; 
1706                 case  NET_SERVICE_TYPE_SIG
: 
1707                         /* Signaling does not have its own traffic class */ 
1710                         /* We should not be here */ 
1714         /* Network control socket traffic class is always best effort */ 
1715         net_qos_dscp_map
->sotc_to_dscp
[SOTCIX_CTL
] = _DSCP_DF
; 
1717         /* Backround socket traffic class DSCP same as backround system */ 
1718         net_qos_dscp_map
->sotc_to_dscp
[SOTCIX_BK
] = 
1719             net_qos_dscp_map
->sotc_to_dscp
[SOTCIX_BK_SYS
]; 
1725  * out_count is an input/ouput parameter 
1728 get_netsvctype_dscp_map(size_t *out_count
, 
1729     struct netsvctype_dscp_map 
*netsvctype_dscp_map
) 
1732         struct net_qos_dscp_map 
*net_qos_dscp_map 
= NULL
; 
1735          * Do not accept more that max number of distinct DSCPs 
1737         if (out_count 
== NULL 
|| netsvctype_dscp_map 
== NULL
) { 
1740         if (*out_count 
> _MAX_DSCP
) { 
1744         net_qos_dscp_map 
= &fastlane_net_qos_dscp_map
; 
1746         for (i 
= 0; i 
< MIN(_NET_SERVICE_TYPE_COUNT
, *out_count
); i
++) { 
1747                 netsvctype_dscp_map
[i
].netsvctype 
= i
; 
1748                 netsvctype_dscp_map
[i
].dscp 
= net_qos_dscp_map
->netsvctype_to_dscp
[i
]; 
1760         error 
= set_netsvctype_dscp_map(&fastlane_net_qos_dscp_map
, 
1761             fastlane_netsvctype_dscp_map
); 
1764         error 
= set_netsvctype_dscp_map(&rfc4594_net_qos_dscp_map
, 
1765             rfc4594_netsvctype_dscp_map
); 
1768         set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map
, 1); 
1772 sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
 
1774 #pragma unused(oidp, arg1, arg2) 
1777         struct netsvctype_dscp_map netsvctype_dscp_map
[_NET_SERVICE_TYPE_COUNT
] = {}; 
1780         if (req
->oldptr 
== USER_ADDR_NULL
) { 
1782                     _NET_SERVICE_TYPE_COUNT 
* sizeof(struct netsvctype_dscp_map
); 
1783         } else if (req
->oldlen 
> 0) { 
1784                 count 
= _NET_SERVICE_TYPE_COUNT
; 
1785                 error 
= get_netsvctype_dscp_map(&count
, netsvctype_dscp_map
); 
1789                 len 
= count 
* sizeof(struct netsvctype_dscp_map
); 
1790                 error 
= SYSCTL_OUT(req
, netsvctype_dscp_map
, 
1791                     MIN(len
, req
->oldlen
)); 
1797         if (req
->newptr 
!= USER_ADDR_NULL
) { 
1804 __private_extern__ errno_t
 
1805 set_packet_qos(struct mbuf 
*m
, struct ifnet 
*ifp
, boolean_t qos_allowed
, 
1806     int sotc
, int netsvctype
, uint8_t *dscp_inout
) 
1808         if (ifp 
== NULL 
|| dscp_inout 
== NULL
) { 
1812         if ((ifp
->if_eflags 
& IFEF_QOSMARKING_ENABLED
) != 0 && 
1813             ifp
->if_qosmarking_mode 
!= IFRTYPE_QOSMARKING_MODE_NONE
) { 
1815                 const struct net_qos_dscp_map 
*net_qos_dscp_map 
= NULL
; 
1817                 switch (ifp
->if_qosmarking_mode
) { 
1818                 case IFRTYPE_QOSMARKING_FASTLANE
: 
1819                         net_qos_dscp_map 
= &fastlane_net_qos_dscp_map
; 
1821                 case IFRTYPE_QOSMARKING_RFC4594
: 
1822                         net_qos_dscp_map 
= &rfc4594_net_qos_dscp_map
; 
1825                         panic("invalid QoS marking type"); 
1830                  * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops 
1835                  * For DSCP use the network service type is specified, otherwise 
1836                  * use the socket traffic class 
1838                  * When not whitelisted by the policy, set DSCP only for best 
1839                  * effort and background, and set the mbuf service class to 
1840                  * best effort as well so the packet will be queued and 
1841                  * scheduled at a lower priority. 
1842                  * We still want to prioritize control traffic on the interface 
1843                  * so we do not change the mbuf service class for SO_TC_CTL 
1845                 if (IS_VALID_NET_SERVICE_TYPE(netsvctype
) && 
1846                     netsvctype 
!= NET_SERVICE_TYPE_BE
) { 
1847                         dscp 
= net_qos_dscp_map
->netsvctype_to_dscp
[netsvctype
]; 
1849                         if (qos_allowed 
== FALSE 
&& 
1850                             netsvctype 
!= NET_SERVICE_TYPE_BE 
&& 
1851                             netsvctype 
!= NET_SERVICE_TYPE_BK
) { 
1853                                 if (sotc 
!= SO_TC_CTL
) { 
1854                                         m_set_service_class(m
, MBUF_SC_BE
); 
1857                 } else if (sotc 
!= SO_TC_UNSPEC
) { 
1858                         size_t sotcix 
= sotc_index(sotc
); 
1859                         if (sotcix 
!= SIZE_T_MAX
) { 
1860                                 dscp 
= net_qos_dscp_map
->sotc_to_dscp
[sotcix
]; 
1862                                 if (qos_allowed 
== FALSE 
&& sotc 
!= SO_TC_BE 
&& 
1863                                     sotc 
!= SO_TC_BK 
&& sotc 
!= SO_TC_BK_SYS 
&& 
1864                                     sotc 
!= SO_TC_CTL
) { 
1866                                         if (sotc 
!= SO_TC_CTL
) { 
1867                                                 m_set_service_class(m
, MBUF_SC_BE
); 
1872                 if (net_qos_verbose 
!= 0) { 
1873                         printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n", 
1874                             __func__
, qos_allowed
, sotc
, netsvctype
, dscp
); 
1877                 if (*dscp_inout 
!= dscp
) { 
1880         } else if (*dscp_inout 
!= _DSCP_DF 
&& IFNET_IS_WIFI_INFRA(ifp
)) { 
1881                 mbuf_svc_class_t msc 
= m_get_service_class(m
); 
1884                  * For WiFi infra, when the mbuf service class is best effort 
1885                  * and the DSCP is not default, set the service class based 
1888                 if (msc 
== MBUF_SC_BE
) { 
1889                         msc 
= wifi_dscp_to_msc_array
[*dscp_inout
]; 
1891                         if (msc 
!= MBUF_SC_BE
) { 
1892                                 m_set_service_class(m
, msc
); 
1894                                 if (net_qos_verbose 
!= 0) { 
1895                                         printf("%s set msc %u for dscp %u\n", 
1896                                             __func__
, msc
, *dscp_inout
); 
1906 set_dscp_to_wifi_ac_map(const struct dcsp_msc_map 
*map
, int clear
) 
1911                 bzero(wifi_dscp_to_msc_array
, sizeof(wifi_dscp_to_msc_array
)); 
1914         for (i 
= 0; i 
< DSCP_ARRAY_SIZE
; i
++) { 
1915                 const struct dcsp_msc_map 
*elem 
= map 
+ i
; 
1917                 if (elem
->dscp 
> _MAX_DSCP 
|| elem
->msc 
== MBUF_SC_UNSPEC
) { 
1920                 switch (elem
->msc
) { 
1921                 case MBUF_SC_BK_SYS
: 
1923                         wifi_dscp_to_msc_array
[elem
->dscp
] = MBUF_SC_BK
; 
1929                         wifi_dscp_to_msc_array
[elem
->dscp
] = MBUF_SC_BE
; 
1934                         wifi_dscp_to_msc_array
[elem
->dscp
] = MBUF_SC_VI
; 
1938                         wifi_dscp_to_msc_array
[elem
->dscp
] = MBUF_SC_VO
; 
1945 dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map 
*netsvctype_dscp_map
, 
1946     size_t count
, struct dcsp_msc_map 
*dcsp_msc_map
) 
1952          * Validate input parameters 
1954         for (i 
= 0; i 
< count
; i
++) { 
1955                 if (!SO_VALID_TC(netsvctype_dscp_map
[i
].netsvctype
)) { 
1959                 if (netsvctype_dscp_map
[i
].dscp 
> _MAX_DSCP
) { 
1965         bzero(dcsp_msc_map
, DSCP_ARRAY_SIZE 
* sizeof(struct dcsp_msc_map
)); 
1967         for (i 
= 0; i 
< count
; i
++) { 
1968                 dcsp_msc_map
[i
].dscp 
= netsvctype_dscp_map
[i
].dscp
; 
1969                 dcsp_msc_map
[i
].msc 
= so_tc2msc(netsvctype_dscp_map
[i
].netsvctype
); 
1976 sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
 
1978 #pragma unused(oidp, arg1, arg2) 
1980         size_t len 
= DSCP_ARRAY_SIZE 
* sizeof(struct netsvctype_dscp_map
); 
1981         struct netsvctype_dscp_map netsvctype_dscp_map
[DSCP_ARRAY_SIZE
] = {}; 
1982         struct dcsp_msc_map dcsp_msc_map
[DSCP_ARRAY_SIZE
]; 
1986         if (req
->oldptr 
== USER_ADDR_NULL
) { 
1988         } else if (req
->oldlen 
> 0) { 
1989                 for (i 
= 0; i 
< DSCP_ARRAY_SIZE
; i
++) { 
1990                         netsvctype_dscp_map
[i
].dscp 
= i
; 
1991                         netsvctype_dscp_map
[i
].netsvctype 
= 
1992                             so_svc2tc(wifi_dscp_to_msc_array
[i
]); 
1994                 error 
= SYSCTL_OUT(req
, netsvctype_dscp_map
, 
1995                     MIN(len
, req
->oldlen
)); 
2001         if (req
->newptr 
== USER_ADDR_NULL
) { 
2005         error 
= proc_suser(current_proc()); 
2011          * Check input length 
2013         if (req
->newlen 
> len
) { 
2018          * Cap the number of entries to copy from input buffer 
2020         if (len 
> req
->newlen
) { 
2023         error 
= SYSCTL_IN(req
, netsvctype_dscp_map
, len
); 
2027         count 
= len 
/ sizeof(struct netsvctype_dscp_map
); 
2028         bzero(dcsp_msc_map
, sizeof(dcsp_msc_map
)); 
2029         error 
= dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map
, count
, 
2034         set_dscp_to_wifi_ac_map(dcsp_msc_map
, 0); 
2040 sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
 
2042 #pragma unused(oidp, arg1, arg2) 
2046         error 
= sysctl_handle_int(oidp
, &val
, 0, req
); 
2047         if (error 
|| !req
->newptr
) { 
2051         set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map
, 1); 
2057  * Returns whether a large upload or download transfer should be marked as 
2058  * BK service type for network activity. This is a system level 
2059  * hint/suggestion to classify application traffic based on statistics 
2060  * collected from the current network attachment 
2062  * Returns 1 for BK and 0 for default 
2066 net_qos_guideline(struct proc 
*p
, struct net_qos_guideline_args 
*arg
, 
2070 #define RETURN_USE_BK   1 
2071 #define RETURN_USE_DEFAULT      0 
2072         struct net_qos_param qos_arg
; 
2073         struct ifnet 
*ipv4_primary
, *ipv6_primary
; 
2076         if (arg
->param 
== USER_ADDR_NULL 
|| retval 
== NULL 
|| 
2077             arg
->param_len 
!= sizeof(qos_arg
)) { 
2080         err 
= copyin(arg
->param
, (caddr_t
) &qos_arg
, sizeof(qos_arg
)); 
2085         *retval 
= RETURN_USE_DEFAULT
; 
2086         ipv4_primary 
= ifindex2ifnet
[get_primary_ifscope(AF_INET
)]; 
2087         ipv6_primary 
= ifindex2ifnet
[get_primary_ifscope(AF_INET6
)]; 
2090          * If either of the interfaces is in Low Internet mode, enable 
2091          * background delay based algorithms on this transfer 
2093         if (qos_arg
.nq_uplink
) { 
2094                 if ((ipv4_primary 
!= NULL 
&& 
2095                     (ipv4_primary
->if_xflags 
& IFXF_LOW_INTERNET_UL
)) || 
2096                     (ipv6_primary 
!= NULL 
&& 
2097                     (ipv6_primary
->if_xflags 
& IFXF_LOW_INTERNET_UL
))) { 
2098                         *retval 
= RETURN_USE_BK
; 
2102                 if ((ipv4_primary 
!= NULL 
&& 
2103                     (ipv4_primary
->if_xflags 
& IFXF_LOW_INTERNET_DL
)) || 
2104                     (ipv6_primary 
!= NULL 
&& 
2105                     (ipv6_primary
->if_xflags 
& IFXF_LOW_INTERNET_DL
))) { 
2106                         *retval 
= RETURN_USE_BK
; 
2112          * Some times IPv4 and IPv6 primary interfaces can be different. 
2113          * In this case, if either of them is non-cellular, we should mark 
2114          * the transfer as BK as it can potentially get used based on 
2115          * the host name resolution 
2117         if (ipv4_primary 
!= NULL 
&& IFNET_IS_EXPENSIVE(ipv4_primary
) && 
2118             ipv6_primary 
!= NULL 
&& IFNET_IS_EXPENSIVE(ipv6_primary
)) { 
2119                 if (qos_arg
.nq_use_expensive
) { 
2122                         *retval 
= RETURN_USE_BK
; 
2126         if (ipv4_primary 
!= NULL 
&& IFNET_IS_CONSTRAINED(ipv4_primary
) && 
2127             ipv6_primary 
!= NULL 
&& IFNET_IS_CONSTRAINED(ipv6_primary
)) { 
2128                 if (qos_arg
.nq_use_constrained
) { 
2131                         *retval 
= RETURN_USE_BK
; 
2135         if (qos_arg
.nq_transfer_size 
>= 5 * 1024 * 1024) { 
2136                 *retval 
= RETURN_USE_BK
; 
2141 #undef  RETURN_USE_BK 
2142 #undef  RETURN_USE_DEFAULT