2 * Copyright (c) 2009-2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/types.h>
32 #include <sys/filedesc.h>
33 #include <sys/file_internal.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/errno.h>
38 #include <sys/protosw.h>
39 #include <sys/domain.h>
41 #include <sys/queue.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysproto.h>
46 #include <net/if_var.h>
47 #include <net/route.h>
49 #include <netinet/in.h>
50 #include <netinet/in_var.h>
51 #include <netinet/in_pcb.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/ip6_var.h>
56 #include <netinet/udp.h>
57 #include <netinet/udp_var.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcp_cc.h>
61 #include <netinet/lro_ext.h>
62 #include <netinet/in_tclass.h>
68 static inline int so_throttle_best_effort(struct socket
*, struct ifnet
*);
69 static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map
*, int);
70 static errno_t
dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map
*, size_t,
71 struct dcsp_msc_map
*);
73 static lck_grp_attr_t
*tclass_lck_grp_attr
= NULL
; /* mutex group attributes */
74 static lck_grp_t
*tclass_lck_grp
= NULL
; /* mutex group definition */
75 static lck_attr_t
*tclass_lck_attr
= NULL
; /* mutex attributes */
76 decl_lck_mtx_data(static, tclass_lock_data
);
77 static lck_mtx_t
*tclass_lock
= &tclass_lock_data
;
79 SYSCTL_NODE(_net
, OID_AUTO
, qos
,
80 CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "QoS");
82 static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
;
83 SYSCTL_PROC(_net_qos
, OID_AUTO
, default_netsvctype_to_dscp_map
,
84 CTLTYPE_STRUCT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
85 0, 0, sysctl_default_netsvctype_to_dscp_map
, "S", "");
87 static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
;
88 SYSCTL_PROC(_net_qos
, OID_AUTO
, dscp_to_wifi_ac_map
,
89 CTLTYPE_STRUCT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
90 0, 0, sysctl_dscp_to_wifi_ac_map
, "S", "");
92 static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
;
93 SYSCTL_PROC(_net_qos
, OID_AUTO
, reset_dscp_to_wifi_ac_map
,
94 CTLTYPE_INT
| CTLFLAG_WR
| CTLFLAG_LOCKED
,
95 0, 0, sysctl_reset_dscp_to_wifi_ac_map
, "I", "");
97 int net_qos_verbose
= 0;
98 SYSCTL_INT(_net_qos
, OID_AUTO
, verbose
,
99 CTLFLAG_RW
| CTLFLAG_LOCKED
, &net_qos_verbose
, 0, "");
102 * Fastlane QoS policy:
103 * By Default allow all apps to get traffic class to DSCP mapping
105 SYSCTL_NODE(_net_qos
, OID_AUTO
, policy
,
106 CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "");
108 int net_qos_policy_restricted
= 0;
109 SYSCTL_INT(_net_qos_policy
, OID_AUTO
, restricted
,
110 CTLFLAG_RW
| CTLFLAG_LOCKED
, &net_qos_policy_restricted
, 0, "");
112 int net_qos_policy_restrict_avapps
= 0;
113 SYSCTL_INT(_net_qos_policy
, OID_AUTO
, restrict_avapps
,
114 CTLFLAG_RW
| CTLFLAG_LOCKED
, &net_qos_policy_restrict_avapps
, 0, "");
116 int net_qos_policy_wifi_enabled
= 0;
117 SYSCTL_INT(_net_qos_policy
, OID_AUTO
, wifi_enabled
,
118 CTLFLAG_RW
| CTLFLAG_LOCKED
, &net_qos_policy_wifi_enabled
, 0, "");
120 int net_qos_policy_none_wifi_enabled
= 0;
121 SYSCTL_INT(_net_qos_policy
, OID_AUTO
, none_wifi_enabled
,
122 CTLFLAG_RW
| CTLFLAG_LOCKED
, &net_qos_policy_none_wifi_enabled
, 0, "");
124 int net_qos_policy_capable_enabled
= 0;
125 SYSCTL_INT(_net_qos_policy
, OID_AUTO
, capable_enabled
,
126 CTLFLAG_RW
| CTLFLAG_LOCKED
, &net_qos_policy_capable_enabled
, 0, "");
129 * Socket traffic class from network service type
131 const int sotc_by_netservicetype
[_NET_SERVICE_TYPE_COUNT
] = {
132 SO_TC_BE
, /* NET_SERVICE_TYPE_BE */
133 SO_TC_BK_SYS
, /* NET_SERVICE_TYPE_BK */
134 SO_TC_VI
, /* NET_SERVICE_TYPE_SIG */
135 SO_TC_VI
, /* NET_SERVICE_TYPE_VI */
136 SO_TC_VO
, /* NET_SERVICE_TYPE_VO */
137 SO_TC_RV
, /* NET_SERVICE_TYPE_RV */
138 SO_TC_AV
, /* NET_SERVICE_TYPE_AV */
139 SO_TC_OAM
, /* NET_SERVICE_TYPE_OAM */
140 SO_TC_RD
/* NET_SERVICE_TYPE_RD */
144 * DSCP mappings for QoS Fastlane as based on network service types
147 struct netsvctype_dscp_map fastlane_netsvctype_dscp_map
[_NET_SERVICE_TYPE_COUNT
] = {
148 { NET_SERVICE_TYPE_BE
, _DSCP_DF
},
149 { NET_SERVICE_TYPE_BK
, _DSCP_AF11
},
150 { NET_SERVICE_TYPE_SIG
, _DSCP_CS3
},
151 { NET_SERVICE_TYPE_VI
, _DSCP_AF41
},
152 { NET_SERVICE_TYPE_VO
, _DSCP_EF
},
153 { NET_SERVICE_TYPE_RV
, _DSCP_CS4
},
154 { NET_SERVICE_TYPE_AV
, _DSCP_AF31
},
155 { NET_SERVICE_TYPE_OAM
, _DSCP_CS2
},
156 { NET_SERVICE_TYPE_RD
, _DSCP_AF21
},
159 static struct net_qos_dscp_map default_net_qos_dscp_map
;
162 * The size is one more than the max because DSCP start at zero
164 #define DSCP_ARRAY_SIZE (_MAX_DSCP + 1)
167 * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping
168 * that implemented at the 802.11 driver level when the mbuf service class is
171 * This clashes with the recommended mapping documented by the IETF document
172 * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain
173 * binary compatibility. Applications should use the network service type socket
174 * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS.
176 static const struct dcsp_msc_map default_dscp_to_wifi_ac_map
[] = {
177 { _DSCP_DF
, MBUF_SC_BE
}, /* RFC 2474 Standard */
178 { 1, MBUF_SC_BE
}, /* */
179 { 2, MBUF_SC_BE
}, /* */
180 { 3, MBUF_SC_BE
}, /* */
181 { 4, MBUF_SC_BE
}, /* */
182 { 5, MBUF_SC_BE
}, /* */
183 { 6, MBUF_SC_BE
}, /* */
184 { 7, MBUF_SC_BE
}, /* */
186 { _DSCP_CS1
, MBUF_SC_BK
}, /* RFC 3662 Low-Priority Data */
187 { 9, MBUF_SC_BK
}, /* */
188 { _DSCP_AF11
, MBUF_SC_BK
}, /* RFC 2597 High-Throughput Data */
189 { 11, MBUF_SC_BK
}, /* */
190 { _DSCP_AF12
, MBUF_SC_BK
}, /* RFC 2597 High-Throughput Data */
191 { 13, MBUF_SC_BK
}, /* */
192 { _DSCP_AF13
, MBUF_SC_BK
}, /* RFC 2597 High-Throughput Data */
193 { 15, MBUF_SC_BK
}, /* */
195 { _DSCP_CS2
, MBUF_SC_BK
}, /* RFC 4594 OAM */
196 { 17, MBUF_SC_BK
}, /* */
197 { _DSCP_AF21
, MBUF_SC_BK
}, /* RFC 2597 Low-Latency Data */
198 { 19, MBUF_SC_BK
}, /* */
199 { _DSCP_AF22
, MBUF_SC_BK
}, /* RFC 2597 Low-Latency Data */
200 { 21, MBUF_SC_BK
}, /* */
201 { _DSCP_AF23
, MBUF_SC_BK
}, /* RFC 2597 Low-Latency Data */
202 { 23, MBUF_SC_BK
}, /* */
204 { _DSCP_CS3
, MBUF_SC_BE
}, /* RFC 2474 Broadcast Video */
205 { 25, MBUF_SC_BE
}, /* */
206 { _DSCP_AF31
, MBUF_SC_BE
}, /* RFC 2597 Multimedia Streaming */
207 { 27, MBUF_SC_BE
}, /* */
208 { _DSCP_AF32
, MBUF_SC_BE
}, /* RFC 2597 Multimedia Streaming */
209 { 29, MBUF_SC_BE
}, /* */
210 { _DSCP_AF33
, MBUF_SC_BE
}, /* RFC 2597 Multimedia Streaming */
211 { 31, MBUF_SC_BE
}, /* */
213 { _DSCP_CS4
, MBUF_SC_VI
}, /* RFC 2474 Real-Time Interactive */
214 { 33, MBUF_SC_VI
}, /* */
215 { _DSCP_AF41
, MBUF_SC_VI
}, /* RFC 2597 Multimedia Conferencing */
216 { 35, MBUF_SC_VI
}, /* */
217 { _DSCP_AF42
, MBUF_SC_VI
}, /* RFC 2597 Multimedia Conferencing */
218 { 37, MBUF_SC_VI
}, /* */
219 { _DSCP_AF43
, MBUF_SC_VI
}, /* RFC 2597 Multimedia Conferencing */
220 { 39, MBUF_SC_VI
}, /* */
222 { _DSCP_CS5
, MBUF_SC_VI
}, /* RFC 2474 Signaling */
223 { 41, MBUF_SC_VI
}, /* */
224 { 42, MBUF_SC_VI
}, /* */
225 { 43, MBUF_SC_VI
}, /* */
226 { _DSCP_VA
, MBUF_SC_VI
}, /* RFC 5865 VOICE-ADMIT */
227 { 45, MBUF_SC_VI
}, /* */
228 { _DSCP_EF
, MBUF_SC_VI
}, /* RFC 3246 Telephony */
229 { 47, MBUF_SC_VI
}, /* */
231 { _DSCP_CS6
, MBUF_SC_VO
}, /* Wi-Fi WMM Certification: Chariot */
232 { 49, MBUF_SC_VO
}, /* */
233 { 50, MBUF_SC_VO
}, /* */
234 { 51, MBUF_SC_VO
}, /* */
235 { 52, MBUF_SC_VO
}, /* Wi-Fi WMM Certification: Sigma */
236 { 53, MBUF_SC_VO
}, /* */
237 { 54, MBUF_SC_VO
}, /* */
238 { 55, MBUF_SC_VO
}, /* */
240 { _DSCP_CS7
, MBUF_SC_VO
}, /* Wi-Fi WMM Certification: Chariot */
241 { 57, MBUF_SC_VO
}, /* */
242 { 58, MBUF_SC_VO
}, /* */
243 { 59, MBUF_SC_VO
}, /* */
244 { 60, MBUF_SC_VO
}, /* */
245 { 61, MBUF_SC_VO
}, /* */
246 { 62, MBUF_SC_VO
}, /* */
247 { 63, MBUF_SC_VO
}, /* */
249 { 255, MBUF_SC_UNSPEC
} /* invalid DSCP to mark last entry */
252 mbuf_svc_class_t wifi_dscp_to_msc_array
[DSCP_ARRAY_SIZE
];
255 * If there is no foreground activity on the interface for bg_switch_time
256 * seconds, the background connections can switch to foreground TCP
257 * congestion control.
259 #define TCP_BG_SWITCH_TIME 2 /* seconds */
261 #if (DEVELOPMENT || DEBUG)
263 static int tfp_count
= 0;
265 static TAILQ_HEAD(, tclass_for_proc
) tfp_head
=
266 TAILQ_HEAD_INITIALIZER(tfp_head
);
268 struct tclass_for_proc
{
269 TAILQ_ENTRY(tclass_for_proc
) tfp_link
;
272 char tfp_pname
[(2 * MAXCOMLEN
) + 1];
273 u_int32_t tfp_qos_mode
;
276 static int get_pid_tclass(struct so_tcdbg
*);
277 static int get_pname_tclass(struct so_tcdbg
*);
278 static int set_pid_tclass(struct so_tcdbg
*);
279 static int set_pname_tclass(struct so_tcdbg
*);
280 static int flush_pid_tclass(struct so_tcdbg
*);
281 static int purge_tclass_for_proc(void);
282 static int flush_tclass_for_proc(void);
283 static void set_tclass_for_curr_proc(struct socket
*);
286 * Must be called with tclass_lock held
288 static struct tclass_for_proc
*
289 find_tfp_by_pid(pid_t pid
)
291 struct tclass_for_proc
*tfp
;
293 TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) {
294 if (tfp
->tfp_pid
== pid
)
301 * Must be called with tclass_lock held
303 static struct tclass_for_proc
*
304 find_tfp_by_pname(const char *pname
)
306 struct tclass_for_proc
*tfp
;
308 TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) {
309 if (strncmp(pname
, tfp
->tfp_pname
,
310 sizeof (tfp
->tfp_pname
)) == 0)
316 __private_extern__
void
317 set_tclass_for_curr_proc(struct socket
*so
)
319 struct tclass_for_proc
*tfp
= NULL
;
320 proc_t p
= current_proc(); /* Not ref counted */
321 pid_t pid
= proc_pid(p
);
322 char *pname
= proc_best_name(p
);
324 lck_mtx_lock(tclass_lock
);
326 TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) {
327 if ((tfp
->tfp_pid
== pid
) || (tfp
->tfp_pid
== -1 &&
328 strncmp(pname
, tfp
->tfp_pname
,
329 sizeof (tfp
->tfp_pname
)) == 0)) {
330 if (tfp
->tfp_class
!= SO_TC_UNSPEC
)
331 so
->so_traffic_class
= tfp
->tfp_class
;
333 if (tfp
->tfp_qos_mode
== QOS_MODE_MARKING_POLICY_ENABLE
)
334 so
->so_flags1
|= SOF1_QOSMARKING_ALLOWED
;
335 else if (tfp
->tfp_qos_mode
== QOS_MODE_MARKING_POLICY_DISABLE
)
336 so
->so_flags1
&= ~SOF1_QOSMARKING_ALLOWED
;
341 lck_mtx_unlock(tclass_lock
);
345 * Purge entries with PIDs of exited processes
348 purge_tclass_for_proc(void)
351 struct tclass_for_proc
*tfp
, *tvar
;
353 lck_mtx_lock(tclass_lock
);
355 TAILQ_FOREACH_SAFE(tfp
, &tfp_head
, tfp_link
, tvar
) {
358 if (tfp
->tfp_pid
== -1)
360 if ((p
= proc_find(tfp
->tfp_pid
)) == NULL
) {
362 TAILQ_REMOVE(&tfp_head
, tfp
, tfp_link
);
370 lck_mtx_unlock(tclass_lock
);
377 * Must be called with tclass_lock held
380 free_tclass_for_proc(struct tclass_for_proc
*tfp
)
385 TAILQ_REMOVE(&tfp_head
, tfp
, tfp_link
);
393 flush_tclass_for_proc(void)
396 struct tclass_for_proc
*tfp
, *tvar
;
398 lck_mtx_lock(tclass_lock
);
400 TAILQ_FOREACH_SAFE(tfp
, &tfp_head
, tfp_link
, tvar
) {
401 free_tclass_for_proc(tfp
);
404 lck_mtx_unlock(tclass_lock
);
411 * Must be called with tclass_lock held
413 static struct tclass_for_proc
*
414 alloc_tclass_for_proc(pid_t pid
, const char *pname
)
416 struct tclass_for_proc
*tfp
;
418 if (pid
== -1 && pname
== NULL
)
421 tfp
= _MALLOC(sizeof (struct tclass_for_proc
), M_TEMP
, M_NOWAIT
|M_ZERO
);
427 * Add per pid entries before per proc name so we can find
428 * a specific instance of a process before the general name base entry.
431 TAILQ_INSERT_HEAD(&tfp_head
, tfp
, tfp_link
);
433 strlcpy(tfp
->tfp_pname
, pname
, sizeof (tfp
->tfp_pname
));
434 TAILQ_INSERT_TAIL(&tfp_head
, tfp
, tfp_link
);
443 * SO_TC_UNSPEC for tclass means to remove the entry
446 set_pid_tclass(struct so_tcdbg
*so_tcdbg
)
450 struct filedesc
*fdp
;
452 struct tclass_for_proc
*tfp
;
454 pid_t pid
= so_tcdbg
->so_tcdbg_pid
;
455 int tclass
= so_tcdbg
->so_tcdbg_tclass
;
456 int netsvctype
= so_tcdbg
->so_tcdbg_netsvctype
;
460 printf("%s proc_find(%d) failed\n", __func__
, pid
);
465 lck_mtx_lock(tclass_lock
);
467 tfp
= find_tfp_by_pid(pid
);
469 tfp
= alloc_tclass_for_proc(pid
, NULL
);
471 lck_mtx_unlock(tclass_lock
);
476 tfp
->tfp_class
= tclass
;
477 tfp
->tfp_qos_mode
= so_tcdbg
->so_tcbbg_qos_mode
;
479 lck_mtx_unlock(tclass_lock
);
485 for (i
= 0; i
< fdp
->fd_nfiles
; i
++) {
488 fp
= fdp
->fd_ofiles
[i
];
490 (fdp
->fd_ofileflags
[i
] & UF_RESERVED
) != 0 ||
491 FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_SOCKET
)
494 so
= (struct socket
*)fp
->f_fglob
->fg_data
;
495 if (SOCK_DOM(so
) != PF_INET
&& SOCK_DOM(so
) != PF_INET6
)
499 if (tfp
->tfp_qos_mode
== QOS_MODE_MARKING_POLICY_ENABLE
)
500 so
->so_flags1
|= SOF1_QOSMARKING_ALLOWED
;
501 else if (tfp
->tfp_qos_mode
== QOS_MODE_MARKING_POLICY_DISABLE
)
502 so
->so_flags1
&= ~SOF1_QOSMARKING_ALLOWED
;
503 socket_unlock(so
, 1);
505 if (netsvctype
!= _NET_SERVICE_TYPE_UNSPEC
)
506 error
= sock_setsockopt(so
, SOL_SOCKET
,
507 SO_NET_SERVICE_TYPE
, &netsvctype
, sizeof(int));
508 if (tclass
!= SO_TC_UNSPEC
)
509 error
= sock_setsockopt(so
, SOL_SOCKET
,
510 SO_TRAFFIC_CLASS
, &tclass
, sizeof(int));
526 set_pname_tclass(struct so_tcdbg
*so_tcdbg
)
529 struct tclass_for_proc
*tfp
;
531 lck_mtx_lock(tclass_lock
);
533 tfp
= find_tfp_by_pname(so_tcdbg
->so_tcdbg_pname
);
535 tfp
= alloc_tclass_for_proc(-1, so_tcdbg
->so_tcdbg_pname
);
537 lck_mtx_unlock(tclass_lock
);
542 tfp
->tfp_class
= so_tcdbg
->so_tcdbg_tclass
;
543 tfp
->tfp_qos_mode
= so_tcdbg
->so_tcbbg_qos_mode
;
545 lck_mtx_unlock(tclass_lock
);
554 flush_pid_tclass(struct so_tcdbg
*so_tcdbg
)
556 pid_t pid
= so_tcdbg
->so_tcdbg_pid
;
557 int tclass
= so_tcdbg
->so_tcdbg_tclass
;
558 struct filedesc
*fdp
;
564 if (p
== PROC_NULL
) {
565 printf("%s proc_find(%d) failed\n", __func__
, pid
);
571 for (i
= 0; i
< fdp
->fd_nfiles
; i
++) {
575 fp
= fdp
->fd_ofiles
[i
];
577 (fdp
->fd_ofileflags
[i
] & UF_RESERVED
) != 0 ||
578 FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_SOCKET
)
581 so
= (struct socket
*)fp
->f_fglob
->fg_data
;
582 error
= sock_setsockopt(so
, SOL_SOCKET
, SO_FLUSH
, &tclass
,
585 printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
586 "tclass=%d) failed %d\n", __func__
,
587 (uint64_t)VM_KERNEL_ADDRPERM(so
), i
, tclass
,
603 get_pid_tclass(struct so_tcdbg
*so_tcdbg
)
607 struct tclass_for_proc
*tfp
;
608 pid_t pid
= so_tcdbg
->so_tcdbg_pid
;
610 so_tcdbg
->so_tcdbg_tclass
= SO_TC_UNSPEC
; /* Means not set */
614 printf("%s proc_find(%d) failed\n", __func__
, pid
);
619 lck_mtx_lock(tclass_lock
);
621 tfp
= find_tfp_by_pid(pid
);
623 so_tcdbg
->so_tcdbg_tclass
= tfp
->tfp_class
;
624 so_tcdbg
->so_tcbbg_qos_mode
= tfp
->tfp_qos_mode
;
627 lck_mtx_unlock(tclass_lock
);
636 get_pname_tclass(struct so_tcdbg
*so_tcdbg
)
639 struct tclass_for_proc
*tfp
;
641 so_tcdbg
->so_tcdbg_tclass
= SO_TC_UNSPEC
; /* Means not set */
644 lck_mtx_lock(tclass_lock
);
646 tfp
= find_tfp_by_pname(so_tcdbg
->so_tcdbg_pname
);
648 so_tcdbg
->so_tcdbg_tclass
= tfp
->tfp_class
;
649 so_tcdbg
->so_tcbbg_qos_mode
= tfp
->tfp_qos_mode
;
652 lck_mtx_unlock(tclass_lock
);
658 delete_tclass_for_pid_pname(struct so_tcdbg
*so_tcdbg
)
661 pid_t pid
= so_tcdbg
->so_tcdbg_pid
;
662 struct tclass_for_proc
*tfp
= NULL
;
664 lck_mtx_lock(tclass_lock
);
667 tfp
= find_tfp_by_pid(pid
);
669 tfp
= find_tfp_by_pname(so_tcdbg
->so_tcdbg_pname
);
672 free_tclass_for_proc(tfp
);
676 lck_mtx_unlock(tclass_lock
);
682 * Setting options requires privileges
684 __private_extern__
int
685 so_set_tcdbg(struct socket
*so
, struct so_tcdbg
*so_tcdbg
)
689 if ((so
->so_state
& SS_PRIV
) == 0)
692 socket_unlock(so
, 0);
694 switch (so_tcdbg
->so_tcdbg_cmd
) {
696 error
= set_pid_tclass(so_tcdbg
);
700 error
= set_pname_tclass(so_tcdbg
);
704 error
= purge_tclass_for_proc();
708 error
= flush_tclass_for_proc();
711 case SO_TCDBG_DELETE
:
712 error
= delete_tclass_for_pid_pname(so_tcdbg
);
715 case SO_TCDBG_TCFLUSH_PID
:
716 error
= flush_pid_tclass(so_tcdbg
);
730 * Not required to be privileged to get
732 __private_extern__
int
733 sogetopt_tcdbg(struct socket
*so
, struct sockopt
*sopt
)
736 struct so_tcdbg so_tcdbg
;
738 size_t len
= sopt
->sopt_valsize
;
740 error
= sooptcopyin(sopt
, &so_tcdbg
, sizeof (struct so_tcdbg
),
741 sizeof (struct so_tcdbg
));
745 sopt
->sopt_valsize
= len
;
747 socket_unlock(so
, 0);
749 switch (so_tcdbg
.so_tcdbg_cmd
) {
751 error
= get_pid_tclass(&so_tcdbg
);
755 error
= get_pname_tclass(&so_tcdbg
);
759 lck_mtx_lock(tclass_lock
);
760 so_tcdbg
.so_tcdbg_count
= tfp_count
;
761 lck_mtx_unlock(tclass_lock
);
764 case SO_TCDBG_LIST
: {
765 struct tclass_for_proc
*tfp
;
767 struct so_tcdbg
*ptr
;
769 lck_mtx_lock(tclass_lock
);
770 if ((alloc_count
= tfp_count
) == 0) {
771 lck_mtx_unlock(tclass_lock
);
775 len
= alloc_count
* sizeof (struct so_tcdbg
);
776 lck_mtx_unlock(tclass_lock
);
778 buf
= _MALLOC(len
, M_TEMP
, M_WAITOK
| M_ZERO
);
784 lck_mtx_lock(tclass_lock
);
786 ptr
= (struct so_tcdbg
*)buf
;
787 TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) {
788 if (++n
> alloc_count
)
790 if (tfp
->tfp_pid
!= -1) {
791 ptr
->so_tcdbg_cmd
= SO_TCDBG_PID
;
792 ptr
->so_tcdbg_pid
= tfp
->tfp_pid
;
794 ptr
->so_tcdbg_cmd
= SO_TCDBG_PNAME
;
795 ptr
->so_tcdbg_pid
= -1;
796 strlcpy(ptr
->so_tcdbg_pname
,
798 sizeof (ptr
->so_tcdbg_pname
));
800 ptr
->so_tcdbg_tclass
= tfp
->tfp_class
;
801 ptr
->so_tcbbg_qos_mode
= tfp
->tfp_qos_mode
;
805 lck_mtx_unlock(tclass_lock
);
818 error
= sooptcopyout(sopt
, &so_tcdbg
,
819 sizeof (struct so_tcdbg
));
821 error
= sooptcopyout(sopt
, buf
, len
);
828 #endif /* (DEVELOPMENT || DEBUG) */
831 so_get_netsvc_marking_level(struct socket
*so
)
833 int marking_level
= NETSVC_MRKNG_UNKNOWN
;
834 struct ifnet
*ifp
= NULL
;
836 switch (SOCK_DOM(so
)) {
838 struct inpcb
*inp
= sotoinpcb(so
);
841 ifp
= inp
->inp_last_outifp
;
845 struct in6pcb
*in6p
= sotoin6pcb(so
);
848 ifp
= in6p
->in6p_last_outifp
;
855 if ((ifp
->if_eflags
&
856 (IFEF_QOSMARKING_ENABLED
| IFEF_QOSMARKING_CAPABLE
)) ==
857 (IFEF_QOSMARKING_ENABLED
| IFEF_QOSMARKING_CAPABLE
)) {
858 if ((so
->so_flags1
& SOF1_QOSMARKING_ALLOWED
))
859 marking_level
= NETSVC_MRKNG_LVL_L3L2_ALL
;
861 marking_level
= NETSVC_MRKNG_LVL_L3L2_BK
;
863 marking_level
= NETSVC_MRKNG_LVL_L2
;
866 return (marking_level
);
869 __private_extern__
int
870 so_set_traffic_class(struct socket
*so
, int optval
)
874 if (optval
< SO_TC_BE
|| optval
> SO_TC_CTL
) {
888 if (!SO_VALID_TC(optval
))
894 int oldval
= so
->so_traffic_class
;
896 VERIFY(SO_VALID_TC(optval
));
897 so
->so_traffic_class
= optval
;
899 if ((SOCK_DOM(so
) == PF_INET
||
900 SOCK_DOM(so
) == PF_INET6
) &&
901 SOCK_TYPE(so
) == SOCK_STREAM
)
902 set_tcp_stream_priority(so
);
904 if ((SOCK_DOM(so
) == PF_INET
||
905 SOCK_DOM(so
) == PF_INET6
) &&
906 optval
!= oldval
&& (optval
== SO_TC_BK_SYS
||
907 oldval
== SO_TC_BK_SYS
)) {
909 * If the app switches from BK_SYS to something
910 * else, resume the socket if it was suspended.
912 if (oldval
== SO_TC_BK_SYS
)
913 inp_reset_fc_state(so
->so_pcb
);
915 SOTHROTTLELOG("throttle[%d]: so 0x%llx "
916 "[%d,%d] opportunistic %s\n", so
->last_pid
,
917 (uint64_t)VM_KERNEL_ADDRPERM(so
),
918 SOCK_DOM(so
), SOCK_TYPE(so
),
919 (optval
== SO_TC_BK_SYS
) ? "ON" : "OFF");
926 __private_extern__
int
927 so_set_net_service_type(struct socket
*so
, int netsvctype
)
932 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype
))
935 sotc
= sotc_by_netservicetype
[netsvctype
];
936 error
= so_set_traffic_class(so
, sotc
);
939 so
->so_netsvctype
= netsvctype
;
940 so
->so_flags1
|= SOF1_TC_NET_SERV_TYPE
;
945 __private_extern__
void
946 so_set_default_traffic_class(struct socket
*so
)
948 so
->so_traffic_class
= SO_TC_BE
;
950 if ((SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
)) {
951 if (net_qos_policy_restricted
== 0)
952 so
->so_flags1
|= SOF1_QOSMARKING_ALLOWED
;
953 #if (DEVELOPMENT || DEBUG)
955 set_tclass_for_curr_proc(so
);
956 #endif /* (DEVELOPMENT || DEBUG) */
960 __private_extern__
int
961 so_set_opportunistic(struct socket
*so
, int optval
)
963 return (so_set_traffic_class(so
, (optval
== 0) ?
964 SO_TC_BE
: SO_TC_BK_SYS
));
967 __private_extern__
int
968 so_get_opportunistic(struct socket
*so
)
970 return (so
->so_traffic_class
== SO_TC_BK_SYS
);
973 __private_extern__
int
974 so_tc_from_control(struct mbuf
*control
, int *out_netsvctype
)
977 int sotc
= SO_TC_UNSPEC
;
979 *out_netsvctype
= _NET_SERVICE_TYPE_UNSPEC
;
981 for (cm
= M_FIRST_CMSGHDR(control
); cm
!= NULL
;
982 cm
= M_NXT_CMSGHDR(control
, cm
)) {
985 if (cm
->cmsg_len
< sizeof (struct cmsghdr
))
987 if (cm
->cmsg_level
!= SOL_SOCKET
||
988 cm
->cmsg_len
!= CMSG_LEN(sizeof(int)))
990 val
= *(int *)(void *)CMSG_DATA(cm
);
992 * The first valid option wins
994 switch (cm
->cmsg_type
) {
995 case SO_TRAFFIC_CLASS
:
996 if (SO_VALID_TC(val
)) {
1000 } else if (val
< SO_TC_NET_SERVICE_OFFSET
) {
1004 * Handle the case SO_NET_SERVICE_TYPE values are
1005 * passed using SO_TRAFFIC_CLASS
1007 val
= val
- SO_TC_NET_SERVICE_OFFSET
;
1009 case SO_NET_SERVICE_TYPE
:
1010 if (!IS_VALID_NET_SERVICE_TYPE(val
))
1012 *out_netsvctype
= val
;
1013 sotc
= sotc_by_netservicetype
[val
];
1024 __private_extern__
void
1025 so_recv_data_stat(struct socket
*so
, struct mbuf
*m
, size_t off
)
1027 uint32_t mtc
= m_get_traffic_class(m
);
1029 if (mtc
>= SO_TC_STATS_MAX
)
1032 so
->so_tc_stats
[mtc
].rxpackets
+= 1;
1033 so
->so_tc_stats
[mtc
].rxbytes
+=
1034 ((m
->m_flags
& M_PKTHDR
) ? m
->m_pkthdr
.len
: 0) + off
;
1037 __private_extern__
void
1038 so_inc_recv_data_stat(struct socket
*so
, size_t pkts
, size_t bytes
,
1041 if (mtc
>= SO_TC_STATS_MAX
)
1044 so
->so_tc_stats
[mtc
].rxpackets
+= pkts
;
1045 so
->so_tc_stats
[mtc
].rxbytes
+= bytes
;
1049 so_throttle_best_effort(struct socket
*so
, struct ifnet
*ifp
)
1051 u_int32_t uptime
= net_uptime();
1052 return (soissrcbesteffort(so
) &&
1053 net_io_policy_throttle_best_effort
== 1 &&
1054 ifp
->if_rt_sendts
> 0 &&
1055 (int)(uptime
- ifp
->if_rt_sendts
) <= TCP_BG_SWITCH_TIME
);
1058 __private_extern__
void
1059 set_tcp_stream_priority(struct socket
*so
)
1061 struct inpcb
*inp
= sotoinpcb(so
);
1062 struct tcpcb
*tp
= intotcpcb(inp
);
1063 struct ifnet
*outifp
;
1064 u_char old_cc
= tp
->tcp_cc_index
;
1065 int recvbg
= IS_TCP_RECV_BG(so
);
1066 bool is_local
= false, fg_active
= false;
1069 VERIFY((SOCK_CHECK_DOM(so
, PF_INET
) ||
1070 SOCK_CHECK_DOM(so
, PF_INET6
)) &&
1071 SOCK_CHECK_TYPE(so
, SOCK_STREAM
) &&
1072 SOCK_CHECK_PROTO(so
, IPPROTO_TCP
));
1074 /* Return if the socket is in a terminal state */
1075 if (inp
->inp_state
== INPCB_STATE_DEAD
)
1078 outifp
= inp
->inp_last_outifp
;
1079 uptime
= net_uptime();
1082 * If the socket was marked as a background socket or if the
1083 * traffic class is set to background with traffic class socket
1084 * option then make both send and recv side of the stream to be
1085 * background. The variable sotcdb which can be set with sysctl
1086 * is used to disable these settings for testing.
1088 if (outifp
== NULL
|| (outifp
->if_flags
& IFF_LOOPBACK
))
1091 /* Check if there has been recent foreground activity */
1092 if (outifp
!= NULL
) {
1094 * If the traffic source is background, check if
1095 * if it can be switched to foreground. This can
1096 * happen when there is no indication of foreground
1099 if (soissrcbackground(so
) && outifp
->if_fg_sendts
> 0 &&
1100 (int)(uptime
- outifp
->if_fg_sendts
) <= TCP_BG_SWITCH_TIME
)
1104 * The traffic source is best-effort -- check if
1105 * the policy to throttle best effort is enabled
1106 * and there was realtime activity on this
1107 * interface recently. If this is true, enable
1108 * algorithms that respond to increased latency
1109 * on best-effort traffic.
1111 if (so_throttle_best_effort(so
, outifp
))
1116 * System initiated background traffic like cloud uploads should
1117 * always use background delay sensitive algorithms. This will
1118 * make the stream more responsive to other streams on the user's
1119 * network and it will minimize latency induced.
1121 if (fg_active
|| IS_SO_TC_BACKGROUNDSYSTEM(so
->so_traffic_class
)) {
1123 * If the interface that the connection is using is
1124 * loopback, do not use background congestion
1125 * control algorithm.
1127 * If there has been recent foreground activity or if
1128 * there was an indication that a foreground application
1129 * is going to use networking (net_io_policy_throttled),
1130 * switch the backgroung streams to use background
1131 * congestion control algorithm. Otherwise, even background
1132 * flows can move into foreground.
1134 if ((sotcdb
& SOTCDB_NO_SENDTCPBG
) != 0 || is_local
||
1135 !IS_SO_TC_BACKGROUNDSYSTEM(so
->so_traffic_class
)) {
1136 if (old_cc
== TCP_CC_ALGO_BACKGROUND_INDEX
)
1137 tcp_set_foreground_cc(so
);
1139 if (old_cc
!= TCP_CC_ALGO_BACKGROUND_INDEX
)
1140 tcp_set_background_cc(so
);
1143 /* Set receive side background flags */
1144 if ((sotcdb
& SOTCDB_NO_RECVTCPBG
) != 0 || is_local
||
1145 !IS_SO_TC_BACKGROUNDSYSTEM(so
->so_traffic_class
)) {
1146 tcp_clear_recv_bg(so
);
1148 tcp_set_recv_bg(so
);
1151 tcp_clear_recv_bg(so
);
1152 if (old_cc
== TCP_CC_ALGO_BACKGROUND_INDEX
)
1153 tcp_set_foreground_cc(so
);
1156 if (old_cc
!= tp
->tcp_cc_index
|| recvbg
!= IS_TCP_RECV_BG(so
)) {
1157 SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
1158 "%s recv\n", so
->last_pid
,
1159 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1160 SOCK_DOM(so
), SOCK_TYPE(so
),
1161 (tp
->tcp_cc_index
== TCP_CC_ALGO_BACKGROUND_INDEX
) ?
1162 "background" : "foreground",
1163 IS_TCP_RECV_BG(so
) ? "background" : "foreground");
1168 * Set traffic class to an IPv4 or IPv6 packet
1170 * - set the DSCP code following the WMM mapping
1172 __private_extern__
void
1173 set_packet_service_class(struct mbuf
*m
, struct socket
*so
,
1174 int sotc
, u_int32_t flags
)
1176 mbuf_svc_class_t msc
= MBUF_SC_BE
; /* Best effort by default */
1177 struct inpcb
*inp
= sotoinpcb(so
); /* in6pcb and inpcb are the same */
1179 if (!(m
->m_flags
& M_PKTHDR
))
1183 * Here is the precedence:
1184 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
1185 * 2) Traffic class passed via ancillary data to sendmsdg(2)
1186 * 3) Traffic class socket option last
1188 if (sotc
!= SO_TC_UNSPEC
) {
1189 VERIFY(SO_VALID_TC(sotc
));
1190 msc
= so_tc2msc(sotc
);
1191 /* Assert because tc must have been valid */
1192 VERIFY(MBUF_VALID_SC(msc
));
1196 * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
1197 * best effort is set, depress the priority.
1199 if (!IS_MBUF_SC_BACKGROUND(msc
) && soisthrottled(so
))
1202 if (IS_MBUF_SC_BESTEFFORT(msc
) && inp
->inp_last_outifp
!= NULL
&&
1203 so_throttle_best_effort(so
, inp
->inp_last_outifp
))
1206 if (soissrcbackground(so
))
1207 m
->m_pkthdr
.pkt_flags
|= PKTF_SO_BACKGROUND
;
1209 if (soissrcrealtime(so
) || IS_MBUF_SC_REALTIME(msc
))
1210 m
->m_pkthdr
.pkt_flags
|= PKTF_SO_REALTIME
;
1212 * Set the traffic class in the mbuf packet header svc field
1214 if (sotcdb
& SOTCDB_NO_MTC
)
1218 * Elevate service class if the packet is a pure TCP ACK.
1219 * We can do this only when the flow is not a background
1220 * flow and the outgoing interface supports
1221 * transmit-start model.
1223 if (!IS_MBUF_SC_BACKGROUND(msc
) &&
1224 (flags
& (PKT_SCF_TCP_ACK
| PKT_SCF_TCP_SYN
)) != 0)
1227 (void) m_set_service_class(m
, msc
);
1230 * Set the privileged traffic auxiliary flag if applicable,
1233 if (!(sotcdb
& SOTCDB_NO_PRIVILEGED
) && soisprivilegedtraffic(so
) &&
1234 msc
!= MBUF_SC_UNSPEC
)
1235 m
->m_pkthdr
.pkt_flags
|= PKTF_PRIO_PRIVILEGED
;
1237 m
->m_pkthdr
.pkt_flags
&= ~PKTF_PRIO_PRIVILEGED
;
1241 * For TCP with background traffic class switch CC algo based on sysctl
1243 if (so
->so_type
== SOCK_STREAM
)
1244 set_tcp_stream_priority(so
);
1246 so_tc_update_stats(m
, so
, msc
);
1249 __private_extern__
void
1250 so_tc_update_stats(struct mbuf
*m
, struct socket
*so
, mbuf_svc_class_t msc
)
1252 mbuf_traffic_class_t mtc
;
1255 * Assume socket and mbuf traffic class values are the same
1256 * Also assume the socket lock is held. Note that the stats
1257 * at the socket layer are reduced down to the legacy traffic
1258 * classes; we could/should potentially expand so_tc_stats[].
1260 mtc
= MBUF_SC2TC(msc
);
1261 VERIFY(mtc
< SO_TC_STATS_MAX
);
1262 so
->so_tc_stats
[mtc
].txpackets
+= 1;
1263 so
->so_tc_stats
[mtc
].txbytes
+= m
->m_pkthdr
.len
;
1266 __private_extern__
void
1267 socket_tclass_init(void)
1269 _CASSERT(_SO_TC_MAX
== SO_TC_STATS_MAX
);
1271 tclass_lck_grp_attr
= lck_grp_attr_alloc_init();
1272 tclass_lck_grp
= lck_grp_alloc_init("tclass", tclass_lck_grp_attr
);
1273 tclass_lck_attr
= lck_attr_alloc_init();
1274 lck_mtx_init(tclass_lock
, tclass_lck_grp
, tclass_lck_attr
);
1277 __private_extern__ mbuf_svc_class_t
1280 mbuf_svc_class_t msc
;
1284 msc
= MBUF_SC_BK_SYS
;
1309 case SO_TC_NETSVC_SIG
:
1321 msc
= MBUF_SC_UNSPEC
;
1328 __private_extern__
int
1329 so_svc2tc(mbuf_svc_class_t svc
)
1332 case MBUF_SC_BK_SYS
:
1333 return (SO_TC_BK_SYS
);
1349 return (SO_TC_NETSVC_SIG
);
1354 case MBUF_SC_UNSPEC
:
1361 * LRO is turned on for AV streaming class.
1364 so_set_lro(struct socket
*so
, int optval
)
1366 if (optval
== SO_TC_AV
) {
1367 so
->so_flags
|= SOF_USELRO
;
1369 if (so
->so_flags
& SOF_USELRO
) {
1370 /* transition to non LRO class */
1371 so
->so_flags
&= ~SOF_USELRO
;
1372 struct inpcb
*inp
= sotoinpcb(so
);
1373 struct tcpcb
*tp
= NULL
;
1375 tp
= intotcpcb(inp
);
1376 if (tp
&& (tp
->t_flagsext
& TF_LRO_OFFLOADED
)) {
1377 tcp_lro_remove_state(inp
->inp_laddr
,
1381 tp
->t_flagsext
&= ~TF_LRO_OFFLOADED
;
1389 sotc_index(int sotc
)
1393 return (SOTCIX_BK_SYS
);
1403 return (SOTCIX_OAM
);
1417 return (SOTCIX_CTL
);
1423 * Unknown traffic class value
1425 return (SIZE_T_MAX
);
1429 * Pass NULL ifp for default map
1432 set_netsvctype_dscp_map(size_t in_count
,
1433 const struct netsvctype_dscp_map
*netsvctype_dscp_map
)
1436 struct net_qos_dscp_map
*net_qos_dscp_map
= NULL
;
1440 * Do not accept more that max number of distinct DSCPs
1442 if (in_count
> _MAX_DSCP
|| netsvctype_dscp_map
== NULL
)
1446 * Validate input parameters
1448 for (i
= 0; i
< in_count
; i
++) {
1449 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map
[i
].netsvctype
))
1451 if (netsvctype_dscp_map
[i
].dscp
> _MAX_DSCP
)
1455 net_qos_dscp_map
= &default_net_qos_dscp_map
;
1457 for (i
= 0; i
< in_count
; i
++) {
1458 netsvctype
= netsvctype_dscp_map
[i
].netsvctype
;
1460 net_qos_dscp_map
->netsvctype_to_dscp
[netsvctype
] =
1461 netsvctype_dscp_map
[i
].dscp
;
1463 for (netsvctype
= 0; netsvctype
< _NET_SERVICE_TYPE_COUNT
; netsvctype
++) {
1464 switch (netsvctype
) {
1465 case NET_SERVICE_TYPE_BE
:
1466 case NET_SERVICE_TYPE_BK
:
1467 case NET_SERVICE_TYPE_VI
:
1468 case NET_SERVICE_TYPE_VO
:
1469 case NET_SERVICE_TYPE_RV
:
1470 case NET_SERVICE_TYPE_AV
:
1471 case NET_SERVICE_TYPE_OAM
:
1472 case NET_SERVICE_TYPE_RD
: {
1475 sotcix
= sotc_index(sotc_by_netservicetype
[netsvctype
]);
1476 if (sotcix
!= SIZE_T_MAX
) {
1477 net_qos_dscp_map
->sotc_to_dscp
[sotcix
] =
1478 netsvctype_dscp_map
[netsvctype
].dscp
;
1482 case NET_SERVICE_TYPE_SIG
:
1483 /* Signaling does not have its own traffic class */
1486 /* We should not be here */
1490 /* Network control socket traffic class is always best effort */
1491 net_qos_dscp_map
->sotc_to_dscp
[SOTCIX_CTL
] = _DSCP_DF
;
1493 /* Backround socket traffic class DSCP same as backround system */
1494 net_qos_dscp_map
->sotc_to_dscp
[SOTCIX_BK
] =
1495 net_qos_dscp_map
->sotc_to_dscp
[SOTCIX_BK_SYS
];
1501 * out_count is an input/ouput parameter
1504 get_netsvctype_dscp_map(size_t *out_count
,
1505 struct netsvctype_dscp_map
*netsvctype_dscp_map
)
1508 struct net_qos_dscp_map
*net_qos_dscp_map
= NULL
;
1511 * Do not accept more that max number of distinct DSCPs
1513 if (out_count
== NULL
|| netsvctype_dscp_map
== NULL
)
1515 if (*out_count
> _MAX_DSCP
)
1518 net_qos_dscp_map
= &default_net_qos_dscp_map
;
1520 for (i
= 0; i
< MIN(_NET_SERVICE_TYPE_COUNT
, *out_count
); i
++) {
1521 netsvctype_dscp_map
[i
].netsvctype
= i
;
1522 netsvctype_dscp_map
[i
].dscp
= net_qos_dscp_map
->netsvctype_to_dscp
[i
];
1536 * By default use the Fastlane DSCP mappngs
1538 error
= set_netsvctype_dscp_map(_NET_SERVICE_TYPE_COUNT
,
1539 fastlane_netsvctype_dscp_map
);
1543 * No DSCP mapping for network control
1545 default_net_qos_dscp_map
.sotc_to_dscp
[SOTCIX_CTL
] = _DSCP_DF
;
1547 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map
, 1);
1551 sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
1553 #pragma unused(oidp, arg1, arg2)
1555 const size_t max_netsvctype_to_dscp_map_len
=
1556 _NET_SERVICE_TYPE_COUNT
* sizeof(struct netsvctype_dscp_map
);
1558 struct netsvctype_dscp_map netsvctype_dscp_map
[_NET_SERVICE_TYPE_COUNT
] = {};
1561 if (req
->oldptr
== USER_ADDR_NULL
) {
1563 _NET_SERVICE_TYPE_COUNT
* sizeof(struct netsvctype_dscp_map
);
1564 } else if (req
->oldlen
> 0) {
1565 count
= _NET_SERVICE_TYPE_COUNT
;
1566 error
= get_netsvctype_dscp_map(&count
, netsvctype_dscp_map
);
1569 len
= count
* sizeof(struct netsvctype_dscp_map
);
1570 error
= SYSCTL_OUT(req
, netsvctype_dscp_map
,
1571 MIN(len
, req
->oldlen
));
1576 if (req
->newptr
== USER_ADDR_NULL
)
1579 error
= proc_suser(current_proc());
1584 * Check input length
1586 if (req
->newlen
> max_netsvctype_to_dscp_map_len
) {
1591 * Cap the number of entries to copy from input buffer
1593 error
= SYSCTL_IN(req
, netsvctype_dscp_map
, req
->newlen
);
1597 count
= req
->newlen
/ sizeof(struct netsvctype_dscp_map
);
1598 error
= set_netsvctype_dscp_map(count
, netsvctype_dscp_map
);
1603 __private_extern__ errno_t
1604 set_packet_qos(struct mbuf
*m
, struct ifnet
*ifp
, boolean_t qos_allowed
,
1605 int sotc
, int netsvctype
, u_int8_t
*dscp_inout
)
1607 if (ifp
== NULL
|| dscp_inout
== NULL
)
1610 if ((ifp
->if_eflags
&
1611 (IFEF_QOSMARKING_ENABLED
| IFEF_QOSMARKING_CAPABLE
)) ==
1612 (IFEF_QOSMARKING_ENABLED
| IFEF_QOSMARKING_CAPABLE
)) {
1616 * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops
1621 * For DSCP use the network service type is specified, otherwise
1622 * use the socket traffic class
1624 * When not whitelisted by the policy, set DSCP only for best
1625 * effort and background, and set the mbuf service class to
1626 * best effort as well so the packet will be queued and
1627 * scheduled at a lower priority.
1628 * We still want to prioritize control traffic on the interface
1629 * so we do not change the mbuf service class for SO_TC_CTL
1631 if (IS_VALID_NET_SERVICE_TYPE(netsvctype
) &&
1632 netsvctype
!= NET_SERVICE_TYPE_BE
) {
1633 dscp
= default_net_qos_dscp_map
.netsvctype_to_dscp
[netsvctype
];
1635 if (qos_allowed
== FALSE
&&
1636 netsvctype
!= NET_SERVICE_TYPE_BE
&&
1637 netsvctype
!= NET_SERVICE_TYPE_BK
) {
1639 if (sotc
!= SO_TC_CTL
)
1640 m_set_service_class(m
, MBUF_SC_BE
);
1642 } else if (sotc
!= SO_TC_UNSPEC
) {
1643 size_t sotcix
= sotc_index(sotc
);
1644 if (sotcix
!= SIZE_T_MAX
) {
1645 dscp
= default_net_qos_dscp_map
.sotc_to_dscp
[sotcix
];
1647 if (qos_allowed
== FALSE
&& sotc
!= SO_TC_BE
&&
1648 sotc
!= SO_TC_BK
&& sotc
!= SO_TC_BK_SYS
&&
1649 sotc
!= SO_TC_CTL
) {
1651 if (sotc
!= SO_TC_CTL
)
1652 m_set_service_class(m
, MBUF_SC_BE
);
1656 if (net_qos_verbose
!= 0)
1657 printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n",
1658 __func__
, qos_allowed
, sotc
, netsvctype
, dscp
);
1660 if (*dscp_inout
!= dscp
) {
1663 } else if (*dscp_inout
!= _DSCP_DF
&& IFNET_IS_WIFI_INFRA(ifp
)) {
1664 mbuf_svc_class_t msc
= m_get_service_class(m
);
1667 * For WiFi infra, when the mbuf service class is best effort
1668 * and the DSCP is not default, set the service class based
1671 if (msc
== MBUF_SC_BE
) {
1672 msc
= wifi_dscp_to_msc_array
[*dscp_inout
];
1674 if (msc
!= MBUF_SC_BE
) {
1675 m_set_service_class(m
, msc
);
1677 if (net_qos_verbose
!= 0)
1678 printf("%s set msc %u for dscp %u\n",
1679 __func__
, msc
, *dscp_inout
);
1688 set_dscp_to_wifi_ac_map(const struct dcsp_msc_map
*map
, int clear
)
1693 bzero(wifi_dscp_to_msc_array
, sizeof(wifi_dscp_to_msc_array
));
1695 for (i
= 0; i
< DSCP_ARRAY_SIZE
; i
++) {
1696 const struct dcsp_msc_map
*elem
= map
+ i
;
1698 if (elem
->dscp
> _MAX_DSCP
|| elem
->msc
== MBUF_SC_UNSPEC
)
1700 switch (elem
->msc
) {
1701 case MBUF_SC_BK_SYS
:
1703 wifi_dscp_to_msc_array
[elem
->dscp
] = MBUF_SC_BK
;
1709 wifi_dscp_to_msc_array
[elem
->dscp
] = MBUF_SC_BE
;
1714 wifi_dscp_to_msc_array
[elem
->dscp
] = MBUF_SC_VI
;
1718 wifi_dscp_to_msc_array
[elem
->dscp
] = MBUF_SC_VO
;
1725 dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map
*netsvctype_dscp_map
,
1726 size_t count
, struct dcsp_msc_map
*dcsp_msc_map
)
1732 * Validate input parameters
1734 for (i
= 0; i
< count
; i
++) {
1735 if (!SO_VALID_TC(netsvctype_dscp_map
[i
].netsvctype
)) {
1739 if (netsvctype_dscp_map
[i
].dscp
> _MAX_DSCP
) {
1745 bzero(dcsp_msc_map
, DSCP_ARRAY_SIZE
* sizeof(struct dcsp_msc_map
));
1747 for (i
= 0; i
< count
; i
++) {
1748 dcsp_msc_map
[i
].dscp
= netsvctype_dscp_map
[i
].dscp
;
1749 dcsp_msc_map
[i
].msc
= so_tc2msc(netsvctype_dscp_map
[i
].netsvctype
);
1756 sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1758 #pragma unused(oidp, arg1, arg2)
1760 size_t len
= DSCP_ARRAY_SIZE
* sizeof(struct netsvctype_dscp_map
);
1761 struct netsvctype_dscp_map netsvctype_dscp_map
[DSCP_ARRAY_SIZE
] = {};
1762 struct dcsp_msc_map dcsp_msc_map
[DSCP_ARRAY_SIZE
];
1766 if (req
->oldptr
== USER_ADDR_NULL
) {
1768 } else if (req
->oldlen
> 0) {
1769 for (i
= 0; i
< DSCP_ARRAY_SIZE
; i
++) {
1770 netsvctype_dscp_map
[i
].dscp
= i
;
1771 netsvctype_dscp_map
[i
].netsvctype
=
1772 so_svc2tc(wifi_dscp_to_msc_array
[i
]);
1774 error
= SYSCTL_OUT(req
, netsvctype_dscp_map
,
1775 MIN(len
, req
->oldlen
));
1780 if (req
->newptr
== USER_ADDR_NULL
)
1783 error
= proc_suser(current_proc());
1788 * Check input length
1790 if (req
->newlen
> len
) {
1795 * Cap the number of entries to copy from input buffer
1797 if (len
> req
->newlen
)
1799 error
= SYSCTL_IN(req
, netsvctype_dscp_map
, len
);
1803 count
= len
/ sizeof(struct netsvctype_dscp_map
);
1804 bzero(dcsp_msc_map
, sizeof(dcsp_msc_map
));
1805 error
= dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map
, count
,
1810 set_dscp_to_wifi_ac_map(dcsp_msc_map
, 0);
1816 sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1818 #pragma unused(oidp, arg1, arg2)
1822 error
= sysctl_handle_int(oidp
, &val
, 0, req
);
1823 if (error
|| !req
->newptr
)
1826 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map
, 1);
1832 * Returns whether a large upload or download transfer should be marked as
1833 * BK service type for network activity. This is a system level
1834 * hint/suggestion to classify application traffic based on statistics
1835 * collected from the current network attachment
1837 * Returns 1 for BK and 0 for default
1841 net_qos_guideline(struct proc
*p
, struct net_qos_guideline_args
*arg
,
1845 #define RETURN_USE_BK 1
1846 #define RETURN_USE_DEFAULT 0
1847 struct net_qos_param qos_arg
;
1848 struct ifnet
*ipv4_primary
, *ipv6_primary
;
1851 if (arg
->param
== USER_ADDR_NULL
|| retval
== NULL
||
1852 arg
->param_len
!= sizeof (qos_arg
)) {
1855 err
= copyin(arg
->param
, (caddr_t
) &qos_arg
, sizeof (qos_arg
));
1859 *retval
= RETURN_USE_DEFAULT
;
1860 ipv4_primary
= ifindex2ifnet
[get_primary_ifscope(AF_INET
)];
1861 ipv6_primary
= ifindex2ifnet
[get_primary_ifscope(AF_INET6
)];
1864 * If either of the interfaces is in Low Internet mode, enable
1865 * background delay based algorithms on this transfer
1867 if (qos_arg
.nq_uplink
) {
1868 if ((ipv4_primary
!= NULL
&&
1869 (ipv4_primary
->if_xflags
& IFXF_LOW_INTERNET_UL
)) ||
1870 (ipv6_primary
!= NULL
&&
1871 (ipv6_primary
->if_xflags
& IFXF_LOW_INTERNET_UL
))) {
1872 *retval
= RETURN_USE_BK
;
1876 if ((ipv4_primary
!= NULL
&&
1877 (ipv4_primary
->if_xflags
& IFXF_LOW_INTERNET_DL
)) ||
1878 (ipv6_primary
!= NULL
&&
1879 (ipv6_primary
->if_xflags
& IFXF_LOW_INTERNET_DL
))) {
1880 *retval
= RETURN_USE_BK
;
1886 * Some times IPv4 and IPv6 primary interfaces can be different.
1887 * In this case, if either of them is non-cellular, we should mark
1888 * the transfer as BK as it can potentially get used based on
1889 * the host name resolution
1891 if (ipv4_primary
!= NULL
&& IFNET_IS_EXPENSIVE(ipv4_primary
) &&
1892 ipv6_primary
!= NULL
&& IFNET_IS_EXPENSIVE(ipv6_primary
)) {
1893 if (qos_arg
.nq_use_expensive
) {
1896 *retval
= RETURN_USE_BK
;
1900 if (qos_arg
.nq_transfer_size
>= 5 * 1024 * 1024) {
1901 *retval
= RETURN_USE_BK
;
1906 #undef RETURN_USE_BK
1907 #undef RETURN_USE_DEFAULT