2 * Copyright (c) 2009-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/types.h>
32 #include <sys/filedesc.h>
33 #include <sys/file_internal.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/errno.h>
38 #include <sys/protosw.h>
39 #include <sys/domain.h>
41 #include <sys/queue.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysproto.h>
46 #include <net/if_var.h>
47 #include <net/route.h>
49 #include <netinet/in.h>
50 #include <netinet/in_var.h>
51 #include <netinet/in_pcb.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/ip6_var.h>
56 #include <netinet/udp.h>
57 #include <netinet/udp_var.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcp_cc.h>
61 #include <netinet/lro_ext.h>
62 #include <netinet/in_tclass.h>
64 struct net_qos_dscp_map
{
65 uint8_t sotc_to_dscp
[SO_TC_MAX
];
66 uint8_t netsvctype_to_dscp
[_NET_SERVICE_TYPE_COUNT
];
73 static inline int so_throttle_best_effort(struct socket
*, struct ifnet
*);
74 static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map
*, int);
75 static errno_t
dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map
*, size_t,
76 struct dcsp_msc_map
*);
78 static lck_grp_attr_t
*tclass_lck_grp_attr
= NULL
; /* mutex group attributes */
79 static lck_grp_t
*tclass_lck_grp
= NULL
; /* mutex group definition */
80 static lck_attr_t
*tclass_lck_attr
= NULL
; /* mutex attributes */
81 decl_lck_mtx_data(static, tclass_lock_data
);
82 static lck_mtx_t
*tclass_lock
= &tclass_lock_data
;
84 SYSCTL_NODE(_net
, OID_AUTO
, qos
,
85 CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "QoS");
87 static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
;
88 SYSCTL_PROC(_net_qos
, OID_AUTO
, default_netsvctype_to_dscp_map
,
89 CTLTYPE_STRUCT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
90 0, 0, sysctl_default_netsvctype_to_dscp_map
, "S", "");
92 static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
;
93 SYSCTL_PROC(_net_qos
, OID_AUTO
, dscp_to_wifi_ac_map
,
94 CTLTYPE_STRUCT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
95 0, 0, sysctl_dscp_to_wifi_ac_map
, "S", "");
97 static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
;
98 SYSCTL_PROC(_net_qos
, OID_AUTO
, reset_dscp_to_wifi_ac_map
,
99 CTLTYPE_INT
| CTLFLAG_WR
| CTLFLAG_LOCKED
,
100 0, 0, sysctl_reset_dscp_to_wifi_ac_map
, "I", "");
102 int net_qos_verbose
= 0;
103 SYSCTL_INT(_net_qos
, OID_AUTO
, verbose
,
104 CTLFLAG_RW
| CTLFLAG_LOCKED
, &net_qos_verbose
, 0, "");
107 * Fastlane QoS policy:
108 * By Default allow all apps to get traffic class to DSCP mapping
110 SYSCTL_NODE(_net_qos
, OID_AUTO
, policy
,
111 CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "");
113 int net_qos_policy_restricted
= 0;
114 SYSCTL_INT(_net_qos_policy
, OID_AUTO
, restricted
,
115 CTLFLAG_RW
| CTLFLAG_LOCKED
, &net_qos_policy_restricted
, 0, "");
117 int net_qos_policy_restrict_avapps
= 0;
118 SYSCTL_INT(_net_qos_policy
, OID_AUTO
, restrict_avapps
,
119 CTLFLAG_RW
| CTLFLAG_LOCKED
, &net_qos_policy_restrict_avapps
, 0, "");
121 int net_qos_policy_wifi_enabled
= 0;
122 SYSCTL_INT(_net_qos_policy
, OID_AUTO
, wifi_enabled
,
123 CTLFLAG_RW
| CTLFLAG_LOCKED
, &net_qos_policy_wifi_enabled
, 0, "");
125 int net_qos_policy_capable_enabled
= 0;
126 SYSCTL_INT(_net_qos_policy
, OID_AUTO
, capable_enabled
,
127 CTLFLAG_RW
| CTLFLAG_LOCKED
, &net_qos_policy_capable_enabled
, 0, "");
130 * Socket traffic class from network service type
132 const int sotc_by_netservicetype
[_NET_SERVICE_TYPE_COUNT
] = {
133 SO_TC_BE
, /* NET_SERVICE_TYPE_BE */
134 SO_TC_BK_SYS
, /* NET_SERVICE_TYPE_BK */
135 SO_TC_VI
, /* NET_SERVICE_TYPE_SIG */
136 SO_TC_VI
, /* NET_SERVICE_TYPE_VI */
137 SO_TC_VO
, /* NET_SERVICE_TYPE_VO */
138 SO_TC_RV
, /* NET_SERVICE_TYPE_RV */
139 SO_TC_AV
, /* NET_SERVICE_TYPE_AV */
140 SO_TC_OAM
, /* NET_SERVICE_TYPE_OAM */
141 SO_TC_RD
/* NET_SERVICE_TYPE_RD */
145 * DSCP mappings for QoS Fastlane as based on network service types
148 struct netsvctype_dscp_map fastlane_netsvctype_dscp_map
[_NET_SERVICE_TYPE_COUNT
] = {
149 { .netsvctype
= NET_SERVICE_TYPE_BE
, .dscp
= _DSCP_DF
},
150 { .netsvctype
= NET_SERVICE_TYPE_BK
, .dscp
= _DSCP_AF11
},
151 { .netsvctype
= NET_SERVICE_TYPE_SIG
, .dscp
= _DSCP_CS3
},
152 { .netsvctype
= NET_SERVICE_TYPE_VI
, .dscp
= _DSCP_AF41
},
153 { .netsvctype
= NET_SERVICE_TYPE_VO
, .dscp
= _DSCP_EF
},
154 { .netsvctype
= NET_SERVICE_TYPE_RV
, .dscp
= _DSCP_CS4
},
155 { .netsvctype
= NET_SERVICE_TYPE_AV
, .dscp
= _DSCP_AF31
},
156 { .netsvctype
= NET_SERVICE_TYPE_OAM
, .dscp
= _DSCP_CS2
},
157 { .netsvctype
= NET_SERVICE_TYPE_RD
, .dscp
= _DSCP_AF21
},
162 * DSCP mappings for QoS RFC4594 as based on network service types
165 struct netsvctype_dscp_map rfc4594_netsvctype_dscp_map
[_NET_SERVICE_TYPE_COUNT
] = {
166 { .netsvctype
= NET_SERVICE_TYPE_BE
, .dscp
= _DSCP_DF
},
167 { .netsvctype
= NET_SERVICE_TYPE_BK
, .dscp
= _DSCP_CS1
},
168 { .netsvctype
= NET_SERVICE_TYPE_SIG
, .dscp
= _DSCP_CS5
},
169 { .netsvctype
= NET_SERVICE_TYPE_VI
, .dscp
= _DSCP_AF41
},
170 { .netsvctype
= NET_SERVICE_TYPE_VO
, .dscp
= _DSCP_EF
},
171 { .netsvctype
= NET_SERVICE_TYPE_RV
, .dscp
= _DSCP_CS4
},
172 { .netsvctype
= NET_SERVICE_TYPE_AV
, .dscp
= _DSCP_AF31
},
173 { .netsvctype
= NET_SERVICE_TYPE_OAM
, .dscp
= _DSCP_CS2
},
174 { .netsvctype
= NET_SERVICE_TYPE_RD
, .dscp
= _DSCP_AF21
},
177 static struct net_qos_dscp_map fastlane_net_qos_dscp_map
;
178 static struct net_qos_dscp_map rfc4594_net_qos_dscp_map
;
181 * The size is one more than the max because DSCP start at zero
183 #define DSCP_ARRAY_SIZE (_MAX_DSCP + 1)
186 * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping
187 * that implemented at the 802.11 driver level when the mbuf service class is
190 * This clashes with the recommended mapping documented by the IETF document
191 * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain
192 * binary compatibility. Applications should use the network service type socket
193 * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS.
195 static const struct dcsp_msc_map default_dscp_to_wifi_ac_map
[] = {
196 { .dscp
= _DSCP_DF
, .msc
= MBUF_SC_BE
}, /* RFC 2474 Standard */
197 { .dscp
= 1, .msc
= MBUF_SC_BE
}, /* */
198 { .dscp
= 2, .msc
= MBUF_SC_BE
}, /* */
199 { .dscp
= 3, .msc
= MBUF_SC_BE
}, /* */
200 { .dscp
= 4, .msc
= MBUF_SC_BE
}, /* */
201 { .dscp
= 5, .msc
= MBUF_SC_BE
}, /* */
202 { .dscp
= 6, .msc
= MBUF_SC_BE
}, /* */
203 { .dscp
= 7, .msc
= MBUF_SC_BE
}, /* */
205 { .dscp
= _DSCP_CS1
, .msc
= MBUF_SC_BK
}, /* RFC 3662 Low-Priority Data */
206 { .dscp
= 9, .msc
= MBUF_SC_BK
}, /* */
207 { .dscp
= _DSCP_AF11
, .msc
= MBUF_SC_BK
}, /* RFC 2597 High-Throughput Data */
208 { .dscp
= 11, .msc
= MBUF_SC_BK
}, /* */
209 { .dscp
= _DSCP_AF12
, .msc
= MBUF_SC_BK
}, /* RFC 2597 High-Throughput Data */
210 { .dscp
= 13, .msc
= MBUF_SC_BK
}, /* */
211 { .dscp
= _DSCP_AF13
, .msc
= MBUF_SC_BK
}, /* RFC 2597 High-Throughput Data */
212 { .dscp
= 15, .msc
= MBUF_SC_BK
}, /* */
214 { .dscp
= _DSCP_CS2
, .msc
= MBUF_SC_BK
}, /* RFC 4594 OAM */
215 { .dscp
= 17, .msc
= MBUF_SC_BK
}, /* */
216 { .dscp
= _DSCP_AF21
, .msc
= MBUF_SC_BK
}, /* RFC 2597 Low-Latency Data */
217 { .dscp
= 19, .msc
= MBUF_SC_BK
}, /* */
218 { .dscp
= _DSCP_AF22
, .msc
= MBUF_SC_BK
}, /* RFC 2597 Low-Latency Data */
219 { .dscp
= 21, .msc
= MBUF_SC_BK
}, /* */
220 { .dscp
= _DSCP_AF23
, .msc
= MBUF_SC_BK
}, /* RFC 2597 Low-Latency Data */
221 { .dscp
= 23, .msc
= MBUF_SC_BK
}, /* */
223 { .dscp
= _DSCP_CS3
, .msc
= MBUF_SC_BE
}, /* RFC 2474 Broadcast Video */
224 { .dscp
= 25, .msc
= MBUF_SC_BE
}, /* */
225 { .dscp
= _DSCP_AF31
, .msc
= MBUF_SC_BE
}, /* RFC 2597 Multimedia Streaming */
226 { .dscp
= 27, .msc
= MBUF_SC_BE
}, /* */
227 { .dscp
= _DSCP_AF32
, .msc
= MBUF_SC_BE
}, /* RFC 2597 Multimedia Streaming */
228 { .dscp
= 29, .msc
= MBUF_SC_BE
}, /* */
229 { .dscp
= _DSCP_AF33
, .msc
= MBUF_SC_BE
}, /* RFC 2597 Multimedia Streaming */
230 { .dscp
= 31, .msc
= MBUF_SC_BE
}, /* */
232 { .dscp
= _DSCP_CS4
, .msc
= MBUF_SC_VI
}, /* RFC 2474 Real-Time Interactive */
233 { .dscp
= 33, .msc
= MBUF_SC_VI
}, /* */
234 { .dscp
= _DSCP_AF41
, .msc
= MBUF_SC_VI
}, /* RFC 2597 Multimedia Conferencing */
235 { .dscp
= 35, .msc
= MBUF_SC_VI
}, /* */
236 { .dscp
= _DSCP_AF42
, .msc
= MBUF_SC_VI
}, /* RFC 2597 Multimedia Conferencing */
237 { .dscp
= 37, .msc
= MBUF_SC_VI
}, /* */
238 { .dscp
= _DSCP_AF43
, .msc
= MBUF_SC_VI
}, /* RFC 2597 Multimedia Conferencing */
239 { .dscp
= 39, .msc
= MBUF_SC_VI
}, /* */
241 { .dscp
= _DSCP_CS5
, .msc
= MBUF_SC_VI
}, /* RFC 2474 Signaling */
242 { .dscp
= 41, .msc
= MBUF_SC_VI
}, /* */
243 { .dscp
= 42, .msc
= MBUF_SC_VI
}, /* */
244 { .dscp
= 43, .msc
= MBUF_SC_VI
}, /* */
245 { .dscp
= _DSCP_VA
, .msc
= MBUF_SC_VI
}, /* RFC 5865 VOICE-ADMIT */
246 { .dscp
= 45, .msc
= MBUF_SC_VI
}, /* */
247 { .dscp
= _DSCP_EF
, .msc
= MBUF_SC_VI
}, /* RFC 3246 Telephony */
248 { .dscp
= 47, .msc
= MBUF_SC_VI
}, /* */
250 { .dscp
= _DSCP_CS6
, .msc
= MBUF_SC_VO
}, /* Wi-Fi WMM Certification: Chariot */
251 { .dscp
= 49, .msc
= MBUF_SC_VO
}, /* */
252 { .dscp
= 50, .msc
= MBUF_SC_VO
}, /* */
253 { .dscp
= 51, .msc
= MBUF_SC_VO
}, /* */
254 { .dscp
= 52, .msc
= MBUF_SC_VO
}, /* Wi-Fi WMM Certification: Sigma */
255 { .dscp
= 53, .msc
= MBUF_SC_VO
}, /* */
256 { .dscp
= 54, .msc
= MBUF_SC_VO
}, /* */
257 { .dscp
= 55, .msc
= MBUF_SC_VO
}, /* */
259 { .dscp
= _DSCP_CS7
, .msc
= MBUF_SC_VO
}, /* Wi-Fi WMM Certification: Chariot */
260 { .dscp
= 57, .msc
= MBUF_SC_VO
}, /* */
261 { .dscp
= 58, .msc
= MBUF_SC_VO
}, /* */
262 { .dscp
= 59, .msc
= MBUF_SC_VO
}, /* */
263 { .dscp
= 60, .msc
= MBUF_SC_VO
}, /* */
264 { .dscp
= 61, .msc
= MBUF_SC_VO
}, /* */
265 { .dscp
= 62, .msc
= MBUF_SC_VO
}, /* */
266 { .dscp
= 63, .msc
= MBUF_SC_VO
}, /* */
268 { .dscp
= 255, .msc
= MBUF_SC_UNSPEC
} /* invalid DSCP to mark last entry */
271 mbuf_svc_class_t wifi_dscp_to_msc_array
[DSCP_ARRAY_SIZE
];
274 * If there is no foreground activity on the interface for bg_switch_time
275 * seconds, the background connections can switch to foreground TCP
276 * congestion control.
278 #define TCP_BG_SWITCH_TIME 2 /* seconds */
280 #if (DEVELOPMENT || DEBUG)
282 static int tfp_count
= 0;
284 static TAILQ_HEAD(, tclass_for_proc
) tfp_head
=
285 TAILQ_HEAD_INITIALIZER(tfp_head
);
287 struct tclass_for_proc
{
288 TAILQ_ENTRY(tclass_for_proc
) tfp_link
;
291 char tfp_pname
[(2 * MAXCOMLEN
) + 1];
292 uint32_t tfp_qos_mode
;
295 static int get_pid_tclass(struct so_tcdbg
*);
296 static int get_pname_tclass(struct so_tcdbg
*);
297 static int set_pid_tclass(struct so_tcdbg
*);
298 static int set_pname_tclass(struct so_tcdbg
*);
299 static int flush_pid_tclass(struct so_tcdbg
*);
300 static int purge_tclass_for_proc(void);
301 static int flush_tclass_for_proc(void);
302 static void set_tclass_for_curr_proc(struct socket
*);
305 * Must be called with tclass_lock held
307 static struct tclass_for_proc
*
308 find_tfp_by_pid(pid_t pid
)
310 struct tclass_for_proc
*tfp
;
312 TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) {
313 if (tfp
->tfp_pid
== pid
) {
321 * Must be called with tclass_lock held
323 static struct tclass_for_proc
*
324 find_tfp_by_pname(const char *pname
)
326 struct tclass_for_proc
*tfp
;
328 TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) {
329 if (strncmp(pname
, tfp
->tfp_pname
,
330 sizeof(tfp
->tfp_pname
)) == 0) {
337 __private_extern__
void
338 set_tclass_for_curr_proc(struct socket
*so
)
340 struct tclass_for_proc
*tfp
= NULL
;
341 proc_t p
= current_proc(); /* Not ref counted */
342 pid_t pid
= proc_pid(p
);
343 char *pname
= proc_best_name(p
);
345 lck_mtx_lock(tclass_lock
);
347 TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) {
348 if ((tfp
->tfp_pid
== pid
) || (tfp
->tfp_pid
== -1 &&
349 strncmp(pname
, tfp
->tfp_pname
,
350 sizeof(tfp
->tfp_pname
)) == 0)) {
351 if (tfp
->tfp_class
!= SO_TC_UNSPEC
) {
352 so
->so_traffic_class
= tfp
->tfp_class
;
355 if (tfp
->tfp_qos_mode
== QOS_MODE_MARKING_POLICY_ENABLE
) {
356 so
->so_flags1
|= SOF1_QOSMARKING_ALLOWED
;
357 } else if (tfp
->tfp_qos_mode
== QOS_MODE_MARKING_POLICY_DISABLE
) {
358 so
->so_flags1
&= ~SOF1_QOSMARKING_ALLOWED
;
364 lck_mtx_unlock(tclass_lock
);
368 * Purge entries with PIDs of exited processes
371 purge_tclass_for_proc(void)
374 struct tclass_for_proc
*tfp
, *tvar
;
376 lck_mtx_lock(tclass_lock
);
378 TAILQ_FOREACH_SAFE(tfp
, &tfp_head
, tfp_link
, tvar
) {
381 if (tfp
->tfp_pid
== -1) {
384 if ((p
= proc_find(tfp
->tfp_pid
)) == NULL
) {
386 TAILQ_REMOVE(&tfp_head
, tfp
, tfp_link
);
394 lck_mtx_unlock(tclass_lock
);
401 * Must be called with tclass_lock held
404 free_tclass_for_proc(struct tclass_for_proc
*tfp
)
410 TAILQ_REMOVE(&tfp_head
, tfp
, tfp_link
);
418 flush_tclass_for_proc(void)
421 struct tclass_for_proc
*tfp
, *tvar
;
423 lck_mtx_lock(tclass_lock
);
425 TAILQ_FOREACH_SAFE(tfp
, &tfp_head
, tfp_link
, tvar
) {
426 free_tclass_for_proc(tfp
);
429 lck_mtx_unlock(tclass_lock
);
435 * Must be called with tclass_lock held
437 static struct tclass_for_proc
*
438 alloc_tclass_for_proc(pid_t pid
, const char *pname
)
440 struct tclass_for_proc
*tfp
;
442 if (pid
== -1 && pname
== NULL
) {
446 tfp
= _MALLOC(sizeof(struct tclass_for_proc
), M_TEMP
, M_NOWAIT
| M_ZERO
);
453 * Add per pid entries before per proc name so we can find
454 * a specific instance of a process before the general name base entry.
457 TAILQ_INSERT_HEAD(&tfp_head
, tfp
, tfp_link
);
459 strlcpy(tfp
->tfp_pname
, pname
, sizeof(tfp
->tfp_pname
));
460 TAILQ_INSERT_TAIL(&tfp_head
, tfp
, tfp_link
);
469 * SO_TC_UNSPEC for tclass means to remove the entry
472 set_pid_tclass(struct so_tcdbg
*so_tcdbg
)
476 struct filedesc
*fdp
;
478 struct tclass_for_proc
*tfp
;
480 pid_t pid
= so_tcdbg
->so_tcdbg_pid
;
481 int tclass
= so_tcdbg
->so_tcdbg_tclass
;
482 int netsvctype
= so_tcdbg
->so_tcdbg_netsvctype
;
486 printf("%s proc_find(%d) failed\n", __func__
, pid
);
491 lck_mtx_lock(tclass_lock
);
493 tfp
= find_tfp_by_pid(pid
);
495 tfp
= alloc_tclass_for_proc(pid
, NULL
);
497 lck_mtx_unlock(tclass_lock
);
502 tfp
->tfp_class
= tclass
;
503 tfp
->tfp_qos_mode
= so_tcdbg
->so_tcbbg_qos_mode
;
505 lck_mtx_unlock(tclass_lock
);
511 for (i
= 0; i
< fdp
->fd_nfiles
; i
++) {
514 fp
= fdp
->fd_ofiles
[i
];
516 (fdp
->fd_ofileflags
[i
] & UF_RESERVED
) != 0 ||
517 FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_SOCKET
) {
521 so
= (struct socket
*)fp
->f_fglob
->fg_data
;
522 if (SOCK_DOM(so
) != PF_INET
&& SOCK_DOM(so
) != PF_INET6
) {
527 if (tfp
->tfp_qos_mode
== QOS_MODE_MARKING_POLICY_ENABLE
) {
528 so
->so_flags1
|= SOF1_QOSMARKING_ALLOWED
;
529 } else if (tfp
->tfp_qos_mode
== QOS_MODE_MARKING_POLICY_DISABLE
) {
530 so
->so_flags1
&= ~SOF1_QOSMARKING_ALLOWED
;
532 socket_unlock(so
, 1);
534 if (netsvctype
!= _NET_SERVICE_TYPE_UNSPEC
) {
535 error
= sock_setsockopt(so
, SOL_SOCKET
,
536 SO_NET_SERVICE_TYPE
, &netsvctype
, sizeof(int));
538 if (tclass
!= SO_TC_UNSPEC
) {
539 error
= sock_setsockopt(so
, SOL_SOCKET
,
540 SO_TRAFFIC_CLASS
, &tclass
, sizeof(int));
557 set_pname_tclass(struct so_tcdbg
*so_tcdbg
)
560 struct tclass_for_proc
*tfp
;
562 lck_mtx_lock(tclass_lock
);
564 tfp
= find_tfp_by_pname(so_tcdbg
->so_tcdbg_pname
);
566 tfp
= alloc_tclass_for_proc(-1, so_tcdbg
->so_tcdbg_pname
);
568 lck_mtx_unlock(tclass_lock
);
573 tfp
->tfp_class
= so_tcdbg
->so_tcdbg_tclass
;
574 tfp
->tfp_qos_mode
= so_tcdbg
->so_tcbbg_qos_mode
;
576 lck_mtx_unlock(tclass_lock
);
585 flush_pid_tclass(struct so_tcdbg
*so_tcdbg
)
587 pid_t pid
= so_tcdbg
->so_tcdbg_pid
;
588 int tclass
= so_tcdbg
->so_tcdbg_tclass
;
589 struct filedesc
*fdp
;
595 if (p
== PROC_NULL
) {
596 printf("%s proc_find(%d) failed\n", __func__
, pid
);
602 for (i
= 0; i
< fdp
->fd_nfiles
; i
++) {
606 fp
= fdp
->fd_ofiles
[i
];
608 (fdp
->fd_ofileflags
[i
] & UF_RESERVED
) != 0 ||
609 FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_SOCKET
) {
613 so
= (struct socket
*)fp
->f_fglob
->fg_data
;
614 error
= sock_setsockopt(so
, SOL_SOCKET
, SO_FLUSH
, &tclass
,
617 printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
618 "tclass=%d) failed %d\n", __func__
,
619 (uint64_t)VM_KERNEL_ADDRPERM(so
), i
, tclass
,
628 if (p
!= PROC_NULL
) {
636 get_pid_tclass(struct so_tcdbg
*so_tcdbg
)
640 struct tclass_for_proc
*tfp
;
641 pid_t pid
= so_tcdbg
->so_tcdbg_pid
;
643 so_tcdbg
->so_tcdbg_tclass
= SO_TC_UNSPEC
; /* Means not set */
647 printf("%s proc_find(%d) failed\n", __func__
, pid
);
652 lck_mtx_lock(tclass_lock
);
654 tfp
= find_tfp_by_pid(pid
);
656 so_tcdbg
->so_tcdbg_tclass
= tfp
->tfp_class
;
657 so_tcdbg
->so_tcbbg_qos_mode
= tfp
->tfp_qos_mode
;
660 lck_mtx_unlock(tclass_lock
);
670 get_pname_tclass(struct so_tcdbg
*so_tcdbg
)
673 struct tclass_for_proc
*tfp
;
675 so_tcdbg
->so_tcdbg_tclass
= SO_TC_UNSPEC
; /* Means not set */
678 lck_mtx_lock(tclass_lock
);
680 tfp
= find_tfp_by_pname(so_tcdbg
->so_tcdbg_pname
);
682 so_tcdbg
->so_tcdbg_tclass
= tfp
->tfp_class
;
683 so_tcdbg
->so_tcbbg_qos_mode
= tfp
->tfp_qos_mode
;
686 lck_mtx_unlock(tclass_lock
);
692 delete_tclass_for_pid_pname(struct so_tcdbg
*so_tcdbg
)
695 pid_t pid
= so_tcdbg
->so_tcdbg_pid
;
696 struct tclass_for_proc
*tfp
= NULL
;
698 lck_mtx_lock(tclass_lock
);
701 tfp
= find_tfp_by_pid(pid
);
703 tfp
= find_tfp_by_pname(so_tcdbg
->so_tcdbg_pname
);
707 free_tclass_for_proc(tfp
);
711 lck_mtx_unlock(tclass_lock
);
717 * Setting options requires privileges
719 __private_extern__
int
720 so_set_tcdbg(struct socket
*so
, struct so_tcdbg
*so_tcdbg
)
724 if ((so
->so_state
& SS_PRIV
) == 0) {
728 socket_unlock(so
, 0);
730 switch (so_tcdbg
->so_tcdbg_cmd
) {
732 error
= set_pid_tclass(so_tcdbg
);
736 error
= set_pname_tclass(so_tcdbg
);
740 error
= purge_tclass_for_proc();
744 error
= flush_tclass_for_proc();
747 case SO_TCDBG_DELETE
:
748 error
= delete_tclass_for_pid_pname(so_tcdbg
);
751 case SO_TCDBG_TCFLUSH_PID
:
752 error
= flush_pid_tclass(so_tcdbg
);
766 * Not required to be privileged to get
768 __private_extern__
int
769 sogetopt_tcdbg(struct socket
*so
, struct sockopt
*sopt
)
772 struct so_tcdbg so_tcdbg
;
774 size_t len
= sopt
->sopt_valsize
;
776 error
= sooptcopyin(sopt
, &so_tcdbg
, sizeof(struct so_tcdbg
),
777 sizeof(struct so_tcdbg
));
782 sopt
->sopt_valsize
= len
;
784 socket_unlock(so
, 0);
786 switch (so_tcdbg
.so_tcdbg_cmd
) {
788 error
= get_pid_tclass(&so_tcdbg
);
792 error
= get_pname_tclass(&so_tcdbg
);
796 lck_mtx_lock(tclass_lock
);
797 so_tcdbg
.so_tcdbg_count
= tfp_count
;
798 lck_mtx_unlock(tclass_lock
);
801 case SO_TCDBG_LIST
: {
802 struct tclass_for_proc
*tfp
;
804 struct so_tcdbg
*ptr
;
806 lck_mtx_lock(tclass_lock
);
807 if ((alloc_count
= tfp_count
) == 0) {
808 lck_mtx_unlock(tclass_lock
);
812 len
= alloc_count
* sizeof(struct so_tcdbg
);
813 lck_mtx_unlock(tclass_lock
);
815 buf
= _MALLOC(len
, M_TEMP
, M_WAITOK
| M_ZERO
);
821 lck_mtx_lock(tclass_lock
);
823 ptr
= (struct so_tcdbg
*)buf
;
824 TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) {
825 if (++n
> alloc_count
) {
828 if (tfp
->tfp_pid
!= -1) {
829 ptr
->so_tcdbg_cmd
= SO_TCDBG_PID
;
830 ptr
->so_tcdbg_pid
= tfp
->tfp_pid
;
832 ptr
->so_tcdbg_cmd
= SO_TCDBG_PNAME
;
833 ptr
->so_tcdbg_pid
= -1;
834 strlcpy(ptr
->so_tcdbg_pname
,
836 sizeof(ptr
->so_tcdbg_pname
));
838 ptr
->so_tcdbg_tclass
= tfp
->tfp_class
;
839 ptr
->so_tcbbg_qos_mode
= tfp
->tfp_qos_mode
;
843 lck_mtx_unlock(tclass_lock
);
856 error
= sooptcopyout(sopt
, &so_tcdbg
,
857 sizeof(struct so_tcdbg
));
859 error
= sooptcopyout(sopt
, buf
, len
);
866 #endif /* (DEVELOPMENT || DEBUG) */
869 so_get_netsvc_marking_level(struct socket
*so
)
871 int marking_level
= NETSVC_MRKNG_UNKNOWN
;
872 struct ifnet
*ifp
= NULL
;
874 switch (SOCK_DOM(so
)) {
876 struct inpcb
*inp
= sotoinpcb(so
);
879 ifp
= inp
->inp_last_outifp
;
884 struct in6pcb
*in6p
= sotoin6pcb(so
);
887 ifp
= in6p
->in6p_last_outifp
;
895 if ((ifp
->if_eflags
& IFEF_QOSMARKING_ENABLED
) != 0) {
896 if ((so
->so_flags1
& SOF1_QOSMARKING_ALLOWED
)) {
897 marking_level
= NETSVC_MRKNG_LVL_L3L2_ALL
;
899 marking_level
= NETSVC_MRKNG_LVL_L3L2_BK
;
902 marking_level
= NETSVC_MRKNG_LVL_L2
;
905 return marking_level
;
908 __private_extern__
int
909 so_set_traffic_class(struct socket
*so
, int optval
)
913 if (optval
< SO_TC_BE
|| optval
> SO_TC_CTL
) {
927 if (!SO_VALID_TC(optval
)) {
934 int oldval
= so
->so_traffic_class
;
936 VERIFY(SO_VALID_TC(optval
));
937 so
->so_traffic_class
= optval
;
939 if ((SOCK_DOM(so
) == PF_INET
||
940 SOCK_DOM(so
) == PF_INET6
) &&
941 SOCK_TYPE(so
) == SOCK_STREAM
) {
942 set_tcp_stream_priority(so
);
945 if ((SOCK_DOM(so
) == PF_INET
||
946 SOCK_DOM(so
) == PF_INET6
) &&
947 optval
!= oldval
&& (optval
== SO_TC_BK_SYS
||
948 oldval
== SO_TC_BK_SYS
)) {
950 * If the app switches from BK_SYS to something
951 * else, resume the socket if it was suspended.
953 if (oldval
== SO_TC_BK_SYS
) {
954 inp_reset_fc_state(so
->so_pcb
);
957 SOTHROTTLELOG("throttle[%d]: so 0x%llx "
958 "[%d,%d] opportunistic %s\n", so
->last_pid
,
959 (uint64_t)VM_KERNEL_ADDRPERM(so
),
960 SOCK_DOM(so
), SOCK_TYPE(so
),
961 (optval
== SO_TC_BK_SYS
) ? "ON" : "OFF");
968 __private_extern__
int
969 so_set_net_service_type(struct socket
*so
, int netsvctype
)
974 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype
)) {
978 sotc
= sotc_by_netservicetype
[netsvctype
];
979 error
= so_set_traffic_class(so
, sotc
);
983 so
->so_netsvctype
= netsvctype
;
984 so
->so_flags1
|= SOF1_TC_NET_SERV_TYPE
;
989 __private_extern__
void
990 so_set_default_traffic_class(struct socket
*so
)
992 so
->so_traffic_class
= SO_TC_BE
;
994 if ((SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
)) {
995 if (net_qos_policy_restricted
== 0) {
996 so
->so_flags1
|= SOF1_QOSMARKING_ALLOWED
;
998 #if (DEVELOPMENT || DEBUG)
1000 set_tclass_for_curr_proc(so
);
1002 #endif /* (DEVELOPMENT || DEBUG) */
1006 __private_extern__
int
1007 so_set_opportunistic(struct socket
*so
, int optval
)
1009 return so_set_traffic_class(so
, (optval
== 0) ?
1010 SO_TC_BE
: SO_TC_BK_SYS
);
1013 __private_extern__
int
1014 so_get_opportunistic(struct socket
*so
)
1016 return so
->so_traffic_class
== SO_TC_BK_SYS
;
1019 __private_extern__
int
1020 so_tc_from_control(struct mbuf
*control
, int *out_netsvctype
)
1023 int sotc
= SO_TC_UNSPEC
;
1025 *out_netsvctype
= _NET_SERVICE_TYPE_UNSPEC
;
1027 for (cm
= M_FIRST_CMSGHDR(control
);
1028 is_cmsg_valid(control
, cm
);
1029 cm
= M_NXT_CMSGHDR(control
, cm
)) {
1032 if (cm
->cmsg_level
!= SOL_SOCKET
||
1033 cm
->cmsg_len
!= CMSG_LEN(sizeof(int))) {
1036 val
= *(int *)(void *)CMSG_DATA(cm
);
1038 * The first valid option wins
1040 switch (cm
->cmsg_type
) {
1041 case SO_TRAFFIC_CLASS
:
1042 if (SO_VALID_TC(val
)) {
1046 } else if (val
< SO_TC_NET_SERVICE_OFFSET
) {
1050 * Handle the case SO_NET_SERVICE_TYPE values are
1051 * passed using SO_TRAFFIC_CLASS
1053 val
= val
- SO_TC_NET_SERVICE_OFFSET
;
1055 case SO_NET_SERVICE_TYPE
:
1056 if (!IS_VALID_NET_SERVICE_TYPE(val
)) {
1059 *out_netsvctype
= val
;
1060 sotc
= sotc_by_netservicetype
[val
];
1071 __private_extern__
int
1072 so_tos_from_control(struct mbuf
*control
)
1075 int tos
= IPTOS_UNSPEC
;
1077 for (cm
= M_FIRST_CMSGHDR(control
);
1078 is_cmsg_valid(control
, cm
);
1079 cm
= M_NXT_CMSGHDR(control
, cm
)) {
1080 if (cm
->cmsg_len
!= CMSG_LEN(sizeof(int))) {
1084 if ((cm
->cmsg_level
== IPPROTO_IP
&&
1085 cm
->cmsg_type
== IP_TOS
) ||
1086 (cm
->cmsg_level
== IPPROTO_IPV6
&&
1087 cm
->cmsg_type
== IPV6_TCLASS
)) {
1088 tos
= *(int *)(void *)CMSG_DATA(cm
) & IPTOS_MASK
;
1089 /* The first valid option wins */
1097 __private_extern__
void
1098 so_recv_data_stat(struct socket
*so
, struct mbuf
*m
, size_t off
)
1100 uint32_t mtc
= m_get_traffic_class(m
);
1102 if (mtc
>= SO_TC_STATS_MAX
) {
1106 so
->so_tc_stats
[mtc
].rxpackets
+= 1;
1107 so
->so_tc_stats
[mtc
].rxbytes
+=
1108 ((m
->m_flags
& M_PKTHDR
) ? m
->m_pkthdr
.len
: 0) + off
;
1111 __private_extern__
void
1112 so_inc_recv_data_stat(struct socket
*so
, size_t pkts
, size_t bytes
,
1115 if (mtc
>= SO_TC_STATS_MAX
) {
1119 so
->so_tc_stats
[mtc
].rxpackets
+= pkts
;
1120 so
->so_tc_stats
[mtc
].rxbytes
+= bytes
;
1124 so_throttle_best_effort(struct socket
*so
, struct ifnet
*ifp
)
1126 uint32_t uptime
= net_uptime();
1127 return soissrcbesteffort(so
) &&
1128 net_io_policy_throttle_best_effort
== 1 &&
1129 ifp
->if_rt_sendts
> 0 &&
1130 (int)(uptime
- ifp
->if_rt_sendts
) <= TCP_BG_SWITCH_TIME
;
1133 __private_extern__
void
1134 set_tcp_stream_priority(struct socket
*so
)
1136 struct inpcb
*inp
= sotoinpcb(so
);
1137 struct tcpcb
*tp
= intotcpcb(inp
);
1138 struct ifnet
*outifp
;
1139 u_char old_cc
= tp
->tcp_cc_index
;
1140 int recvbg
= IS_TCP_RECV_BG(so
);
1141 bool is_local
= false, fg_active
= false;
1144 VERIFY((SOCK_CHECK_DOM(so
, PF_INET
) ||
1145 SOCK_CHECK_DOM(so
, PF_INET6
)) &&
1146 SOCK_CHECK_TYPE(so
, SOCK_STREAM
) &&
1147 SOCK_CHECK_PROTO(so
, IPPROTO_TCP
));
1149 /* Return if the socket is in a terminal state */
1150 if (inp
->inp_state
== INPCB_STATE_DEAD
) {
1154 outifp
= inp
->inp_last_outifp
;
1155 uptime
= net_uptime();
1158 * If the socket was marked as a background socket or if the
1159 * traffic class is set to background with traffic class socket
1160 * option then make both send and recv side of the stream to be
1161 * background. The variable sotcdb which can be set with sysctl
1162 * is used to disable these settings for testing.
1164 if (outifp
== NULL
|| (outifp
->if_flags
& IFF_LOOPBACK
)) {
1168 /* Check if there has been recent foreground activity */
1169 if (outifp
!= NULL
) {
1171 * If the traffic source is background, check if
1172 * if it can be switched to foreground. This can
1173 * happen when there is no indication of foreground
1176 if (soissrcbackground(so
) && outifp
->if_fg_sendts
> 0 &&
1177 (int)(uptime
- outifp
->if_fg_sendts
) <= TCP_BG_SWITCH_TIME
) {
1182 * The traffic source is best-effort -- check if
1183 * the policy to throttle best effort is enabled
1184 * and there was realtime activity on this
1185 * interface recently. If this is true, enable
1186 * algorithms that respond to increased latency
1187 * on best-effort traffic.
1189 if (so_throttle_best_effort(so
, outifp
)) {
1195 * System initiated background traffic like cloud uploads should
1196 * always use background delay sensitive algorithms. This will
1197 * make the stream more responsive to other streams on the user's
1198 * network and it will minimize latency induced.
1200 if (fg_active
|| IS_SO_TC_BACKGROUNDSYSTEM(so
->so_traffic_class
)) {
1202 * If the interface that the connection is using is
1203 * loopback, do not use background congestion
1204 * control algorithm.
1206 * If there has been recent foreground activity or if
1207 * there was an indication that a foreground application
1208 * is going to use networking (net_io_policy_throttled),
1209 * switch the backgroung streams to use background
1210 * congestion control algorithm. Otherwise, even background
1211 * flows can move into foreground.
1213 if ((sotcdb
& SOTCDB_NO_SENDTCPBG
) != 0 || is_local
||
1214 !IS_SO_TC_BACKGROUNDSYSTEM(so
->so_traffic_class
)) {
1215 if (old_cc
== TCP_CC_ALGO_BACKGROUND_INDEX
) {
1216 tcp_set_foreground_cc(so
);
1219 if (old_cc
!= TCP_CC_ALGO_BACKGROUND_INDEX
) {
1220 tcp_set_background_cc(so
);
1224 /* Set receive side background flags */
1225 if ((sotcdb
& SOTCDB_NO_RECVTCPBG
) != 0 || is_local
||
1226 !IS_SO_TC_BACKGROUNDSYSTEM(so
->so_traffic_class
)) {
1227 tcp_clear_recv_bg(so
);
1229 tcp_set_recv_bg(so
);
1232 tcp_clear_recv_bg(so
);
1233 if (old_cc
== TCP_CC_ALGO_BACKGROUND_INDEX
) {
1234 tcp_set_foreground_cc(so
);
1238 if (old_cc
!= tp
->tcp_cc_index
|| recvbg
!= IS_TCP_RECV_BG(so
)) {
1239 SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
1240 "%s recv\n", so
->last_pid
,
1241 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1242 SOCK_DOM(so
), SOCK_TYPE(so
),
1243 (tp
->tcp_cc_index
== TCP_CC_ALGO_BACKGROUND_INDEX
) ?
1244 "background" : "foreground",
1245 IS_TCP_RECV_BG(so
) ? "background" : "foreground");
1250 * Set traffic class to an IPv4 or IPv6 packet
1252 * - set the DSCP code following the WMM mapping
1254 __private_extern__
void
1255 set_packet_service_class(struct mbuf
*m
, struct socket
*so
,
1256 int sotc
, uint32_t flags
)
1258 mbuf_svc_class_t msc
= MBUF_SC_BE
; /* Best effort by default */
1259 struct inpcb
*inp
= sotoinpcb(so
); /* in6pcb and inpcb are the same */
1261 if (!(m
->m_flags
& M_PKTHDR
)) {
1266 * Here is the precedence:
1267 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
1268 * 2) Traffic class passed via ancillary data to sendmsdg(2)
1269 * 3) Traffic class socket option last
1271 if (sotc
!= SO_TC_UNSPEC
) {
1272 VERIFY(SO_VALID_TC(sotc
));
1273 msc
= so_tc2msc(sotc
);
1274 /* Assert because tc must have been valid */
1275 VERIFY(MBUF_VALID_SC(msc
));
1279 * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
1280 * best effort is set, depress the priority.
1282 if (!IS_MBUF_SC_BACKGROUND(msc
) && soisthrottled(so
)) {
1286 if (IS_MBUF_SC_BESTEFFORT(msc
) && inp
->inp_last_outifp
!= NULL
&&
1287 so_throttle_best_effort(so
, inp
->inp_last_outifp
)) {
1291 if (soissrcbackground(so
)) {
1292 m
->m_pkthdr
.pkt_flags
|= PKTF_SO_BACKGROUND
;
1295 if (soissrcrealtime(so
) || IS_MBUF_SC_REALTIME(msc
)) {
1296 m
->m_pkthdr
.pkt_flags
|= PKTF_SO_REALTIME
;
1299 * Set the traffic class in the mbuf packet header svc field
1301 if (sotcdb
& SOTCDB_NO_MTC
) {
1306 * Elevate service class if the packet is a pure TCP ACK.
1307 * We can do this only when the flow is not a background
1308 * flow and the outgoing interface supports
1309 * transmit-start model.
1311 if (!IS_MBUF_SC_BACKGROUND(msc
) &&
1312 (flags
& (PKT_SCF_TCP_ACK
| PKT_SCF_TCP_SYN
)) != 0) {
1316 (void) m_set_service_class(m
, msc
);
1319 * Set the privileged traffic auxiliary flag if applicable,
1322 if (!(sotcdb
& SOTCDB_NO_PRIVILEGED
) && soisprivilegedtraffic(so
) &&
1323 msc
!= MBUF_SC_UNSPEC
) {
1324 m
->m_pkthdr
.pkt_flags
|= PKTF_PRIO_PRIVILEGED
;
1326 m
->m_pkthdr
.pkt_flags
&= ~PKTF_PRIO_PRIVILEGED
;
1331 * For TCP with background traffic class switch CC algo based on sysctl
1333 if (so
->so_type
== SOCK_STREAM
) {
1334 set_tcp_stream_priority(so
);
1337 so_tc_update_stats(m
, so
, msc
);
1340 __private_extern__
void
1341 so_tc_update_stats(struct mbuf
*m
, struct socket
*so
, mbuf_svc_class_t msc
)
1343 mbuf_traffic_class_t mtc
;
1346 * Assume socket and mbuf traffic class values are the same
1347 * Also assume the socket lock is held. Note that the stats
1348 * at the socket layer are reduced down to the legacy traffic
1349 * classes; we could/should potentially expand so_tc_stats[].
1351 mtc
= MBUF_SC2TC(msc
);
1352 VERIFY(mtc
< SO_TC_STATS_MAX
);
1353 so
->so_tc_stats
[mtc
].txpackets
+= 1;
1354 so
->so_tc_stats
[mtc
].txbytes
+= m
->m_pkthdr
.len
;
1357 __private_extern__
void
1358 socket_tclass_init(void)
1360 _CASSERT(_SO_TC_MAX
== SO_TC_STATS_MAX
);
1362 tclass_lck_grp_attr
= lck_grp_attr_alloc_init();
1363 tclass_lck_grp
= lck_grp_alloc_init("tclass", tclass_lck_grp_attr
);
1364 tclass_lck_attr
= lck_attr_alloc_init();
1365 lck_mtx_init(tclass_lock
, tclass_lck_grp
, tclass_lck_attr
);
1368 __private_extern__ mbuf_svc_class_t
1371 mbuf_svc_class_t msc
;
1375 msc
= MBUF_SC_BK_SYS
;
1400 case SO_TC_NETSVC_SIG
:
1412 msc
= MBUF_SC_UNSPEC
;
1419 __private_extern__
int
1420 so_svc2tc(mbuf_svc_class_t svc
)
1423 case MBUF_SC_BK_SYS
:
1424 return SO_TC_BK_SYS
;
1440 return SO_TC_NETSVC_SIG
;
1445 case MBUF_SC_UNSPEC
:
1452 * LRO is turned on for AV streaming class.
1455 so_set_lro(struct socket
*so
, int optval
)
1457 if (optval
== SO_TC_AV
) {
1458 so
->so_flags
|= SOF_USELRO
;
1460 if (so
->so_flags
& SOF_USELRO
) {
1461 /* transition to non LRO class */
1462 so
->so_flags
&= ~SOF_USELRO
;
1463 struct inpcb
*inp
= sotoinpcb(so
);
1464 struct tcpcb
*tp
= NULL
;
1466 tp
= intotcpcb(inp
);
1467 if (tp
&& (tp
->t_flagsext
& TF_LRO_OFFLOADED
)) {
1468 tcp_lro_remove_state(inp
->inp_laddr
,
1472 tp
->t_flagsext
&= ~TF_LRO_OFFLOADED
;
1480 sotc_index(int sotc
)
1484 return SOTCIX_BK_SYS
;
1514 * Unknown traffic class value
1520 fastlane_sc_to_dscp(uint32_t svc_class
)
1522 uint8_t dscp
= _DSCP_DF
;
1524 switch (svc_class
) {
1525 case MBUF_SC_BK_SYS
:
1568 rfc4594_sc_to_dscp(uint32_t svc_class
)
1570 uint8_t dscp
= _DSCP_DF
;
1572 switch (svc_class
) {
1573 case MBUF_SC_BK_SYS
: /* Low-Priority Data */
1578 case MBUF_SC_BE
: /* Standard */
1581 case MBUF_SC_RD
: /* Low-Latency Data */
1585 /* SVC_CLASS Not Defined: High-Throughput Data */
1587 case MBUF_SC_OAM
: /* OAM */
1591 /* SVC_CLASS Not Defined: Broadcast Video */
1593 case MBUF_SC_AV
: /* Multimedia Streaming */
1596 case MBUF_SC_RV
: /* Real-Time Interactive */
1599 case MBUF_SC_VI
: /* Multimedia Conferencing */
1602 case MBUF_SC_SIG
: /* Signaling */
1606 case MBUF_SC_VO
: /* Telephony */
1609 case MBUF_SC_CTL
: /* Network Control*/
1620 mbuf_traffic_class_t
1621 rfc4594_dscp_to_tc(uint8_t dscp
)
1623 mbuf_traffic_class_t tc
= MBUF_TC_BE
;
1653 * Pass NULL ifp for default map
1656 set_netsvctype_dscp_map(struct net_qos_dscp_map
*net_qos_dscp_map
,
1657 const struct netsvctype_dscp_map
*netsvctype_dscp_map
)
1663 * Do not accept more that max number of distinct DSCPs
1665 if (net_qos_dscp_map
== NULL
|| netsvctype_dscp_map
== NULL
) {
1670 * Validate input parameters
1672 for (i
= 0; i
< _NET_SERVICE_TYPE_COUNT
; i
++) {
1673 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map
[i
].netsvctype
)) {
1676 if (netsvctype_dscp_map
[i
].dscp
> _MAX_DSCP
) {
1681 for (i
= 0; i
< _NET_SERVICE_TYPE_COUNT
; i
++) {
1682 netsvctype
= netsvctype_dscp_map
[i
].netsvctype
;
1684 net_qos_dscp_map
->netsvctype_to_dscp
[netsvctype
] =
1685 netsvctype_dscp_map
[i
].dscp
;
1687 for (netsvctype
= 0; netsvctype
< _NET_SERVICE_TYPE_COUNT
; netsvctype
++) {
1688 switch (netsvctype
) {
1689 case NET_SERVICE_TYPE_BE
:
1690 case NET_SERVICE_TYPE_BK
:
1691 case NET_SERVICE_TYPE_VI
:
1692 case NET_SERVICE_TYPE_VO
:
1693 case NET_SERVICE_TYPE_RV
:
1694 case NET_SERVICE_TYPE_AV
:
1695 case NET_SERVICE_TYPE_OAM
:
1696 case NET_SERVICE_TYPE_RD
: {
1699 sotcix
= sotc_index(sotc_by_netservicetype
[netsvctype
]);
1700 if (sotcix
!= SIZE_T_MAX
) {
1701 net_qos_dscp_map
->sotc_to_dscp
[sotcix
] =
1702 netsvctype_dscp_map
[netsvctype
].dscp
;
1706 case NET_SERVICE_TYPE_SIG
:
1707 /* Signaling does not have its own traffic class */
1710 /* We should not be here */
1714 /* Network control socket traffic class is always best effort */
1715 net_qos_dscp_map
->sotc_to_dscp
[SOTCIX_CTL
] = _DSCP_DF
;
1717 /* Backround socket traffic class DSCP same as backround system */
1718 net_qos_dscp_map
->sotc_to_dscp
[SOTCIX_BK
] =
1719 net_qos_dscp_map
->sotc_to_dscp
[SOTCIX_BK_SYS
];
1725 * out_count is an input/ouput parameter
1728 get_netsvctype_dscp_map(size_t *out_count
,
1729 struct netsvctype_dscp_map
*netsvctype_dscp_map
)
1732 struct net_qos_dscp_map
*net_qos_dscp_map
= NULL
;
1735 * Do not accept more that max number of distinct DSCPs
1737 if (out_count
== NULL
|| netsvctype_dscp_map
== NULL
) {
1740 if (*out_count
> _MAX_DSCP
) {
1744 net_qos_dscp_map
= &fastlane_net_qos_dscp_map
;
1746 for (i
= 0; i
< MIN(_NET_SERVICE_TYPE_COUNT
, *out_count
); i
++) {
1747 netsvctype_dscp_map
[i
].netsvctype
= i
;
1748 netsvctype_dscp_map
[i
].dscp
= net_qos_dscp_map
->netsvctype_to_dscp
[i
];
1760 error
= set_netsvctype_dscp_map(&fastlane_net_qos_dscp_map
,
1761 fastlane_netsvctype_dscp_map
);
1764 error
= set_netsvctype_dscp_map(&rfc4594_net_qos_dscp_map
,
1765 rfc4594_netsvctype_dscp_map
);
1768 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map
, 1);
1772 sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
1774 #pragma unused(oidp, arg1, arg2)
1777 struct netsvctype_dscp_map netsvctype_dscp_map
[_NET_SERVICE_TYPE_COUNT
] = {};
1780 if (req
->oldptr
== USER_ADDR_NULL
) {
1782 _NET_SERVICE_TYPE_COUNT
* sizeof(struct netsvctype_dscp_map
);
1783 } else if (req
->oldlen
> 0) {
1784 count
= _NET_SERVICE_TYPE_COUNT
;
1785 error
= get_netsvctype_dscp_map(&count
, netsvctype_dscp_map
);
1789 len
= count
* sizeof(struct netsvctype_dscp_map
);
1790 error
= SYSCTL_OUT(req
, netsvctype_dscp_map
,
1791 MIN(len
, req
->oldlen
));
1797 if (req
->newptr
!= USER_ADDR_NULL
) {
1804 __private_extern__ errno_t
1805 set_packet_qos(struct mbuf
*m
, struct ifnet
*ifp
, boolean_t qos_allowed
,
1806 int sotc
, int netsvctype
, uint8_t *dscp_inout
)
1808 if (ifp
== NULL
|| dscp_inout
== NULL
) {
1812 if ((ifp
->if_eflags
& IFEF_QOSMARKING_ENABLED
) != 0 &&
1813 ifp
->if_qosmarking_mode
!= IFRTYPE_QOSMARKING_MODE_NONE
) {
1815 const struct net_qos_dscp_map
*net_qos_dscp_map
= NULL
;
1817 switch (ifp
->if_qosmarking_mode
) {
1818 case IFRTYPE_QOSMARKING_FASTLANE
:
1819 net_qos_dscp_map
= &fastlane_net_qos_dscp_map
;
1821 case IFRTYPE_QOSMARKING_RFC4594
:
1822 net_qos_dscp_map
= &rfc4594_net_qos_dscp_map
;
1825 panic("invalid QoS marking type");
1830 * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops
1835 * For DSCP use the network service type is specified, otherwise
1836 * use the socket traffic class
1838 * When not whitelisted by the policy, set DSCP only for best
1839 * effort and background, and set the mbuf service class to
1840 * best effort as well so the packet will be queued and
1841 * scheduled at a lower priority.
1842 * We still want to prioritize control traffic on the interface
1843 * so we do not change the mbuf service class for SO_TC_CTL
1845 if (IS_VALID_NET_SERVICE_TYPE(netsvctype
) &&
1846 netsvctype
!= NET_SERVICE_TYPE_BE
) {
1847 dscp
= net_qos_dscp_map
->netsvctype_to_dscp
[netsvctype
];
1849 if (qos_allowed
== FALSE
&&
1850 netsvctype
!= NET_SERVICE_TYPE_BE
&&
1851 netsvctype
!= NET_SERVICE_TYPE_BK
) {
1853 if (sotc
!= SO_TC_CTL
) {
1854 m_set_service_class(m
, MBUF_SC_BE
);
1857 } else if (sotc
!= SO_TC_UNSPEC
) {
1858 size_t sotcix
= sotc_index(sotc
);
1859 if (sotcix
!= SIZE_T_MAX
) {
1860 dscp
= net_qos_dscp_map
->sotc_to_dscp
[sotcix
];
1862 if (qos_allowed
== FALSE
&& sotc
!= SO_TC_BE
&&
1863 sotc
!= SO_TC_BK
&& sotc
!= SO_TC_BK_SYS
&&
1864 sotc
!= SO_TC_CTL
) {
1866 if (sotc
!= SO_TC_CTL
) {
1867 m_set_service_class(m
, MBUF_SC_BE
);
1872 if (net_qos_verbose
!= 0) {
1873 printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n",
1874 __func__
, qos_allowed
, sotc
, netsvctype
, dscp
);
1877 if (*dscp_inout
!= dscp
) {
1880 } else if (*dscp_inout
!= _DSCP_DF
&& IFNET_IS_WIFI_INFRA(ifp
)) {
1881 mbuf_svc_class_t msc
= m_get_service_class(m
);
1884 * For WiFi infra, when the mbuf service class is best effort
1885 * and the DSCP is not default, set the service class based
1888 if (msc
== MBUF_SC_BE
) {
1889 msc
= wifi_dscp_to_msc_array
[*dscp_inout
];
1891 if (msc
!= MBUF_SC_BE
) {
1892 m_set_service_class(m
, msc
);
1894 if (net_qos_verbose
!= 0) {
1895 printf("%s set msc %u for dscp %u\n",
1896 __func__
, msc
, *dscp_inout
);
1906 set_dscp_to_wifi_ac_map(const struct dcsp_msc_map
*map
, int clear
)
1911 bzero(wifi_dscp_to_msc_array
, sizeof(wifi_dscp_to_msc_array
));
1914 for (i
= 0; i
< DSCP_ARRAY_SIZE
; i
++) {
1915 const struct dcsp_msc_map
*elem
= map
+ i
;
1917 if (elem
->dscp
> _MAX_DSCP
|| elem
->msc
== MBUF_SC_UNSPEC
) {
1920 switch (elem
->msc
) {
1921 case MBUF_SC_BK_SYS
:
1923 wifi_dscp_to_msc_array
[elem
->dscp
] = MBUF_SC_BK
;
1929 wifi_dscp_to_msc_array
[elem
->dscp
] = MBUF_SC_BE
;
1934 wifi_dscp_to_msc_array
[elem
->dscp
] = MBUF_SC_VI
;
1938 wifi_dscp_to_msc_array
[elem
->dscp
] = MBUF_SC_VO
;
1945 dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map
*netsvctype_dscp_map
,
1946 size_t count
, struct dcsp_msc_map
*dcsp_msc_map
)
1952 * Validate input parameters
1954 for (i
= 0; i
< count
; i
++) {
1955 if (!SO_VALID_TC(netsvctype_dscp_map
[i
].netsvctype
)) {
1959 if (netsvctype_dscp_map
[i
].dscp
> _MAX_DSCP
) {
1965 bzero(dcsp_msc_map
, DSCP_ARRAY_SIZE
* sizeof(struct dcsp_msc_map
));
1967 for (i
= 0; i
< count
; i
++) {
1968 dcsp_msc_map
[i
].dscp
= netsvctype_dscp_map
[i
].dscp
;
1969 dcsp_msc_map
[i
].msc
= so_tc2msc(netsvctype_dscp_map
[i
].netsvctype
);
1976 sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1978 #pragma unused(oidp, arg1, arg2)
1980 size_t len
= DSCP_ARRAY_SIZE
* sizeof(struct netsvctype_dscp_map
);
1981 struct netsvctype_dscp_map netsvctype_dscp_map
[DSCP_ARRAY_SIZE
] = {};
1982 struct dcsp_msc_map dcsp_msc_map
[DSCP_ARRAY_SIZE
];
1986 if (req
->oldptr
== USER_ADDR_NULL
) {
1988 } else if (req
->oldlen
> 0) {
1989 for (i
= 0; i
< DSCP_ARRAY_SIZE
; i
++) {
1990 netsvctype_dscp_map
[i
].dscp
= i
;
1991 netsvctype_dscp_map
[i
].netsvctype
=
1992 so_svc2tc(wifi_dscp_to_msc_array
[i
]);
1994 error
= SYSCTL_OUT(req
, netsvctype_dscp_map
,
1995 MIN(len
, req
->oldlen
));
2001 if (req
->newptr
== USER_ADDR_NULL
) {
2005 error
= proc_suser(current_proc());
2011 * Check input length
2013 if (req
->newlen
> len
) {
2018 * Cap the number of entries to copy from input buffer
2020 if (len
> req
->newlen
) {
2023 error
= SYSCTL_IN(req
, netsvctype_dscp_map
, len
);
2027 count
= len
/ sizeof(struct netsvctype_dscp_map
);
2028 bzero(dcsp_msc_map
, sizeof(dcsp_msc_map
));
2029 error
= dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map
, count
,
2034 set_dscp_to_wifi_ac_map(dcsp_msc_map
, 0);
2040 sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
2042 #pragma unused(oidp, arg1, arg2)
2046 error
= sysctl_handle_int(oidp
, &val
, 0, req
);
2047 if (error
|| !req
->newptr
) {
2051 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map
, 1);
2057 * Returns whether a large upload or download transfer should be marked as
2058 * BK service type for network activity. This is a system level
2059 * hint/suggestion to classify application traffic based on statistics
2060 * collected from the current network attachment
2062 * Returns 1 for BK and 0 for default
2066 net_qos_guideline(struct proc
*p
, struct net_qos_guideline_args
*arg
,
2070 #define RETURN_USE_BK 1
2071 #define RETURN_USE_DEFAULT 0
2072 struct net_qos_param qos_arg
;
2073 struct ifnet
*ipv4_primary
, *ipv6_primary
;
2076 if (arg
->param
== USER_ADDR_NULL
|| retval
== NULL
||
2077 arg
->param_len
!= sizeof(qos_arg
)) {
2080 err
= copyin(arg
->param
, (caddr_t
) &qos_arg
, sizeof(qos_arg
));
2085 *retval
= RETURN_USE_DEFAULT
;
2086 ipv4_primary
= ifindex2ifnet
[get_primary_ifscope(AF_INET
)];
2087 ipv6_primary
= ifindex2ifnet
[get_primary_ifscope(AF_INET6
)];
2090 * If either of the interfaces is in Low Internet mode, enable
2091 * background delay based algorithms on this transfer
2093 if (qos_arg
.nq_uplink
) {
2094 if ((ipv4_primary
!= NULL
&&
2095 (ipv4_primary
->if_xflags
& IFXF_LOW_INTERNET_UL
)) ||
2096 (ipv6_primary
!= NULL
&&
2097 (ipv6_primary
->if_xflags
& IFXF_LOW_INTERNET_UL
))) {
2098 *retval
= RETURN_USE_BK
;
2102 if ((ipv4_primary
!= NULL
&&
2103 (ipv4_primary
->if_xflags
& IFXF_LOW_INTERNET_DL
)) ||
2104 (ipv6_primary
!= NULL
&&
2105 (ipv6_primary
->if_xflags
& IFXF_LOW_INTERNET_DL
))) {
2106 *retval
= RETURN_USE_BK
;
2112 * Some times IPv4 and IPv6 primary interfaces can be different.
2113 * In this case, if either of them is non-cellular, we should mark
2114 * the transfer as BK as it can potentially get used based on
2115 * the host name resolution
2117 if (ipv4_primary
!= NULL
&& IFNET_IS_EXPENSIVE(ipv4_primary
) &&
2118 ipv6_primary
!= NULL
&& IFNET_IS_EXPENSIVE(ipv6_primary
)) {
2119 if (qos_arg
.nq_use_expensive
) {
2122 *retval
= RETURN_USE_BK
;
2126 if (ipv4_primary
!= NULL
&& IFNET_IS_CONSTRAINED(ipv4_primary
) &&
2127 ipv6_primary
!= NULL
&& IFNET_IS_CONSTRAINED(ipv6_primary
)) {
2128 if (qos_arg
.nq_use_constrained
) {
2131 *retval
= RETURN_USE_BK
;
2135 if (qos_arg
.nq_transfer_size
>= 5 * 1024 * 1024) {
2136 *retval
= RETURN_USE_BK
;
2141 #undef RETURN_USE_BK
2142 #undef RETURN_USE_DEFAULT