]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_tclass.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / netinet / in_tclass.c
1 /*
2 * Copyright (c) 2009-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/types.h>
32 #include <sys/filedesc.h>
33 #include <sys/file_internal.h>
34 #include <sys/proc.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/errno.h>
38 #include <sys/protosw.h>
39 #include <sys/domain.h>
40 #include <sys/mbuf.h>
41 #include <sys/queue.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysproto.h>
44
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/route.h>
48
49 #include <netinet/in.h>
50 #include <netinet/in_var.h>
51 #include <netinet/in_pcb.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/ip6_var.h>
56 #include <netinet/udp.h>
57 #include <netinet/udp_var.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcp_cc.h>
61 #include <netinet/in_tclass.h>
62
63 struct net_qos_dscp_map {
64 uint8_t sotc_to_dscp[SO_TC_MAX];
65 uint8_t netsvctype_to_dscp[_NET_SERVICE_TYPE_COUNT];
66 };
67
68 struct dcsp_msc_map {
69 uint8_t dscp;
70 mbuf_svc_class_t msc;
71 };
72 static inline int so_throttle_best_effort(struct socket *, struct ifnet *);
73 static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *, int);
74 static errno_t dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *, size_t,
75 struct dcsp_msc_map *);
76
77 static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */
78 static lck_grp_t *tclass_lck_grp = NULL; /* mutex group definition */
79 static lck_attr_t *tclass_lck_attr = NULL; /* mutex attributes */
80 decl_lck_mtx_data(static, tclass_lock_data);
81 static lck_mtx_t *tclass_lock = &tclass_lock_data;
82
83 SYSCTL_NODE(_net, OID_AUTO, qos,
84 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "QoS");
85
86 static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS;
87 SYSCTL_PROC(_net_qos, OID_AUTO, default_netsvctype_to_dscp_map,
88 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
89 0, 0, sysctl_default_netsvctype_to_dscp_map, "S", "");
90
91 static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
92 SYSCTL_PROC(_net_qos, OID_AUTO, dscp_to_wifi_ac_map,
93 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
94 0, 0, sysctl_dscp_to_wifi_ac_map, "S", "");
95
96 static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
97 SYSCTL_PROC(_net_qos, OID_AUTO, reset_dscp_to_wifi_ac_map,
98 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
99 0, 0, sysctl_reset_dscp_to_wifi_ac_map, "I", "");
100
101 int net_qos_verbose = 0;
102 SYSCTL_INT(_net_qos, OID_AUTO, verbose,
103 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_verbose, 0, "");
104
105 /*
106 * Fastlane QoS policy:
107 * By Default allow all apps to get traffic class to DSCP mapping
108 */
109 SYSCTL_NODE(_net_qos, OID_AUTO, policy,
110 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
111
112 int net_qos_policy_restricted = 0;
113 SYSCTL_INT(_net_qos_policy, OID_AUTO, restricted,
114 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restricted, 0, "");
115
116 int net_qos_policy_restrict_avapps = 0;
117 SYSCTL_INT(_net_qos_policy, OID_AUTO, restrict_avapps,
118 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restrict_avapps, 0, "");
119
120 int net_qos_policy_wifi_enabled = 0;
121 SYSCTL_INT(_net_qos_policy, OID_AUTO, wifi_enabled,
122 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_wifi_enabled, 0, "");
123
124 int net_qos_policy_capable_enabled = 0;
125 SYSCTL_INT(_net_qos_policy, OID_AUTO, capable_enabled,
126 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_capable_enabled, 0, "");
127
128 /*
129 * Socket traffic class from network service type
130 */
131 const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT] = {
132 SO_TC_BE, /* NET_SERVICE_TYPE_BE */
133 SO_TC_BK_SYS, /* NET_SERVICE_TYPE_BK */
134 SO_TC_VI, /* NET_SERVICE_TYPE_SIG */
135 SO_TC_VI, /* NET_SERVICE_TYPE_VI */
136 SO_TC_VO, /* NET_SERVICE_TYPE_VO */
137 SO_TC_RV, /* NET_SERVICE_TYPE_RV */
138 SO_TC_AV, /* NET_SERVICE_TYPE_AV */
139 SO_TC_OAM, /* NET_SERVICE_TYPE_OAM */
140 SO_TC_RD /* NET_SERVICE_TYPE_RD */
141 };
142
143 /*
144 * DSCP mappings for QoS Fastlane as based on network service types
145 */
146 static const
147 struct netsvctype_dscp_map fastlane_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
148 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
149 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_AF11 },
150 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS3 },
151 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
152 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
153 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
154 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
155 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
156 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
157 };
158
159
160 /*
161 * DSCP mappings for QoS RFC4594 as based on network service types
162 */
163 static const
164 struct netsvctype_dscp_map rfc4594_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
165 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
166 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_CS1 },
167 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS5 },
168 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
169 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
170 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
171 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
172 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
173 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
174 };
175
176 static struct net_qos_dscp_map fastlane_net_qos_dscp_map;
177 static struct net_qos_dscp_map rfc4594_net_qos_dscp_map;
178
179 /*
180 * The size is one more than the max because DSCP start at zero
181 */
182 #define DSCP_ARRAY_SIZE (_MAX_DSCP + 1)
183
184 /*
185 * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping
186 * that implemented at the 802.11 driver level when the mbuf service class is
187 * MBUF_SC_BE.
188 *
189 * This clashes with the recommended mapping documented by the IETF document
190 * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain
191 * binary compatibility. Applications should use the network service type socket
192 * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS.
193 */
194 static const struct dcsp_msc_map default_dscp_to_wifi_ac_map[] = {
195 { .dscp = _DSCP_DF, .msc = MBUF_SC_BE }, /* RFC 2474 Standard */
196 { .dscp = 1, .msc = MBUF_SC_BE }, /* */
197 { .dscp = 2, .msc = MBUF_SC_BE }, /* */
198 { .dscp = 3, .msc = MBUF_SC_BE }, /* */
199 { .dscp = 4, .msc = MBUF_SC_BE }, /* */
200 { .dscp = 5, .msc = MBUF_SC_BE }, /* */
201 { .dscp = 6, .msc = MBUF_SC_BE }, /* */
202 { .dscp = 7, .msc = MBUF_SC_BE }, /* */
203
204 { .dscp = _DSCP_CS1, .msc = MBUF_SC_BK }, /* RFC 3662 Low-Priority Data */
205 { .dscp = 9, .msc = MBUF_SC_BK }, /* */
206 { .dscp = _DSCP_AF11, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
207 { .dscp = 11, .msc = MBUF_SC_BK }, /* */
208 { .dscp = _DSCP_AF12, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
209 { .dscp = 13, .msc = MBUF_SC_BK }, /* */
210 { .dscp = _DSCP_AF13, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
211 { .dscp = 15, .msc = MBUF_SC_BK }, /* */
212
213 { .dscp = _DSCP_CS2, .msc = MBUF_SC_BK }, /* RFC 4594 OAM */
214 { .dscp = 17, .msc = MBUF_SC_BK }, /* */
215 { .dscp = _DSCP_AF21, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
216 { .dscp = 19, .msc = MBUF_SC_BK }, /* */
217 { .dscp = _DSCP_AF22, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
218 { .dscp = 21, .msc = MBUF_SC_BK }, /* */
219 { .dscp = _DSCP_AF23, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
220 { .dscp = 23, .msc = MBUF_SC_BK }, /* */
221
222 { .dscp = _DSCP_CS3, .msc = MBUF_SC_BE }, /* RFC 2474 Broadcast Video */
223 { .dscp = 25, .msc = MBUF_SC_BE }, /* */
224 { .dscp = _DSCP_AF31, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
225 { .dscp = 27, .msc = MBUF_SC_BE }, /* */
226 { .dscp = _DSCP_AF32, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
227 { .dscp = 29, .msc = MBUF_SC_BE }, /* */
228 { .dscp = _DSCP_AF33, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
229 { .dscp = 31, .msc = MBUF_SC_BE }, /* */
230
231 { .dscp = _DSCP_CS4, .msc = MBUF_SC_VI }, /* RFC 2474 Real-Time Interactive */
232 { .dscp = 33, .msc = MBUF_SC_VI }, /* */
233 { .dscp = _DSCP_AF41, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
234 { .dscp = 35, .msc = MBUF_SC_VI }, /* */
235 { .dscp = _DSCP_AF42, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
236 { .dscp = 37, .msc = MBUF_SC_VI }, /* */
237 { .dscp = _DSCP_AF43, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
238 { .dscp = 39, .msc = MBUF_SC_VI }, /* */
239
240 { .dscp = _DSCP_CS5, .msc = MBUF_SC_VI }, /* RFC 2474 Signaling */
241 { .dscp = 41, .msc = MBUF_SC_VI }, /* */
242 { .dscp = 42, .msc = MBUF_SC_VI }, /* */
243 { .dscp = 43, .msc = MBUF_SC_VI }, /* */
244 { .dscp = _DSCP_VA, .msc = MBUF_SC_VI }, /* RFC 5865 VOICE-ADMIT */
245 { .dscp = 45, .msc = MBUF_SC_VI }, /* */
246 { .dscp = _DSCP_EF, .msc = MBUF_SC_VI }, /* RFC 3246 Telephony */
247 { .dscp = 47, .msc = MBUF_SC_VI }, /* */
248
249 { .dscp = _DSCP_CS6, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
250 { .dscp = 49, .msc = MBUF_SC_VO }, /* */
251 { .dscp = 50, .msc = MBUF_SC_VO }, /* */
252 { .dscp = 51, .msc = MBUF_SC_VO }, /* */
253 { .dscp = 52, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Sigma */
254 { .dscp = 53, .msc = MBUF_SC_VO }, /* */
255 { .dscp = 54, .msc = MBUF_SC_VO }, /* */
256 { .dscp = 55, .msc = MBUF_SC_VO }, /* */
257
258 { .dscp = _DSCP_CS7, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
259 { .dscp = 57, .msc = MBUF_SC_VO }, /* */
260 { .dscp = 58, .msc = MBUF_SC_VO }, /* */
261 { .dscp = 59, .msc = MBUF_SC_VO }, /* */
262 { .dscp = 60, .msc = MBUF_SC_VO }, /* */
263 { .dscp = 61, .msc = MBUF_SC_VO }, /* */
264 { .dscp = 62, .msc = MBUF_SC_VO }, /* */
265 { .dscp = 63, .msc = MBUF_SC_VO }, /* */
266
267 { .dscp = 255, .msc = MBUF_SC_UNSPEC } /* invalid DSCP to mark last entry */
268 };
269
270 mbuf_svc_class_t wifi_dscp_to_msc_array[DSCP_ARRAY_SIZE];
271
272 /*
273 * If there is no foreground activity on the interface for bg_switch_time
274 * seconds, the background connections can switch to foreground TCP
275 * congestion control.
276 */
277 #define TCP_BG_SWITCH_TIME 2 /* seconds */
278
279 #if (DEVELOPMENT || DEBUG)
280
281 static int tfp_count = 0;
282
283 static TAILQ_HEAD(, tclass_for_proc) tfp_head =
284 TAILQ_HEAD_INITIALIZER(tfp_head);
285
286 struct tclass_for_proc {
287 TAILQ_ENTRY(tclass_for_proc) tfp_link;
288 int tfp_class;
289 pid_t tfp_pid;
290 char tfp_pname[(2 * MAXCOMLEN) + 1];
291 uint32_t tfp_qos_mode;
292 };
293
294 static int get_pid_tclass(struct so_tcdbg *);
295 static int get_pname_tclass(struct so_tcdbg *);
296 static int set_pid_tclass(struct so_tcdbg *);
297 static int set_pname_tclass(struct so_tcdbg *);
298 static int flush_pid_tclass(struct so_tcdbg *);
299 static int purge_tclass_for_proc(void);
300 static int flush_tclass_for_proc(void);
301 static void set_tclass_for_curr_proc(struct socket *);
302
303 /*
304 * Must be called with tclass_lock held
305 */
306 static struct tclass_for_proc *
307 find_tfp_by_pid(pid_t pid)
308 {
309 struct tclass_for_proc *tfp;
310
311 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
312 if (tfp->tfp_pid == pid) {
313 break;
314 }
315 }
316 return tfp;
317 }
318
319 /*
320 * Must be called with tclass_lock held
321 */
322 static struct tclass_for_proc *
323 find_tfp_by_pname(const char *pname)
324 {
325 struct tclass_for_proc *tfp;
326
327 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
328 if (strncmp(pname, tfp->tfp_pname,
329 sizeof(tfp->tfp_pname)) == 0) {
330 break;
331 }
332 }
333 return tfp;
334 }
335
336 __private_extern__ void
337 set_tclass_for_curr_proc(struct socket *so)
338 {
339 struct tclass_for_proc *tfp = NULL;
340 proc_t p = current_proc(); /* Not ref counted */
341 pid_t pid = proc_pid(p);
342 char *pname = proc_best_name(p);
343
344 lck_mtx_lock(tclass_lock);
345
346 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
347 if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
348 strncmp(pname, tfp->tfp_pname,
349 sizeof(tfp->tfp_pname)) == 0)) {
350 if (tfp->tfp_class != SO_TC_UNSPEC) {
351 so->so_traffic_class = (uint16_t)tfp->tfp_class;
352 }
353
354 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
355 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
356 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
357 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
358 }
359 break;
360 }
361 }
362
363 lck_mtx_unlock(tclass_lock);
364 }
365
366 /*
367 * Purge entries with PIDs of exited processes
368 */
369 int
370 purge_tclass_for_proc(void)
371 {
372 int error = 0;
373 struct tclass_for_proc *tfp, *tvar;
374
375 lck_mtx_lock(tclass_lock);
376
377 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
378 proc_t p;
379
380 if (tfp->tfp_pid == -1) {
381 continue;
382 }
383 if ((p = proc_find(tfp->tfp_pid)) == NULL) {
384 tfp_count--;
385 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
386
387 _FREE(tfp, M_TEMP);
388 } else {
389 proc_rele(p);
390 }
391 }
392
393 lck_mtx_unlock(tclass_lock);
394
395 return error;
396 }
397
398 /*
399 * Remove one entry
400 * Must be called with tclass_lock held
401 */
402 static void
403 free_tclass_for_proc(struct tclass_for_proc *tfp)
404 {
405 if (tfp == NULL) {
406 return;
407 }
408 tfp_count--;
409 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
410 _FREE(tfp, M_TEMP);
411 }
412
413 /*
414 * Remove all entries
415 */
416 int
417 flush_tclass_for_proc(void)
418 {
419 int error = 0;
420 struct tclass_for_proc *tfp, *tvar;
421
422 lck_mtx_lock(tclass_lock);
423
424 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
425 free_tclass_for_proc(tfp);
426 }
427
428 lck_mtx_unlock(tclass_lock);
429
430 return error;
431 }
432
433 /*
434 * Must be called with tclass_lock held
435 */
436 static struct tclass_for_proc *
437 alloc_tclass_for_proc(pid_t pid, const char *pname)
438 {
439 struct tclass_for_proc *tfp;
440
441 if (pid == -1 && pname == NULL) {
442 return NULL;
443 }
444
445 tfp = _MALLOC(sizeof(struct tclass_for_proc), M_TEMP, M_NOWAIT | M_ZERO);
446 if (tfp == NULL) {
447 return NULL;
448 }
449
450 tfp->tfp_pid = pid;
451 /*
452 * Add per pid entries before per proc name so we can find
453 * a specific instance of a process before the general name base entry.
454 */
455 if (pid != -1) {
456 TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
457 } else {
458 strlcpy(tfp->tfp_pname, pname, sizeof(tfp->tfp_pname));
459 TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
460 }
461
462 tfp_count++;
463
464 return tfp;
465 }
466
467 /*
468 * SO_TC_UNSPEC for tclass means to remove the entry
469 */
470 int
471 set_pid_tclass(struct so_tcdbg *so_tcdbg)
472 {
473 int error = EINVAL;
474 proc_t p = NULL;
475 struct tclass_for_proc *tfp;
476 pid_t pid = so_tcdbg->so_tcdbg_pid;
477 int tclass = so_tcdbg->so_tcdbg_tclass;
478 int netsvctype = so_tcdbg->so_tcdbg_netsvctype;
479
480 p = proc_find(pid);
481 if (p == NULL) {
482 printf("%s proc_find(%d) failed\n", __func__, pid);
483 goto done;
484 }
485
486 /* Need a tfp */
487 lck_mtx_lock(tclass_lock);
488
489 tfp = find_tfp_by_pid(pid);
490 if (tfp == NULL) {
491 tfp = alloc_tclass_for_proc(pid, NULL);
492 if (tfp == NULL) {
493 lck_mtx_unlock(tclass_lock);
494 error = ENOBUFS;
495 goto done;
496 }
497 }
498 tfp->tfp_class = tclass;
499 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
500
501 lck_mtx_unlock(tclass_lock);
502
503 if (tfp != NULL) {
504 struct fileproc *fp;
505
506 fdt_foreach(fp, p) {
507 struct socket *so;
508
509 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
510 continue;
511 }
512
513 so = (struct socket *)fp->fp_glob->fg_data;
514 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
515 continue;
516 }
517
518 socket_lock(so, 1);
519 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
520 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
521 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
522 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
523 }
524 socket_unlock(so, 1);
525
526 if (netsvctype != _NET_SERVICE_TYPE_UNSPEC) {
527 error = sock_setsockopt(so, SOL_SOCKET,
528 SO_NET_SERVICE_TYPE, &netsvctype, sizeof(int));
529 }
530 if (tclass != SO_TC_UNSPEC) {
531 error = sock_setsockopt(so, SOL_SOCKET,
532 SO_TRAFFIC_CLASS, &tclass, sizeof(int));
533 }
534 }
535
536 proc_fdunlock(p);
537 }
538
539 error = 0;
540 done:
541 if (p != NULL) {
542 proc_rele(p);
543 }
544
545 return error;
546 }
547
548 int
549 set_pname_tclass(struct so_tcdbg *so_tcdbg)
550 {
551 int error = EINVAL;
552 struct tclass_for_proc *tfp;
553
554 lck_mtx_lock(tclass_lock);
555
556 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
557 if (tfp == NULL) {
558 tfp = alloc_tclass_for_proc(-1, so_tcdbg->so_tcdbg_pname);
559 if (tfp == NULL) {
560 lck_mtx_unlock(tclass_lock);
561 error = ENOBUFS;
562 goto done;
563 }
564 }
565 tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
566 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
567
568 lck_mtx_unlock(tclass_lock);
569
570 error = 0;
571 done:
572
573 return error;
574 }
575
576 static int
577 flush_pid_tclass(struct so_tcdbg *so_tcdbg)
578 {
579 pid_t pid = so_tcdbg->so_tcdbg_pid;
580 int tclass = so_tcdbg->so_tcdbg_tclass;
581 struct fileproc *fp;
582 proc_t p;
583 int error;
584
585 p = proc_find(pid);
586 if (p == PROC_NULL) {
587 printf("%s proc_find(%d) failed\n", __func__, pid);
588 return EINVAL;
589 }
590
591 proc_fdlock(p);
592
593 fdt_foreach(fp, p) {
594 struct socket *so;
595
596 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
597 continue;
598 }
599
600 so = (struct socket *)fp->fp_glob->fg_data;
601 error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
602 sizeof(tclass));
603 if (error != 0) {
604 printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
605 "tclass=%d) failed %d\n", __func__,
606 (uint64_t)VM_KERNEL_ADDRPERM(so), fdt_foreach_fd(), tclass,
607 error);
608 }
609 }
610
611 proc_fdunlock(p);
612
613 proc_rele(p);
614 return 0;
615 }
616
617 int
618 get_pid_tclass(struct so_tcdbg *so_tcdbg)
619 {
620 int error = EINVAL;
621 proc_t p = NULL;
622 struct tclass_for_proc *tfp;
623 pid_t pid = so_tcdbg->so_tcdbg_pid;
624
625 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
626
627 p = proc_find(pid);
628 if (p == NULL) {
629 printf("%s proc_find(%d) failed\n", __func__, pid);
630 goto done;
631 }
632
633 /* Need a tfp */
634 lck_mtx_lock(tclass_lock);
635
636 tfp = find_tfp_by_pid(pid);
637 if (tfp != NULL) {
638 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
639 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
640 error = 0;
641 }
642 lck_mtx_unlock(tclass_lock);
643 done:
644 if (p != NULL) {
645 proc_rele(p);
646 }
647
648 return error;
649 }
650
651 int
652 get_pname_tclass(struct so_tcdbg *so_tcdbg)
653 {
654 int error = EINVAL;
655 struct tclass_for_proc *tfp;
656
657 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
658
659 /* Need a tfp */
660 lck_mtx_lock(tclass_lock);
661
662 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
663 if (tfp != NULL) {
664 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
665 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
666 error = 0;
667 }
668 lck_mtx_unlock(tclass_lock);
669
670 return error;
671 }
672
673 static int
674 delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
675 {
676 int error = EINVAL;
677 pid_t pid = so_tcdbg->so_tcdbg_pid;
678 struct tclass_for_proc *tfp = NULL;
679
680 lck_mtx_lock(tclass_lock);
681
682 if (pid != -1) {
683 tfp = find_tfp_by_pid(pid);
684 } else {
685 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
686 }
687
688 if (tfp != NULL) {
689 free_tclass_for_proc(tfp);
690 error = 0;
691 }
692
693 lck_mtx_unlock(tclass_lock);
694
695 return error;
696 }
697
698 /*
699 * Setting options requires privileges
700 */
701 __private_extern__ int
702 so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
703 {
704 int error = 0;
705
706 if ((so->so_state & SS_PRIV) == 0) {
707 return EPERM;
708 }
709
710 socket_unlock(so, 0);
711
712 switch (so_tcdbg->so_tcdbg_cmd) {
713 case SO_TCDBG_PID:
714 error = set_pid_tclass(so_tcdbg);
715 break;
716
717 case SO_TCDBG_PNAME:
718 error = set_pname_tclass(so_tcdbg);
719 break;
720
721 case SO_TCDBG_PURGE:
722 error = purge_tclass_for_proc();
723 break;
724
725 case SO_TCDBG_FLUSH:
726 error = flush_tclass_for_proc();
727 break;
728
729 case SO_TCDBG_DELETE:
730 error = delete_tclass_for_pid_pname(so_tcdbg);
731 break;
732
733 case SO_TCDBG_TCFLUSH_PID:
734 error = flush_pid_tclass(so_tcdbg);
735 break;
736
737 default:
738 error = EINVAL;
739 break;
740 }
741
742 socket_lock(so, 0);
743
744 return error;
745 }
746
747 /*
748 * Not required to be privileged to get
749 */
750 __private_extern__ int
751 sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
752 {
753 int error = 0;
754 struct so_tcdbg so_tcdbg;
755 void *buf = NULL;
756 size_t len = sopt->sopt_valsize;
757
758 error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg),
759 sizeof(struct so_tcdbg));
760 if (error != 0) {
761 return error;
762 }
763
764 sopt->sopt_valsize = len;
765
766 socket_unlock(so, 0);
767
768 switch (so_tcdbg.so_tcdbg_cmd) {
769 case SO_TCDBG_PID:
770 error = get_pid_tclass(&so_tcdbg);
771 break;
772
773 case SO_TCDBG_PNAME:
774 error = get_pname_tclass(&so_tcdbg);
775 break;
776
777 case SO_TCDBG_COUNT:
778 lck_mtx_lock(tclass_lock);
779 so_tcdbg.so_tcdbg_count = tfp_count;
780 lck_mtx_unlock(tclass_lock);
781 break;
782
783 case SO_TCDBG_LIST: {
784 struct tclass_for_proc *tfp;
785 int n, alloc_count;
786 struct so_tcdbg *ptr;
787
788 lck_mtx_lock(tclass_lock);
789 if ((alloc_count = tfp_count) == 0) {
790 lck_mtx_unlock(tclass_lock);
791 error = EINVAL;
792 break;
793 }
794 len = alloc_count * sizeof(struct so_tcdbg);
795 lck_mtx_unlock(tclass_lock);
796
797 buf = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
798 if (buf == NULL) {
799 error = ENOBUFS;
800 break;
801 }
802
803 lck_mtx_lock(tclass_lock);
804 n = 0;
805 ptr = (struct so_tcdbg *)buf;
806 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
807 if (++n > alloc_count) {
808 break;
809 }
810 if (tfp->tfp_pid != -1) {
811 ptr->so_tcdbg_cmd = SO_TCDBG_PID;
812 ptr->so_tcdbg_pid = tfp->tfp_pid;
813 } else {
814 ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
815 ptr->so_tcdbg_pid = -1;
816 strlcpy(ptr->so_tcdbg_pname,
817 tfp->tfp_pname,
818 sizeof(ptr->so_tcdbg_pname));
819 }
820 ptr->so_tcdbg_tclass = tfp->tfp_class;
821 ptr->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
822 ptr++;
823 }
824
825 lck_mtx_unlock(tclass_lock);
826 }
827 break;
828
829 default:
830 error = EINVAL;
831 break;
832 }
833
834 socket_lock(so, 0);
835
836 if (error == 0) {
837 if (buf == NULL) {
838 error = sooptcopyout(sopt, &so_tcdbg,
839 sizeof(struct so_tcdbg));
840 } else {
841 error = sooptcopyout(sopt, buf, len);
842 _FREE(buf, M_TEMP);
843 }
844 }
845 return error;
846 }
847
848 #endif /* (DEVELOPMENT || DEBUG) */
849
850 int
851 so_get_netsvc_marking_level(struct socket *so)
852 {
853 int marking_level = NETSVC_MRKNG_UNKNOWN;
854 struct ifnet *ifp = NULL;
855
856 switch (SOCK_DOM(so)) {
857 case PF_INET: {
858 struct inpcb *inp = sotoinpcb(so);
859
860 if (inp != NULL) {
861 ifp = inp->inp_last_outifp;
862 }
863 break;
864 }
865 case PF_INET6: {
866 struct in6pcb *in6p = sotoin6pcb(so);
867
868 if (in6p != NULL) {
869 ifp = in6p->in6p_last_outifp;
870 }
871 break;
872 }
873 default:
874 break;
875 }
876 if (ifp != NULL) {
877 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0) {
878 if ((so->so_flags1 & SOF1_QOSMARKING_ALLOWED)) {
879 marking_level = NETSVC_MRKNG_LVL_L3L2_ALL;
880 } else {
881 marking_level = NETSVC_MRKNG_LVL_L3L2_BK;
882 }
883 } else {
884 marking_level = NETSVC_MRKNG_LVL_L2;
885 }
886 }
887 return marking_level;
888 }
889
890 __private_extern__ int
891 so_set_traffic_class(struct socket *so, int optval)
892 {
893 int error = 0;
894
895 if (optval < SO_TC_BE || optval > SO_TC_CTL) {
896 error = EINVAL;
897 } else {
898 switch (optval) {
899 case _SO_TC_BK:
900 optval = SO_TC_BK;
901 break;
902 case _SO_TC_VI:
903 optval = SO_TC_VI;
904 break;
905 case _SO_TC_VO:
906 optval = SO_TC_VO;
907 break;
908 default:
909 if (!SO_VALID_TC(optval)) {
910 error = EINVAL;
911 }
912 break;
913 }
914
915 if (error == 0) {
916 int oldval = so->so_traffic_class;
917
918 VERIFY(SO_VALID_TC(optval));
919 so->so_traffic_class = (uint16_t)optval;
920
921 if ((SOCK_DOM(so) == PF_INET ||
922 SOCK_DOM(so) == PF_INET6) &&
923 SOCK_TYPE(so) == SOCK_STREAM) {
924 set_tcp_stream_priority(so);
925 }
926
927 if ((SOCK_DOM(so) == PF_INET ||
928 SOCK_DOM(so) == PF_INET6) &&
929 optval != oldval && (optval == SO_TC_BK_SYS ||
930 oldval == SO_TC_BK_SYS)) {
931 /*
932 * If the app switches from BK_SYS to something
933 * else, resume the socket if it was suspended.
934 */
935 if (oldval == SO_TC_BK_SYS) {
936 inp_reset_fc_state(so->so_pcb);
937 }
938
939 SOTHROTTLELOG("throttle[%d]: so 0x%llx "
940 "[%d,%d] opportunistic %s\n", so->last_pid,
941 (uint64_t)VM_KERNEL_ADDRPERM(so),
942 SOCK_DOM(so), SOCK_TYPE(so),
943 (optval == SO_TC_BK_SYS) ? "ON" : "OFF");
944 }
945 }
946 }
947 return error;
948 }
949
950 __private_extern__ int
951 so_set_net_service_type(struct socket *so, int netsvctype)
952 {
953 int sotc;
954 int error;
955
956 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype)) {
957 return EINVAL;
958 }
959
960 sotc = sotc_by_netservicetype[netsvctype];
961 error = so_set_traffic_class(so, sotc);
962 if (error != 0) {
963 return error;
964 }
965 so->so_netsvctype = (int8_t)netsvctype;
966 so->so_flags1 |= SOF1_TC_NET_SERV_TYPE;
967
968 return 0;
969 }
970
971 __private_extern__ void
972 so_set_default_traffic_class(struct socket *so)
973 {
974 so->so_traffic_class = SO_TC_BE;
975
976 if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) {
977 if (net_qos_policy_restricted == 0) {
978 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
979 }
980 #if (DEVELOPMENT || DEBUG)
981 if (tfp_count > 0) {
982 set_tclass_for_curr_proc(so);
983 }
984 #endif /* (DEVELOPMENT || DEBUG) */
985 }
986 }
987
988 __private_extern__ int
989 so_set_opportunistic(struct socket *so, int optval)
990 {
991 return so_set_traffic_class(so, (optval == 0) ?
992 SO_TC_BE : SO_TC_BK_SYS);
993 }
994
995 __private_extern__ int
996 so_get_opportunistic(struct socket *so)
997 {
998 return so->so_traffic_class == SO_TC_BK_SYS;
999 }
1000
1001 __private_extern__ int
1002 so_tc_from_control(struct mbuf *control, int *out_netsvctype)
1003 {
1004 struct cmsghdr *cm;
1005 int sotc = SO_TC_UNSPEC;
1006
1007 *out_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
1008
1009 for (cm = M_FIRST_CMSGHDR(control);
1010 is_cmsg_valid(control, cm);
1011 cm = M_NXT_CMSGHDR(control, cm)) {
1012 int val;
1013
1014 if (cm->cmsg_level != SOL_SOCKET ||
1015 cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1016 continue;
1017 }
1018 val = *(int *)(void *)CMSG_DATA(cm);
1019 /*
1020 * The first valid option wins
1021 */
1022 switch (cm->cmsg_type) {
1023 case SO_TRAFFIC_CLASS:
1024 if (SO_VALID_TC(val)) {
1025 sotc = val;
1026 return sotc;
1027 /* NOT REACHED */
1028 } else if (val < SO_TC_NET_SERVICE_OFFSET) {
1029 break;
1030 }
1031 /*
1032 * Handle the case SO_NET_SERVICE_TYPE values are
1033 * passed using SO_TRAFFIC_CLASS
1034 */
1035 val = val - SO_TC_NET_SERVICE_OFFSET;
1036 OS_FALLTHROUGH;
1037 case SO_NET_SERVICE_TYPE:
1038 if (!IS_VALID_NET_SERVICE_TYPE(val)) {
1039 break;
1040 }
1041 *out_netsvctype = val;
1042 sotc = sotc_by_netservicetype[val];
1043 return sotc;
1044 /* NOT REACHED */
1045 default:
1046 break;
1047 }
1048 }
1049
1050 return sotc;
1051 }
1052
1053 __private_extern__ int
1054 so_tos_from_control(struct mbuf *control)
1055 {
1056 struct cmsghdr *cm;
1057 int tos = IPTOS_UNSPEC;
1058
1059 for (cm = M_FIRST_CMSGHDR(control);
1060 is_cmsg_valid(control, cm);
1061 cm = M_NXT_CMSGHDR(control, cm)) {
1062 if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1063 continue;
1064 }
1065
1066 if ((cm->cmsg_level == IPPROTO_IP &&
1067 cm->cmsg_type == IP_TOS) ||
1068 (cm->cmsg_level == IPPROTO_IPV6 &&
1069 cm->cmsg_type == IPV6_TCLASS)) {
1070 tos = *(int *)(void *)CMSG_DATA(cm) & IPTOS_MASK;
1071 /* The first valid option wins */
1072 break;
1073 }
1074 }
1075
1076 return tos;
1077 }
1078
1079 __private_extern__ void
1080 so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
1081 {
1082 uint32_t mtc = m_get_traffic_class(m);
1083
1084 if (mtc >= SO_TC_STATS_MAX) {
1085 mtc = MBUF_TC_BE;
1086 }
1087
1088 so->so_tc_stats[mtc].rxpackets += 1;
1089 so->so_tc_stats[mtc].rxbytes +=
1090 ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
1091 }
1092
1093 __private_extern__ void
1094 so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes,
1095 uint32_t mtc)
1096 {
1097 if (mtc >= SO_TC_STATS_MAX) {
1098 mtc = MBUF_TC_BE;
1099 }
1100
1101 so->so_tc_stats[mtc].rxpackets += pkts;
1102 so->so_tc_stats[mtc].rxbytes += bytes;
1103 }
1104
1105 static inline int
1106 so_throttle_best_effort(struct socket *so, struct ifnet *ifp)
1107 {
1108 uint32_t uptime = (uint32_t)net_uptime();
1109 return soissrcbesteffort(so) &&
1110 net_io_policy_throttle_best_effort == 1 &&
1111 ifp->if_rt_sendts > 0 &&
1112 (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME;
1113 }
1114
1115 __private_extern__ void
1116 set_tcp_stream_priority(struct socket *so)
1117 {
1118 struct inpcb *inp = sotoinpcb(so);
1119 struct tcpcb *tp = intotcpcb(inp);
1120 struct ifnet *outifp;
1121 u_char old_cc = tp->tcp_cc_index;
1122 int recvbg = IS_TCP_RECV_BG(so);
1123 bool is_local = false, fg_active = false;
1124 uint32_t uptime;
1125
1126 VERIFY((SOCK_CHECK_DOM(so, PF_INET) ||
1127 SOCK_CHECK_DOM(so, PF_INET6)) &&
1128 SOCK_CHECK_TYPE(so, SOCK_STREAM) &&
1129 SOCK_CHECK_PROTO(so, IPPROTO_TCP));
1130
1131 /* Return if the socket is in a terminal state */
1132 if (inp->inp_state == INPCB_STATE_DEAD) {
1133 return;
1134 }
1135
1136 outifp = inp->inp_last_outifp;
1137 uptime = (uint32_t)net_uptime();
1138
1139 /*
1140 * If the socket was marked as a background socket or if the
1141 * traffic class is set to background with traffic class socket
1142 * option then make both send and recv side of the stream to be
1143 * background. The variable sotcdb which can be set with sysctl
1144 * is used to disable these settings for testing.
1145 */
1146 if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) {
1147 is_local = true;
1148 }
1149
1150 /* Check if there has been recent foreground activity */
1151 if (outifp != NULL) {
1152 /*
1153 * If the traffic source is background, check if
1154 * if it can be switched to foreground. This can
1155 * happen when there is no indication of foreground
1156 * activity.
1157 */
1158 if (soissrcbackground(so) && outifp->if_fg_sendts > 0 &&
1159 (int)(uptime - outifp->if_fg_sendts) <= TCP_BG_SWITCH_TIME) {
1160 fg_active = true;
1161 }
1162
1163 /*
1164 * The traffic source is best-effort -- check if
1165 * the policy to throttle best effort is enabled
1166 * and there was realtime activity on this
1167 * interface recently. If this is true, enable
1168 * algorithms that respond to increased latency
1169 * on best-effort traffic.
1170 */
1171 if (so_throttle_best_effort(so, outifp)) {
1172 fg_active = true;
1173 }
1174 }
1175
1176 /*
1177 * System initiated background traffic like cloud uploads should
1178 * always use background delay sensitive algorithms. This will
1179 * make the stream more responsive to other streams on the user's
1180 * network and it will minimize latency induced.
1181 */
1182 if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
1183 /*
1184 * If the interface that the connection is using is
1185 * loopback, do not use background congestion
1186 * control algorithm.
1187 *
1188 * If there has been recent foreground activity or if
1189 * there was an indication that a foreground application
1190 * is going to use networking (net_io_policy_throttled),
1191 * switch the backgroung streams to use background
1192 * congestion control algorithm. Otherwise, even background
1193 * flows can move into foreground.
1194 */
1195 if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local ||
1196 !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
1197 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1198 tcp_set_foreground_cc(so);
1199 }
1200 } else {
1201 if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX) {
1202 tcp_set_background_cc(so);
1203 }
1204 }
1205
1206 /* Set receive side background flags */
1207 if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local ||
1208 !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
1209 tcp_clear_recv_bg(so);
1210 } else {
1211 tcp_set_recv_bg(so);
1212 }
1213 } else {
1214 tcp_clear_recv_bg(so);
1215 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1216 tcp_set_foreground_cc(so);
1217 }
1218 }
1219
1220 if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
1221 SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
1222 "%s recv\n", so->last_pid,
1223 (uint64_t)VM_KERNEL_ADDRPERM(so),
1224 SOCK_DOM(so), SOCK_TYPE(so),
1225 (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
1226 "background" : "foreground",
1227 IS_TCP_RECV_BG(so) ? "background" : "foreground");
1228 }
1229 }
1230
1231 /*
1232 * Set traffic class to an IPv4 or IPv6 packet
1233 * - mark the mbuf
1234 * - set the DSCP code following the WMM mapping
1235 */
1236 __private_extern__ void
1237 set_packet_service_class(struct mbuf *m, struct socket *so,
1238 int sotc, uint32_t flags)
1239 {
1240 mbuf_svc_class_t msc = MBUF_SC_BE; /* Best effort by default */
1241 struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
1242
1243 if (!(m->m_flags & M_PKTHDR)) {
1244 return;
1245 }
1246
1247 /*
1248 * Here is the precedence:
1249 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
1250 * 2) Traffic class passed via ancillary data to sendmsdg(2)
1251 * 3) Traffic class socket option last
1252 */
1253 if (sotc != SO_TC_UNSPEC) {
1254 VERIFY(SO_VALID_TC(sotc));
1255 msc = so_tc2msc(sotc);
1256 /* Assert because tc must have been valid */
1257 VERIFY(MBUF_VALID_SC(msc));
1258 }
1259
1260 /*
1261 * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
1262 * best effort is set, depress the priority.
1263 */
1264 if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so)) {
1265 msc = MBUF_SC_BK;
1266 }
1267
1268 if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL &&
1269 so_throttle_best_effort(so, inp->inp_last_outifp)) {
1270 msc = MBUF_SC_BK;
1271 }
1272
1273 if (soissrcbackground(so)) {
1274 m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND;
1275 }
1276
1277 if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc)) {
1278 m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME;
1279 }
1280 /*
1281 * Set the traffic class in the mbuf packet header svc field
1282 */
1283 if (sotcdb & SOTCDB_NO_MTC) {
1284 goto no_mbtc;
1285 }
1286
1287 /*
1288 * Elevate service class if the packet is a pure TCP ACK.
1289 * We can do this only when the flow is not a background
1290 * flow and the outgoing interface supports
1291 * transmit-start model.
1292 */
1293 if (!IS_MBUF_SC_BACKGROUND(msc) &&
1294 (flags & (PKT_SCF_TCP_ACK | PKT_SCF_TCP_SYN)) != 0) {
1295 msc = MBUF_SC_CTL;
1296 }
1297
1298 (void) m_set_service_class(m, msc);
1299
1300 /*
1301 * Set the privileged traffic auxiliary flag if applicable,
1302 * or clear it.
1303 */
1304 if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
1305 msc != MBUF_SC_UNSPEC) {
1306 m->m_pkthdr.pkt_flags |= PKTF_PRIO_PRIVILEGED;
1307 } else {
1308 m->m_pkthdr.pkt_flags &= ~PKTF_PRIO_PRIVILEGED;
1309 }
1310
1311 no_mbtc:
1312 /*
1313 * For TCP with background traffic class switch CC algo based on sysctl
1314 */
1315 if (so->so_type == SOCK_STREAM) {
1316 set_tcp_stream_priority(so);
1317 }
1318
1319 so_tc_update_stats(m, so, msc);
1320 }
1321
1322 __private_extern__ void
1323 so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc)
1324 {
1325 mbuf_traffic_class_t mtc;
1326
1327 /*
1328 * Assume socket and mbuf traffic class values are the same
1329 * Also assume the socket lock is held. Note that the stats
1330 * at the socket layer are reduced down to the legacy traffic
1331 * classes; we could/should potentially expand so_tc_stats[].
1332 */
1333 mtc = MBUF_SC2TC(msc);
1334 VERIFY(mtc < SO_TC_STATS_MAX);
1335 so->so_tc_stats[mtc].txpackets += 1;
1336 so->so_tc_stats[mtc].txbytes += m->m_pkthdr.len;
1337 }
1338
1339 __private_extern__ void
1340 socket_tclass_init(void)
1341 {
1342 _CASSERT(_SO_TC_MAX == SO_TC_STATS_MAX);
1343
1344 tclass_lck_grp_attr = lck_grp_attr_alloc_init();
1345 tclass_lck_grp = lck_grp_alloc_init("tclass", tclass_lck_grp_attr);
1346 tclass_lck_attr = lck_attr_alloc_init();
1347 lck_mtx_init(tclass_lock, tclass_lck_grp, tclass_lck_attr);
1348 }
1349
1350 __private_extern__ mbuf_svc_class_t
1351 so_tc2msc(int tc)
1352 {
1353 mbuf_svc_class_t msc;
1354
1355 switch (tc) {
1356 case SO_TC_BK_SYS:
1357 msc = MBUF_SC_BK_SYS;
1358 break;
1359 case SO_TC_BK:
1360 case _SO_TC_BK:
1361 msc = MBUF_SC_BK;
1362 break;
1363 case SO_TC_BE:
1364 msc = MBUF_SC_BE;
1365 break;
1366 case SO_TC_RD:
1367 msc = MBUF_SC_RD;
1368 break;
1369 case SO_TC_OAM:
1370 msc = MBUF_SC_OAM;
1371 break;
1372 case SO_TC_AV:
1373 msc = MBUF_SC_AV;
1374 break;
1375 case SO_TC_RV:
1376 msc = MBUF_SC_RV;
1377 break;
1378 case SO_TC_VI:
1379 case _SO_TC_VI:
1380 msc = MBUF_SC_VI;
1381 break;
1382 case SO_TC_NETSVC_SIG:
1383 msc = MBUF_SC_SIG;
1384 break;
1385 case SO_TC_VO:
1386 case _SO_TC_VO:
1387 msc = MBUF_SC_VO;
1388 break;
1389 case SO_TC_CTL:
1390 msc = MBUF_SC_CTL;
1391 break;
1392 case SO_TC_ALL:
1393 default:
1394 msc = MBUF_SC_UNSPEC;
1395 break;
1396 }
1397
1398 return msc;
1399 }
1400
1401 __private_extern__ int
1402 so_svc2tc(mbuf_svc_class_t svc)
1403 {
1404 switch (svc) {
1405 case MBUF_SC_BK_SYS:
1406 return SO_TC_BK_SYS;
1407 case MBUF_SC_BK:
1408 return SO_TC_BK;
1409 case MBUF_SC_BE:
1410 return SO_TC_BE;
1411 case MBUF_SC_RD:
1412 return SO_TC_RD;
1413 case MBUF_SC_OAM:
1414 return SO_TC_OAM;
1415 case MBUF_SC_AV:
1416 return SO_TC_AV;
1417 case MBUF_SC_RV:
1418 return SO_TC_RV;
1419 case MBUF_SC_VI:
1420 return SO_TC_VI;
1421 case MBUF_SC_SIG:
1422 return SO_TC_NETSVC_SIG;
1423 case MBUF_SC_VO:
1424 return SO_TC_VO;
1425 case MBUF_SC_CTL:
1426 return SO_TC_CTL;
1427 case MBUF_SC_UNSPEC:
1428 default:
1429 return SO_TC_BE;
1430 }
1431 }
1432
1433 static size_t
1434 sotc_index(int sotc)
1435 {
1436 switch (sotc) {
1437 case SO_TC_BK_SYS:
1438 return SOTCIX_BK_SYS;
1439 case _SO_TC_BK:
1440 case SO_TC_BK:
1441 return SOTCIX_BK;
1442
1443 case SO_TC_BE:
1444 return SOTCIX_BE;
1445 case SO_TC_RD:
1446 return SOTCIX_RD;
1447 case SO_TC_OAM:
1448 return SOTCIX_OAM;
1449
1450 case SO_TC_AV:
1451 return SOTCIX_AV;
1452 case SO_TC_RV:
1453 return SOTCIX_RV;
1454 case _SO_TC_VI:
1455 case SO_TC_VI:
1456 return SOTCIX_VI;
1457
1458 case _SO_TC_VO:
1459 case SO_TC_VO:
1460 return SOTCIX_VO;
1461 case SO_TC_CTL:
1462 return SOTCIX_CTL;
1463
1464 default:
1465 break;
1466 }
1467 /*
1468 * Unknown traffic class value
1469 */
1470 return SIZE_T_MAX;
1471 }
1472
1473 uint8_t
1474 fastlane_sc_to_dscp(uint32_t svc_class)
1475 {
1476 uint8_t dscp = _DSCP_DF;
1477
1478 switch (svc_class) {
1479 case MBUF_SC_BK_SYS:
1480 case MBUF_SC_BK:
1481 dscp = _DSCP_AF11;
1482 break;
1483
1484 case MBUF_SC_BE:
1485 dscp = _DSCP_DF;
1486 break;
1487 case MBUF_SC_RD:
1488 dscp = _DSCP_AF21;
1489 break;
1490 case MBUF_SC_OAM:
1491 dscp = _DSCP_CS2;
1492 break;
1493
1494 case MBUF_SC_AV:
1495 dscp = _DSCP_AF31;
1496 break;
1497 case MBUF_SC_RV:
1498 dscp = _DSCP_CS4;
1499 break;
1500 case MBUF_SC_VI:
1501 dscp = _DSCP_AF41;
1502 break;
1503 case MBUF_SC_SIG:
1504 dscp = _DSCP_CS3;
1505 break;
1506
1507 case MBUF_SC_VO:
1508 dscp = _DSCP_EF;
1509 break;
1510 case MBUF_SC_CTL:
1511 dscp = _DSCP_DF;
1512 break;
1513 default:
1514 dscp = _DSCP_DF;
1515 break;
1516 }
1517
1518 return dscp;
1519 }
1520
1521 uint8_t
1522 rfc4594_sc_to_dscp(uint32_t svc_class)
1523 {
1524 uint8_t dscp = _DSCP_DF;
1525
1526 switch (svc_class) {
1527 case MBUF_SC_BK_SYS: /* Low-Priority Data */
1528 case MBUF_SC_BK:
1529 dscp = _DSCP_CS1;
1530 break;
1531
1532 case MBUF_SC_BE: /* Standard */
1533 dscp = _DSCP_DF;
1534 break;
1535 case MBUF_SC_RD: /* Low-Latency Data */
1536 dscp = _DSCP_AF21;
1537 break;
1538
1539 /* SVC_CLASS Not Defined: High-Throughput Data */
1540
1541 case MBUF_SC_OAM: /* OAM */
1542 dscp = _DSCP_CS2;
1543 break;
1544
1545 /* SVC_CLASS Not Defined: Broadcast Video */
1546
1547 case MBUF_SC_AV: /* Multimedia Streaming */
1548 dscp = _DSCP_AF31;
1549 break;
1550 case MBUF_SC_RV: /* Real-Time Interactive */
1551 dscp = _DSCP_CS4;
1552 break;
1553 case MBUF_SC_VI: /* Multimedia Conferencing */
1554 dscp = _DSCP_AF41;
1555 break;
1556 case MBUF_SC_SIG: /* Signaling */
1557 dscp = _DSCP_CS5;
1558 break;
1559
1560 case MBUF_SC_VO: /* Telephony */
1561 dscp = _DSCP_EF;
1562 break;
1563 case MBUF_SC_CTL: /* Network Control*/
1564 dscp = _DSCP_CS6;
1565 break;
1566 default:
1567 dscp = _DSCP_DF;
1568 break;
1569 }
1570
1571 return dscp;
1572 }
1573
1574 mbuf_traffic_class_t
1575 rfc4594_dscp_to_tc(uint8_t dscp)
1576 {
1577 mbuf_traffic_class_t tc = MBUF_TC_BE;
1578
1579 switch (dscp) {
1580 case _DSCP_CS1:
1581 tc = MBUF_TC_BK;
1582 break;
1583 case _DSCP_DF:
1584 case _DSCP_AF21:
1585 case _DSCP_CS2:
1586 tc = MBUF_TC_BE;
1587 break;
1588 case _DSCP_AF31:
1589 case _DSCP_CS4:
1590 case _DSCP_AF41:
1591 case _DSCP_CS5:
1592 tc = MBUF_TC_VI;
1593 break;
1594 case _DSCP_EF:
1595 case _DSCP_CS6:
1596 tc = MBUF_TC_VO;
1597 break;
1598 default:
1599 tc = MBUF_TC_BE;
1600 break;
1601 }
1602
1603 return tc;
1604 }
1605
1606 /*
1607 * Pass NULL ifp for default map
1608 */
1609 static errno_t
1610 set_netsvctype_dscp_map(struct net_qos_dscp_map *net_qos_dscp_map,
1611 const struct netsvctype_dscp_map *netsvctype_dscp_map)
1612 {
1613 size_t i;
1614 int netsvctype;
1615
1616 /*
1617 * Do not accept more that max number of distinct DSCPs
1618 */
1619 if (net_qos_dscp_map == NULL || netsvctype_dscp_map == NULL) {
1620 return EINVAL;
1621 }
1622
1623 /*
1624 * Validate input parameters
1625 */
1626 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1627 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map[i].netsvctype)) {
1628 return EINVAL;
1629 }
1630 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1631 return EINVAL;
1632 }
1633 }
1634
1635 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1636 netsvctype = netsvctype_dscp_map[i].netsvctype;
1637
1638 net_qos_dscp_map->netsvctype_to_dscp[netsvctype] =
1639 netsvctype_dscp_map[i].dscp;
1640 }
1641 for (netsvctype = 0; netsvctype < _NET_SERVICE_TYPE_COUNT; netsvctype++) {
1642 switch (netsvctype) {
1643 case NET_SERVICE_TYPE_BE:
1644 case NET_SERVICE_TYPE_BK:
1645 case NET_SERVICE_TYPE_VI:
1646 case NET_SERVICE_TYPE_VO:
1647 case NET_SERVICE_TYPE_RV:
1648 case NET_SERVICE_TYPE_AV:
1649 case NET_SERVICE_TYPE_OAM:
1650 case NET_SERVICE_TYPE_RD: {
1651 size_t sotcix;
1652
1653 sotcix = sotc_index(sotc_by_netservicetype[netsvctype]);
1654 if (sotcix != SIZE_T_MAX) {
1655 net_qos_dscp_map->sotc_to_dscp[sotcix] =
1656 netsvctype_dscp_map[netsvctype].dscp;
1657 }
1658 break;
1659 }
1660 case NET_SERVICE_TYPE_SIG:
1661 /* Signaling does not have its own traffic class */
1662 break;
1663 default:
1664 /* We should not be here */
1665 ASSERT(0);
1666 }
1667 }
1668 if (net_qos_dscp_map == &fastlane_net_qos_dscp_map) {
1669 /* Network control socket traffic class is always best effort for fastlane*/
1670 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_DF;
1671 } else {
1672 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_CS6;
1673 }
1674
1675 /* Backround socket traffic class DSCP same as backround system */
1676 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK] =
1677 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK_SYS];
1678
1679 return 0;
1680 }
1681
1682 static size_t
1683 get_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map)
1684 {
1685 struct net_qos_dscp_map *net_qos_dscp_map;
1686 int i;
1687
1688 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1689
1690 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1691 netsvctype_dscp_map[i].netsvctype = i;
1692 netsvctype_dscp_map[i].dscp = net_qos_dscp_map->netsvctype_to_dscp[i];
1693 }
1694
1695 return i * sizeof(struct netsvctype_dscp_map);
1696 }
1697
1698 void
1699 net_qos_map_init()
1700 {
1701 errno_t error;
1702
1703 error = set_netsvctype_dscp_map(&fastlane_net_qos_dscp_map,
1704 fastlane_netsvctype_dscp_map);
1705 ASSERT(error == 0);
1706
1707 error = set_netsvctype_dscp_map(&rfc4594_net_qos_dscp_map,
1708 rfc4594_netsvctype_dscp_map);
1709 ASSERT(error == 0);
1710
1711 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
1712 }
1713
1714 int
1715 sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
1716 {
1717 #pragma unused(oidp, arg1, arg2)
1718 int error = 0;
1719
1720 if (req->oldptr == USER_ADDR_NULL) {
1721 req->oldidx =
1722 _NET_SERVICE_TYPE_COUNT * sizeof(struct netsvctype_dscp_map);
1723 } else if (req->oldlen > 0) {
1724 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
1725 size_t len;
1726
1727 len = get_netsvctype_dscp_map(netsvctype_dscp_map);
1728
1729 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1730 MIN(len, req->oldlen));
1731 if (error != 0) {
1732 goto done;
1733 }
1734 }
1735
1736 if (req->newptr != USER_ADDR_NULL) {
1737 error = EPERM;
1738 }
1739 done:
1740 return error;
1741 }
1742
1743 __private_extern__ errno_t
1744 set_packet_qos(struct mbuf *m, struct ifnet *ifp, boolean_t qos_allowed,
1745 int sotc, int netsvctype, uint8_t *dscp_inout)
1746 {
1747 if (ifp == NULL || dscp_inout == NULL) {
1748 return EINVAL;
1749 }
1750
1751 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0 &&
1752 ifp->if_qosmarking_mode != IFRTYPE_QOSMARKING_MODE_NONE) {
1753 uint8_t dscp;
1754 const struct net_qos_dscp_map *net_qos_dscp_map = NULL;
1755
1756 switch (ifp->if_qosmarking_mode) {
1757 case IFRTYPE_QOSMARKING_FASTLANE:
1758 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1759 break;
1760 case IFRTYPE_QOSMARKING_RFC4594:
1761 net_qos_dscp_map = &rfc4594_net_qos_dscp_map;
1762 break;
1763 default:
1764 panic("invalid QoS marking type");
1765 /* NOTREACHED */
1766 }
1767
1768 /*
1769 * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops
1770 */
1771 dscp = _DSCP_DF;
1772
1773 /*
1774 * For DSCP use the network service type is specified, otherwise
1775 * use the socket traffic class
1776 *
1777 * When not whitelisted by the policy, set DSCP only for best
1778 * effort and background, and set the mbuf service class to
1779 * best effort as well so the packet will be queued and
1780 * scheduled at a lower priority.
1781 * We still want to prioritize control traffic on the interface
1782 * so we do not change the mbuf service class for SO_TC_CTL
1783 */
1784 if (IS_VALID_NET_SERVICE_TYPE(netsvctype) &&
1785 netsvctype != NET_SERVICE_TYPE_BE) {
1786 dscp = net_qos_dscp_map->netsvctype_to_dscp[netsvctype];
1787
1788 if (qos_allowed == FALSE &&
1789 netsvctype != NET_SERVICE_TYPE_BE &&
1790 netsvctype != NET_SERVICE_TYPE_BK) {
1791 dscp = _DSCP_DF;
1792 if (sotc != SO_TC_CTL) {
1793 m_set_service_class(m, MBUF_SC_BE);
1794 }
1795 }
1796 } else if (sotc != SO_TC_UNSPEC) {
1797 size_t sotcix = sotc_index(sotc);
1798 if (sotcix != SIZE_T_MAX) {
1799 dscp = net_qos_dscp_map->sotc_to_dscp[sotcix];
1800
1801 if (qos_allowed == FALSE && sotc != SO_TC_BE &&
1802 sotc != SO_TC_BK && sotc != SO_TC_BK_SYS &&
1803 sotc != SO_TC_CTL) {
1804 dscp = _DSCP_DF;
1805 if (sotc != SO_TC_CTL) {
1806 m_set_service_class(m, MBUF_SC_BE);
1807 }
1808 }
1809 }
1810 }
1811 if (net_qos_verbose != 0) {
1812 printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n",
1813 __func__, qos_allowed, sotc, netsvctype, dscp);
1814 }
1815
1816 if (*dscp_inout != dscp) {
1817 *dscp_inout = dscp;
1818 }
1819 } else if (*dscp_inout != _DSCP_DF && IFNET_IS_WIFI_INFRA(ifp)) {
1820 mbuf_svc_class_t msc = m_get_service_class(m);
1821
1822 /*
1823 * For WiFi infra, when the mbuf service class is best effort
1824 * and the DSCP is not default, set the service class based
1825 * on DSCP
1826 */
1827 if (msc == MBUF_SC_BE) {
1828 msc = wifi_dscp_to_msc_array[*dscp_inout];
1829
1830 if (msc != MBUF_SC_BE) {
1831 m_set_service_class(m, msc);
1832
1833 if (net_qos_verbose != 0) {
1834 printf("%s set msc %u for dscp %u\n",
1835 __func__, msc, *dscp_inout);
1836 }
1837 }
1838 }
1839 }
1840
1841 return 0;
1842 }
1843
1844 static void
1845 set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *map, int clear)
1846 {
1847 int i;
1848
1849 if (clear) {
1850 bzero(wifi_dscp_to_msc_array, sizeof(wifi_dscp_to_msc_array));
1851 }
1852
1853 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1854 const struct dcsp_msc_map *elem = map + i;
1855
1856 if (elem->dscp > _MAX_DSCP || elem->msc == MBUF_SC_UNSPEC) {
1857 break;
1858 }
1859 switch (elem->msc) {
1860 case MBUF_SC_BK_SYS:
1861 case MBUF_SC_BK:
1862 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BK;
1863 break;
1864 default:
1865 case MBUF_SC_BE:
1866 case MBUF_SC_RD:
1867 case MBUF_SC_OAM:
1868 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BE;
1869 break;
1870 case MBUF_SC_AV:
1871 case MBUF_SC_RV:
1872 case MBUF_SC_VI:
1873 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VI;
1874 break;
1875 case MBUF_SC_VO:
1876 case MBUF_SC_CTL:
1877 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VO;
1878 break;
1879 }
1880 }
1881 }
1882
1883 static errno_t
1884 dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map,
1885 size_t count, struct dcsp_msc_map *dcsp_msc_map)
1886 {
1887 errno_t error = 0;
1888 uint32_t i;
1889
1890 /*
1891 * Validate input parameters
1892 */
1893 for (i = 0; i < count; i++) {
1894 if (!SO_VALID_TC(netsvctype_dscp_map[i].netsvctype)) {
1895 error = EINVAL;
1896 goto done;
1897 }
1898 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1899 error = EINVAL;
1900 goto done;
1901 }
1902 }
1903
1904 bzero(dcsp_msc_map, DSCP_ARRAY_SIZE * sizeof(struct dcsp_msc_map));
1905
1906 for (i = 0; i < count; i++) {
1907 dcsp_msc_map[i].dscp = netsvctype_dscp_map[i].dscp;
1908 dcsp_msc_map[i].msc = so_tc2msc(netsvctype_dscp_map[i].netsvctype);
1909 }
1910 done:
1911 return error;
1912 }
1913
1914 int
1915 sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1916 {
1917 #pragma unused(oidp, arg1, arg2)
1918 int error = 0;
1919 size_t len = DSCP_ARRAY_SIZE * sizeof(struct netsvctype_dscp_map);
1920 struct netsvctype_dscp_map netsvctype_dscp_map[DSCP_ARRAY_SIZE] = {};
1921 struct dcsp_msc_map dcsp_msc_map[DSCP_ARRAY_SIZE];
1922 size_t count;
1923
1924 if (req->oldptr == USER_ADDR_NULL) {
1925 req->oldidx = len;
1926 } else if (req->oldlen > 0) {
1927 uint8_t i;
1928
1929 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1930 netsvctype_dscp_map[i].dscp = i;
1931 netsvctype_dscp_map[i].netsvctype =
1932 so_svc2tc(wifi_dscp_to_msc_array[i]);
1933 }
1934 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1935 MIN(len, req->oldlen));
1936 if (error != 0) {
1937 goto done;
1938 }
1939 }
1940
1941 if (req->newptr == USER_ADDR_NULL) {
1942 goto done;
1943 }
1944
1945 error = proc_suser(current_proc());
1946 if (error != 0) {
1947 goto done;
1948 }
1949
1950 /*
1951 * Check input length
1952 */
1953 if (req->newlen > len) {
1954 error = EINVAL;
1955 goto done;
1956 }
1957 /*
1958 * Cap the number of entries to copy from input buffer
1959 */
1960 if (len > req->newlen) {
1961 len = req->newlen;
1962 }
1963 error = SYSCTL_IN(req, netsvctype_dscp_map, len);
1964 if (error != 0) {
1965 goto done;
1966 }
1967 count = len / sizeof(struct netsvctype_dscp_map);
1968 bzero(dcsp_msc_map, sizeof(dcsp_msc_map));
1969 error = dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map, count,
1970 dcsp_msc_map);
1971 if (error != 0) {
1972 goto done;
1973 }
1974 set_dscp_to_wifi_ac_map(dcsp_msc_map, 0);
1975 done:
1976 return error;
1977 }
1978
1979 int
1980 sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1981 {
1982 #pragma unused(oidp, arg1, arg2)
1983 int error = 0;
1984 int val = 0;
1985
1986 error = sysctl_handle_int(oidp, &val, 0, req);
1987 if (error || !req->newptr) {
1988 return error;
1989 }
1990
1991 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
1992
1993 return 0;
1994 }
1995
1996 /*
1997 * Returns whether a large upload or download transfer should be marked as
1998 * BK service type for network activity. This is a system level
1999 * hint/suggestion to classify application traffic based on statistics
2000 * collected from the current network attachment
2001 *
2002 * Returns 1 for BK and 0 for default
2003 */
2004
2005 int
2006 net_qos_guideline(struct proc *p, struct net_qos_guideline_args *arg,
2007 int *retval)
2008 {
2009 #pragma unused(p)
2010 #define RETURN_USE_BK 1
2011 #define RETURN_USE_DEFAULT 0
2012 struct net_qos_param qos_arg;
2013 struct ifnet *ipv4_primary, *ipv6_primary;
2014 int err = 0;
2015
2016 if (arg->param == USER_ADDR_NULL || retval == NULL ||
2017 arg->param_len != sizeof(qos_arg)) {
2018 return EINVAL;
2019 }
2020 err = copyin(arg->param, (caddr_t) &qos_arg, sizeof(qos_arg));
2021 if (err != 0) {
2022 return err;
2023 }
2024
2025 *retval = RETURN_USE_DEFAULT;
2026 ipv4_primary = ifindex2ifnet[get_primary_ifscope(AF_INET)];
2027 ipv6_primary = ifindex2ifnet[get_primary_ifscope(AF_INET6)];
2028
2029 /*
2030 * If either of the interfaces is in Low Internet mode, enable
2031 * background delay based algorithms on this transfer
2032 */
2033 if (qos_arg.nq_uplink) {
2034 if ((ipv4_primary != NULL &&
2035 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_UL)) ||
2036 (ipv6_primary != NULL &&
2037 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_UL))) {
2038 *retval = RETURN_USE_BK;
2039 return 0;
2040 }
2041 } else {
2042 if ((ipv4_primary != NULL &&
2043 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_DL)) ||
2044 (ipv6_primary != NULL &&
2045 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_DL))) {
2046 *retval = RETURN_USE_BK;
2047 return 0;
2048 }
2049 }
2050
2051 /*
2052 * Some times IPv4 and IPv6 primary interfaces can be different.
2053 * In this case, if either of them is non-cellular, we should mark
2054 * the transfer as BK as it can potentially get used based on
2055 * the host name resolution
2056 */
2057 if (ipv4_primary != NULL && IFNET_IS_EXPENSIVE(ipv4_primary) &&
2058 ipv6_primary != NULL && IFNET_IS_EXPENSIVE(ipv6_primary)) {
2059 if (qos_arg.nq_use_expensive) {
2060 return 0;
2061 } else {
2062 *retval = RETURN_USE_BK;
2063 return 0;
2064 }
2065 }
2066 if (ipv4_primary != NULL && IFNET_IS_CONSTRAINED(ipv4_primary) &&
2067 ipv6_primary != NULL && IFNET_IS_CONSTRAINED(ipv6_primary)) {
2068 if (qos_arg.nq_use_constrained) {
2069 return 0;
2070 } else {
2071 *retval = RETURN_USE_BK;
2072 return 0;
2073 }
2074 }
2075 if (qos_arg.nq_transfer_size >= 5 * 1024 * 1024) {
2076 *retval = RETURN_USE_BK;
2077 return 0;
2078 }
2079
2080
2081 #undef RETURN_USE_BK
2082 #undef RETURN_USE_DEFAULT
2083 return 0;
2084 }