]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_tclass.c
6c939c5aa5ccc6fdc539ddfd8a7eac17d231ef4b
[apple/xnu.git] / bsd / netinet / in_tclass.c
1 /*
2 * Copyright (c) 2009-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/types.h>
32 #include <sys/filedesc.h>
33 #include <sys/file_internal.h>
34 #include <sys/proc.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/errno.h>
38 #include <sys/protosw.h>
39 #include <sys/domain.h>
40 #include <sys/mbuf.h>
41 #include <sys/queue.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysproto.h>
44
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/route.h>
48
49 #include <netinet/in.h>
50 #include <netinet/in_var.h>
51 #include <netinet/in_pcb.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/ip6_var.h>
56 #include <netinet/udp.h>
57 #include <netinet/udp_var.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcp_cc.h>
61 #include <netinet/lro_ext.h>
62 #include <netinet/in_tclass.h>
63
64 struct net_qos_dscp_map {
65 uint8_t sotc_to_dscp[SO_TC_MAX];
66 uint8_t netsvctype_to_dscp[_NET_SERVICE_TYPE_COUNT];
67 };
68
69 struct dcsp_msc_map {
70 uint8_t dscp;
71 mbuf_svc_class_t msc;
72 };
73 static inline int so_throttle_best_effort(struct socket *, struct ifnet *);
74 static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *, int);
75 static errno_t dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *, size_t,
76 struct dcsp_msc_map *);
77
78 static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */
79 static lck_grp_t *tclass_lck_grp = NULL; /* mutex group definition */
80 static lck_attr_t *tclass_lck_attr = NULL; /* mutex attributes */
81 decl_lck_mtx_data(static, tclass_lock_data);
82 static lck_mtx_t *tclass_lock = &tclass_lock_data;
83
84 SYSCTL_NODE(_net, OID_AUTO, qos,
85 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "QoS");
86
87 static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS;
88 SYSCTL_PROC(_net_qos, OID_AUTO, default_netsvctype_to_dscp_map,
89 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
90 0, 0, sysctl_default_netsvctype_to_dscp_map, "S", "");
91
92 static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
93 SYSCTL_PROC(_net_qos, OID_AUTO, dscp_to_wifi_ac_map,
94 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
95 0, 0, sysctl_dscp_to_wifi_ac_map, "S", "");
96
97 static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
98 SYSCTL_PROC(_net_qos, OID_AUTO, reset_dscp_to_wifi_ac_map,
99 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
100 0, 0, sysctl_reset_dscp_to_wifi_ac_map, "I", "");
101
102 int net_qos_verbose = 0;
103 SYSCTL_INT(_net_qos, OID_AUTO, verbose,
104 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_verbose, 0, "");
105
106 /*
107 * Fastlane QoS policy:
108 * By Default allow all apps to get traffic class to DSCP mapping
109 */
110 SYSCTL_NODE(_net_qos, OID_AUTO, policy,
111 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
112
113 int net_qos_policy_restricted = 0;
114 SYSCTL_INT(_net_qos_policy, OID_AUTO, restricted,
115 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restricted, 0, "");
116
117 int net_qos_policy_restrict_avapps = 0;
118 SYSCTL_INT(_net_qos_policy, OID_AUTO, restrict_avapps,
119 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restrict_avapps, 0, "");
120
121 int net_qos_policy_wifi_enabled = 0;
122 SYSCTL_INT(_net_qos_policy, OID_AUTO, wifi_enabled,
123 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_wifi_enabled, 0, "");
124
125 int net_qos_policy_capable_enabled = 0;
126 SYSCTL_INT(_net_qos_policy, OID_AUTO, capable_enabled,
127 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_capable_enabled, 0, "");
128
129 /*
130 * Socket traffic class from network service type
131 */
132 const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT] = {
133 SO_TC_BE, /* NET_SERVICE_TYPE_BE */
134 SO_TC_BK_SYS, /* NET_SERVICE_TYPE_BK */
135 SO_TC_VI, /* NET_SERVICE_TYPE_SIG */
136 SO_TC_VI, /* NET_SERVICE_TYPE_VI */
137 SO_TC_VO, /* NET_SERVICE_TYPE_VO */
138 SO_TC_RV, /* NET_SERVICE_TYPE_RV */
139 SO_TC_AV, /* NET_SERVICE_TYPE_AV */
140 SO_TC_OAM, /* NET_SERVICE_TYPE_OAM */
141 SO_TC_RD /* NET_SERVICE_TYPE_RD */
142 };
143
144 /*
145 * DSCP mappings for QoS Fastlane as based on network service types
146 */
147 static const
148 struct netsvctype_dscp_map fastlane_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
149 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
150 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_AF11 },
151 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS3 },
152 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
153 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
154 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
155 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
156 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
157 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
158 };
159
160
161 /*
162 * DSCP mappings for QoS RFC4594 as based on network service types
163 */
164 static const
165 struct netsvctype_dscp_map rfc4594_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
166 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
167 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_CS1 },
168 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS5 },
169 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
170 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
171 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
172 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
173 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
174 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
175 };
176
177 static struct net_qos_dscp_map fastlane_net_qos_dscp_map;
178 static struct net_qos_dscp_map rfc4594_net_qos_dscp_map;
179
180 /*
181 * The size is one more than the max because DSCP start at zero
182 */
183 #define DSCP_ARRAY_SIZE (_MAX_DSCP + 1)
184
185 /*
186 * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping
187 * that implemented at the 802.11 driver level when the mbuf service class is
188 * MBUF_SC_BE.
189 *
190 * This clashes with the recommended mapping documented by the IETF document
191 * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain
192 * binary compatibility. Applications should use the network service type socket
193 * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS.
194 */
195 static const struct dcsp_msc_map default_dscp_to_wifi_ac_map[] = {
196 { .dscp = _DSCP_DF, .msc = MBUF_SC_BE }, /* RFC 2474 Standard */
197 { .dscp = 1, .msc = MBUF_SC_BE }, /* */
198 { .dscp = 2, .msc = MBUF_SC_BE }, /* */
199 { .dscp = 3, .msc = MBUF_SC_BE }, /* */
200 { .dscp = 4, .msc = MBUF_SC_BE }, /* */
201 { .dscp = 5, .msc = MBUF_SC_BE }, /* */
202 { .dscp = 6, .msc = MBUF_SC_BE }, /* */
203 { .dscp = 7, .msc = MBUF_SC_BE }, /* */
204
205 { .dscp = _DSCP_CS1, .msc = MBUF_SC_BK }, /* RFC 3662 Low-Priority Data */
206 { .dscp = 9, .msc = MBUF_SC_BK }, /* */
207 { .dscp = _DSCP_AF11, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
208 { .dscp = 11, .msc = MBUF_SC_BK }, /* */
209 { .dscp = _DSCP_AF12, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
210 { .dscp = 13, .msc = MBUF_SC_BK }, /* */
211 { .dscp = _DSCP_AF13, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
212 { .dscp = 15, .msc = MBUF_SC_BK }, /* */
213
214 { .dscp = _DSCP_CS2, .msc = MBUF_SC_BK }, /* RFC 4594 OAM */
215 { .dscp = 17, .msc = MBUF_SC_BK }, /* */
216 { .dscp = _DSCP_AF21, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
217 { .dscp = 19, .msc = MBUF_SC_BK }, /* */
218 { .dscp = _DSCP_AF22, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
219 { .dscp = 21, .msc = MBUF_SC_BK }, /* */
220 { .dscp = _DSCP_AF23, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
221 { .dscp = 23, .msc = MBUF_SC_BK }, /* */
222
223 { .dscp = _DSCP_CS3, .msc = MBUF_SC_BE }, /* RFC 2474 Broadcast Video */
224 { .dscp = 25, .msc = MBUF_SC_BE }, /* */
225 { .dscp = _DSCP_AF31, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
226 { .dscp = 27, .msc = MBUF_SC_BE }, /* */
227 { .dscp = _DSCP_AF32, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
228 { .dscp = 29, .msc = MBUF_SC_BE }, /* */
229 { .dscp = _DSCP_AF33, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
230 { .dscp = 31, .msc = MBUF_SC_BE }, /* */
231
232 { .dscp = _DSCP_CS4, .msc = MBUF_SC_VI }, /* RFC 2474 Real-Time Interactive */
233 { .dscp = 33, .msc = MBUF_SC_VI }, /* */
234 { .dscp = _DSCP_AF41, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
235 { .dscp = 35, .msc = MBUF_SC_VI }, /* */
236 { .dscp = _DSCP_AF42, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
237 { .dscp = 37, .msc = MBUF_SC_VI }, /* */
238 { .dscp = _DSCP_AF43, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
239 { .dscp = 39, .msc = MBUF_SC_VI }, /* */
240
241 { .dscp = _DSCP_CS5, .msc = MBUF_SC_VI }, /* RFC 2474 Signaling */
242 { .dscp = 41, .msc = MBUF_SC_VI }, /* */
243 { .dscp = 42, .msc = MBUF_SC_VI }, /* */
244 { .dscp = 43, .msc = MBUF_SC_VI }, /* */
245 { .dscp = _DSCP_VA, .msc = MBUF_SC_VI }, /* RFC 5865 VOICE-ADMIT */
246 { .dscp = 45, .msc = MBUF_SC_VI }, /* */
247 { .dscp = _DSCP_EF, .msc = MBUF_SC_VI }, /* RFC 3246 Telephony */
248 { .dscp = 47, .msc = MBUF_SC_VI }, /* */
249
250 { .dscp = _DSCP_CS6, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
251 { .dscp = 49, .msc = MBUF_SC_VO }, /* */
252 { .dscp = 50, .msc = MBUF_SC_VO }, /* */
253 { .dscp = 51, .msc = MBUF_SC_VO }, /* */
254 { .dscp = 52, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Sigma */
255 { .dscp = 53, .msc = MBUF_SC_VO }, /* */
256 { .dscp = 54, .msc = MBUF_SC_VO }, /* */
257 { .dscp = 55, .msc = MBUF_SC_VO }, /* */
258
259 { .dscp = _DSCP_CS7, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
260 { .dscp = 57, .msc = MBUF_SC_VO }, /* */
261 { .dscp = 58, .msc = MBUF_SC_VO }, /* */
262 { .dscp = 59, .msc = MBUF_SC_VO }, /* */
263 { .dscp = 60, .msc = MBUF_SC_VO }, /* */
264 { .dscp = 61, .msc = MBUF_SC_VO }, /* */
265 { .dscp = 62, .msc = MBUF_SC_VO }, /* */
266 { .dscp = 63, .msc = MBUF_SC_VO }, /* */
267
268 { .dscp = 255, .msc = MBUF_SC_UNSPEC } /* invalid DSCP to mark last entry */
269 };
270
271 mbuf_svc_class_t wifi_dscp_to_msc_array[DSCP_ARRAY_SIZE];
272
273 /*
274 * If there is no foreground activity on the interface for bg_switch_time
275 * seconds, the background connections can switch to foreground TCP
276 * congestion control.
277 */
278 #define TCP_BG_SWITCH_TIME 2 /* seconds */
279
280 #if (DEVELOPMENT || DEBUG)
281
282 static int tfp_count = 0;
283
284 static TAILQ_HEAD(, tclass_for_proc) tfp_head =
285 TAILQ_HEAD_INITIALIZER(tfp_head);
286
287 struct tclass_for_proc {
288 TAILQ_ENTRY(tclass_for_proc) tfp_link;
289 int tfp_class;
290 pid_t tfp_pid;
291 char tfp_pname[(2 * MAXCOMLEN) + 1];
292 uint32_t tfp_qos_mode;
293 };
294
295 static int get_pid_tclass(struct so_tcdbg *);
296 static int get_pname_tclass(struct so_tcdbg *);
297 static int set_pid_tclass(struct so_tcdbg *);
298 static int set_pname_tclass(struct so_tcdbg *);
299 static int flush_pid_tclass(struct so_tcdbg *);
300 static int purge_tclass_for_proc(void);
301 static int flush_tclass_for_proc(void);
302 static void set_tclass_for_curr_proc(struct socket *);
303
304 /*
305 * Must be called with tclass_lock held
306 */
307 static struct tclass_for_proc *
308 find_tfp_by_pid(pid_t pid)
309 {
310 struct tclass_for_proc *tfp;
311
312 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
313 if (tfp->tfp_pid == pid) {
314 break;
315 }
316 }
317 return tfp;
318 }
319
320 /*
321 * Must be called with tclass_lock held
322 */
323 static struct tclass_for_proc *
324 find_tfp_by_pname(const char *pname)
325 {
326 struct tclass_for_proc *tfp;
327
328 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
329 if (strncmp(pname, tfp->tfp_pname,
330 sizeof(tfp->tfp_pname)) == 0) {
331 break;
332 }
333 }
334 return tfp;
335 }
336
337 __private_extern__ void
338 set_tclass_for_curr_proc(struct socket *so)
339 {
340 struct tclass_for_proc *tfp = NULL;
341 proc_t p = current_proc(); /* Not ref counted */
342 pid_t pid = proc_pid(p);
343 char *pname = proc_best_name(p);
344
345 lck_mtx_lock(tclass_lock);
346
347 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
348 if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
349 strncmp(pname, tfp->tfp_pname,
350 sizeof(tfp->tfp_pname)) == 0)) {
351 if (tfp->tfp_class != SO_TC_UNSPEC) {
352 so->so_traffic_class = tfp->tfp_class;
353 }
354
355 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
356 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
357 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
358 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
359 }
360 break;
361 }
362 }
363
364 lck_mtx_unlock(tclass_lock);
365 }
366
367 /*
368 * Purge entries with PIDs of exited processes
369 */
370 int
371 purge_tclass_for_proc(void)
372 {
373 int error = 0;
374 struct tclass_for_proc *tfp, *tvar;
375
376 lck_mtx_lock(tclass_lock);
377
378 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
379 proc_t p;
380
381 if (tfp->tfp_pid == -1) {
382 continue;
383 }
384 if ((p = proc_find(tfp->tfp_pid)) == NULL) {
385 tfp_count--;
386 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
387
388 _FREE(tfp, M_TEMP);
389 } else {
390 proc_rele(p);
391 }
392 }
393
394 lck_mtx_unlock(tclass_lock);
395
396 return error;
397 }
398
399 /*
400 * Remove one entry
401 * Must be called with tclass_lock held
402 */
403 static void
404 free_tclass_for_proc(struct tclass_for_proc *tfp)
405 {
406 if (tfp == NULL) {
407 return;
408 }
409 tfp_count--;
410 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
411 _FREE(tfp, M_TEMP);
412 }
413
414 /*
415 * Remove all entries
416 */
417 int
418 flush_tclass_for_proc(void)
419 {
420 int error = 0;
421 struct tclass_for_proc *tfp, *tvar;
422
423 lck_mtx_lock(tclass_lock);
424
425 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
426 free_tclass_for_proc(tfp);
427 }
428
429 lck_mtx_unlock(tclass_lock);
430
431 return error;
432 }
433
434 /*
435 * Must be called with tclass_lock held
436 */
437 static struct tclass_for_proc *
438 alloc_tclass_for_proc(pid_t pid, const char *pname)
439 {
440 struct tclass_for_proc *tfp;
441
442 if (pid == -1 && pname == NULL) {
443 return NULL;
444 }
445
446 tfp = _MALLOC(sizeof(struct tclass_for_proc), M_TEMP, M_NOWAIT | M_ZERO);
447 if (tfp == NULL) {
448 return NULL;
449 }
450
451 tfp->tfp_pid = pid;
452 /*
453 * Add per pid entries before per proc name so we can find
454 * a specific instance of a process before the general name base entry.
455 */
456 if (pid != -1) {
457 TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
458 } else {
459 strlcpy(tfp->tfp_pname, pname, sizeof(tfp->tfp_pname));
460 TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
461 }
462
463 tfp_count++;
464
465 return tfp;
466 }
467
468 /*
469 * SO_TC_UNSPEC for tclass means to remove the entry
470 */
471 int
472 set_pid_tclass(struct so_tcdbg *so_tcdbg)
473 {
474 int error = EINVAL;
475 proc_t p = NULL;
476 struct filedesc *fdp;
477 struct fileproc *fp;
478 struct tclass_for_proc *tfp;
479 int i;
480 pid_t pid = so_tcdbg->so_tcdbg_pid;
481 int tclass = so_tcdbg->so_tcdbg_tclass;
482 int netsvctype = so_tcdbg->so_tcdbg_netsvctype;
483
484 p = proc_find(pid);
485 if (p == NULL) {
486 printf("%s proc_find(%d) failed\n", __func__, pid);
487 goto done;
488 }
489
490 /* Need a tfp */
491 lck_mtx_lock(tclass_lock);
492
493 tfp = find_tfp_by_pid(pid);
494 if (tfp == NULL) {
495 tfp = alloc_tclass_for_proc(pid, NULL);
496 if (tfp == NULL) {
497 lck_mtx_unlock(tclass_lock);
498 error = ENOBUFS;
499 goto done;
500 }
501 }
502 tfp->tfp_class = tclass;
503 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
504
505 lck_mtx_unlock(tclass_lock);
506
507 if (tfp != NULL) {
508 proc_fdlock(p);
509
510 fdp = p->p_fd;
511 for (i = 0; i < fdp->fd_nfiles; i++) {
512 struct socket *so;
513
514 fp = fdp->fd_ofiles[i];
515 if (fp == NULL ||
516 (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
517 FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET) {
518 continue;
519 }
520
521 so = (struct socket *)fp->f_fglob->fg_data;
522 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
523 continue;
524 }
525
526 socket_lock(so, 1);
527 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
528 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
529 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
530 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
531 }
532 socket_unlock(so, 1);
533
534 if (netsvctype != _NET_SERVICE_TYPE_UNSPEC) {
535 error = sock_setsockopt(so, SOL_SOCKET,
536 SO_NET_SERVICE_TYPE, &netsvctype, sizeof(int));
537 }
538 if (tclass != SO_TC_UNSPEC) {
539 error = sock_setsockopt(so, SOL_SOCKET,
540 SO_TRAFFIC_CLASS, &tclass, sizeof(int));
541 }
542 }
543
544 proc_fdunlock(p);
545 }
546
547 error = 0;
548 done:
549 if (p != NULL) {
550 proc_rele(p);
551 }
552
553 return error;
554 }
555
556 int
557 set_pname_tclass(struct so_tcdbg *so_tcdbg)
558 {
559 int error = EINVAL;
560 struct tclass_for_proc *tfp;
561
562 lck_mtx_lock(tclass_lock);
563
564 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
565 if (tfp == NULL) {
566 tfp = alloc_tclass_for_proc(-1, so_tcdbg->so_tcdbg_pname);
567 if (tfp == NULL) {
568 lck_mtx_unlock(tclass_lock);
569 error = ENOBUFS;
570 goto done;
571 }
572 }
573 tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
574 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
575
576 lck_mtx_unlock(tclass_lock);
577
578 error = 0;
579 done:
580
581 return error;
582 }
583
584 static int
585 flush_pid_tclass(struct so_tcdbg *so_tcdbg)
586 {
587 pid_t pid = so_tcdbg->so_tcdbg_pid;
588 int tclass = so_tcdbg->so_tcdbg_tclass;
589 struct filedesc *fdp;
590 int error = EINVAL;
591 proc_t p;
592 int i;
593
594 p = proc_find(pid);
595 if (p == PROC_NULL) {
596 printf("%s proc_find(%d) failed\n", __func__, pid);
597 goto done;
598 }
599
600 proc_fdlock(p);
601 fdp = p->p_fd;
602 for (i = 0; i < fdp->fd_nfiles; i++) {
603 struct socket *so;
604 struct fileproc *fp;
605
606 fp = fdp->fd_ofiles[i];
607 if (fp == NULL ||
608 (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
609 FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET) {
610 continue;
611 }
612
613 so = (struct socket *)fp->f_fglob->fg_data;
614 error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
615 sizeof(tclass));
616 if (error != 0) {
617 printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
618 "tclass=%d) failed %d\n", __func__,
619 (uint64_t)VM_KERNEL_ADDRPERM(so), i, tclass,
620 error);
621 error = 0;
622 }
623 }
624 proc_fdunlock(p);
625
626 error = 0;
627 done:
628 if (p != PROC_NULL) {
629 proc_rele(p);
630 }
631
632 return error;
633 }
634
635 int
636 get_pid_tclass(struct so_tcdbg *so_tcdbg)
637 {
638 int error = EINVAL;
639 proc_t p = NULL;
640 struct tclass_for_proc *tfp;
641 pid_t pid = so_tcdbg->so_tcdbg_pid;
642
643 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
644
645 p = proc_find(pid);
646 if (p == NULL) {
647 printf("%s proc_find(%d) failed\n", __func__, pid);
648 goto done;
649 }
650
651 /* Need a tfp */
652 lck_mtx_lock(tclass_lock);
653
654 tfp = find_tfp_by_pid(pid);
655 if (tfp != NULL) {
656 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
657 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
658 error = 0;
659 }
660 lck_mtx_unlock(tclass_lock);
661 done:
662 if (p != NULL) {
663 proc_rele(p);
664 }
665
666 return error;
667 }
668
669 int
670 get_pname_tclass(struct so_tcdbg *so_tcdbg)
671 {
672 int error = EINVAL;
673 struct tclass_for_proc *tfp;
674
675 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
676
677 /* Need a tfp */
678 lck_mtx_lock(tclass_lock);
679
680 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
681 if (tfp != NULL) {
682 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
683 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
684 error = 0;
685 }
686 lck_mtx_unlock(tclass_lock);
687
688 return error;
689 }
690
691 static int
692 delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
693 {
694 int error = EINVAL;
695 pid_t pid = so_tcdbg->so_tcdbg_pid;
696 struct tclass_for_proc *tfp = NULL;
697
698 lck_mtx_lock(tclass_lock);
699
700 if (pid != -1) {
701 tfp = find_tfp_by_pid(pid);
702 } else {
703 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
704 }
705
706 if (tfp != NULL) {
707 free_tclass_for_proc(tfp);
708 error = 0;
709 }
710
711 lck_mtx_unlock(tclass_lock);
712
713 return error;
714 }
715
716 /*
717 * Setting options requires privileges
718 */
719 __private_extern__ int
720 so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
721 {
722 int error = 0;
723
724 if ((so->so_state & SS_PRIV) == 0) {
725 return EPERM;
726 }
727
728 socket_unlock(so, 0);
729
730 switch (so_tcdbg->so_tcdbg_cmd) {
731 case SO_TCDBG_PID:
732 error = set_pid_tclass(so_tcdbg);
733 break;
734
735 case SO_TCDBG_PNAME:
736 error = set_pname_tclass(so_tcdbg);
737 break;
738
739 case SO_TCDBG_PURGE:
740 error = purge_tclass_for_proc();
741 break;
742
743 case SO_TCDBG_FLUSH:
744 error = flush_tclass_for_proc();
745 break;
746
747 case SO_TCDBG_DELETE:
748 error = delete_tclass_for_pid_pname(so_tcdbg);
749 break;
750
751 case SO_TCDBG_TCFLUSH_PID:
752 error = flush_pid_tclass(so_tcdbg);
753 break;
754
755 default:
756 error = EINVAL;
757 break;
758 }
759
760 socket_lock(so, 0);
761
762 return error;
763 }
764
765 /*
766 * Not required to be privileged to get
767 */
768 __private_extern__ int
769 sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
770 {
771 int error = 0;
772 struct so_tcdbg so_tcdbg;
773 void *buf = NULL;
774 size_t len = sopt->sopt_valsize;
775
776 error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg),
777 sizeof(struct so_tcdbg));
778 if (error != 0) {
779 return error;
780 }
781
782 sopt->sopt_valsize = len;
783
784 socket_unlock(so, 0);
785
786 switch (so_tcdbg.so_tcdbg_cmd) {
787 case SO_TCDBG_PID:
788 error = get_pid_tclass(&so_tcdbg);
789 break;
790
791 case SO_TCDBG_PNAME:
792 error = get_pname_tclass(&so_tcdbg);
793 break;
794
795 case SO_TCDBG_COUNT:
796 lck_mtx_lock(tclass_lock);
797 so_tcdbg.so_tcdbg_count = tfp_count;
798 lck_mtx_unlock(tclass_lock);
799 break;
800
801 case SO_TCDBG_LIST: {
802 struct tclass_for_proc *tfp;
803 int n, alloc_count;
804 struct so_tcdbg *ptr;
805
806 lck_mtx_lock(tclass_lock);
807 if ((alloc_count = tfp_count) == 0) {
808 lck_mtx_unlock(tclass_lock);
809 error = EINVAL;
810 break;
811 }
812 len = alloc_count * sizeof(struct so_tcdbg);
813 lck_mtx_unlock(tclass_lock);
814
815 buf = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
816 if (buf == NULL) {
817 error = ENOBUFS;
818 break;
819 }
820
821 lck_mtx_lock(tclass_lock);
822 n = 0;
823 ptr = (struct so_tcdbg *)buf;
824 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
825 if (++n > alloc_count) {
826 break;
827 }
828 if (tfp->tfp_pid != -1) {
829 ptr->so_tcdbg_cmd = SO_TCDBG_PID;
830 ptr->so_tcdbg_pid = tfp->tfp_pid;
831 } else {
832 ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
833 ptr->so_tcdbg_pid = -1;
834 strlcpy(ptr->so_tcdbg_pname,
835 tfp->tfp_pname,
836 sizeof(ptr->so_tcdbg_pname));
837 }
838 ptr->so_tcdbg_tclass = tfp->tfp_class;
839 ptr->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
840 ptr++;
841 }
842
843 lck_mtx_unlock(tclass_lock);
844 }
845 break;
846
847 default:
848 error = EINVAL;
849 break;
850 }
851
852 socket_lock(so, 0);
853
854 if (error == 0) {
855 if (buf == NULL) {
856 error = sooptcopyout(sopt, &so_tcdbg,
857 sizeof(struct so_tcdbg));
858 } else {
859 error = sooptcopyout(sopt, buf, len);
860 _FREE(buf, M_TEMP);
861 }
862 }
863 return error;
864 }
865
866 #endif /* (DEVELOPMENT || DEBUG) */
867
868 int
869 so_get_netsvc_marking_level(struct socket *so)
870 {
871 int marking_level = NETSVC_MRKNG_UNKNOWN;
872 struct ifnet *ifp = NULL;
873
874 switch (SOCK_DOM(so)) {
875 case PF_INET: {
876 struct inpcb *inp = sotoinpcb(so);
877
878 if (inp != NULL) {
879 ifp = inp->inp_last_outifp;
880 }
881 break;
882 }
883 case PF_INET6: {
884 struct in6pcb *in6p = sotoin6pcb(so);
885
886 if (in6p != NULL) {
887 ifp = in6p->in6p_last_outifp;
888 }
889 break;
890 }
891 default:
892 break;
893 }
894 if (ifp != NULL) {
895 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0) {
896 if ((so->so_flags1 & SOF1_QOSMARKING_ALLOWED)) {
897 marking_level = NETSVC_MRKNG_LVL_L3L2_ALL;
898 } else {
899 marking_level = NETSVC_MRKNG_LVL_L3L2_BK;
900 }
901 } else {
902 marking_level = NETSVC_MRKNG_LVL_L2;
903 }
904 }
905 return marking_level;
906 }
907
908 __private_extern__ int
909 so_set_traffic_class(struct socket *so, int optval)
910 {
911 int error = 0;
912
913 if (optval < SO_TC_BE || optval > SO_TC_CTL) {
914 error = EINVAL;
915 } else {
916 switch (optval) {
917 case _SO_TC_BK:
918 optval = SO_TC_BK;
919 break;
920 case _SO_TC_VI:
921 optval = SO_TC_VI;
922 break;
923 case _SO_TC_VO:
924 optval = SO_TC_VO;
925 break;
926 default:
927 if (!SO_VALID_TC(optval)) {
928 error = EINVAL;
929 }
930 break;
931 }
932
933 if (error == 0) {
934 int oldval = so->so_traffic_class;
935
936 VERIFY(SO_VALID_TC(optval));
937 so->so_traffic_class = optval;
938
939 if ((SOCK_DOM(so) == PF_INET ||
940 SOCK_DOM(so) == PF_INET6) &&
941 SOCK_TYPE(so) == SOCK_STREAM) {
942 set_tcp_stream_priority(so);
943 }
944
945 if ((SOCK_DOM(so) == PF_INET ||
946 SOCK_DOM(so) == PF_INET6) &&
947 optval != oldval && (optval == SO_TC_BK_SYS ||
948 oldval == SO_TC_BK_SYS)) {
949 /*
950 * If the app switches from BK_SYS to something
951 * else, resume the socket if it was suspended.
952 */
953 if (oldval == SO_TC_BK_SYS) {
954 inp_reset_fc_state(so->so_pcb);
955 }
956
957 SOTHROTTLELOG("throttle[%d]: so 0x%llx "
958 "[%d,%d] opportunistic %s\n", so->last_pid,
959 (uint64_t)VM_KERNEL_ADDRPERM(so),
960 SOCK_DOM(so), SOCK_TYPE(so),
961 (optval == SO_TC_BK_SYS) ? "ON" : "OFF");
962 }
963 }
964 }
965 return error;
966 }
967
968 __private_extern__ int
969 so_set_net_service_type(struct socket *so, int netsvctype)
970 {
971 int sotc;
972 int error;
973
974 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype)) {
975 return EINVAL;
976 }
977
978 sotc = sotc_by_netservicetype[netsvctype];
979 error = so_set_traffic_class(so, sotc);
980 if (error != 0) {
981 return error;
982 }
983 so->so_netsvctype = netsvctype;
984 so->so_flags1 |= SOF1_TC_NET_SERV_TYPE;
985
986 return 0;
987 }
988
989 __private_extern__ void
990 so_set_default_traffic_class(struct socket *so)
991 {
992 so->so_traffic_class = SO_TC_BE;
993
994 if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) {
995 if (net_qos_policy_restricted == 0) {
996 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
997 }
998 #if (DEVELOPMENT || DEBUG)
999 if (tfp_count > 0) {
1000 set_tclass_for_curr_proc(so);
1001 }
1002 #endif /* (DEVELOPMENT || DEBUG) */
1003 }
1004 }
1005
1006 __private_extern__ int
1007 so_set_opportunistic(struct socket *so, int optval)
1008 {
1009 return so_set_traffic_class(so, (optval == 0) ?
1010 SO_TC_BE : SO_TC_BK_SYS);
1011 }
1012
1013 __private_extern__ int
1014 so_get_opportunistic(struct socket *so)
1015 {
1016 return so->so_traffic_class == SO_TC_BK_SYS;
1017 }
1018
1019 __private_extern__ int
1020 so_tc_from_control(struct mbuf *control, int *out_netsvctype)
1021 {
1022 struct cmsghdr *cm;
1023 int sotc = SO_TC_UNSPEC;
1024
1025 *out_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
1026
1027 for (cm = M_FIRST_CMSGHDR(control);
1028 is_cmsg_valid(control, cm);
1029 cm = M_NXT_CMSGHDR(control, cm)) {
1030 int val;
1031
1032 if (cm->cmsg_level != SOL_SOCKET ||
1033 cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1034 continue;
1035 }
1036 val = *(int *)(void *)CMSG_DATA(cm);
1037 /*
1038 * The first valid option wins
1039 */
1040 switch (cm->cmsg_type) {
1041 case SO_TRAFFIC_CLASS:
1042 if (SO_VALID_TC(val)) {
1043 sotc = val;
1044 return sotc;
1045 /* NOT REACHED */
1046 } else if (val < SO_TC_NET_SERVICE_OFFSET) {
1047 break;
1048 }
1049 /*
1050 * Handle the case SO_NET_SERVICE_TYPE values are
1051 * passed using SO_TRAFFIC_CLASS
1052 */
1053 val = val - SO_TC_NET_SERVICE_OFFSET;
1054 /* FALLTHROUGH */
1055 case SO_NET_SERVICE_TYPE:
1056 if (!IS_VALID_NET_SERVICE_TYPE(val)) {
1057 break;
1058 }
1059 *out_netsvctype = val;
1060 sotc = sotc_by_netservicetype[val];
1061 return sotc;
1062 /* NOT REACHED */
1063 default:
1064 break;
1065 }
1066 }
1067
1068 return sotc;
1069 }
1070
1071 __private_extern__ void
1072 so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
1073 {
1074 uint32_t mtc = m_get_traffic_class(m);
1075
1076 if (mtc >= SO_TC_STATS_MAX) {
1077 mtc = MBUF_TC_BE;
1078 }
1079
1080 so->so_tc_stats[mtc].rxpackets += 1;
1081 so->so_tc_stats[mtc].rxbytes +=
1082 ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
1083 }
1084
1085 __private_extern__ void
1086 so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes,
1087 uint32_t mtc)
1088 {
1089 if (mtc >= SO_TC_STATS_MAX) {
1090 mtc = MBUF_TC_BE;
1091 }
1092
1093 so->so_tc_stats[mtc].rxpackets += pkts;
1094 so->so_tc_stats[mtc].rxbytes += bytes;
1095 }
1096
1097 static inline int
1098 so_throttle_best_effort(struct socket *so, struct ifnet *ifp)
1099 {
1100 uint32_t uptime = net_uptime();
1101 return soissrcbesteffort(so) &&
1102 net_io_policy_throttle_best_effort == 1 &&
1103 ifp->if_rt_sendts > 0 &&
1104 (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME;
1105 }
1106
1107 __private_extern__ void
1108 set_tcp_stream_priority(struct socket *so)
1109 {
1110 struct inpcb *inp = sotoinpcb(so);
1111 struct tcpcb *tp = intotcpcb(inp);
1112 struct ifnet *outifp;
1113 u_char old_cc = tp->tcp_cc_index;
1114 int recvbg = IS_TCP_RECV_BG(so);
1115 bool is_local = false, fg_active = false;
1116 uint32_t uptime;
1117
1118 VERIFY((SOCK_CHECK_DOM(so, PF_INET) ||
1119 SOCK_CHECK_DOM(so, PF_INET6)) &&
1120 SOCK_CHECK_TYPE(so, SOCK_STREAM) &&
1121 SOCK_CHECK_PROTO(so, IPPROTO_TCP));
1122
1123 /* Return if the socket is in a terminal state */
1124 if (inp->inp_state == INPCB_STATE_DEAD) {
1125 return;
1126 }
1127
1128 outifp = inp->inp_last_outifp;
1129 uptime = net_uptime();
1130
1131 /*
1132 * If the socket was marked as a background socket or if the
1133 * traffic class is set to background with traffic class socket
1134 * option then make both send and recv side of the stream to be
1135 * background. The variable sotcdb which can be set with sysctl
1136 * is used to disable these settings for testing.
1137 */
1138 if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) {
1139 is_local = true;
1140 }
1141
1142 /* Check if there has been recent foreground activity */
1143 if (outifp != NULL) {
1144 /*
1145 * If the traffic source is background, check if
1146 * if it can be switched to foreground. This can
1147 * happen when there is no indication of foreground
1148 * activity.
1149 */
1150 if (soissrcbackground(so) && outifp->if_fg_sendts > 0 &&
1151 (int)(uptime - outifp->if_fg_sendts) <= TCP_BG_SWITCH_TIME) {
1152 fg_active = true;
1153 }
1154
1155 /*
1156 * The traffic source is best-effort -- check if
1157 * the policy to throttle best effort is enabled
1158 * and there was realtime activity on this
1159 * interface recently. If this is true, enable
1160 * algorithms that respond to increased latency
1161 * on best-effort traffic.
1162 */
1163 if (so_throttle_best_effort(so, outifp)) {
1164 fg_active = true;
1165 }
1166 }
1167
1168 /*
1169 * System initiated background traffic like cloud uploads should
1170 * always use background delay sensitive algorithms. This will
1171 * make the stream more responsive to other streams on the user's
1172 * network and it will minimize latency induced.
1173 */
1174 if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
1175 /*
1176 * If the interface that the connection is using is
1177 * loopback, do not use background congestion
1178 * control algorithm.
1179 *
1180 * If there has been recent foreground activity or if
1181 * there was an indication that a foreground application
1182 * is going to use networking (net_io_policy_throttled),
1183 * switch the backgroung streams to use background
1184 * congestion control algorithm. Otherwise, even background
1185 * flows can move into foreground.
1186 */
1187 if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local ||
1188 !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
1189 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1190 tcp_set_foreground_cc(so);
1191 }
1192 } else {
1193 if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX) {
1194 tcp_set_background_cc(so);
1195 }
1196 }
1197
1198 /* Set receive side background flags */
1199 if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local ||
1200 !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
1201 tcp_clear_recv_bg(so);
1202 } else {
1203 tcp_set_recv_bg(so);
1204 }
1205 } else {
1206 tcp_clear_recv_bg(so);
1207 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1208 tcp_set_foreground_cc(so);
1209 }
1210 }
1211
1212 if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
1213 SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
1214 "%s recv\n", so->last_pid,
1215 (uint64_t)VM_KERNEL_ADDRPERM(so),
1216 SOCK_DOM(so), SOCK_TYPE(so),
1217 (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
1218 "background" : "foreground",
1219 IS_TCP_RECV_BG(so) ? "background" : "foreground");
1220 }
1221 }
1222
1223 /*
1224 * Set traffic class to an IPv4 or IPv6 packet
1225 * - mark the mbuf
1226 * - set the DSCP code following the WMM mapping
1227 */
1228 __private_extern__ void
1229 set_packet_service_class(struct mbuf *m, struct socket *so,
1230 int sotc, uint32_t flags)
1231 {
1232 mbuf_svc_class_t msc = MBUF_SC_BE; /* Best effort by default */
1233 struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
1234
1235 if (!(m->m_flags & M_PKTHDR)) {
1236 return;
1237 }
1238
1239 /*
1240 * Here is the precedence:
1241 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
1242 * 2) Traffic class passed via ancillary data to sendmsdg(2)
1243 * 3) Traffic class socket option last
1244 */
1245 if (sotc != SO_TC_UNSPEC) {
1246 VERIFY(SO_VALID_TC(sotc));
1247 msc = so_tc2msc(sotc);
1248 /* Assert because tc must have been valid */
1249 VERIFY(MBUF_VALID_SC(msc));
1250 }
1251
1252 /*
1253 * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
1254 * best effort is set, depress the priority.
1255 */
1256 if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so)) {
1257 msc = MBUF_SC_BK;
1258 }
1259
1260 if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL &&
1261 so_throttle_best_effort(so, inp->inp_last_outifp)) {
1262 msc = MBUF_SC_BK;
1263 }
1264
1265 if (soissrcbackground(so)) {
1266 m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND;
1267 }
1268
1269 if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc)) {
1270 m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME;
1271 }
1272 /*
1273 * Set the traffic class in the mbuf packet header svc field
1274 */
1275 if (sotcdb & SOTCDB_NO_MTC) {
1276 goto no_mbtc;
1277 }
1278
1279 /*
1280 * Elevate service class if the packet is a pure TCP ACK.
1281 * We can do this only when the flow is not a background
1282 * flow and the outgoing interface supports
1283 * transmit-start model.
1284 */
1285 if (!IS_MBUF_SC_BACKGROUND(msc) &&
1286 (flags & (PKT_SCF_TCP_ACK | PKT_SCF_TCP_SYN)) != 0) {
1287 msc = MBUF_SC_CTL;
1288 }
1289
1290 (void) m_set_service_class(m, msc);
1291
1292 /*
1293 * Set the privileged traffic auxiliary flag if applicable,
1294 * or clear it.
1295 */
1296 if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
1297 msc != MBUF_SC_UNSPEC) {
1298 m->m_pkthdr.pkt_flags |= PKTF_PRIO_PRIVILEGED;
1299 } else {
1300 m->m_pkthdr.pkt_flags &= ~PKTF_PRIO_PRIVILEGED;
1301 }
1302
1303 no_mbtc:
1304 /*
1305 * For TCP with background traffic class switch CC algo based on sysctl
1306 */
1307 if (so->so_type == SOCK_STREAM) {
1308 set_tcp_stream_priority(so);
1309 }
1310
1311 so_tc_update_stats(m, so, msc);
1312 }
1313
1314 __private_extern__ void
1315 so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc)
1316 {
1317 mbuf_traffic_class_t mtc;
1318
1319 /*
1320 * Assume socket and mbuf traffic class values are the same
1321 * Also assume the socket lock is held. Note that the stats
1322 * at the socket layer are reduced down to the legacy traffic
1323 * classes; we could/should potentially expand so_tc_stats[].
1324 */
1325 mtc = MBUF_SC2TC(msc);
1326 VERIFY(mtc < SO_TC_STATS_MAX);
1327 so->so_tc_stats[mtc].txpackets += 1;
1328 so->so_tc_stats[mtc].txbytes += m->m_pkthdr.len;
1329 }
1330
1331 __private_extern__ void
1332 socket_tclass_init(void)
1333 {
1334 _CASSERT(_SO_TC_MAX == SO_TC_STATS_MAX);
1335
1336 tclass_lck_grp_attr = lck_grp_attr_alloc_init();
1337 tclass_lck_grp = lck_grp_alloc_init("tclass", tclass_lck_grp_attr);
1338 tclass_lck_attr = lck_attr_alloc_init();
1339 lck_mtx_init(tclass_lock, tclass_lck_grp, tclass_lck_attr);
1340 }
1341
1342 __private_extern__ mbuf_svc_class_t
1343 so_tc2msc(int tc)
1344 {
1345 mbuf_svc_class_t msc;
1346
1347 switch (tc) {
1348 case SO_TC_BK_SYS:
1349 msc = MBUF_SC_BK_SYS;
1350 break;
1351 case SO_TC_BK:
1352 case _SO_TC_BK:
1353 msc = MBUF_SC_BK;
1354 break;
1355 case SO_TC_BE:
1356 msc = MBUF_SC_BE;
1357 break;
1358 case SO_TC_RD:
1359 msc = MBUF_SC_RD;
1360 break;
1361 case SO_TC_OAM:
1362 msc = MBUF_SC_OAM;
1363 break;
1364 case SO_TC_AV:
1365 msc = MBUF_SC_AV;
1366 break;
1367 case SO_TC_RV:
1368 msc = MBUF_SC_RV;
1369 break;
1370 case SO_TC_VI:
1371 case _SO_TC_VI:
1372 msc = MBUF_SC_VI;
1373 break;
1374 case SO_TC_NETSVC_SIG:
1375 msc = MBUF_SC_SIG;
1376 break;
1377 case SO_TC_VO:
1378 case _SO_TC_VO:
1379 msc = MBUF_SC_VO;
1380 break;
1381 case SO_TC_CTL:
1382 msc = MBUF_SC_CTL;
1383 break;
1384 case SO_TC_ALL:
1385 default:
1386 msc = MBUF_SC_UNSPEC;
1387 break;
1388 }
1389
1390 return msc;
1391 }
1392
1393 __private_extern__ int
1394 so_svc2tc(mbuf_svc_class_t svc)
1395 {
1396 switch (svc) {
1397 case MBUF_SC_BK_SYS:
1398 return SO_TC_BK_SYS;
1399 case MBUF_SC_BK:
1400 return SO_TC_BK;
1401 case MBUF_SC_BE:
1402 return SO_TC_BE;
1403 case MBUF_SC_RD:
1404 return SO_TC_RD;
1405 case MBUF_SC_OAM:
1406 return SO_TC_OAM;
1407 case MBUF_SC_AV:
1408 return SO_TC_AV;
1409 case MBUF_SC_RV:
1410 return SO_TC_RV;
1411 case MBUF_SC_VI:
1412 return SO_TC_VI;
1413 case MBUF_SC_SIG:
1414 return SO_TC_NETSVC_SIG;
1415 case MBUF_SC_VO:
1416 return SO_TC_VO;
1417 case MBUF_SC_CTL:
1418 return SO_TC_CTL;
1419 case MBUF_SC_UNSPEC:
1420 default:
1421 return SO_TC_BE;
1422 }
1423 }
1424
1425 /*
1426 * LRO is turned on for AV streaming class.
1427 */
1428 void
1429 so_set_lro(struct socket *so, int optval)
1430 {
1431 if (optval == SO_TC_AV) {
1432 so->so_flags |= SOF_USELRO;
1433 } else {
1434 if (so->so_flags & SOF_USELRO) {
1435 /* transition to non LRO class */
1436 so->so_flags &= ~SOF_USELRO;
1437 struct inpcb *inp = sotoinpcb(so);
1438 struct tcpcb *tp = NULL;
1439 if (inp) {
1440 tp = intotcpcb(inp);
1441 if (tp && (tp->t_flagsext & TF_LRO_OFFLOADED)) {
1442 tcp_lro_remove_state(inp->inp_laddr,
1443 inp->inp_faddr,
1444 inp->inp_lport,
1445 inp->inp_fport);
1446 tp->t_flagsext &= ~TF_LRO_OFFLOADED;
1447 }
1448 }
1449 }
1450 }
1451 }
1452
1453 static size_t
1454 sotc_index(int sotc)
1455 {
1456 switch (sotc) {
1457 case SO_TC_BK_SYS:
1458 return SOTCIX_BK_SYS;
1459 case _SO_TC_BK:
1460 case SO_TC_BK:
1461 return SOTCIX_BK;
1462
1463 case SO_TC_BE:
1464 return SOTCIX_BE;
1465 case SO_TC_RD:
1466 return SOTCIX_RD;
1467 case SO_TC_OAM:
1468 return SOTCIX_OAM;
1469
1470 case SO_TC_AV:
1471 return SOTCIX_AV;
1472 case SO_TC_RV:
1473 return SOTCIX_RV;
1474 case _SO_TC_VI:
1475 case SO_TC_VI:
1476 return SOTCIX_VI;
1477
1478 case _SO_TC_VO:
1479 case SO_TC_VO:
1480 return SOTCIX_VO;
1481 case SO_TC_CTL:
1482 return SOTCIX_CTL;
1483
1484 default:
1485 break;
1486 }
1487 /*
1488 * Unknown traffic class value
1489 */
1490 return SIZE_T_MAX;
1491 }
1492
1493 uint8_t
1494 fastlane_sc_to_dscp(uint32_t svc_class)
1495 {
1496 uint8_t dscp = _DSCP_DF;
1497
1498 switch (svc_class) {
1499 case MBUF_SC_BK_SYS:
1500 case MBUF_SC_BK:
1501 dscp = _DSCP_AF11;
1502 break;
1503
1504 case MBUF_SC_BE:
1505 dscp = _DSCP_DF;
1506 break;
1507 case MBUF_SC_RD:
1508 dscp = _DSCP_AF21;
1509 break;
1510 case MBUF_SC_OAM:
1511 dscp = _DSCP_CS2;
1512 break;
1513
1514 case MBUF_SC_AV:
1515 dscp = _DSCP_AF31;
1516 break;
1517 case MBUF_SC_RV:
1518 dscp = _DSCP_CS4;
1519 break;
1520 case MBUF_SC_VI:
1521 dscp = _DSCP_AF41;
1522 break;
1523 case MBUF_SC_SIG:
1524 dscp = _DSCP_CS3;
1525 break;
1526
1527 case MBUF_SC_VO:
1528 dscp = _DSCP_EF;
1529 break;
1530 case MBUF_SC_CTL:
1531 dscp = _DSCP_DF;
1532 break;
1533 default:
1534 dscp = _DSCP_DF;
1535 break;
1536 }
1537
1538 return dscp;
1539 }
1540
1541 uint8_t
1542 rfc4594_sc_to_dscp(uint32_t svc_class)
1543 {
1544 uint8_t dscp = _DSCP_DF;
1545
1546 switch (svc_class) {
1547 case MBUF_SC_BK_SYS: /* Low-Priority Data */
1548 case MBUF_SC_BK:
1549 dscp = _DSCP_CS1;
1550 break;
1551
1552 case MBUF_SC_BE: /* Standard */
1553 dscp = _DSCP_DF;
1554 break;
1555 case MBUF_SC_RD: /* Low-Latency Data */
1556 dscp = _DSCP_AF21;
1557 break;
1558
1559 /* SVC_CLASS Not Defined: High-Throughput Data */
1560
1561 case MBUF_SC_OAM: /* OAM */
1562 dscp = _DSCP_CS2;
1563 break;
1564
1565 /* SVC_CLASS Not Defined: Broadcast Video */
1566
1567 case MBUF_SC_AV: /* Multimedia Streaming */
1568 dscp = _DSCP_AF31;
1569 break;
1570 case MBUF_SC_RV: /* Real-Time Interactive */
1571 dscp = _DSCP_CS4;
1572 break;
1573 case MBUF_SC_VI: /* Multimedia Conferencing */
1574 dscp = _DSCP_AF41;
1575 break;
1576 case MBUF_SC_SIG: /* Signaling */
1577 dscp = _DSCP_CS5;
1578 break;
1579
1580 case MBUF_SC_VO: /* Telephony */
1581 dscp = _DSCP_EF;
1582 break;
1583 case MBUF_SC_CTL: /* Network Control*/
1584 dscp = _DSCP_CS6;
1585 break;
1586 default:
1587 dscp = _DSCP_DF;
1588 break;
1589 }
1590
1591 return dscp;
1592 }
1593
1594 mbuf_traffic_class_t
1595 rfc4594_dscp_to_tc(uint8_t dscp)
1596 {
1597 mbuf_traffic_class_t tc = MBUF_TC_BE;
1598
1599 switch (dscp) {
1600 case _DSCP_CS1:
1601 tc = MBUF_TC_BK;
1602 break;
1603 case _DSCP_DF:
1604 case _DSCP_AF21:
1605 case _DSCP_CS2:
1606 tc = MBUF_TC_BE;
1607 break;
1608 case _DSCP_AF31:
1609 case _DSCP_CS4:
1610 case _DSCP_AF41:
1611 case _DSCP_CS5:
1612 tc = MBUF_TC_VI;
1613 break;
1614 case _DSCP_EF:
1615 case _DSCP_CS6:
1616 tc = MBUF_TC_VO;
1617 break;
1618 default:
1619 tc = MBUF_TC_BE;
1620 break;
1621 }
1622
1623 return tc;
1624 }
1625
1626 /*
1627 * Pass NULL ifp for default map
1628 */
1629 static errno_t
1630 set_netsvctype_dscp_map(struct net_qos_dscp_map *net_qos_dscp_map,
1631 const struct netsvctype_dscp_map *netsvctype_dscp_map)
1632 {
1633 size_t i;
1634 int netsvctype;
1635
1636 /*
1637 * Do not accept more that max number of distinct DSCPs
1638 */
1639 if (net_qos_dscp_map == NULL || netsvctype_dscp_map == NULL) {
1640 return EINVAL;
1641 }
1642
1643 /*
1644 * Validate input parameters
1645 */
1646 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1647 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map[i].netsvctype)) {
1648 return EINVAL;
1649 }
1650 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1651 return EINVAL;
1652 }
1653 }
1654
1655 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1656 netsvctype = netsvctype_dscp_map[i].netsvctype;
1657
1658 net_qos_dscp_map->netsvctype_to_dscp[netsvctype] =
1659 netsvctype_dscp_map[i].dscp;
1660 }
1661 for (netsvctype = 0; netsvctype < _NET_SERVICE_TYPE_COUNT; netsvctype++) {
1662 switch (netsvctype) {
1663 case NET_SERVICE_TYPE_BE:
1664 case NET_SERVICE_TYPE_BK:
1665 case NET_SERVICE_TYPE_VI:
1666 case NET_SERVICE_TYPE_VO:
1667 case NET_SERVICE_TYPE_RV:
1668 case NET_SERVICE_TYPE_AV:
1669 case NET_SERVICE_TYPE_OAM:
1670 case NET_SERVICE_TYPE_RD: {
1671 size_t sotcix;
1672
1673 sotcix = sotc_index(sotc_by_netservicetype[netsvctype]);
1674 if (sotcix != SIZE_T_MAX) {
1675 net_qos_dscp_map->sotc_to_dscp[sotcix] =
1676 netsvctype_dscp_map[netsvctype].dscp;
1677 }
1678 break;
1679 }
1680 case NET_SERVICE_TYPE_SIG:
1681 /* Signaling does not have its own traffic class */
1682 break;
1683 default:
1684 /* We should not be here */
1685 ASSERT(0);
1686 }
1687 }
1688 /* Network control socket traffic class is always best effort */
1689 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_DF;
1690
1691 /* Backround socket traffic class DSCP same as backround system */
1692 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK] =
1693 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK_SYS];
1694
1695 return 0;
1696 }
1697
1698 /*
1699 * out_count is an input/ouput parameter
1700 */
1701 static errno_t
1702 get_netsvctype_dscp_map(size_t *out_count,
1703 struct netsvctype_dscp_map *netsvctype_dscp_map)
1704 {
1705 size_t i;
1706 struct net_qos_dscp_map *net_qos_dscp_map = NULL;
1707
1708 /*
1709 * Do not accept more that max number of distinct DSCPs
1710 */
1711 if (out_count == NULL || netsvctype_dscp_map == NULL) {
1712 return EINVAL;
1713 }
1714 if (*out_count > _MAX_DSCP) {
1715 return EINVAL;
1716 }
1717
1718 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1719
1720 for (i = 0; i < MIN(_NET_SERVICE_TYPE_COUNT, *out_count); i++) {
1721 netsvctype_dscp_map[i].netsvctype = i;
1722 netsvctype_dscp_map[i].dscp = net_qos_dscp_map->netsvctype_to_dscp[i];
1723 }
1724 *out_count = i;
1725
1726 return 0;
1727 }
1728
1729 void
1730 net_qos_map_init()
1731 {
1732 errno_t error;
1733
1734 error = set_netsvctype_dscp_map(&fastlane_net_qos_dscp_map,
1735 fastlane_netsvctype_dscp_map);
1736 ASSERT(error == 0);
1737
1738 error = set_netsvctype_dscp_map(&rfc4594_net_qos_dscp_map,
1739 rfc4594_netsvctype_dscp_map);
1740 ASSERT(error == 0);
1741
1742 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
1743 }
1744
1745 int
1746 sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
1747 {
1748 #pragma unused(oidp, arg1, arg2)
1749 int error = 0;
1750 size_t len;
1751 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
1752 size_t count;
1753
1754 if (req->oldptr == USER_ADDR_NULL) {
1755 req->oldidx =
1756 _NET_SERVICE_TYPE_COUNT * sizeof(struct netsvctype_dscp_map);
1757 } else if (req->oldlen > 0) {
1758 count = _NET_SERVICE_TYPE_COUNT;
1759 error = get_netsvctype_dscp_map(&count, netsvctype_dscp_map);
1760 if (error != 0) {
1761 goto done;
1762 }
1763 len = count * sizeof(struct netsvctype_dscp_map);
1764 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1765 MIN(len, req->oldlen));
1766 if (error != 0) {
1767 goto done;
1768 }
1769 }
1770
1771 if (req->newptr != USER_ADDR_NULL) {
1772 error = EPERM;
1773 }
1774 done:
1775 return error;
1776 }
1777
1778 __private_extern__ errno_t
1779 set_packet_qos(struct mbuf *m, struct ifnet *ifp, boolean_t qos_allowed,
1780 int sotc, int netsvctype, uint8_t *dscp_inout)
1781 {
1782 if (ifp == NULL || dscp_inout == NULL) {
1783 return EINVAL;
1784 }
1785
1786 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0 &&
1787 ifp->if_qosmarking_mode != IFRTYPE_QOSMARKING_MODE_NONE) {
1788 uint8_t dscp;
1789 const struct net_qos_dscp_map *net_qos_dscp_map = NULL;
1790
1791 switch (ifp->if_qosmarking_mode) {
1792 case IFRTYPE_QOSMARKING_FASTLANE:
1793 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1794 break;
1795 case IFRTYPE_QOSMARKING_RFC4594:
1796 net_qos_dscp_map = &rfc4594_net_qos_dscp_map;
1797 break;
1798 default:
1799 panic("invalid QoS marking type");
1800 /* NOTREACHED */
1801 }
1802
1803 /*
1804 * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops
1805 */
1806 dscp = _DSCP_DF;
1807
1808 /*
1809 * For DSCP use the network service type is specified, otherwise
1810 * use the socket traffic class
1811 *
1812 * When not whitelisted by the policy, set DSCP only for best
1813 * effort and background, and set the mbuf service class to
1814 * best effort as well so the packet will be queued and
1815 * scheduled at a lower priority.
1816 * We still want to prioritize control traffic on the interface
1817 * so we do not change the mbuf service class for SO_TC_CTL
1818 */
1819 if (IS_VALID_NET_SERVICE_TYPE(netsvctype) &&
1820 netsvctype != NET_SERVICE_TYPE_BE) {
1821 dscp = net_qos_dscp_map->netsvctype_to_dscp[netsvctype];
1822
1823 if (qos_allowed == FALSE &&
1824 netsvctype != NET_SERVICE_TYPE_BE &&
1825 netsvctype != NET_SERVICE_TYPE_BK) {
1826 dscp = _DSCP_DF;
1827 if (sotc != SO_TC_CTL) {
1828 m_set_service_class(m, MBUF_SC_BE);
1829 }
1830 }
1831 } else if (sotc != SO_TC_UNSPEC) {
1832 size_t sotcix = sotc_index(sotc);
1833 if (sotcix != SIZE_T_MAX) {
1834 dscp = net_qos_dscp_map->sotc_to_dscp[sotcix];
1835
1836 if (qos_allowed == FALSE && sotc != SO_TC_BE &&
1837 sotc != SO_TC_BK && sotc != SO_TC_BK_SYS &&
1838 sotc != SO_TC_CTL) {
1839 dscp = _DSCP_DF;
1840 if (sotc != SO_TC_CTL) {
1841 m_set_service_class(m, MBUF_SC_BE);
1842 }
1843 }
1844 }
1845 }
1846 if (net_qos_verbose != 0) {
1847 printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n",
1848 __func__, qos_allowed, sotc, netsvctype, dscp);
1849 }
1850
1851 if (*dscp_inout != dscp) {
1852 *dscp_inout = dscp;
1853 }
1854 } else if (*dscp_inout != _DSCP_DF && IFNET_IS_WIFI_INFRA(ifp)) {
1855 mbuf_svc_class_t msc = m_get_service_class(m);
1856
1857 /*
1858 * For WiFi infra, when the mbuf service class is best effort
1859 * and the DSCP is not default, set the service class based
1860 * on DSCP
1861 */
1862 if (msc == MBUF_SC_BE) {
1863 msc = wifi_dscp_to_msc_array[*dscp_inout];
1864
1865 if (msc != MBUF_SC_BE) {
1866 m_set_service_class(m, msc);
1867
1868 if (net_qos_verbose != 0) {
1869 printf("%s set msc %u for dscp %u\n",
1870 __func__, msc, *dscp_inout);
1871 }
1872 }
1873 }
1874 }
1875
1876 return 0;
1877 }
1878
1879 static void
1880 set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *map, int clear)
1881 {
1882 int i;
1883
1884 if (clear) {
1885 bzero(wifi_dscp_to_msc_array, sizeof(wifi_dscp_to_msc_array));
1886 }
1887
1888 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1889 const struct dcsp_msc_map *elem = map + i;
1890
1891 if (elem->dscp > _MAX_DSCP || elem->msc == MBUF_SC_UNSPEC) {
1892 break;
1893 }
1894 switch (elem->msc) {
1895 case MBUF_SC_BK_SYS:
1896 case MBUF_SC_BK:
1897 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BK;
1898 break;
1899 default:
1900 case MBUF_SC_BE:
1901 case MBUF_SC_RD:
1902 case MBUF_SC_OAM:
1903 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BE;
1904 break;
1905 case MBUF_SC_AV:
1906 case MBUF_SC_RV:
1907 case MBUF_SC_VI:
1908 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VI;
1909 break;
1910 case MBUF_SC_VO:
1911 case MBUF_SC_CTL:
1912 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VO;
1913 break;
1914 }
1915 }
1916 }
1917
1918 static errno_t
1919 dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map,
1920 size_t count, struct dcsp_msc_map *dcsp_msc_map)
1921 {
1922 errno_t error = 0;
1923 uint32_t i;
1924
1925 /*
1926 * Validate input parameters
1927 */
1928 for (i = 0; i < count; i++) {
1929 if (!SO_VALID_TC(netsvctype_dscp_map[i].netsvctype)) {
1930 error = EINVAL;
1931 goto done;
1932 }
1933 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1934 error = EINVAL;
1935 goto done;
1936 }
1937 }
1938
1939 bzero(dcsp_msc_map, DSCP_ARRAY_SIZE * sizeof(struct dcsp_msc_map));
1940
1941 for (i = 0; i < count; i++) {
1942 dcsp_msc_map[i].dscp = netsvctype_dscp_map[i].dscp;
1943 dcsp_msc_map[i].msc = so_tc2msc(netsvctype_dscp_map[i].netsvctype);
1944 }
1945 done:
1946 return error;
1947 }
1948
1949 int
1950 sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1951 {
1952 #pragma unused(oidp, arg1, arg2)
1953 int error = 0;
1954 size_t len = DSCP_ARRAY_SIZE * sizeof(struct netsvctype_dscp_map);
1955 struct netsvctype_dscp_map netsvctype_dscp_map[DSCP_ARRAY_SIZE] = {};
1956 struct dcsp_msc_map dcsp_msc_map[DSCP_ARRAY_SIZE];
1957 size_t count;
1958 uint32_t i;
1959
1960 if (req->oldptr == USER_ADDR_NULL) {
1961 req->oldidx = len;
1962 } else if (req->oldlen > 0) {
1963 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1964 netsvctype_dscp_map[i].dscp = i;
1965 netsvctype_dscp_map[i].netsvctype =
1966 so_svc2tc(wifi_dscp_to_msc_array[i]);
1967 }
1968 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1969 MIN(len, req->oldlen));
1970 if (error != 0) {
1971 goto done;
1972 }
1973 }
1974
1975 if (req->newptr == USER_ADDR_NULL) {
1976 goto done;
1977 }
1978
1979 error = proc_suser(current_proc());
1980 if (error != 0) {
1981 goto done;
1982 }
1983
1984 /*
1985 * Check input length
1986 */
1987 if (req->newlen > len) {
1988 error = EINVAL;
1989 goto done;
1990 }
1991 /*
1992 * Cap the number of entries to copy from input buffer
1993 */
1994 if (len > req->newlen) {
1995 len = req->newlen;
1996 }
1997 error = SYSCTL_IN(req, netsvctype_dscp_map, len);
1998 if (error != 0) {
1999 goto done;
2000 }
2001 count = len / sizeof(struct netsvctype_dscp_map);
2002 bzero(dcsp_msc_map, sizeof(dcsp_msc_map));
2003 error = dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map, count,
2004 dcsp_msc_map);
2005 if (error != 0) {
2006 goto done;
2007 }
2008 set_dscp_to_wifi_ac_map(dcsp_msc_map, 0);
2009 done:
2010 return error;
2011 }
2012
2013 int
2014 sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
2015 {
2016 #pragma unused(oidp, arg1, arg2)
2017 int error = 0;
2018 int val = 0;
2019
2020 error = sysctl_handle_int(oidp, &val, 0, req);
2021 if (error || !req->newptr) {
2022 return error;
2023 }
2024
2025 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
2026
2027 return 0;
2028 }
2029
2030 /*
2031 * Returns whether a large upload or download transfer should be marked as
2032 * BK service type for network activity. This is a system level
2033 * hint/suggestion to classify application traffic based on statistics
2034 * collected from the current network attachment
2035 *
2036 * Returns 1 for BK and 0 for default
2037 */
2038
2039 int
2040 net_qos_guideline(struct proc *p, struct net_qos_guideline_args *arg,
2041 int *retval)
2042 {
2043 #pragma unused(p)
2044 #define RETURN_USE_BK 1
2045 #define RETURN_USE_DEFAULT 0
2046 struct net_qos_param qos_arg;
2047 struct ifnet *ipv4_primary, *ipv6_primary;
2048 int err = 0;
2049
2050 if (arg->param == USER_ADDR_NULL || retval == NULL ||
2051 arg->param_len != sizeof(qos_arg)) {
2052 return EINVAL;
2053 }
2054 err = copyin(arg->param, (caddr_t) &qos_arg, sizeof(qos_arg));
2055 if (err != 0) {
2056 return err;
2057 }
2058
2059 *retval = RETURN_USE_DEFAULT;
2060 ipv4_primary = ifindex2ifnet[get_primary_ifscope(AF_INET)];
2061 ipv6_primary = ifindex2ifnet[get_primary_ifscope(AF_INET6)];
2062
2063 /*
2064 * If either of the interfaces is in Low Internet mode, enable
2065 * background delay based algorithms on this transfer
2066 */
2067 if (qos_arg.nq_uplink) {
2068 if ((ipv4_primary != NULL &&
2069 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_UL)) ||
2070 (ipv6_primary != NULL &&
2071 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_UL))) {
2072 *retval = RETURN_USE_BK;
2073 return 0;
2074 }
2075 } else {
2076 if ((ipv4_primary != NULL &&
2077 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_DL)) ||
2078 (ipv6_primary != NULL &&
2079 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_DL))) {
2080 *retval = RETURN_USE_BK;
2081 return 0;
2082 }
2083 }
2084
2085 /*
2086 * Some times IPv4 and IPv6 primary interfaces can be different.
2087 * In this case, if either of them is non-cellular, we should mark
2088 * the transfer as BK as it can potentially get used based on
2089 * the host name resolution
2090 */
2091 if (ipv4_primary != NULL && IFNET_IS_EXPENSIVE(ipv4_primary) &&
2092 ipv6_primary != NULL && IFNET_IS_EXPENSIVE(ipv6_primary)) {
2093 if (qos_arg.nq_use_expensive) {
2094 return 0;
2095 } else {
2096 *retval = RETURN_USE_BK;
2097 return 0;
2098 }
2099 }
2100 if (ipv4_primary != NULL && IFNET_IS_CONSTRAINED(ipv4_primary) &&
2101 ipv6_primary != NULL && IFNET_IS_CONSTRAINED(ipv6_primary)) {
2102 if (qos_arg.nq_use_constrained) {
2103 return 0;
2104 } else {
2105 *retval = RETURN_USE_BK;
2106 return 0;
2107 }
2108 }
2109 if (qos_arg.nq_transfer_size >= 5 * 1024 * 1024) {
2110 *retval = RETURN_USE_BK;
2111 return 0;
2112 }
2113
2114
2115 #undef RETURN_USE_BK
2116 #undef RETURN_USE_DEFAULT
2117 return 0;
2118 }