]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/in_tclass.c
xnu-4570.1.46.tar.gz
[apple/xnu.git] / bsd / netinet / in_tclass.c
CommitLineData
6d2010ae 1/*
5ba3f43e 2 * Copyright (c) 2009-2017 Apple Inc. All rights reserved.
6d2010ae
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/systm.h>
30#include <sys/kernel.h>
31#include <sys/types.h>
32#include <sys/filedesc.h>
33#include <sys/file_internal.h>
34#include <sys/proc.h>
35#include <sys/socket.h>
36#include <sys/socketvar.h>
37#include <sys/errno.h>
38#include <sys/protosw.h>
39#include <sys/domain.h>
40#include <sys/mbuf.h>
41#include <sys/queue.h>
39037602 42#include <sys/sysctl.h>
5ba3f43e 43#include <sys/sysproto.h>
6d2010ae
A
44
45#include <net/if.h>
39037602 46#include <net/if_var.h>
6d2010ae
A
47#include <net/route.h>
48
49#include <netinet/in.h>
50#include <netinet/in_var.h>
51#include <netinet/in_pcb.h>
52#include <netinet/ip.h>
53#include <netinet/ip_var.h>
54#include <netinet/ip6.h>
55#include <netinet6/ip6_var.h>
56#include <netinet/udp.h>
57#include <netinet/udp_var.h>
58#include <netinet/tcp.h>
59#include <netinet/tcp_var.h>
60#include <netinet/tcp_cc.h>
39236c6e 61#include <netinet/lro_ext.h>
39037602 62#include <netinet/in_tclass.h>
6d2010ae 63
39037602
A
64struct dcsp_msc_map {
65 u_int8_t dscp;
66 mbuf_svc_class_t msc;
67};
68static inline int so_throttle_best_effort(struct socket *, struct ifnet *);
69static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *, int);
70static errno_t dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *, size_t,
71 struct dcsp_msc_map *);
72
73static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */
74static lck_grp_t *tclass_lck_grp = NULL; /* mutex group definition */
75static lck_attr_t *tclass_lck_attr = NULL; /* mutex attributes */
76decl_lck_mtx_data(static, tclass_lock_data);
77static lck_mtx_t *tclass_lock = &tclass_lock_data;
78
79SYSCTL_NODE(_net, OID_AUTO, qos,
80 CTLFLAG_RW|CTLFLAG_LOCKED, 0, "QoS");
81
82static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS;
83SYSCTL_PROC(_net_qos, OID_AUTO, default_netsvctype_to_dscp_map,
84 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
85 0, 0, sysctl_default_netsvctype_to_dscp_map, "S", "");
86
87static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
88SYSCTL_PROC(_net_qos, OID_AUTO, dscp_to_wifi_ac_map,
89 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
90 0, 0, sysctl_dscp_to_wifi_ac_map, "S", "");
91
92static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
93SYSCTL_PROC(_net_qos, OID_AUTO, reset_dscp_to_wifi_ac_map,
94 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
95 0, 0, sysctl_reset_dscp_to_wifi_ac_map, "I", "");
96
97int net_qos_verbose = 0;
98SYSCTL_INT(_net_qos, OID_AUTO, verbose,
99 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_verbose, 0, "");
100
101/*
102 * Fastlane QoS policy:
103 * By Default allow all apps to get traffic class to DSCP mapping
104 */
105SYSCTL_NODE(_net_qos, OID_AUTO, policy,
106 CTLFLAG_RW|CTLFLAG_LOCKED, 0, "");
107
108int net_qos_policy_restricted = 0;
109SYSCTL_INT(_net_qos_policy, OID_AUTO, restricted,
110 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restricted, 0, "");
111
112int net_qos_policy_restrict_avapps = 0;
113SYSCTL_INT(_net_qos_policy, OID_AUTO, restrict_avapps,
114 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restrict_avapps, 0, "");
115
116int net_qos_policy_wifi_enabled = 0;
117SYSCTL_INT(_net_qos_policy, OID_AUTO, wifi_enabled,
118 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_wifi_enabled, 0, "");
119
120int net_qos_policy_capable_enabled = 0;
121SYSCTL_INT(_net_qos_policy, OID_AUTO, capable_enabled,
122 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_capable_enabled, 0, "");
123
124/*
125 * Socket traffic class from network service type
126 */
127const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT] = {
128 SO_TC_BE, /* NET_SERVICE_TYPE_BE */
129 SO_TC_BK_SYS, /* NET_SERVICE_TYPE_BK */
130 SO_TC_VI, /* NET_SERVICE_TYPE_SIG */
131 SO_TC_VI, /* NET_SERVICE_TYPE_VI */
132 SO_TC_VO, /* NET_SERVICE_TYPE_VO */
133 SO_TC_RV, /* NET_SERVICE_TYPE_RV */
134 SO_TC_AV, /* NET_SERVICE_TYPE_AV */
135 SO_TC_OAM, /* NET_SERVICE_TYPE_OAM */
136 SO_TC_RD /* NET_SERVICE_TYPE_RD */
137};
138
139/*
140 * DSCP mappings for QoS Fastlane as based on network service types
141 */
142static const
143struct netsvctype_dscp_map fastlane_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
144 { NET_SERVICE_TYPE_BE, _DSCP_DF },
145 { NET_SERVICE_TYPE_BK, _DSCP_AF11 },
146 { NET_SERVICE_TYPE_SIG, _DSCP_CS3 },
147 { NET_SERVICE_TYPE_VI, _DSCP_AF41 },
148 { NET_SERVICE_TYPE_VO, _DSCP_EF },
149 { NET_SERVICE_TYPE_RV, _DSCP_CS4 },
150 { NET_SERVICE_TYPE_AV, _DSCP_AF31 },
151 { NET_SERVICE_TYPE_OAM, _DSCP_CS2 },
152 { NET_SERVICE_TYPE_RD, _DSCP_AF21 },
153};
154
155static struct net_qos_dscp_map default_net_qos_dscp_map;
156
157/*
158 * The size is one more than the max because DSCP start at zero
159 */
160#define DSCP_ARRAY_SIZE (_MAX_DSCP + 1)
161
162/*
163 * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping
164 * that implemented at the 802.11 driver level when the mbuf service class is
165 * MBUF_SC_BE.
166 *
167 * This clashes with the recommended mapping documented by the IETF document
168 * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain
169 * binary compatibility. Applications should use the network service type socket
170 * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS.
171 */
172static const struct dcsp_msc_map default_dscp_to_wifi_ac_map[] = {
173 { _DSCP_DF, MBUF_SC_BE }, /* RFC 2474 Standard */
174 { 1, MBUF_SC_BE }, /* */
175 { 2, MBUF_SC_BE }, /* */
176 { 3, MBUF_SC_BE }, /* */
177 { 4, MBUF_SC_BE }, /* */
178 { 5, MBUF_SC_BE }, /* */
179 { 6, MBUF_SC_BE }, /* */
180 { 7, MBUF_SC_BE }, /* */
181
182 { _DSCP_CS1, MBUF_SC_BK }, /* RFC 3662 Low-Priority Data */
183 { 9, MBUF_SC_BK }, /* */
184 { _DSCP_AF11, MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
185 { 11, MBUF_SC_BK }, /* */
186 { _DSCP_AF12, MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
187 { 13, MBUF_SC_BK }, /* */
188 { _DSCP_AF13, MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
189 { 15, MBUF_SC_BK }, /* */
190
191 { _DSCP_CS2, MBUF_SC_BK }, /* RFC 4594 OAM */
192 { 17, MBUF_SC_BK }, /* */
193 { _DSCP_AF21, MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
194 { 19, MBUF_SC_BK }, /* */
195 { _DSCP_AF22, MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
196 { 21, MBUF_SC_BK }, /* */
197 { _DSCP_AF23, MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
198 { 23, MBUF_SC_BK }, /* */
199
200 { _DSCP_CS3, MBUF_SC_BE }, /* RFC 2474 Broadcast Video */
201 { 25, MBUF_SC_BE }, /* */
202 { _DSCP_AF31, MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
203 { 27, MBUF_SC_BE }, /* */
204 { _DSCP_AF32, MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
205 { 29, MBUF_SC_BE }, /* */
206 { _DSCP_AF33, MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
207 { 31, MBUF_SC_BE }, /* */
208
209 { _DSCP_CS4, MBUF_SC_VI }, /* RFC 2474 Real-Time Interactive */
210 { 33, MBUF_SC_VI }, /* */
211 { _DSCP_AF41, MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
212 { 35, MBUF_SC_VI }, /* */
213 { _DSCP_AF42, MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
214 { 37, MBUF_SC_VI }, /* */
215 { _DSCP_AF43, MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
216 { 39, MBUF_SC_VI }, /* */
217
218 { _DSCP_CS5, MBUF_SC_VI }, /* RFC 2474 Signaling */
219 { 41, MBUF_SC_VI }, /* */
220 { 42, MBUF_SC_VI }, /* */
221 { 43, MBUF_SC_VI }, /* */
222 { _DSCP_VA, MBUF_SC_VI }, /* RFC 5865 VOICE-ADMIT */
223 { 45, MBUF_SC_VI }, /* */
224 { _DSCP_EF, MBUF_SC_VI }, /* RFC 3246 Telephony */
225 { 47, MBUF_SC_VI }, /* */
226
227 { _DSCP_CS6, MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
228 { 49, MBUF_SC_VO }, /* */
229 { 50, MBUF_SC_VO }, /* */
230 { 51, MBUF_SC_VO }, /* */
231 { 52, MBUF_SC_VO }, /* Wi-Fi WMM Certification: Sigma */
232 { 53, MBUF_SC_VO }, /* */
233 { 54, MBUF_SC_VO }, /* */
234 { 55, MBUF_SC_VO }, /* */
235
236 { _DSCP_CS7, MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
237 { 57, MBUF_SC_VO }, /* */
238 { 58, MBUF_SC_VO }, /* */
239 { 59, MBUF_SC_VO }, /* */
240 { 60, MBUF_SC_VO }, /* */
241 { 61, MBUF_SC_VO }, /* */
242 { 62, MBUF_SC_VO }, /* */
243 { 63, MBUF_SC_VO }, /* */
244
245 { 255, MBUF_SC_UNSPEC } /* invalid DSCP to mark last entry */
246};
247
248mbuf_svc_class_t wifi_dscp_to_msc_array[DSCP_ARRAY_SIZE];
249
250/*
251 * If there is no foreground activity on the interface for bg_switch_time
252 * seconds, the background connections can switch to foreground TCP
253 * congestion control.
254 */
255#define TCP_BG_SWITCH_TIME 2 /* seconds */
256
257#if (DEVELOPMENT || DEBUG)
258
259extern char *proc_best_name(proc_t p);
6d2010ae
A
260
261static int tfp_count = 0;
262
316670eb
A
263static TAILQ_HEAD(, tclass_for_proc) tfp_head =
264 TAILQ_HEAD_INITIALIZER(tfp_head);
6d2010ae
A
265
266struct tclass_for_proc {
267 TAILQ_ENTRY(tclass_for_proc) tfp_link;
39037602
A
268 int tfp_class;
269 pid_t tfp_pid;
270 char tfp_pname[(2 * MAXCOMLEN) + 1];
271 u_int32_t tfp_qos_mode;
6d2010ae
A
272};
273
316670eb
A
274static int get_pid_tclass(struct so_tcdbg *);
275static int get_pname_tclass(struct so_tcdbg *);
276static int set_pid_tclass(struct so_tcdbg *);
277static int set_pname_tclass(struct so_tcdbg *);
278static int flush_pid_tclass(struct so_tcdbg *);
6d2010ae
A
279static int purge_tclass_for_proc(void);
280static int flush_tclass_for_proc(void);
39037602 281static void set_tclass_for_curr_proc(struct socket *);
39236c6e 282
6d2010ae
A
283/*
284 * Must be called with tclass_lock held
285 */
286static struct tclass_for_proc *
287find_tfp_by_pid(pid_t pid)
288{
289 struct tclass_for_proc *tfp;
316670eb 290
6d2010ae
A
291 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
292 if (tfp->tfp_pid == pid)
293 break;
294 }
316670eb 295 return (tfp);
6d2010ae
A
296}
297
298/*
299 * Must be called with tclass_lock held
300 */
301static struct tclass_for_proc *
302find_tfp_by_pname(const char *pname)
303{
304 struct tclass_for_proc *tfp;
316670eb 305
6d2010ae 306 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
316670eb
A
307 if (strncmp(pname, tfp->tfp_pname,
308 sizeof (tfp->tfp_pname)) == 0)
6d2010ae
A
309 break;
310 }
316670eb 311 return (tfp);
6d2010ae
A
312}
313
39037602
A
314__private_extern__ void
315set_tclass_for_curr_proc(struct socket *so)
6d2010ae 316{
316670eb 317 struct tclass_for_proc *tfp = NULL;
6d2010ae
A
318 proc_t p = current_proc(); /* Not ref counted */
319 pid_t pid = proc_pid(p);
39037602 320 char *pname = proc_best_name(p);
316670eb 321
6d2010ae 322 lck_mtx_lock(tclass_lock);
316670eb 323
6d2010ae 324 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
316670eb
A
325 if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
326 strncmp(pname, tfp->tfp_pname,
327 sizeof (tfp->tfp_pname)) == 0)) {
39037602
A
328 if (tfp->tfp_class != SO_TC_UNSPEC)
329 so->so_traffic_class = tfp->tfp_class;
330
331 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE)
332 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
333 else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE)
334 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
6d2010ae 335 break;
316670eb 336 }
6d2010ae
A
337 }
338
339 lck_mtx_unlock(tclass_lock);
6d2010ae
A
340}
341
342/*
343 * Purge entries with PIDs of exited processes
344 */
345int
346purge_tclass_for_proc(void)
347{
348 int error = 0;
349 struct tclass_for_proc *tfp, *tvar;
350
351 lck_mtx_lock(tclass_lock);
352
353 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
354 proc_t p;
316670eb 355
6d2010ae
A
356 if (tfp->tfp_pid == -1)
357 continue;
358 if ((p = proc_find(tfp->tfp_pid)) == NULL) {
359 tfp_count--;
360 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
316670eb 361
6d2010ae
A
362 _FREE(tfp, M_TEMP);
363 } else {
364 proc_rele(p);
365 }
366 }
367
368 lck_mtx_unlock(tclass_lock);
316670eb
A
369
370 return (error);
6d2010ae
A
371}
372
373/*
374 * Remove one entry
375 * Must be called with tclass_lock held
376 */
377static void
378free_tclass_for_proc(struct tclass_for_proc *tfp)
379{
380 if (tfp == NULL)
381 return;
382 tfp_count--;
383 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
384 _FREE(tfp, M_TEMP);
385}
386
387/*
388 * Remove all entries
389 */
390int
391flush_tclass_for_proc(void)
392{
393 int error = 0;
394 struct tclass_for_proc *tfp, *tvar;
395
396 lck_mtx_lock(tclass_lock);
397
398 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
399 free_tclass_for_proc(tfp);
400 }
316670eb 401
6d2010ae 402 lck_mtx_unlock(tclass_lock);
316670eb
A
403
404 return (error);
6d2010ae
A
405
406}
407
408/*
409 * Must be called with tclass_lock held
410 */
411static struct tclass_for_proc *
316670eb 412alloc_tclass_for_proc(pid_t pid, const char *pname)
6d2010ae
A
413{
414 struct tclass_for_proc *tfp;
316670eb 415
6d2010ae 416 if (pid == -1 && pname == NULL)
316670eb 417 return (NULL);
6d2010ae 418
316670eb 419 tfp = _MALLOC(sizeof (struct tclass_for_proc), M_TEMP, M_NOWAIT|M_ZERO);
6d2010ae 420 if (tfp == NULL)
316670eb
A
421 return (NULL);
422
6d2010ae 423 tfp->tfp_pid = pid;
6d2010ae 424 /*
316670eb 425 * Add per pid entries before per proc name so we can find
6d2010ae
A
426 * a specific instance of a process before the general name base entry.
427 */
428 if (pid != -1) {
429 TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
430 } else {
316670eb 431 strlcpy(tfp->tfp_pname, pname, sizeof (tfp->tfp_pname));
6d2010ae
A
432 TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
433 }
316670eb 434
6d2010ae
A
435 tfp_count++;
436
316670eb 437 return (tfp);
6d2010ae
A
438}
439
440/*
39037602 441 * SO_TC_UNSPEC for tclass means to remove the entry
6d2010ae 442 */
316670eb
A
443int
444set_pid_tclass(struct so_tcdbg *so_tcdbg)
6d2010ae
A
445{
446 int error = EINVAL;
447 proc_t p = NULL;
448 struct filedesc *fdp;
449 struct fileproc *fp;
450 struct tclass_for_proc *tfp;
451 int i;
316670eb
A
452 pid_t pid = so_tcdbg->so_tcdbg_pid;
453 int tclass = so_tcdbg->so_tcdbg_tclass;
39037602 454 int netsvctype = so_tcdbg->so_tcdbg_netsvctype;
6d2010ae
A
455
456 p = proc_find(pid);
457 if (p == NULL) {
316670eb 458 printf("%s proc_find(%d) failed\n", __func__, pid);
6d2010ae
A
459 goto done;
460 }
316670eb 461
6d2010ae
A
462 /* Need a tfp */
463 lck_mtx_lock(tclass_lock);
316670eb 464
6d2010ae 465 tfp = find_tfp_by_pid(pid);
316670eb
A
466 if (tfp == NULL) {
467 tfp = alloc_tclass_for_proc(pid, NULL);
6d2010ae 468 if (tfp == NULL) {
316670eb
A
469 lck_mtx_unlock(tclass_lock);
470 error = ENOBUFS;
471 goto done;
6d2010ae
A
472 }
473 }
316670eb 474 tfp->tfp_class = tclass;
39037602 475 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
316670eb 476
6d2010ae
A
477 lck_mtx_unlock(tclass_lock);
478
479 if (tfp != NULL) {
480 proc_fdlock(p);
316670eb 481
6d2010ae
A
482 fdp = p->p_fd;
483 for (i = 0; i < fdp->fd_nfiles; i++) {
484 struct socket *so;
316670eb 485
6d2010ae 486 fp = fdp->fd_ofiles[i];
316670eb
A
487 if (fp == NULL ||
488 (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
39236c6e 489 FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET)
6d2010ae 490 continue;
316670eb 491
6d2010ae 492 so = (struct socket *)fp->f_fglob->fg_data;
39236c6e 493 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6)
6d2010ae 494 continue;
39037602 495
6d2010ae 496 socket_lock(so, 1);
39037602
A
497 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE)
498 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
499 else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE)
500 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
316670eb 501 socket_unlock(so, 1);
39037602
A
502
503 if (netsvctype != _NET_SERVICE_TYPE_UNSPEC)
504 error = sock_setsockopt(so, SOL_SOCKET,
505 SO_NET_SERVICE_TYPE, &netsvctype, sizeof(int));
506 if (tclass != SO_TC_UNSPEC)
507 error = sock_setsockopt(so, SOL_SOCKET,
508 SO_TRAFFIC_CLASS, &tclass, sizeof(int));
509
6d2010ae 510 }
316670eb 511
6d2010ae
A
512 proc_fdunlock(p);
513 }
316670eb
A
514
515 error = 0;
6d2010ae
A
516done:
517 if (p != NULL)
518 proc_rele(p);
316670eb
A
519
520 return (error);
6d2010ae
A
521}
522
316670eb
A
523int
524set_pname_tclass(struct so_tcdbg *so_tcdbg)
6d2010ae
A
525{
526 int error = EINVAL;
527 struct tclass_for_proc *tfp;
528
529 lck_mtx_lock(tclass_lock);
316670eb
A
530
531 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
532 if (tfp == NULL) {
533 tfp = alloc_tclass_for_proc(-1, so_tcdbg->so_tcdbg_pname);
6d2010ae 534 if (tfp == NULL) {
316670eb
A
535 lck_mtx_unlock(tclass_lock);
536 error = ENOBUFS;
537 goto done;
6d2010ae
A
538 }
539 }
316670eb 540 tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
39037602 541 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
316670eb 542
6d2010ae 543 lck_mtx_unlock(tclass_lock);
316670eb
A
544
545 error = 0;
6d2010ae 546done:
316670eb
A
547
548 return (error);
6d2010ae
A
549}
550
316670eb
A
551static int
552flush_pid_tclass(struct so_tcdbg *so_tcdbg)
553{
554 pid_t pid = so_tcdbg->so_tcdbg_pid;
555 int tclass = so_tcdbg->so_tcdbg_tclass;
556 struct filedesc *fdp;
557 int error = EINVAL;
558 proc_t p;
559 int i;
560
561 p = proc_find(pid);
562 if (p == PROC_NULL) {
563 printf("%s proc_find(%d) failed\n", __func__, pid);
564 goto done;
565 }
566
567 proc_fdlock(p);
568 fdp = p->p_fd;
569 for (i = 0; i < fdp->fd_nfiles; i++) {
570 struct socket *so;
571 struct fileproc *fp;
572
573 fp = fdp->fd_ofiles[i];
574 if (fp == NULL ||
575 (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
39236c6e 576 FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET)
316670eb
A
577 continue;
578
579 so = (struct socket *)fp->f_fglob->fg_data;
580 error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
581 sizeof (tclass));
582 if (error != 0) {
39236c6e
A
583 printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
584 "tclass=%d) failed %d\n", __func__,
585 (uint64_t)VM_KERNEL_ADDRPERM(so), i, tclass,
316670eb
A
586 error);
587 error = 0;
588 }
589 }
590 proc_fdunlock(p);
591
592 error = 0;
593done:
594 if (p != PROC_NULL)
595 proc_rele(p);
596
597 return (error);
598}
599
600int
601get_pid_tclass(struct so_tcdbg *so_tcdbg)
6d2010ae
A
602{
603 int error = EINVAL;
604 proc_t p = NULL;
605 struct tclass_for_proc *tfp;
316670eb
A
606 pid_t pid = so_tcdbg->so_tcdbg_pid;
607
39037602 608 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
6d2010ae
A
609
610 p = proc_find(pid);
611 if (p == NULL) {
316670eb 612 printf("%s proc_find(%d) failed\n", __func__, pid);
6d2010ae
A
613 goto done;
614 }
316670eb 615
6d2010ae
A
616 /* Need a tfp */
617 lck_mtx_lock(tclass_lock);
316670eb 618
6d2010ae
A
619 tfp = find_tfp_by_pid(pid);
620 if (tfp != NULL) {
316670eb 621 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
39037602 622 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
6d2010ae
A
623 error = 0;
624 }
625 lck_mtx_unlock(tclass_lock);
626done:
627 if (p != NULL)
628 proc_rele(p);
316670eb
A
629
630 return (error);
6d2010ae
A
631}
632
316670eb
A
633int
634get_pname_tclass(struct so_tcdbg *so_tcdbg)
6d2010ae
A
635{
636 int error = EINVAL;
637 struct tclass_for_proc *tfp;
316670eb 638
39037602 639 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
6d2010ae
A
640
641 /* Need a tfp */
642 lck_mtx_lock(tclass_lock);
316670eb
A
643
644 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
6d2010ae 645 if (tfp != NULL) {
316670eb 646 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
39037602 647 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
6d2010ae
A
648 error = 0;
649 }
650 lck_mtx_unlock(tclass_lock);
316670eb
A
651
652 return (error);
6d2010ae
A
653}
654
316670eb
A
655static int
656delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
657{
658 int error = EINVAL;
659 pid_t pid = so_tcdbg->so_tcdbg_pid;
660 struct tclass_for_proc *tfp = NULL;
661
662 lck_mtx_lock(tclass_lock);
6d2010ae 663
316670eb
A
664 if (pid != -1)
665 tfp = find_tfp_by_pid(pid);
666 else
667 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
668
669 if (tfp != NULL) {
670 free_tclass_for_proc(tfp);
671 error = 0;
672 }
673
674 lck_mtx_unlock(tclass_lock);
675
676 return (error);
677}
6d2010ae
A
678
679/*
680 * Setting options requires privileges
681 */
316670eb 682__private_extern__ int
6d2010ae
A
683so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
684{
685 int error = 0;
316670eb 686
6d2010ae 687 if ((so->so_state & SS_PRIV) == 0)
316670eb 688 return (EPERM);
6d2010ae
A
689
690 socket_unlock(so, 0);
691
692 switch (so_tcdbg->so_tcdbg_cmd) {
693 case SO_TCDBG_PID:
316670eb 694 error = set_pid_tclass(so_tcdbg);
6d2010ae 695 break;
316670eb 696
6d2010ae 697 case SO_TCDBG_PNAME:
316670eb 698 error = set_pname_tclass(so_tcdbg);
6d2010ae 699 break;
316670eb 700
6d2010ae
A
701 case SO_TCDBG_PURGE:
702 error = purge_tclass_for_proc();
703 break;
316670eb 704
6d2010ae
A
705 case SO_TCDBG_FLUSH:
706 error = flush_tclass_for_proc();
707 break;
316670eb
A
708
709 case SO_TCDBG_DELETE:
710 error = delete_tclass_for_pid_pname(so_tcdbg);
711 break;
712
713 case SO_TCDBG_TCFLUSH_PID:
714 error = flush_pid_tclass(so_tcdbg);
715 break;
716
6d2010ae
A
717 default:
718 error = EINVAL;
719 break;
6d2010ae
A
720 }
721
722 socket_lock(so, 0);
723
316670eb 724 return (error);
6d2010ae
A
725}
726
727/*
728 * Not required to be privileged to get
729 */
316670eb 730__private_extern__ int
6d2010ae
A
731sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
732{
733 int error = 0;
734 struct so_tcdbg so_tcdbg;
735 void *buf = NULL;
736 size_t len = sopt->sopt_valsize;
737
316670eb
A
738 error = sooptcopyin(sopt, &so_tcdbg, sizeof (struct so_tcdbg),
739 sizeof (struct so_tcdbg));
6d2010ae 740 if (error != 0)
316670eb
A
741 return (error);
742
6d2010ae 743 sopt->sopt_valsize = len;
316670eb 744
6d2010ae
A
745 socket_unlock(so, 0);
746
747 switch (so_tcdbg.so_tcdbg_cmd) {
748 case SO_TCDBG_PID:
316670eb 749 error = get_pid_tclass(&so_tcdbg);
6d2010ae 750 break;
316670eb 751
6d2010ae 752 case SO_TCDBG_PNAME:
316670eb 753 error = get_pname_tclass(&so_tcdbg);
6d2010ae 754 break;
316670eb 755
6d2010ae
A
756 case SO_TCDBG_COUNT:
757 lck_mtx_lock(tclass_lock);
758 so_tcdbg.so_tcdbg_count = tfp_count;
759 lck_mtx_unlock(tclass_lock);
760 break;
761
762 case SO_TCDBG_LIST: {
763 struct tclass_for_proc *tfp;
764 int n, alloc_count;
765 struct so_tcdbg *ptr;
766
767 lck_mtx_lock(tclass_lock);
768 if ((alloc_count = tfp_count) == 0) {
769 lck_mtx_unlock(tclass_lock);
770 error = EINVAL;
771 break;
772 }
316670eb 773 len = alloc_count * sizeof (struct so_tcdbg);
6d2010ae
A
774 lck_mtx_unlock(tclass_lock);
775
776 buf = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
777 if (buf == NULL) {
778 error = ENOBUFS;
779 break;
780 }
781
782 lck_mtx_lock(tclass_lock);
783 n = 0;
784 ptr = (struct so_tcdbg *)buf;
785 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
786 if (++n > alloc_count)
787 break;
788 if (tfp->tfp_pid != -1) {
789 ptr->so_tcdbg_cmd = SO_TCDBG_PID;
790 ptr->so_tcdbg_pid = tfp->tfp_pid;
791 } else {
792 ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
793 ptr->so_tcdbg_pid = -1;
316670eb
A
794 strlcpy(ptr->so_tcdbg_pname,
795 tfp->tfp_pname,
796 sizeof (ptr->so_tcdbg_pname));
6d2010ae
A
797 }
798 ptr->so_tcdbg_tclass = tfp->tfp_class;
39037602 799 ptr->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
6d2010ae
A
800 ptr++;
801 }
316670eb 802
6d2010ae
A
803 lck_mtx_unlock(tclass_lock);
804 }
805 break;
316670eb 806
6d2010ae
A
807 default:
808 error = EINVAL;
809 break;
6d2010ae
A
810 }
811
812 socket_lock(so, 0);
813
814 if (error == 0) {
815 if (buf == NULL) {
316670eb
A
816 error = sooptcopyout(sopt, &so_tcdbg,
817 sizeof (struct so_tcdbg));
6d2010ae
A
818 } else {
819 error = sooptcopyout(sopt, buf, len);
820 _FREE(buf, M_TEMP);
821 }
822 }
316670eb 823 return (error);
6d2010ae
A
824}
825
39037602
A
826#endif /* (DEVELOPMENT || DEBUG) */
827
828int
829so_get_netsvc_marking_level(struct socket *so)
830{
831 int marking_level = NETSVC_MRKNG_UNKNOWN;
832 struct ifnet *ifp = NULL;
833
834 switch (SOCK_DOM(so)) {
835 case PF_INET: {
836 struct inpcb *inp = sotoinpcb(so);
837
838 if (inp != NULL)
839 ifp = inp->inp_last_outifp;
840 break;
841 }
842 case PF_INET6: {
843 struct in6pcb *in6p = sotoin6pcb(so);
844
845 if (in6p != NULL)
846 ifp = in6p->in6p_last_outifp;
847 break;
848 }
849 default:
850 break;
851 }
852 if (ifp != NULL) {
853 if ((ifp->if_eflags &
854 (IFEF_QOSMARKING_ENABLED | IFEF_QOSMARKING_CAPABLE)) ==
855 (IFEF_QOSMARKING_ENABLED | IFEF_QOSMARKING_CAPABLE)) {
856 if ((so->so_flags1 & SOF1_QOSMARKING_ALLOWED))
857 marking_level = NETSVC_MRKNG_LVL_L3L2_ALL;
858 else
859 marking_level = NETSVC_MRKNG_LVL_L3L2_BK;
860 } else {
861 marking_level = NETSVC_MRKNG_LVL_L2;
862 }
863 }
864 return (marking_level);
865}
6d2010ae
A
866
867__private_extern__ int
868so_set_traffic_class(struct socket *so, int optval)
869{
870 int error = 0;
316670eb
A
871
872 if (optval < SO_TC_BE || optval > SO_TC_CTL) {
6d2010ae
A
873 error = EINVAL;
874 } else {
316670eb
A
875 switch (optval) {
876 case _SO_TC_BK:
877 optval = SO_TC_BK;
878 break;
879 case _SO_TC_VI:
880 optval = SO_TC_VI;
881 break;
882 case _SO_TC_VO:
883 optval = SO_TC_VO;
884 break;
885 default:
886 if (!SO_VALID_TC(optval))
887 error = EINVAL;
888 break;
889 }
890
891 if (error == 0) {
892 int oldval = so->so_traffic_class;
893
894 VERIFY(SO_VALID_TC(optval));
895 so->so_traffic_class = optval;
896
39236c6e
A
897 if ((SOCK_DOM(so) == PF_INET ||
898 SOCK_DOM(so) == PF_INET6) &&
899 SOCK_TYPE(so) == SOCK_STREAM)
316670eb
A
900 set_tcp_stream_priority(so);
901
39236c6e
A
902 if ((SOCK_DOM(so) == PF_INET ||
903 SOCK_DOM(so) == PF_INET6) &&
316670eb
A
904 optval != oldval && (optval == SO_TC_BK_SYS ||
905 oldval == SO_TC_BK_SYS)) {
906 /*
907 * If the app switches from BK_SYS to something
908 * else, resume the socket if it was suspended.
909 */
910 if (oldval == SO_TC_BK_SYS)
911 inp_reset_fc_state(so->so_pcb);
912
39037602 913 SOTHROTTLELOG("throttle[%d]: so 0x%llx "
39236c6e
A
914 "[%d,%d] opportunistic %s\n", so->last_pid,
915 (uint64_t)VM_KERNEL_ADDRPERM(so),
916 SOCK_DOM(so), SOCK_TYPE(so),
39037602 917 (optval == SO_TC_BK_SYS) ? "ON" : "OFF");
316670eb 918 }
6d2010ae
A
919 }
920 }
316670eb 921 return (error);
6d2010ae
A
922}
923
39037602
A
924__private_extern__ int
925so_set_net_service_type(struct socket *so, int netsvctype)
926{
927 int sotc;
928 int error;
929
930 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype))
931 return (EINVAL);
932
933 sotc = sotc_by_netservicetype[netsvctype];
934 error = so_set_traffic_class(so, sotc);
935 if (error != 0)
936 return (error);
937 so->so_netsvctype = netsvctype;
938 so->so_flags1 |= SOF1_TC_NET_SERV_TYPE;
939
940 return (0);
941}
942
6d2010ae
A
943__private_extern__ void
944so_set_default_traffic_class(struct socket *so)
945{
39037602
A
946 so->so_traffic_class = SO_TC_BE;
947
948 if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) {
949 if (net_qos_policy_restricted == 0)
950 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
951#if (DEVELOPMENT || DEBUG)
952 if (tfp_count > 0)
953 set_tclass_for_curr_proc(so);
954#endif /* (DEVELOPMENT || DEBUG) */
6d2010ae 955 }
6d2010ae
A
956}
957
316670eb
A
958__private_extern__ int
959so_set_opportunistic(struct socket *so, int optval)
960{
961 return (so_set_traffic_class(so, (optval == 0) ?
962 SO_TC_BE : SO_TC_BK_SYS));
963}
6d2010ae
A
964
965__private_extern__ int
316670eb
A
966so_get_opportunistic(struct socket *so)
967{
968 return (so->so_traffic_class == SO_TC_BK_SYS);
969}
970
39037602
A
971__private_extern__ int
972so_tc_from_control(struct mbuf *control, int *out_netsvctype)
6d2010ae
A
973{
974 struct cmsghdr *cm;
39037602
A
975 int sotc = SO_TC_UNSPEC;
976
977 *out_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
316670eb
A
978
979 for (cm = M_FIRST_CMSGHDR(control); cm != NULL;
980 cm = M_NXT_CMSGHDR(control, cm)) {
39037602 981 int val;
6d2010ae 982
316670eb 983 if (cm->cmsg_len < sizeof (struct cmsghdr))
6d2010ae 984 break;
6d2010ae 985 if (cm->cmsg_level != SOL_SOCKET ||
39037602
A
986 cm->cmsg_len != CMSG_LEN(sizeof(int)))
987 continue;
988 val = *(int *)(void *)CMSG_DATA(cm);
989 /*
990 * The first valid option wins
991 */
992 switch (cm->cmsg_type) {
993 case SO_TRAFFIC_CLASS:
994 if (SO_VALID_TC(val)) {
995 sotc = val;
996 return (sotc);
997 /* NOT REACHED */
998 } else if (val < SO_TC_NET_SERVICE_OFFSET) {
999 break;
1000 }
1001 /*
1002 * Handle the case SO_NET_SERVICE_TYPE values are
1003 * passed using SO_TRAFFIC_CLASS
1004 */
1005 val = val - SO_TC_NET_SERVICE_OFFSET;
1006 /* FALLTHROUGH */
1007 case SO_NET_SERVICE_TYPE:
1008 if (!IS_VALID_NET_SERVICE_TYPE(val))
1009 break;
1010 *out_netsvctype = val;
1011 sotc = sotc_by_netservicetype[val];
1012 return (sotc);
1013 /* NOT REACHED */
1014 default:
1015 break;
1016 }
6d2010ae 1017 }
316670eb 1018
39037602 1019 return (sotc);
6d2010ae
A
1020}
1021
1022__private_extern__ void
1023so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
1024{
39037602 1025 uint32_t mtc = m_get_traffic_class(m);
6d2010ae 1026
39037602
A
1027 if (mtc >= SO_TC_STATS_MAX)
1028 mtc = MBUF_TC_BE;
6d2010ae 1029
39037602
A
1030 so->so_tc_stats[mtc].rxpackets += 1;
1031 so->so_tc_stats[mtc].rxbytes +=
316670eb 1032 ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
6d2010ae
A
1033}
1034
fe8ab488 1035__private_extern__ void
39037602
A
1036so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes,
1037 uint32_t mtc)
fe8ab488 1038{
39037602
A
1039 if (mtc >= SO_TC_STATS_MAX)
1040 mtc = MBUF_TC_BE;
fe8ab488 1041
39037602
A
1042 so->so_tc_stats[mtc].rxpackets += pkts;
1043 so->so_tc_stats[mtc].rxbytes += bytes;
fe8ab488 1044}
3e170ce0
A
1045
1046static inline int
1047so_throttle_best_effort(struct socket *so, struct ifnet *ifp)
1048{
1049 u_int32_t uptime = net_uptime();
1050 return (soissrcbesteffort(so) &&
1051 net_io_policy_throttle_best_effort == 1 &&
1052 ifp->if_rt_sendts > 0 &&
1053 (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME);
1054}
39037602 1055
6d2010ae
A
1056__private_extern__ void
1057set_tcp_stream_priority(struct socket *so)
1058{
39236c6e
A
1059 struct inpcb *inp = sotoinpcb(so);
1060 struct tcpcb *tp = intotcpcb(inp);
1061 struct ifnet *outifp;
1062 u_char old_cc = tp->tcp_cc_index;
316670eb 1063 int recvbg = IS_TCP_RECV_BG(so);
3e170ce0 1064 bool is_local = false, fg_active = false;
39236c6e
A
1065 u_int32_t uptime;
1066
39037602
A
1067 VERIFY((SOCK_CHECK_DOM(so, PF_INET) ||
1068 SOCK_CHECK_DOM(so, PF_INET6)) &&
1069 SOCK_CHECK_TYPE(so, SOCK_STREAM) &&
1070 SOCK_CHECK_PROTO(so, IPPROTO_TCP));
fe8ab488 1071
39037602 1072 /* Return if the socket is in a terminal state */
fe8ab488
A
1073 if (inp->inp_state == INPCB_STATE_DEAD)
1074 return;
1075
39236c6e
A
1076 outifp = inp->inp_last_outifp;
1077 uptime = net_uptime();
6d2010ae 1078
316670eb
A
1079 /*
1080 * If the socket was marked as a background socket or if the
1081 * traffic class is set to background with traffic class socket
1082 * option then make both send and recv side of the stream to be
1083 * background. The variable sotcdb which can be set with sysctl
6d2010ae
A
1084 * is used to disable these settings for testing.
1085 */
3e170ce0
A
1086 if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK))
1087 is_local = true;
1088
1089 /* Check if there has been recent foreground activity */
1090 if (outifp != NULL) {
1091 /*
1092 * If the traffic source is background, check if
39037602 1093 * if it can be switched to foreground. This can
3e170ce0
A
1094 * happen when there is no indication of foreground
1095 * activity.
1096 */
5ba3f43e
A
1097 if (soissrcbackground(so) && outifp->if_fg_sendts > 0 &&
1098 (int)(uptime - outifp->if_fg_sendts) <= TCP_BG_SWITCH_TIME)
39236c6e
A
1099 fg_active = true;
1100
3e170ce0
A
1101 /*
1102 * The traffic source is best-effort -- check if
1103 * the policy to throttle best effort is enabled
1104 * and there was realtime activity on this
1105 * interface recently. If this is true, enable
1106 * algorithms that respond to increased latency
1107 * on best-effort traffic.
39037602 1108 */
3e170ce0
A
1109 if (so_throttle_best_effort(so, outifp))
1110 fg_active = true;
1111 }
1112
1113 /*
1114 * System initiated background traffic like cloud uploads should
1115 * always use background delay sensitive algorithms. This will
1116 * make the stream more responsive to other streams on the user's
1117 * network and it will minimize latency induced.
1118 */
1119 if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
39236c6e
A
1120 /*
1121 * If the interface that the connection is using is
1122 * loopback, do not use background congestion
1123 * control algorithm.
1124 *
39037602
A
1125 * If there has been recent foreground activity or if
1126 * there was an indication that a foreground application
39236c6e 1127 * is going to use networking (net_io_policy_throttled),
39037602 1128 * switch the backgroung streams to use background
39236c6e
A
1129 * congestion control algorithm. Otherwise, even background
1130 * flows can move into foreground.
1131 */
3e170ce0
A
1132 if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local ||
1133 !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
316670eb 1134 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX)
6d2010ae
A
1135 tcp_set_foreground_cc(so);
1136 } else {
316670eb 1137 if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX)
6d2010ae
A
1138 tcp_set_background_cc(so);
1139 }
316670eb 1140
6d2010ae 1141 /* Set receive side background flags */
3e170ce0
A
1142 if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local ||
1143 !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
316670eb 1144 tcp_clear_recv_bg(so);
3e170ce0 1145 } else {
316670eb 1146 tcp_set_recv_bg(so);
3e170ce0 1147 }
6d2010ae 1148 } else {
316670eb
A
1149 tcp_clear_recv_bg(so);
1150 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX)
6d2010ae
A
1151 tcp_set_foreground_cc(so);
1152 }
316670eb
A
1153
1154 if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
39037602
A
1155 SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
1156 "%s recv\n", so->last_pid,
1157 (uint64_t)VM_KERNEL_ADDRPERM(so),
1158 SOCK_DOM(so), SOCK_TYPE(so),
1159 (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
1160 "background" : "foreground",
1161 IS_TCP_RECV_BG(so) ? "background" : "foreground");
316670eb 1162 }
6d2010ae
A
1163}
1164
1165/*
1166 * Set traffic class to an IPv4 or IPv6 packet
1167 * - mark the mbuf
1168 * - set the DSCP code following the WMM mapping
1169 */
1170__private_extern__ void
316670eb 1171set_packet_service_class(struct mbuf *m, struct socket *so,
39037602 1172 int sotc, u_int32_t flags)
6d2010ae 1173{
316670eb
A
1174 mbuf_svc_class_t msc = MBUF_SC_BE; /* Best effort by default */
1175 struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
316670eb 1176
6d2010ae
A
1177 if (!(m->m_flags & M_PKTHDR))
1178 return;
316670eb
A
1179
1180 /*
6d2010ae
A
1181 * Here is the precedence:
1182 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
1183 * 2) Traffic class passed via ancillary data to sendmsdg(2)
1184 * 3) Traffic class socket option last
1185 */
39037602
A
1186 if (sotc != SO_TC_UNSPEC) {
1187 VERIFY(SO_VALID_TC(sotc));
1188 msc = so_tc2msc(sotc);
316670eb
A
1189 /* Assert because tc must have been valid */
1190 VERIFY(MBUF_VALID_SC(msc));
6d2010ae 1191 }
316670eb
A
1192
1193 /*
3e170ce0
A
1194 * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
1195 * best effort is set, depress the priority.
316670eb 1196 */
3e170ce0
A
1197 if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so))
1198 msc = MBUF_SC_BK;
1199
1200 if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL &&
1201 so_throttle_best_effort(so, inp->inp_last_outifp))
316670eb
A
1202 msc = MBUF_SC_BK;
1203
39236c6e
A
1204 if (soissrcbackground(so))
1205 m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND;
3e170ce0
A
1206
1207 if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc))
1208 m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME;
6d2010ae 1209 /*
316670eb 1210 * Set the traffic class in the mbuf packet header svc field
6d2010ae 1211 */
316670eb 1212 if (sotcdb & SOTCDB_NO_MTC)
6d2010ae 1213 goto no_mbtc;
316670eb 1214
39037602
A
1215 /*
1216 * Elevate service class if the packet is a pure TCP ACK.
316670eb 1217 * We can do this only when the flow is not a background
39037602 1218 * flow and the outgoing interface supports
316670eb
A
1219 * transmit-start model.
1220 */
39037602
A
1221 if (!IS_MBUF_SC_BACKGROUND(msc) &&
1222 (flags & (PKT_SCF_TCP_ACK | PKT_SCF_TCP_SYN)) != 0)
316670eb
A
1223 msc = MBUF_SC_CTL;
1224
1225 (void) m_set_service_class(m, msc);
1226
1227 /*
39037602 1228 * Set the privileged traffic auxiliary flag if applicable,
39236c6e 1229 * or clear it.
316670eb
A
1230 */
1231 if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
1232 msc != MBUF_SC_UNSPEC)
39236c6e 1233 m->m_pkthdr.pkt_flags |= PKTF_PRIO_PRIVILEGED;
316670eb 1234 else
39236c6e 1235 m->m_pkthdr.pkt_flags &= ~PKTF_PRIO_PRIVILEGED;
316670eb 1236
6d2010ae 1237no_mbtc:
6d2010ae
A
1238 /*
1239 * For TCP with background traffic class switch CC algo based on sysctl
1240 */
316670eb 1241 if (so->so_type == SOCK_STREAM)
6d2010ae 1242 set_tcp_stream_priority(so);
316670eb
A
1243
1244 so_tc_update_stats(m, so, msc);
1245}
1246
1247__private_extern__ void
1248so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc)
1249{
1250 mbuf_traffic_class_t mtc;
1251
6d2010ae
A
1252 /*
1253 * Assume socket and mbuf traffic class values are the same
316670eb
A
1254 * Also assume the socket lock is held. Note that the stats
1255 * at the socket layer are reduced down to the legacy traffic
1256 * classes; we could/should potentially expand so_tc_stats[].
6d2010ae 1257 */
316670eb
A
1258 mtc = MBUF_SC2TC(msc);
1259 VERIFY(mtc < SO_TC_STATS_MAX);
6d2010ae
A
1260 so->so_tc_stats[mtc].txpackets += 1;
1261 so->so_tc_stats[mtc].txbytes += m->m_pkthdr.len;
6d2010ae
A
1262}
1263
1264__private_extern__ void
1265socket_tclass_init(void)
1266{
39037602 1267 _CASSERT(_SO_TC_MAX == SO_TC_STATS_MAX);
39236c6e 1268
6d2010ae
A
1269 tclass_lck_grp_attr = lck_grp_attr_alloc_init();
1270 tclass_lck_grp = lck_grp_alloc_init("tclass", tclass_lck_grp_attr);
1271 tclass_lck_attr = lck_attr_alloc_init();
316670eb
A
1272 lck_mtx_init(tclass_lock, tclass_lck_grp, tclass_lck_attr);
1273}
1274
1275__private_extern__ mbuf_svc_class_t
1276so_tc2msc(int tc)
1277{
1278 mbuf_svc_class_t msc;
1279
1280 switch (tc) {
1281 case SO_TC_BK_SYS:
1282 msc = MBUF_SC_BK_SYS;
1283 break;
1284 case SO_TC_BK:
1285 case _SO_TC_BK:
1286 msc = MBUF_SC_BK;
1287 break;
1288 case SO_TC_BE:
1289 msc = MBUF_SC_BE;
1290 break;
1291 case SO_TC_RD:
1292 msc = MBUF_SC_RD;
1293 break;
1294 case SO_TC_OAM:
1295 msc = MBUF_SC_OAM;
1296 break;
1297 case SO_TC_AV:
1298 msc = MBUF_SC_AV;
1299 break;
1300 case SO_TC_RV:
1301 msc = MBUF_SC_RV;
1302 break;
1303 case SO_TC_VI:
1304 case _SO_TC_VI:
1305 msc = MBUF_SC_VI;
1306 break;
1307 case SO_TC_VO:
1308 case _SO_TC_VO:
1309 msc = MBUF_SC_VO;
1310 break;
1311 case SO_TC_CTL:
1312 msc = MBUF_SC_CTL;
1313 break;
1314 case SO_TC_ALL:
1315 default:
1316 msc = MBUF_SC_UNSPEC;
1317 break;
6d2010ae 1318 }
316670eb
A
1319
1320 return (msc);
6d2010ae
A
1321}
1322
316670eb
A
1323__private_extern__ int
1324so_svc2tc(mbuf_svc_class_t svc)
1325{
1326 switch (svc) {
316670eb 1327 case MBUF_SC_BK_SYS:
39037602 1328 return (SO_TC_BK_SYS);
316670eb 1329 case MBUF_SC_BK:
39037602 1330 return (SO_TC_BK);
316670eb 1331 case MBUF_SC_BE:
39037602 1332 return (SO_TC_BE);
316670eb 1333 case MBUF_SC_RD:
39037602 1334 return (SO_TC_RD);
316670eb 1335 case MBUF_SC_OAM:
39037602 1336 return (SO_TC_OAM);
316670eb 1337 case MBUF_SC_AV:
39037602 1338 return (SO_TC_AV);
316670eb 1339 case MBUF_SC_RV:
39037602 1340 return (SO_TC_RV);
316670eb 1341 case MBUF_SC_VI:
39037602 1342 return (SO_TC_VI);
316670eb 1343 case MBUF_SC_VO:
39037602 1344 return (SO_TC_VO);
316670eb 1345 case MBUF_SC_CTL:
39037602
A
1346 return (SO_TC_CTL);
1347 case MBUF_SC_UNSPEC:
316670eb 1348 default:
39037602 1349 return (SO_TC_BE);
316670eb
A
1350 }
1351}
1352
1353/*
39236c6e 1354 * LRO is turned on for AV streaming class.
316670eb 1355 */
39236c6e 1356void
316670eb
A
1357so_set_lro(struct socket *so, int optval)
1358{
39236c6e 1359 if (optval == SO_TC_AV) {
316670eb
A
1360 so->so_flags |= SOF_USELRO;
1361 } else {
39236c6e
A
1362 if (so->so_flags & SOF_USELRO) {
1363 /* transition to non LRO class */
1364 so->so_flags &= ~SOF_USELRO;
1365 struct inpcb *inp = sotoinpcb(so);
1366 struct tcpcb *tp = NULL;
1367 if (inp) {
1368 tp = intotcpcb(inp);
1369 if (tp && (tp->t_flagsext & TF_LRO_OFFLOADED)) {
1370 tcp_lro_remove_state(inp->inp_laddr,
1371 inp->inp_faddr,
39037602 1372 inp->inp_lport,
39236c6e 1373 inp->inp_fport);
39037602 1374 tp->t_flagsext &= ~TF_LRO_OFFLOADED;
39236c6e
A
1375 }
1376 }
1377 }
316670eb
A
1378 }
1379}
6d2010ae 1380
39037602
A
1381static size_t
1382sotc_index(int sotc)
1383{
1384 switch (sotc) {
1385 case SO_TC_BK_SYS:
1386 return (SOTCIX_BK_SYS);
1387 case _SO_TC_BK:
1388 case SO_TC_BK:
1389 return (SOTCIX_BK);
1390
1391 case SO_TC_BE:
1392 return (SOTCIX_BE);
1393 case SO_TC_RD:
1394 return (SOTCIX_RD);
1395 case SO_TC_OAM:
1396 return (SOTCIX_OAM);
1397
1398 case SO_TC_AV:
1399 return (SOTCIX_AV);
1400 case SO_TC_RV:
1401 return (SOTCIX_RV);
1402 case _SO_TC_VI:
1403 case SO_TC_VI:
1404 return (SOTCIX_VI);
1405
1406 case _SO_TC_VO:
1407 case SO_TC_VO:
1408 return (SOTCIX_VO);
1409 case SO_TC_CTL:
1410 return (SOTCIX_CTL);
1411
1412 default:
1413 break;
1414 }
1415 /*
1416 * Unknown traffic class value
1417 */
1418 return (SIZE_T_MAX);
1419}
1420
1421/*
1422 * Pass NULL ifp for default map
1423 */
1424static errno_t
1425set_netsvctype_dscp_map(size_t in_count,
1426 const struct netsvctype_dscp_map *netsvctype_dscp_map)
1427{
1428 size_t i;
1429 struct net_qos_dscp_map *net_qos_dscp_map = NULL;
1430 int netsvctype;
1431
1432 /*
1433 * Do not accept more that max number of distinct DSCPs
1434 */
1435 if (in_count > _MAX_DSCP || netsvctype_dscp_map == NULL)
1436 return (EINVAL);
1437
1438 /*
1439 * Validate input parameters
1440 */
1441 for (i = 0; i < in_count; i++) {
1442 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map[i].netsvctype))
1443 return (EINVAL);
1444 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP)
1445 return (EINVAL);
1446 }
1447
1448 net_qos_dscp_map = &default_net_qos_dscp_map;
1449
1450 for (i = 0; i < in_count; i++) {
1451 netsvctype = netsvctype_dscp_map[i].netsvctype;
1452
1453 net_qos_dscp_map->netsvctype_to_dscp[netsvctype] =
1454 netsvctype_dscp_map[i].dscp;
1455 }
1456 for (netsvctype = 0; netsvctype < _NET_SERVICE_TYPE_COUNT; netsvctype++) {
1457 switch (netsvctype) {
1458 case NET_SERVICE_TYPE_BE:
1459 case NET_SERVICE_TYPE_BK:
1460 case NET_SERVICE_TYPE_VI:
1461 case NET_SERVICE_TYPE_VO:
1462 case NET_SERVICE_TYPE_RV:
1463 case NET_SERVICE_TYPE_AV:
1464 case NET_SERVICE_TYPE_OAM:
1465 case NET_SERVICE_TYPE_RD: {
1466 int sotcix;
1467
1468 sotcix = sotc_index(sotc_by_netservicetype[netsvctype]);
1469 net_qos_dscp_map->sotc_to_dscp[sotcix] =
1470 netsvctype_dscp_map[netsvctype].dscp;
1471 break;
1472 }
1473 case NET_SERVICE_TYPE_SIG:
1474 /* Signaling does not have its own traffic class */
1475 break;
1476 default:
1477 /* We should not be here */
1478 ASSERT(0);
1479 }
1480 }
1481 /* Network control socket traffic class is always best effort */
1482 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_DF;
1483
1484 /* Backround socket traffic class DSCP same as backround system */
1485 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK] =
1486 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK_SYS];
1487
1488 return (0);
1489}
1490
1491/*
1492 * out_count is an input/ouput parameter
1493 */
1494static errno_t
1495get_netsvctype_dscp_map(size_t *out_count,
1496 struct netsvctype_dscp_map *netsvctype_dscp_map)
1497{
1498 size_t i;
1499 struct net_qos_dscp_map *net_qos_dscp_map = NULL;
1500
1501 /*
1502 * Do not accept more that max number of distinct DSCPs
1503 */
1504 if (out_count == NULL || netsvctype_dscp_map == NULL)
1505 return (EINVAL);
1506 if (*out_count > _MAX_DSCP)
1507 return (EINVAL);
1508
1509 net_qos_dscp_map = &default_net_qos_dscp_map;
1510
1511 for (i = 0; i < MIN(_NET_SERVICE_TYPE_COUNT, *out_count); i++) {
1512 netsvctype_dscp_map[i].netsvctype = i;
1513 netsvctype_dscp_map[i].dscp = net_qos_dscp_map->netsvctype_to_dscp[i];
1514
1515 }
1516 *out_count = i;
1517
1518 return (0);
1519}
1520
1521void
1522net_qos_map_init()
1523{
1524 errno_t error;
1525
1526 /*
1527 * By default use the Fastlane DSCP mappngs
1528 */
1529 error = set_netsvctype_dscp_map(_NET_SERVICE_TYPE_COUNT,
1530 fastlane_netsvctype_dscp_map);
1531 ASSERT(error == 0);
1532
1533 /*
1534 * No DSCP mapping for network control
1535 */
1536 default_net_qos_dscp_map.sotc_to_dscp[SOTCIX_CTL] = _DSCP_DF;
1537
1538 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
1539}
1540
1541int
1542sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
1543{
1544#pragma unused(oidp, arg1, arg2)
1545 int error = 0;
1546 const size_t max_netsvctype_to_dscp_map_len =
1547 _NET_SERVICE_TYPE_COUNT * sizeof(struct netsvctype_dscp_map);
1548 size_t len;
1549 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT];
1550 size_t count;
1551
1552 if (req->oldptr == USER_ADDR_NULL) {
1553 req->oldidx =
1554 _NET_SERVICE_TYPE_COUNT * sizeof(struct netsvctype_dscp_map);
1555 } else if (req->oldlen > 0) {
1556 count = _NET_SERVICE_TYPE_COUNT;
1557 error = get_netsvctype_dscp_map(&count, netsvctype_dscp_map);
1558 if (error != 0)
1559 goto done;
1560 len = count * sizeof(struct netsvctype_dscp_map);
1561 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1562 MIN(len, req->oldlen));
1563 if (error != 0)
1564 goto done;
1565 }
1566
1567 if (req->newptr == USER_ADDR_NULL)
1568 goto done;
1569
1570 error = proc_suser(current_proc());
1571 if (error != 0)
1572 goto done;
1573
1574 /*
1575 * Check input length
1576 */
1577 if (req->newlen > max_netsvctype_to_dscp_map_len) {
1578 error = EINVAL;
1579 goto done;
1580 }
1581 /*
1582 * Cap the number of entries to copy from input buffer
1583 */
1584 error = SYSCTL_IN(req, netsvctype_dscp_map, req->newlen);
1585 if (error != 0)
1586 goto done;
1587
1588 count = req->newlen / sizeof(struct netsvctype_dscp_map);
1589 error = set_netsvctype_dscp_map(count, netsvctype_dscp_map);
1590done:
1591 return (error);
1592}
1593
1594__private_extern__ errno_t
1595set_packet_qos(struct mbuf *m, struct ifnet *ifp, boolean_t qos_allowed,
1596 int sotc, int netsvctype, u_int8_t *dscp_inout)
1597{
1598 if (ifp == NULL || dscp_inout == NULL)
1599 return (EINVAL);
1600
1601 if ((ifp->if_eflags &
1602 (IFEF_QOSMARKING_ENABLED | IFEF_QOSMARKING_CAPABLE)) ==
1603 (IFEF_QOSMARKING_ENABLED | IFEF_QOSMARKING_CAPABLE)) {
1604 u_int8_t dscp;
1605
1606 /*
1607 * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops
1608 */
1609 dscp = _DSCP_DF;
1610
1611 /*
1612 * For DSCP use the network service type is specified, otherwise
1613 * use the socket traffic class
1614 *
1615 * When not whitelisted by the policy, set DSCP only for best
1616 * effort and background, and set the mbuf service class to
1617 * best effort as well so the packet will be queued and
1618 * scheduled at a lower priority.
1619 * We still want to prioritize control traffic on the interface
1620 * so we do not change the mbuf service class for SO_TC_CTL
1621 */
1622 if (netsvctype != _NET_SERVICE_TYPE_UNSPEC &&
1623 netsvctype != NET_SERVICE_TYPE_BE) {
1624 dscp = default_net_qos_dscp_map.netsvctype_to_dscp[netsvctype];
1625
1626 if (qos_allowed == FALSE &&
1627 netsvctype != NET_SERVICE_TYPE_BE &&
1628 netsvctype != NET_SERVICE_TYPE_BK) {
1629 dscp = _DSCP_DF;
1630 if (sotc != SO_TC_CTL)
1631 m_set_service_class(m, MBUF_SC_BE);
1632 }
1633 } else {
1634 size_t sotcix = sotc_index(sotc);
1635
1636 dscp = default_net_qos_dscp_map.sotc_to_dscp[sotcix];
1637
1638 if (qos_allowed == FALSE && sotc != SO_TC_BE &&
1639 sotc != SO_TC_BK && sotc != SO_TC_BK_SYS &&
1640 sotc != SO_TC_CTL) {
1641 dscp = _DSCP_DF;
1642 if (sotc != SO_TC_CTL)
1643 m_set_service_class(m, MBUF_SC_BE);
1644 }
1645 }
1646 if (net_qos_verbose != 0)
1647 printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n",
1648 __func__, qos_allowed, sotc, netsvctype, dscp);
1649
1650 if (*dscp_inout != dscp) {
1651 *dscp_inout = dscp;
1652 }
1653 } else if (*dscp_inout != _DSCP_DF && IFNET_IS_WIFI_INFRA(ifp)) {
1654 mbuf_svc_class_t msc = m_get_service_class(m);
1655
1656 /*
1657 * For WiFi infra, when the mbuf service class is best effort
1658 * and the DSCP is not default, set the service class based
1659 * on DSCP
1660 */
1661 if (msc == MBUF_SC_BE) {
1662 msc = wifi_dscp_to_msc_array[*dscp_inout];
1663
1664 if (msc != MBUF_SC_BE) {
1665 m_set_service_class(m, msc);
1666
1667 if (net_qos_verbose != 0)
1668 printf("%s set msc %u for dscp %u\n",
1669 __func__, msc, *dscp_inout);
1670 }
1671 }
1672 }
1673
1674 return (0);
1675}
1676
1677static void
1678set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *map, int clear)
1679{
1680 int i;
1681
1682 if (clear)
1683 bzero(wifi_dscp_to_msc_array, sizeof(wifi_dscp_to_msc_array));
1684
1685 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1686 const struct dcsp_msc_map *elem = map + i;
1687
1688 if (elem->dscp > _MAX_DSCP || elem->msc == MBUF_SC_UNSPEC)
1689 break;
1690 switch (elem->msc) {
1691 case MBUF_SC_BK_SYS:
1692 case MBUF_SC_BK:
1693 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BK;
1694 break;
1695 default:
1696 case MBUF_SC_BE:
1697 case MBUF_SC_RD:
1698 case MBUF_SC_OAM:
1699 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BE;
1700 break;
1701 case MBUF_SC_AV:
1702 case MBUF_SC_RV:
1703 case MBUF_SC_VI:
1704 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VI;
1705 break;
1706 case MBUF_SC_VO:
1707 case MBUF_SC_CTL:
1708 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VO;
1709 break;
1710 }
1711 }
1712}
1713
1714static errno_t
1715dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map,
1716 size_t count, struct dcsp_msc_map *dcsp_msc_map)
1717{
1718 errno_t error = 0;
1719 u_int32_t i;
1720
1721 /*
1722 * Validate input parameters
1723 */
1724 for (i = 0; i < count; i++) {
1725 if (!SO_VALID_TC(netsvctype_dscp_map[i].netsvctype)) {
1726 error = EINVAL;
1727 goto done;
1728 }
1729 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1730 error = EINVAL;
1731 goto done;
1732 }
1733 }
1734
1735 bzero(dcsp_msc_map, DSCP_ARRAY_SIZE * sizeof(struct dcsp_msc_map));
1736
1737 for (i = 0; i < count; i++) {
1738 dcsp_msc_map[i].dscp = netsvctype_dscp_map[i].dscp;
1739 dcsp_msc_map[i].msc = so_tc2msc(netsvctype_dscp_map[i].netsvctype);
1740 }
1741done:
1742 return (error);
1743}
1744
1745int
1746sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1747{
1748#pragma unused(oidp, arg1, arg2)
1749 int error = 0;
1750 size_t len = DSCP_ARRAY_SIZE * sizeof(struct netsvctype_dscp_map);
1751 struct netsvctype_dscp_map netsvctype_dscp_map[DSCP_ARRAY_SIZE];
1752 struct dcsp_msc_map dcsp_msc_map[DSCP_ARRAY_SIZE];
1753 size_t count;
1754 u_int32_t i;
1755
1756 if (req->oldptr == USER_ADDR_NULL) {
1757 req->oldidx = len;
1758 } else if (req->oldlen > 0) {
1759 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1760 netsvctype_dscp_map[i].dscp = i;
1761 netsvctype_dscp_map[i].netsvctype =
1762 so_svc2tc(wifi_dscp_to_msc_array[i]);
1763 }
1764 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1765 MIN(len, req->oldlen));
1766 if (error != 0)
1767 goto done;
1768 }
1769
1770 if (req->newptr == USER_ADDR_NULL)
1771 goto done;
1772
1773 error = proc_suser(current_proc());
1774 if (error != 0)
1775 goto done;
1776
1777 /*
1778 * Check input length
1779 */
1780 if (req->newlen > len) {
1781 error = EINVAL;
1782 goto done;
1783 }
1784 /*
1785 * Cap the number of entries to copy from input buffer
1786 */
1787 if (len > req->newlen)
1788 len = req->newlen;
1789 error = SYSCTL_IN(req, netsvctype_dscp_map, len);
1790 if (error != 0) {
1791 goto done;
1792 }
1793 count = len / sizeof(struct netsvctype_dscp_map);
1794 bzero(dcsp_msc_map, sizeof(dcsp_msc_map));
1795 error = dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map, count,
1796 dcsp_msc_map);
1797 if (error != 0) {
1798 goto done;
1799 }
1800 set_dscp_to_wifi_ac_map(dcsp_msc_map, 0);
1801done:
1802 return (error);
1803}
1804
1805int
1806sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1807{
1808#pragma unused(oidp, arg1, arg2)
1809 int error = 0;
1810 int val = 0;
1811
1812 error = sysctl_handle_int(oidp, &val, 0, req);
1813 if (error || !req->newptr)
1814 return (error);
1815
1816 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
1817
1818 return (0);
1819}
5ba3f43e
A
1820
1821/*
1822 * Returns whether a large upload or download transfer should be marked as
1823 * BK service type for network activity. This is a system level
1824 * hint/suggestion to classify application traffic based on statistics
1825 * collected from the current network attachment
1826 *
1827 * Returns 1 for BK and 0 for default
1828 */
1829
1830int
1831net_qos_guideline(struct proc *p, struct net_qos_guideline_args *arg,
1832 int *retval)
1833{
1834#pragma unused(p)
1835#define RETURN_USE_BK 1
1836#define RETURN_USE_DEFAULT 0
1837 struct net_qos_param qos_arg;
1838 struct ifnet *ipv4_primary, *ipv6_primary;
1839 int err = 0;
1840
1841 if (arg->param == USER_ADDR_NULL || retval == NULL ||
1842 arg->param_len != sizeof (qos_arg)) {
1843 return (EINVAL);
1844 }
1845 err = copyin(arg->param, (caddr_t) &qos_arg, sizeof (qos_arg));
1846 if (err != 0)
1847 return (err);
1848
1849 *retval = RETURN_USE_DEFAULT;
1850 ipv4_primary = ifindex2ifnet[get_primary_ifscope(AF_INET)];
1851 ipv6_primary = ifindex2ifnet[get_primary_ifscope(AF_INET6)];
1852
1853 /*
1854 * If either of the interfaces is in Low Internet mode, enable
1855 * background delay based algorithms on this transfer
1856 */
1857 if (qos_arg.nq_uplink) {
1858 if ((ipv4_primary != NULL &&
1859 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_UL)) ||
1860 (ipv6_primary != NULL &&
1861 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_UL))) {
1862 *retval = RETURN_USE_BK;
1863 return (0);
1864 }
1865 } else {
1866 if ((ipv4_primary != NULL &&
1867 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_DL)) ||
1868 (ipv6_primary != NULL &&
1869 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_DL))) {
1870 *retval = RETURN_USE_BK;
1871 return (0);
1872 }
1873 }
1874
1875 /*
1876 * Some times IPv4 and IPv6 primary interfaces can be different.
1877 * In this case, if either of them is non-cellular, we should mark
1878 * the transfer as BK as it can potentially get used based on
1879 * the host name resolution
1880 */
1881 if (ipv4_primary != NULL && IFNET_IS_EXPENSIVE(ipv4_primary) &&
1882 ipv6_primary != NULL && IFNET_IS_EXPENSIVE(ipv6_primary)) {
1883 if (qos_arg.nq_use_expensive) {
1884 return (0);
1885 } else {
1886 *retval = RETURN_USE_BK;
1887 return (0);
1888 }
1889 }
1890 if (qos_arg.nq_transfer_size >= 5 * 1024 * 1024) {
1891 *retval = RETURN_USE_BK;
1892 return (0);
1893 }
1894
1895
1896#undef RETURN_USE_BK
1897#undef RETURN_USE_DEFAULT
1898 return (0);
1899}