]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/in_tclass.c
xnu-6153.41.3.tar.gz
[apple/xnu.git] / bsd / netinet / in_tclass.c
CommitLineData
6d2010ae 1/*
0a7de745 2 * Copyright (c) 2009-2019 Apple Inc. All rights reserved.
6d2010ae
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/systm.h>
30#include <sys/kernel.h>
31#include <sys/types.h>
32#include <sys/filedesc.h>
33#include <sys/file_internal.h>
34#include <sys/proc.h>
35#include <sys/socket.h>
36#include <sys/socketvar.h>
37#include <sys/errno.h>
38#include <sys/protosw.h>
39#include <sys/domain.h>
40#include <sys/mbuf.h>
41#include <sys/queue.h>
39037602 42#include <sys/sysctl.h>
5ba3f43e 43#include <sys/sysproto.h>
6d2010ae
A
44
45#include <net/if.h>
39037602 46#include <net/if_var.h>
6d2010ae
A
47#include <net/route.h>
48
49#include <netinet/in.h>
50#include <netinet/in_var.h>
51#include <netinet/in_pcb.h>
52#include <netinet/ip.h>
53#include <netinet/ip_var.h>
54#include <netinet/ip6.h>
55#include <netinet6/ip6_var.h>
56#include <netinet/udp.h>
57#include <netinet/udp_var.h>
58#include <netinet/tcp.h>
59#include <netinet/tcp_var.h>
60#include <netinet/tcp_cc.h>
39236c6e 61#include <netinet/lro_ext.h>
39037602 62#include <netinet/in_tclass.h>
6d2010ae 63
cb323159
A
64struct net_qos_dscp_map {
65 uint8_t sotc_to_dscp[SO_TC_MAX];
66 uint8_t netsvctype_to_dscp[_NET_SERVICE_TYPE_COUNT];
67};
68
39037602 69struct dcsp_msc_map {
cb323159 70 uint8_t dscp;
0a7de745 71 mbuf_svc_class_t msc;
39037602
A
72};
73static inline int so_throttle_best_effort(struct socket *, struct ifnet *);
74static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *, int);
75static errno_t dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *, size_t,
76 struct dcsp_msc_map *);
77
78static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */
0a7de745
A
79static lck_grp_t *tclass_lck_grp = NULL; /* mutex group definition */
80static lck_attr_t *tclass_lck_attr = NULL; /* mutex attributes */
39037602
A
81decl_lck_mtx_data(static, tclass_lock_data);
82static lck_mtx_t *tclass_lock = &tclass_lock_data;
83
84SYSCTL_NODE(_net, OID_AUTO, qos,
0a7de745 85 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "QoS");
39037602
A
86
87static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS;
88SYSCTL_PROC(_net_qos, OID_AUTO, default_netsvctype_to_dscp_map,
89 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
90 0, 0, sysctl_default_netsvctype_to_dscp_map, "S", "");
91
92static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
93SYSCTL_PROC(_net_qos, OID_AUTO, dscp_to_wifi_ac_map,
94 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
95 0, 0, sysctl_dscp_to_wifi_ac_map, "S", "");
96
97static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
98SYSCTL_PROC(_net_qos, OID_AUTO, reset_dscp_to_wifi_ac_map,
99 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
100 0, 0, sysctl_reset_dscp_to_wifi_ac_map, "I", "");
101
102int net_qos_verbose = 0;
103SYSCTL_INT(_net_qos, OID_AUTO, verbose,
104 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_verbose, 0, "");
105
106/*
107 * Fastlane QoS policy:
108 * By Default allow all apps to get traffic class to DSCP mapping
109 */
110SYSCTL_NODE(_net_qos, OID_AUTO, policy,
0a7de745 111 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
39037602
A
112
113int net_qos_policy_restricted = 0;
114SYSCTL_INT(_net_qos_policy, OID_AUTO, restricted,
115 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restricted, 0, "");
116
117int net_qos_policy_restrict_avapps = 0;
118SYSCTL_INT(_net_qos_policy, OID_AUTO, restrict_avapps,
119 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restrict_avapps, 0, "");
120
121int net_qos_policy_wifi_enabled = 0;
122SYSCTL_INT(_net_qos_policy, OID_AUTO, wifi_enabled,
123 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_wifi_enabled, 0, "");
124
125int net_qos_policy_capable_enabled = 0;
126SYSCTL_INT(_net_qos_policy, OID_AUTO, capable_enabled,
127 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_capable_enabled, 0, "");
128
129/*
130 * Socket traffic class from network service type
131 */
132const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT] = {
0a7de745
A
133 SO_TC_BE, /* NET_SERVICE_TYPE_BE */
134 SO_TC_BK_SYS, /* NET_SERVICE_TYPE_BK */
135 SO_TC_VI, /* NET_SERVICE_TYPE_SIG */
136 SO_TC_VI, /* NET_SERVICE_TYPE_VI */
137 SO_TC_VO, /* NET_SERVICE_TYPE_VO */
138 SO_TC_RV, /* NET_SERVICE_TYPE_RV */
139 SO_TC_AV, /* NET_SERVICE_TYPE_AV */
140 SO_TC_OAM, /* NET_SERVICE_TYPE_OAM */
141 SO_TC_RD /* NET_SERVICE_TYPE_RD */
39037602
A
142};
143
144/*
145 * DSCP mappings for QoS Fastlane as based on network service types
146 */
147static const
148struct netsvctype_dscp_map fastlane_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
cb323159
A
149 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
150 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_AF11 },
151 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS3 },
152 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
153 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
154 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
155 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
156 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
157 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
158};
159
160
161/*
162 * DSCP mappings for QoS RFC4594 as based on network service types
163 */
164static const
165struct netsvctype_dscp_map rfc4594_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
166 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
167 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_CS1 },
168 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS5 },
169 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
170 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
171 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
172 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
173 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
174 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
39037602
A
175};
176
cb323159
A
177static struct net_qos_dscp_map fastlane_net_qos_dscp_map;
178static struct net_qos_dscp_map rfc4594_net_qos_dscp_map;
39037602
A
179
180/*
181 * The size is one more than the max because DSCP start at zero
182 */
0a7de745 183#define DSCP_ARRAY_SIZE (_MAX_DSCP + 1)
39037602
A
184
185/*
186 * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping
187 * that implemented at the 802.11 driver level when the mbuf service class is
188 * MBUF_SC_BE.
189 *
190 * This clashes with the recommended mapping documented by the IETF document
191 * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain
192 * binary compatibility. Applications should use the network service type socket
193 * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS.
194 */
195static const struct dcsp_msc_map default_dscp_to_wifi_ac_map[] = {
cb323159
A
196 { .dscp = _DSCP_DF, .msc = MBUF_SC_BE }, /* RFC 2474 Standard */
197 { .dscp = 1, .msc = MBUF_SC_BE }, /* */
198 { .dscp = 2, .msc = MBUF_SC_BE }, /* */
199 { .dscp = 3, .msc = MBUF_SC_BE }, /* */
200 { .dscp = 4, .msc = MBUF_SC_BE }, /* */
201 { .dscp = 5, .msc = MBUF_SC_BE }, /* */
202 { .dscp = 6, .msc = MBUF_SC_BE }, /* */
203 { .dscp = 7, .msc = MBUF_SC_BE }, /* */
204
205 { .dscp = _DSCP_CS1, .msc = MBUF_SC_BK }, /* RFC 3662 Low-Priority Data */
206 { .dscp = 9, .msc = MBUF_SC_BK }, /* */
207 { .dscp = _DSCP_AF11, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
208 { .dscp = 11, .msc = MBUF_SC_BK }, /* */
209 { .dscp = _DSCP_AF12, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
210 { .dscp = 13, .msc = MBUF_SC_BK }, /* */
211 { .dscp = _DSCP_AF13, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
212 { .dscp = 15, .msc = MBUF_SC_BK }, /* */
213
214 { .dscp = _DSCP_CS2, .msc = MBUF_SC_BK }, /* RFC 4594 OAM */
215 { .dscp = 17, .msc = MBUF_SC_BK }, /* */
216 { .dscp = _DSCP_AF21, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
217 { .dscp = 19, .msc = MBUF_SC_BK }, /* */
218 { .dscp = _DSCP_AF22, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
219 { .dscp = 21, .msc = MBUF_SC_BK }, /* */
220 { .dscp = _DSCP_AF23, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
221 { .dscp = 23, .msc = MBUF_SC_BK }, /* */
222
223 { .dscp = _DSCP_CS3, .msc = MBUF_SC_BE }, /* RFC 2474 Broadcast Video */
224 { .dscp = 25, .msc = MBUF_SC_BE }, /* */
225 { .dscp = _DSCP_AF31, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
226 { .dscp = 27, .msc = MBUF_SC_BE }, /* */
227 { .dscp = _DSCP_AF32, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
228 { .dscp = 29, .msc = MBUF_SC_BE }, /* */
229 { .dscp = _DSCP_AF33, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
230 { .dscp = 31, .msc = MBUF_SC_BE }, /* */
231
232 { .dscp = _DSCP_CS4, .msc = MBUF_SC_VI }, /* RFC 2474 Real-Time Interactive */
233 { .dscp = 33, .msc = MBUF_SC_VI }, /* */
234 { .dscp = _DSCP_AF41, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
235 { .dscp = 35, .msc = MBUF_SC_VI }, /* */
236 { .dscp = _DSCP_AF42, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
237 { .dscp = 37, .msc = MBUF_SC_VI }, /* */
238 { .dscp = _DSCP_AF43, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
239 { .dscp = 39, .msc = MBUF_SC_VI }, /* */
240
241 { .dscp = _DSCP_CS5, .msc = MBUF_SC_VI }, /* RFC 2474 Signaling */
242 { .dscp = 41, .msc = MBUF_SC_VI }, /* */
243 { .dscp = 42, .msc = MBUF_SC_VI }, /* */
244 { .dscp = 43, .msc = MBUF_SC_VI }, /* */
245 { .dscp = _DSCP_VA, .msc = MBUF_SC_VI }, /* RFC 5865 VOICE-ADMIT */
246 { .dscp = 45, .msc = MBUF_SC_VI }, /* */
247 { .dscp = _DSCP_EF, .msc = MBUF_SC_VI }, /* RFC 3246 Telephony */
248 { .dscp = 47, .msc = MBUF_SC_VI }, /* */
249
250 { .dscp = _DSCP_CS6, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
251 { .dscp = 49, .msc = MBUF_SC_VO }, /* */
252 { .dscp = 50, .msc = MBUF_SC_VO }, /* */
253 { .dscp = 51, .msc = MBUF_SC_VO }, /* */
254 { .dscp = 52, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Sigma */
255 { .dscp = 53, .msc = MBUF_SC_VO }, /* */
256 { .dscp = 54, .msc = MBUF_SC_VO }, /* */
257 { .dscp = 55, .msc = MBUF_SC_VO }, /* */
258
259 { .dscp = _DSCP_CS7, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
260 { .dscp = 57, .msc = MBUF_SC_VO }, /* */
261 { .dscp = 58, .msc = MBUF_SC_VO }, /* */
262 { .dscp = 59, .msc = MBUF_SC_VO }, /* */
263 { .dscp = 60, .msc = MBUF_SC_VO }, /* */
264 { .dscp = 61, .msc = MBUF_SC_VO }, /* */
265 { .dscp = 62, .msc = MBUF_SC_VO }, /* */
266 { .dscp = 63, .msc = MBUF_SC_VO }, /* */
267
268 { .dscp = 255, .msc = MBUF_SC_UNSPEC } /* invalid DSCP to mark last entry */
39037602
A
269};
270
271mbuf_svc_class_t wifi_dscp_to_msc_array[DSCP_ARRAY_SIZE];
272
273/*
274 * If there is no foreground activity on the interface for bg_switch_time
275 * seconds, the background connections can switch to foreground TCP
276 * congestion control.
277 */
0a7de745 278#define TCP_BG_SWITCH_TIME 2 /* seconds */
39037602
A
279
280#if (DEVELOPMENT || DEBUG)
281
6d2010ae
A
282static int tfp_count = 0;
283
316670eb
A
284static TAILQ_HEAD(, tclass_for_proc) tfp_head =
285 TAILQ_HEAD_INITIALIZER(tfp_head);
6d2010ae
A
286
287struct tclass_for_proc {
0a7de745
A
288 TAILQ_ENTRY(tclass_for_proc) tfp_link;
289 int tfp_class;
290 pid_t tfp_pid;
291 char tfp_pname[(2 * MAXCOMLEN) + 1];
cb323159 292 uint32_t tfp_qos_mode;
6d2010ae
A
293};
294
316670eb
A
295static int get_pid_tclass(struct so_tcdbg *);
296static int get_pname_tclass(struct so_tcdbg *);
297static int set_pid_tclass(struct so_tcdbg *);
298static int set_pname_tclass(struct so_tcdbg *);
299static int flush_pid_tclass(struct so_tcdbg *);
6d2010ae
A
300static int purge_tclass_for_proc(void);
301static int flush_tclass_for_proc(void);
39037602 302static void set_tclass_for_curr_proc(struct socket *);
39236c6e 303
6d2010ae
A
304/*
305 * Must be called with tclass_lock held
306 */
307static struct tclass_for_proc *
308find_tfp_by_pid(pid_t pid)
309{
310 struct tclass_for_proc *tfp;
316670eb 311
6d2010ae 312 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
0a7de745 313 if (tfp->tfp_pid == pid) {
6d2010ae 314 break;
0a7de745 315 }
6d2010ae 316 }
0a7de745 317 return tfp;
6d2010ae
A
318}
319
320/*
321 * Must be called with tclass_lock held
322 */
323static struct tclass_for_proc *
324find_tfp_by_pname(const char *pname)
325{
326 struct tclass_for_proc *tfp;
316670eb 327
6d2010ae 328 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
316670eb 329 if (strncmp(pname, tfp->tfp_pname,
0a7de745 330 sizeof(tfp->tfp_pname)) == 0) {
6d2010ae 331 break;
0a7de745 332 }
6d2010ae 333 }
0a7de745 334 return tfp;
6d2010ae
A
335}
336
39037602
A
337__private_extern__ void
338set_tclass_for_curr_proc(struct socket *so)
6d2010ae 339{
316670eb 340 struct tclass_for_proc *tfp = NULL;
0a7de745 341 proc_t p = current_proc(); /* Not ref counted */
6d2010ae 342 pid_t pid = proc_pid(p);
39037602 343 char *pname = proc_best_name(p);
316670eb 344
6d2010ae 345 lck_mtx_lock(tclass_lock);
316670eb 346
6d2010ae 347 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
316670eb
A
348 if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
349 strncmp(pname, tfp->tfp_pname,
0a7de745
A
350 sizeof(tfp->tfp_pname)) == 0)) {
351 if (tfp->tfp_class != SO_TC_UNSPEC) {
39037602 352 so->so_traffic_class = tfp->tfp_class;
0a7de745 353 }
39037602 354
0a7de745 355 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
39037602 356 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
0a7de745 357 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
39037602 358 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
0a7de745 359 }
6d2010ae 360 break;
316670eb 361 }
6d2010ae
A
362 }
363
364 lck_mtx_unlock(tclass_lock);
6d2010ae
A
365}
366
367/*
368 * Purge entries with PIDs of exited processes
369 */
370int
371purge_tclass_for_proc(void)
372{
373 int error = 0;
374 struct tclass_for_proc *tfp, *tvar;
375
376 lck_mtx_lock(tclass_lock);
377
378 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
379 proc_t p;
316670eb 380
0a7de745 381 if (tfp->tfp_pid == -1) {
6d2010ae 382 continue;
0a7de745 383 }
6d2010ae
A
384 if ((p = proc_find(tfp->tfp_pid)) == NULL) {
385 tfp_count--;
386 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
316670eb 387
6d2010ae
A
388 _FREE(tfp, M_TEMP);
389 } else {
390 proc_rele(p);
391 }
392 }
393
394 lck_mtx_unlock(tclass_lock);
316670eb 395
0a7de745 396 return error;
6d2010ae
A
397}
398
399/*
400 * Remove one entry
401 * Must be called with tclass_lock held
402 */
403static void
404free_tclass_for_proc(struct tclass_for_proc *tfp)
405{
0a7de745 406 if (tfp == NULL) {
6d2010ae 407 return;
0a7de745 408 }
6d2010ae
A
409 tfp_count--;
410 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
411 _FREE(tfp, M_TEMP);
412}
413
414/*
415 * Remove all entries
416 */
417int
418flush_tclass_for_proc(void)
419{
420 int error = 0;
421 struct tclass_for_proc *tfp, *tvar;
422
423 lck_mtx_lock(tclass_lock);
424
425 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
426 free_tclass_for_proc(tfp);
427 }
316670eb 428
6d2010ae 429 lck_mtx_unlock(tclass_lock);
316670eb 430
0a7de745 431 return error;
6d2010ae
A
432}
433
434/*
435 * Must be called with tclass_lock held
436 */
437static struct tclass_for_proc *
316670eb 438alloc_tclass_for_proc(pid_t pid, const char *pname)
6d2010ae
A
439{
440 struct tclass_for_proc *tfp;
316670eb 441
0a7de745
A
442 if (pid == -1 && pname == NULL) {
443 return NULL;
444 }
6d2010ae 445
0a7de745
A
446 tfp = _MALLOC(sizeof(struct tclass_for_proc), M_TEMP, M_NOWAIT | M_ZERO);
447 if (tfp == NULL) {
448 return NULL;
449 }
316670eb 450
6d2010ae 451 tfp->tfp_pid = pid;
6d2010ae 452 /*
316670eb 453 * Add per pid entries before per proc name so we can find
6d2010ae
A
454 * a specific instance of a process before the general name base entry.
455 */
456 if (pid != -1) {
457 TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
458 } else {
0a7de745 459 strlcpy(tfp->tfp_pname, pname, sizeof(tfp->tfp_pname));
6d2010ae
A
460 TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
461 }
316670eb 462
6d2010ae
A
463 tfp_count++;
464
0a7de745 465 return tfp;
6d2010ae
A
466}
467
468/*
39037602 469 * SO_TC_UNSPEC for tclass means to remove the entry
6d2010ae 470 */
316670eb
A
471int
472set_pid_tclass(struct so_tcdbg *so_tcdbg)
6d2010ae
A
473{
474 int error = EINVAL;
475 proc_t p = NULL;
476 struct filedesc *fdp;
477 struct fileproc *fp;
478 struct tclass_for_proc *tfp;
479 int i;
316670eb
A
480 pid_t pid = so_tcdbg->so_tcdbg_pid;
481 int tclass = so_tcdbg->so_tcdbg_tclass;
39037602 482 int netsvctype = so_tcdbg->so_tcdbg_netsvctype;
6d2010ae
A
483
484 p = proc_find(pid);
485 if (p == NULL) {
316670eb 486 printf("%s proc_find(%d) failed\n", __func__, pid);
6d2010ae
A
487 goto done;
488 }
316670eb 489
6d2010ae
A
490 /* Need a tfp */
491 lck_mtx_lock(tclass_lock);
316670eb 492
6d2010ae 493 tfp = find_tfp_by_pid(pid);
316670eb
A
494 if (tfp == NULL) {
495 tfp = alloc_tclass_for_proc(pid, NULL);
6d2010ae 496 if (tfp == NULL) {
316670eb
A
497 lck_mtx_unlock(tclass_lock);
498 error = ENOBUFS;
499 goto done;
6d2010ae
A
500 }
501 }
316670eb 502 tfp->tfp_class = tclass;
39037602 503 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
316670eb 504
6d2010ae
A
505 lck_mtx_unlock(tclass_lock);
506
507 if (tfp != NULL) {
508 proc_fdlock(p);
316670eb 509
6d2010ae
A
510 fdp = p->p_fd;
511 for (i = 0; i < fdp->fd_nfiles; i++) {
512 struct socket *so;
316670eb 513
6d2010ae 514 fp = fdp->fd_ofiles[i];
316670eb
A
515 if (fp == NULL ||
516 (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
0a7de745 517 FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET) {
6d2010ae 518 continue;
0a7de745 519 }
316670eb 520
6d2010ae 521 so = (struct socket *)fp->f_fglob->fg_data;
0a7de745 522 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
6d2010ae 523 continue;
0a7de745 524 }
39037602 525
6d2010ae 526 socket_lock(so, 1);
0a7de745 527 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
39037602 528 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
0a7de745 529 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
39037602 530 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
0a7de745 531 }
316670eb 532 socket_unlock(so, 1);
39037602 533
0a7de745 534 if (netsvctype != _NET_SERVICE_TYPE_UNSPEC) {
39037602
A
535 error = sock_setsockopt(so, SOL_SOCKET,
536 SO_NET_SERVICE_TYPE, &netsvctype, sizeof(int));
0a7de745
A
537 }
538 if (tclass != SO_TC_UNSPEC) {
39037602
A
539 error = sock_setsockopt(so, SOL_SOCKET,
540 SO_TRAFFIC_CLASS, &tclass, sizeof(int));
0a7de745 541 }
6d2010ae 542 }
316670eb 543
6d2010ae
A
544 proc_fdunlock(p);
545 }
316670eb
A
546
547 error = 0;
6d2010ae 548done:
0a7de745 549 if (p != NULL) {
6d2010ae 550 proc_rele(p);
0a7de745 551 }
316670eb 552
0a7de745 553 return error;
6d2010ae
A
554}
555
316670eb
A
556int
557set_pname_tclass(struct so_tcdbg *so_tcdbg)
6d2010ae
A
558{
559 int error = EINVAL;
560 struct tclass_for_proc *tfp;
561
562 lck_mtx_lock(tclass_lock);
316670eb
A
563
564 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
565 if (tfp == NULL) {
566 tfp = alloc_tclass_for_proc(-1, so_tcdbg->so_tcdbg_pname);
6d2010ae 567 if (tfp == NULL) {
316670eb
A
568 lck_mtx_unlock(tclass_lock);
569 error = ENOBUFS;
570 goto done;
6d2010ae
A
571 }
572 }
316670eb 573 tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
39037602 574 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
316670eb 575
6d2010ae 576 lck_mtx_unlock(tclass_lock);
316670eb
A
577
578 error = 0;
6d2010ae 579done:
316670eb 580
0a7de745 581 return error;
6d2010ae
A
582}
583
316670eb
A
584static int
585flush_pid_tclass(struct so_tcdbg *so_tcdbg)
586{
587 pid_t pid = so_tcdbg->so_tcdbg_pid;
588 int tclass = so_tcdbg->so_tcdbg_tclass;
589 struct filedesc *fdp;
590 int error = EINVAL;
591 proc_t p;
592 int i;
593
594 p = proc_find(pid);
595 if (p == PROC_NULL) {
596 printf("%s proc_find(%d) failed\n", __func__, pid);
597 goto done;
598 }
599
600 proc_fdlock(p);
601 fdp = p->p_fd;
602 for (i = 0; i < fdp->fd_nfiles; i++) {
603 struct socket *so;
604 struct fileproc *fp;
605
606 fp = fdp->fd_ofiles[i];
607 if (fp == NULL ||
608 (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
0a7de745 609 FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET) {
316670eb 610 continue;
0a7de745 611 }
316670eb
A
612
613 so = (struct socket *)fp->f_fglob->fg_data;
614 error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
0a7de745 615 sizeof(tclass));
316670eb 616 if (error != 0) {
39236c6e
A
617 printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
618 "tclass=%d) failed %d\n", __func__,
619 (uint64_t)VM_KERNEL_ADDRPERM(so), i, tclass,
316670eb
A
620 error);
621 error = 0;
622 }
623 }
624 proc_fdunlock(p);
625
626 error = 0;
627done:
0a7de745 628 if (p != PROC_NULL) {
316670eb 629 proc_rele(p);
0a7de745 630 }
316670eb 631
0a7de745 632 return error;
316670eb
A
633}
634
635int
636get_pid_tclass(struct so_tcdbg *so_tcdbg)
6d2010ae
A
637{
638 int error = EINVAL;
639 proc_t p = NULL;
640 struct tclass_for_proc *tfp;
316670eb
A
641 pid_t pid = so_tcdbg->so_tcdbg_pid;
642
39037602 643 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
6d2010ae
A
644
645 p = proc_find(pid);
646 if (p == NULL) {
316670eb 647 printf("%s proc_find(%d) failed\n", __func__, pid);
6d2010ae
A
648 goto done;
649 }
316670eb 650
6d2010ae
A
651 /* Need a tfp */
652 lck_mtx_lock(tclass_lock);
316670eb 653
6d2010ae
A
654 tfp = find_tfp_by_pid(pid);
655 if (tfp != NULL) {
316670eb 656 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
39037602 657 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
6d2010ae
A
658 error = 0;
659 }
660 lck_mtx_unlock(tclass_lock);
661done:
0a7de745 662 if (p != NULL) {
6d2010ae 663 proc_rele(p);
0a7de745 664 }
316670eb 665
0a7de745 666 return error;
6d2010ae
A
667}
668
316670eb
A
669int
670get_pname_tclass(struct so_tcdbg *so_tcdbg)
6d2010ae
A
671{
672 int error = EINVAL;
673 struct tclass_for_proc *tfp;
316670eb 674
39037602 675 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
6d2010ae
A
676
677 /* Need a tfp */
678 lck_mtx_lock(tclass_lock);
316670eb
A
679
680 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
6d2010ae 681 if (tfp != NULL) {
316670eb 682 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
39037602 683 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
6d2010ae
A
684 error = 0;
685 }
686 lck_mtx_unlock(tclass_lock);
316670eb 687
0a7de745 688 return error;
6d2010ae
A
689}
690
316670eb
A
691static int
692delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
693{
694 int error = EINVAL;
695 pid_t pid = so_tcdbg->so_tcdbg_pid;
696 struct tclass_for_proc *tfp = NULL;
697
698 lck_mtx_lock(tclass_lock);
6d2010ae 699
0a7de745 700 if (pid != -1) {
316670eb 701 tfp = find_tfp_by_pid(pid);
0a7de745 702 } else {
316670eb 703 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
0a7de745 704 }
316670eb
A
705
706 if (tfp != NULL) {
707 free_tclass_for_proc(tfp);
708 error = 0;
709 }
710
711 lck_mtx_unlock(tclass_lock);
712
0a7de745 713 return error;
316670eb 714}
6d2010ae
A
715
716/*
717 * Setting options requires privileges
718 */
316670eb 719__private_extern__ int
6d2010ae
A
720so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
721{
722 int error = 0;
316670eb 723
0a7de745
A
724 if ((so->so_state & SS_PRIV) == 0) {
725 return EPERM;
726 }
6d2010ae
A
727
728 socket_unlock(so, 0);
729
730 switch (so_tcdbg->so_tcdbg_cmd) {
0a7de745
A
731 case SO_TCDBG_PID:
732 error = set_pid_tclass(so_tcdbg);
733 break;
316670eb 734
0a7de745
A
735 case SO_TCDBG_PNAME:
736 error = set_pname_tclass(so_tcdbg);
737 break;
316670eb 738
0a7de745
A
739 case SO_TCDBG_PURGE:
740 error = purge_tclass_for_proc();
741 break;
316670eb 742
0a7de745
A
743 case SO_TCDBG_FLUSH:
744 error = flush_tclass_for_proc();
745 break;
316670eb 746
0a7de745
A
747 case SO_TCDBG_DELETE:
748 error = delete_tclass_for_pid_pname(so_tcdbg);
749 break;
316670eb 750
0a7de745
A
751 case SO_TCDBG_TCFLUSH_PID:
752 error = flush_pid_tclass(so_tcdbg);
753 break;
316670eb 754
0a7de745
A
755 default:
756 error = EINVAL;
757 break;
6d2010ae
A
758 }
759
760 socket_lock(so, 0);
761
0a7de745 762 return error;
6d2010ae
A
763}
764
765/*
766 * Not required to be privileged to get
767 */
316670eb 768__private_extern__ int
6d2010ae
A
769sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
770{
771 int error = 0;
772 struct so_tcdbg so_tcdbg;
773 void *buf = NULL;
774 size_t len = sopt->sopt_valsize;
775
0a7de745
A
776 error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg),
777 sizeof(struct so_tcdbg));
778 if (error != 0) {
779 return error;
780 }
316670eb 781
6d2010ae 782 sopt->sopt_valsize = len;
316670eb 783
6d2010ae
A
784 socket_unlock(so, 0);
785
786 switch (so_tcdbg.so_tcdbg_cmd) {
0a7de745
A
787 case SO_TCDBG_PID:
788 error = get_pid_tclass(&so_tcdbg);
789 break;
316670eb 790
0a7de745
A
791 case SO_TCDBG_PNAME:
792 error = get_pname_tclass(&so_tcdbg);
793 break;
316670eb 794
0a7de745
A
795 case SO_TCDBG_COUNT:
796 lck_mtx_lock(tclass_lock);
797 so_tcdbg.so_tcdbg_count = tfp_count;
798 lck_mtx_unlock(tclass_lock);
799 break;
6d2010ae 800
0a7de745
A
801 case SO_TCDBG_LIST: {
802 struct tclass_for_proc *tfp;
803 int n, alloc_count;
804 struct so_tcdbg *ptr;
6d2010ae 805
0a7de745
A
806 lck_mtx_lock(tclass_lock);
807 if ((alloc_count = tfp_count) == 0) {
6d2010ae 808 lck_mtx_unlock(tclass_lock);
0a7de745
A
809 error = EINVAL;
810 break;
811 }
812 len = alloc_count * sizeof(struct so_tcdbg);
813 lck_mtx_unlock(tclass_lock);
6d2010ae 814
0a7de745
A
815 buf = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
816 if (buf == NULL) {
817 error = ENOBUFS;
818 break;
819 }
820
821 lck_mtx_lock(tclass_lock);
822 n = 0;
823 ptr = (struct so_tcdbg *)buf;
824 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
825 if (++n > alloc_count) {
6d2010ae
A
826 break;
827 }
0a7de745
A
828 if (tfp->tfp_pid != -1) {
829 ptr->so_tcdbg_cmd = SO_TCDBG_PID;
830 ptr->so_tcdbg_pid = tfp->tfp_pid;
831 } else {
832 ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
833 ptr->so_tcdbg_pid = -1;
834 strlcpy(ptr->so_tcdbg_pname,
835 tfp->tfp_pname,
836 sizeof(ptr->so_tcdbg_pname));
6d2010ae 837 }
0a7de745
A
838 ptr->so_tcdbg_tclass = tfp->tfp_class;
839 ptr->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
840 ptr++;
841 }
316670eb 842
0a7de745
A
843 lck_mtx_unlock(tclass_lock);
844 }
845 break;
316670eb 846
0a7de745
A
847 default:
848 error = EINVAL;
849 break;
6d2010ae
A
850 }
851
852 socket_lock(so, 0);
853
854 if (error == 0) {
855 if (buf == NULL) {
316670eb 856 error = sooptcopyout(sopt, &so_tcdbg,
0a7de745 857 sizeof(struct so_tcdbg));
6d2010ae
A
858 } else {
859 error = sooptcopyout(sopt, buf, len);
860 _FREE(buf, M_TEMP);
861 }
862 }
0a7de745 863 return error;
6d2010ae
A
864}
865
39037602
A
866#endif /* (DEVELOPMENT || DEBUG) */
867
868int
869so_get_netsvc_marking_level(struct socket *so)
870{
871 int marking_level = NETSVC_MRKNG_UNKNOWN;
872 struct ifnet *ifp = NULL;
873
874 switch (SOCK_DOM(so)) {
0a7de745
A
875 case PF_INET: {
876 struct inpcb *inp = sotoinpcb(so);
39037602 877
0a7de745
A
878 if (inp != NULL) {
879 ifp = inp->inp_last_outifp;
39037602 880 }
0a7de745
A
881 break;
882 }
883 case PF_INET6: {
884 struct in6pcb *in6p = sotoin6pcb(so);
39037602 885
0a7de745
A
886 if (in6p != NULL) {
887 ifp = in6p->in6p_last_outifp;
39037602 888 }
0a7de745
A
889 break;
890 }
891 default:
892 break;
39037602
A
893 }
894 if (ifp != NULL) {
cb323159 895 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0) {
0a7de745 896 if ((so->so_flags1 & SOF1_QOSMARKING_ALLOWED)) {
39037602 897 marking_level = NETSVC_MRKNG_LVL_L3L2_ALL;
0a7de745 898 } else {
39037602 899 marking_level = NETSVC_MRKNG_LVL_L3L2_BK;
0a7de745 900 }
39037602
A
901 } else {
902 marking_level = NETSVC_MRKNG_LVL_L2;
903 }
904 }
0a7de745 905 return marking_level;
39037602 906}
6d2010ae
A
907
908__private_extern__ int
909so_set_traffic_class(struct socket *so, int optval)
910{
911 int error = 0;
316670eb
A
912
913 if (optval < SO_TC_BE || optval > SO_TC_CTL) {
6d2010ae
A
914 error = EINVAL;
915 } else {
316670eb
A
916 switch (optval) {
917 case _SO_TC_BK:
918 optval = SO_TC_BK;
919 break;
920 case _SO_TC_VI:
921 optval = SO_TC_VI;
922 break;
923 case _SO_TC_VO:
924 optval = SO_TC_VO;
925 break;
926 default:
0a7de745 927 if (!SO_VALID_TC(optval)) {
316670eb 928 error = EINVAL;
0a7de745 929 }
316670eb
A
930 break;
931 }
932
933 if (error == 0) {
934 int oldval = so->so_traffic_class;
935
936 VERIFY(SO_VALID_TC(optval));
937 so->so_traffic_class = optval;
938
39236c6e
A
939 if ((SOCK_DOM(so) == PF_INET ||
940 SOCK_DOM(so) == PF_INET6) &&
0a7de745 941 SOCK_TYPE(so) == SOCK_STREAM) {
316670eb 942 set_tcp_stream_priority(so);
0a7de745 943 }
316670eb 944
39236c6e
A
945 if ((SOCK_DOM(so) == PF_INET ||
946 SOCK_DOM(so) == PF_INET6) &&
316670eb
A
947 optval != oldval && (optval == SO_TC_BK_SYS ||
948 oldval == SO_TC_BK_SYS)) {
949 /*
950 * If the app switches from BK_SYS to something
951 * else, resume the socket if it was suspended.
952 */
0a7de745 953 if (oldval == SO_TC_BK_SYS) {
316670eb 954 inp_reset_fc_state(so->so_pcb);
0a7de745 955 }
316670eb 956
39037602 957 SOTHROTTLELOG("throttle[%d]: so 0x%llx "
39236c6e
A
958 "[%d,%d] opportunistic %s\n", so->last_pid,
959 (uint64_t)VM_KERNEL_ADDRPERM(so),
960 SOCK_DOM(so), SOCK_TYPE(so),
39037602 961 (optval == SO_TC_BK_SYS) ? "ON" : "OFF");
316670eb 962 }
6d2010ae
A
963 }
964 }
0a7de745 965 return error;
6d2010ae
A
966}
967
39037602
A
968__private_extern__ int
969so_set_net_service_type(struct socket *so, int netsvctype)
970{
971 int sotc;
972 int error;
973
0a7de745
A
974 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype)) {
975 return EINVAL;
976 }
39037602
A
977
978 sotc = sotc_by_netservicetype[netsvctype];
979 error = so_set_traffic_class(so, sotc);
0a7de745
A
980 if (error != 0) {
981 return error;
982 }
39037602
A
983 so->so_netsvctype = netsvctype;
984 so->so_flags1 |= SOF1_TC_NET_SERV_TYPE;
985
0a7de745 986 return 0;
39037602
A
987}
988
6d2010ae
A
989__private_extern__ void
990so_set_default_traffic_class(struct socket *so)
991{
39037602
A
992 so->so_traffic_class = SO_TC_BE;
993
994 if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) {
0a7de745 995 if (net_qos_policy_restricted == 0) {
39037602 996 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
0a7de745 997 }
39037602 998#if (DEVELOPMENT || DEBUG)
0a7de745 999 if (tfp_count > 0) {
39037602 1000 set_tclass_for_curr_proc(so);
0a7de745 1001 }
39037602 1002#endif /* (DEVELOPMENT || DEBUG) */
6d2010ae 1003 }
6d2010ae
A
1004}
1005
316670eb
A
1006__private_extern__ int
1007so_set_opportunistic(struct socket *so, int optval)
1008{
0a7de745
A
1009 return so_set_traffic_class(so, (optval == 0) ?
1010 SO_TC_BE : SO_TC_BK_SYS);
316670eb 1011}
6d2010ae
A
1012
1013__private_extern__ int
316670eb
A
1014so_get_opportunistic(struct socket *so)
1015{
0a7de745 1016 return so->so_traffic_class == SO_TC_BK_SYS;
316670eb
A
1017}
1018
39037602
A
1019__private_extern__ int
1020so_tc_from_control(struct mbuf *control, int *out_netsvctype)
6d2010ae
A
1021{
1022 struct cmsghdr *cm;
39037602
A
1023 int sotc = SO_TC_UNSPEC;
1024
1025 *out_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
316670eb 1026
0a7de745
A
1027 for (cm = M_FIRST_CMSGHDR(control);
1028 is_cmsg_valid(control, cm);
316670eb 1029 cm = M_NXT_CMSGHDR(control, cm)) {
0a7de745 1030 int val;
6d2010ae 1031
6d2010ae 1032 if (cm->cmsg_level != SOL_SOCKET ||
0a7de745
A
1033 cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1034 continue;
1035 }
39037602
A
1036 val = *(int *)(void *)CMSG_DATA(cm);
1037 /*
1038 * The first valid option wins
1039 */
1040 switch (cm->cmsg_type) {
0a7de745
A
1041 case SO_TRAFFIC_CLASS:
1042 if (SO_VALID_TC(val)) {
1043 sotc = val;
1044 return sotc;
39037602 1045 /* NOT REACHED */
0a7de745 1046 } else if (val < SO_TC_NET_SERVICE_OFFSET) {
39037602 1047 break;
0a7de745
A
1048 }
1049 /*
1050 * Handle the case SO_NET_SERVICE_TYPE values are
1051 * passed using SO_TRAFFIC_CLASS
1052 */
1053 val = val - SO_TC_NET_SERVICE_OFFSET;
1054 /* FALLTHROUGH */
1055 case SO_NET_SERVICE_TYPE:
1056 if (!IS_VALID_NET_SERVICE_TYPE(val)) {
1057 break;
1058 }
1059 *out_netsvctype = val;
1060 sotc = sotc_by_netservicetype[val];
1061 return sotc;
1062 /* NOT REACHED */
1063 default:
1064 break;
39037602 1065 }
6d2010ae 1066 }
316670eb 1067
0a7de745 1068 return sotc;
6d2010ae
A
1069}
1070
94ff46dc
A
1071__private_extern__ int
1072so_tos_from_control(struct mbuf *control)
1073{
1074 struct cmsghdr *cm;
1075 int tos = IPTOS_UNSPEC;
1076
1077 for (cm = M_FIRST_CMSGHDR(control);
1078 is_cmsg_valid(control, cm);
1079 cm = M_NXT_CMSGHDR(control, cm)) {
1080 if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1081 continue;
1082 }
1083
1084 if ((cm->cmsg_level == IPPROTO_IP &&
1085 cm->cmsg_type == IP_TOS) ||
1086 (cm->cmsg_level == IPPROTO_IPV6 &&
1087 cm->cmsg_type == IPV6_TCLASS)) {
1088 tos = *(int *)(void *)CMSG_DATA(cm) & IPTOS_MASK;
1089 /* The first valid option wins */
1090 break;
1091 }
1092 }
1093
1094 return tos;
1095}
1096
6d2010ae
A
1097__private_extern__ void
1098so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
1099{
39037602 1100 uint32_t mtc = m_get_traffic_class(m);
6d2010ae 1101
0a7de745 1102 if (mtc >= SO_TC_STATS_MAX) {
39037602 1103 mtc = MBUF_TC_BE;
0a7de745 1104 }
6d2010ae 1105
39037602
A
1106 so->so_tc_stats[mtc].rxpackets += 1;
1107 so->so_tc_stats[mtc].rxbytes +=
316670eb 1108 ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
6d2010ae
A
1109}
1110
fe8ab488 1111__private_extern__ void
39037602
A
1112so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes,
1113 uint32_t mtc)
fe8ab488 1114{
0a7de745 1115 if (mtc >= SO_TC_STATS_MAX) {
39037602 1116 mtc = MBUF_TC_BE;
0a7de745 1117 }
fe8ab488 1118
39037602
A
1119 so->so_tc_stats[mtc].rxpackets += pkts;
1120 so->so_tc_stats[mtc].rxbytes += bytes;
fe8ab488 1121}
3e170ce0
A
1122
1123static inline int
1124so_throttle_best_effort(struct socket *so, struct ifnet *ifp)
1125{
cb323159 1126 uint32_t uptime = net_uptime();
0a7de745
A
1127 return soissrcbesteffort(so) &&
1128 net_io_policy_throttle_best_effort == 1 &&
1129 ifp->if_rt_sendts > 0 &&
1130 (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME;
3e170ce0 1131}
39037602 1132
6d2010ae
A
1133__private_extern__ void
1134set_tcp_stream_priority(struct socket *so)
1135{
39236c6e
A
1136 struct inpcb *inp = sotoinpcb(so);
1137 struct tcpcb *tp = intotcpcb(inp);
1138 struct ifnet *outifp;
1139 u_char old_cc = tp->tcp_cc_index;
316670eb 1140 int recvbg = IS_TCP_RECV_BG(so);
3e170ce0 1141 bool is_local = false, fg_active = false;
cb323159 1142 uint32_t uptime;
39236c6e 1143
39037602
A
1144 VERIFY((SOCK_CHECK_DOM(so, PF_INET) ||
1145 SOCK_CHECK_DOM(so, PF_INET6)) &&
1146 SOCK_CHECK_TYPE(so, SOCK_STREAM) &&
1147 SOCK_CHECK_PROTO(so, IPPROTO_TCP));
fe8ab488 1148
39037602 1149 /* Return if the socket is in a terminal state */
0a7de745 1150 if (inp->inp_state == INPCB_STATE_DEAD) {
fe8ab488 1151 return;
0a7de745 1152 }
fe8ab488 1153
39236c6e
A
1154 outifp = inp->inp_last_outifp;
1155 uptime = net_uptime();
6d2010ae 1156
316670eb
A
1157 /*
1158 * If the socket was marked as a background socket or if the
1159 * traffic class is set to background with traffic class socket
1160 * option then make both send and recv side of the stream to be
1161 * background. The variable sotcdb which can be set with sysctl
6d2010ae
A
1162 * is used to disable these settings for testing.
1163 */
0a7de745 1164 if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) {
3e170ce0 1165 is_local = true;
0a7de745 1166 }
3e170ce0
A
1167
1168 /* Check if there has been recent foreground activity */
1169 if (outifp != NULL) {
1170 /*
1171 * If the traffic source is background, check if
39037602 1172 * if it can be switched to foreground. This can
3e170ce0
A
1173 * happen when there is no indication of foreground
1174 * activity.
1175 */
5ba3f43e 1176 if (soissrcbackground(so) && outifp->if_fg_sendts > 0 &&
0a7de745 1177 (int)(uptime - outifp->if_fg_sendts) <= TCP_BG_SWITCH_TIME) {
39236c6e 1178 fg_active = true;
0a7de745 1179 }
39236c6e 1180
3e170ce0
A
1181 /*
1182 * The traffic source is best-effort -- check if
1183 * the policy to throttle best effort is enabled
1184 * and there was realtime activity on this
1185 * interface recently. If this is true, enable
1186 * algorithms that respond to increased latency
1187 * on best-effort traffic.
39037602 1188 */
0a7de745 1189 if (so_throttle_best_effort(so, outifp)) {
3e170ce0 1190 fg_active = true;
0a7de745 1191 }
3e170ce0
A
1192 }
1193
1194 /*
1195 * System initiated background traffic like cloud uploads should
1196 * always use background delay sensitive algorithms. This will
1197 * make the stream more responsive to other streams on the user's
1198 * network and it will minimize latency induced.
1199 */
1200 if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
39236c6e
A
1201 /*
1202 * If the interface that the connection is using is
1203 * loopback, do not use background congestion
1204 * control algorithm.
1205 *
39037602
A
1206 * If there has been recent foreground activity or if
1207 * there was an indication that a foreground application
39236c6e 1208 * is going to use networking (net_io_policy_throttled),
39037602 1209 * switch the backgroung streams to use background
39236c6e
A
1210 * congestion control algorithm. Otherwise, even background
1211 * flows can move into foreground.
1212 */
3e170ce0
A
1213 if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local ||
1214 !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
0a7de745 1215 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
6d2010ae 1216 tcp_set_foreground_cc(so);
0a7de745 1217 }
6d2010ae 1218 } else {
0a7de745 1219 if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX) {
6d2010ae 1220 tcp_set_background_cc(so);
0a7de745 1221 }
6d2010ae 1222 }
316670eb 1223
6d2010ae 1224 /* Set receive side background flags */
3e170ce0
A
1225 if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local ||
1226 !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
316670eb 1227 tcp_clear_recv_bg(so);
3e170ce0 1228 } else {
316670eb 1229 tcp_set_recv_bg(so);
3e170ce0 1230 }
6d2010ae 1231 } else {
316670eb 1232 tcp_clear_recv_bg(so);
0a7de745 1233 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
6d2010ae 1234 tcp_set_foreground_cc(so);
0a7de745 1235 }
6d2010ae 1236 }
316670eb
A
1237
1238 if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
39037602
A
1239 SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
1240 "%s recv\n", so->last_pid,
1241 (uint64_t)VM_KERNEL_ADDRPERM(so),
1242 SOCK_DOM(so), SOCK_TYPE(so),
1243 (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
1244 "background" : "foreground",
1245 IS_TCP_RECV_BG(so) ? "background" : "foreground");
316670eb 1246 }
6d2010ae
A
1247}
1248
1249/*
1250 * Set traffic class to an IPv4 or IPv6 packet
1251 * - mark the mbuf
1252 * - set the DSCP code following the WMM mapping
1253 */
1254__private_extern__ void
316670eb 1255set_packet_service_class(struct mbuf *m, struct socket *so,
cb323159 1256 int sotc, uint32_t flags)
6d2010ae 1257{
0a7de745 1258 mbuf_svc_class_t msc = MBUF_SC_BE; /* Best effort by default */
316670eb 1259 struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
316670eb 1260
0a7de745 1261 if (!(m->m_flags & M_PKTHDR)) {
6d2010ae 1262 return;
0a7de745 1263 }
316670eb
A
1264
1265 /*
6d2010ae
A
1266 * Here is the precedence:
1267 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
1268 * 2) Traffic class passed via ancillary data to sendmsdg(2)
1269 * 3) Traffic class socket option last
1270 */
39037602
A
1271 if (sotc != SO_TC_UNSPEC) {
1272 VERIFY(SO_VALID_TC(sotc));
1273 msc = so_tc2msc(sotc);
316670eb
A
1274 /* Assert because tc must have been valid */
1275 VERIFY(MBUF_VALID_SC(msc));
6d2010ae 1276 }
316670eb
A
1277
1278 /*
3e170ce0
A
1279 * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
1280 * best effort is set, depress the priority.
316670eb 1281 */
0a7de745 1282 if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so)) {
3e170ce0 1283 msc = MBUF_SC_BK;
0a7de745 1284 }
3e170ce0
A
1285
1286 if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL &&
0a7de745 1287 so_throttle_best_effort(so, inp->inp_last_outifp)) {
316670eb 1288 msc = MBUF_SC_BK;
0a7de745 1289 }
316670eb 1290
0a7de745 1291 if (soissrcbackground(so)) {
39236c6e 1292 m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND;
0a7de745 1293 }
3e170ce0 1294
0a7de745 1295 if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc)) {
3e170ce0 1296 m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME;
0a7de745 1297 }
6d2010ae 1298 /*
316670eb 1299 * Set the traffic class in the mbuf packet header svc field
6d2010ae 1300 */
0a7de745 1301 if (sotcdb & SOTCDB_NO_MTC) {
6d2010ae 1302 goto no_mbtc;
0a7de745 1303 }
316670eb 1304
39037602
A
1305 /*
1306 * Elevate service class if the packet is a pure TCP ACK.
316670eb 1307 * We can do this only when the flow is not a background
39037602 1308 * flow and the outgoing interface supports
316670eb
A
1309 * transmit-start model.
1310 */
39037602 1311 if (!IS_MBUF_SC_BACKGROUND(msc) &&
0a7de745 1312 (flags & (PKT_SCF_TCP_ACK | PKT_SCF_TCP_SYN)) != 0) {
316670eb 1313 msc = MBUF_SC_CTL;
0a7de745 1314 }
316670eb
A
1315
1316 (void) m_set_service_class(m, msc);
1317
1318 /*
39037602 1319 * Set the privileged traffic auxiliary flag if applicable,
39236c6e 1320 * or clear it.
316670eb
A
1321 */
1322 if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
0a7de745 1323 msc != MBUF_SC_UNSPEC) {
39236c6e 1324 m->m_pkthdr.pkt_flags |= PKTF_PRIO_PRIVILEGED;
0a7de745 1325 } else {
39236c6e 1326 m->m_pkthdr.pkt_flags &= ~PKTF_PRIO_PRIVILEGED;
0a7de745 1327 }
316670eb 1328
6d2010ae 1329no_mbtc:
6d2010ae
A
1330 /*
1331 * For TCP with background traffic class switch CC algo based on sysctl
1332 */
0a7de745 1333 if (so->so_type == SOCK_STREAM) {
6d2010ae 1334 set_tcp_stream_priority(so);
0a7de745 1335 }
316670eb
A
1336
1337 so_tc_update_stats(m, so, msc);
1338}
1339
1340__private_extern__ void
1341so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc)
1342{
1343 mbuf_traffic_class_t mtc;
1344
6d2010ae
A
1345 /*
1346 * Assume socket and mbuf traffic class values are the same
316670eb
A
1347 * Also assume the socket lock is held. Note that the stats
1348 * at the socket layer are reduced down to the legacy traffic
1349 * classes; we could/should potentially expand so_tc_stats[].
6d2010ae 1350 */
316670eb
A
1351 mtc = MBUF_SC2TC(msc);
1352 VERIFY(mtc < SO_TC_STATS_MAX);
6d2010ae
A
1353 so->so_tc_stats[mtc].txpackets += 1;
1354 so->so_tc_stats[mtc].txbytes += m->m_pkthdr.len;
6d2010ae
A
1355}
1356
1357__private_extern__ void
1358socket_tclass_init(void)
1359{
39037602 1360 _CASSERT(_SO_TC_MAX == SO_TC_STATS_MAX);
39236c6e 1361
6d2010ae
A
1362 tclass_lck_grp_attr = lck_grp_attr_alloc_init();
1363 tclass_lck_grp = lck_grp_alloc_init("tclass", tclass_lck_grp_attr);
1364 tclass_lck_attr = lck_attr_alloc_init();
316670eb
A
1365 lck_mtx_init(tclass_lock, tclass_lck_grp, tclass_lck_attr);
1366}
1367
1368__private_extern__ mbuf_svc_class_t
1369so_tc2msc(int tc)
1370{
1371 mbuf_svc_class_t msc;
1372
1373 switch (tc) {
1374 case SO_TC_BK_SYS:
1375 msc = MBUF_SC_BK_SYS;
1376 break;
1377 case SO_TC_BK:
1378 case _SO_TC_BK:
1379 msc = MBUF_SC_BK;
1380 break;
1381 case SO_TC_BE:
1382 msc = MBUF_SC_BE;
1383 break;
1384 case SO_TC_RD:
1385 msc = MBUF_SC_RD;
1386 break;
1387 case SO_TC_OAM:
1388 msc = MBUF_SC_OAM;
1389 break;
1390 case SO_TC_AV:
1391 msc = MBUF_SC_AV;
1392 break;
1393 case SO_TC_RV:
1394 msc = MBUF_SC_RV;
1395 break;
1396 case SO_TC_VI:
1397 case _SO_TC_VI:
1398 msc = MBUF_SC_VI;
1399 break;
d9a64523
A
1400 case SO_TC_NETSVC_SIG:
1401 msc = MBUF_SC_SIG;
1402 break;
316670eb
A
1403 case SO_TC_VO:
1404 case _SO_TC_VO:
1405 msc = MBUF_SC_VO;
1406 break;
1407 case SO_TC_CTL:
1408 msc = MBUF_SC_CTL;
1409 break;
1410 case SO_TC_ALL:
1411 default:
1412 msc = MBUF_SC_UNSPEC;
1413 break;
6d2010ae 1414 }
316670eb 1415
0a7de745 1416 return msc;
6d2010ae
A
1417}
1418
316670eb
A
1419__private_extern__ int
1420so_svc2tc(mbuf_svc_class_t svc)
1421{
1422 switch (svc) {
316670eb 1423 case MBUF_SC_BK_SYS:
0a7de745 1424 return SO_TC_BK_SYS;
316670eb 1425 case MBUF_SC_BK:
0a7de745 1426 return SO_TC_BK;
316670eb 1427 case MBUF_SC_BE:
0a7de745 1428 return SO_TC_BE;
316670eb 1429 case MBUF_SC_RD:
0a7de745 1430 return SO_TC_RD;
316670eb 1431 case MBUF_SC_OAM:
0a7de745 1432 return SO_TC_OAM;
316670eb 1433 case MBUF_SC_AV:
0a7de745 1434 return SO_TC_AV;
316670eb 1435 case MBUF_SC_RV:
0a7de745 1436 return SO_TC_RV;
316670eb 1437 case MBUF_SC_VI:
0a7de745 1438 return SO_TC_VI;
d9a64523 1439 case MBUF_SC_SIG:
0a7de745 1440 return SO_TC_NETSVC_SIG;
316670eb 1441 case MBUF_SC_VO:
0a7de745 1442 return SO_TC_VO;
316670eb 1443 case MBUF_SC_CTL:
0a7de745 1444 return SO_TC_CTL;
39037602 1445 case MBUF_SC_UNSPEC:
316670eb 1446 default:
0a7de745 1447 return SO_TC_BE;
316670eb
A
1448 }
1449}
1450
1451/*
39236c6e 1452 * LRO is turned on for AV streaming class.
316670eb 1453 */
39236c6e 1454void
316670eb
A
1455so_set_lro(struct socket *so, int optval)
1456{
39236c6e 1457 if (optval == SO_TC_AV) {
316670eb
A
1458 so->so_flags |= SOF_USELRO;
1459 } else {
39236c6e
A
1460 if (so->so_flags & SOF_USELRO) {
1461 /* transition to non LRO class */
1462 so->so_flags &= ~SOF_USELRO;
1463 struct inpcb *inp = sotoinpcb(so);
1464 struct tcpcb *tp = NULL;
1465 if (inp) {
1466 tp = intotcpcb(inp);
1467 if (tp && (tp->t_flagsext & TF_LRO_OFFLOADED)) {
1468 tcp_lro_remove_state(inp->inp_laddr,
0a7de745
A
1469 inp->inp_faddr,
1470 inp->inp_lport,
1471 inp->inp_fport);
39037602 1472 tp->t_flagsext &= ~TF_LRO_OFFLOADED;
39236c6e
A
1473 }
1474 }
1475 }
316670eb
A
1476 }
1477}
6d2010ae 1478
39037602
A
1479static size_t
1480sotc_index(int sotc)
1481{
1482 switch (sotc) {
0a7de745
A
1483 case SO_TC_BK_SYS:
1484 return SOTCIX_BK_SYS;
1485 case _SO_TC_BK:
1486 case SO_TC_BK:
1487 return SOTCIX_BK;
39037602 1488
0a7de745
A
1489 case SO_TC_BE:
1490 return SOTCIX_BE;
1491 case SO_TC_RD:
1492 return SOTCIX_RD;
1493 case SO_TC_OAM:
1494 return SOTCIX_OAM;
39037602 1495
0a7de745
A
1496 case SO_TC_AV:
1497 return SOTCIX_AV;
1498 case SO_TC_RV:
1499 return SOTCIX_RV;
1500 case _SO_TC_VI:
1501 case SO_TC_VI:
1502 return SOTCIX_VI;
1503
1504 case _SO_TC_VO:
1505 case SO_TC_VO:
1506 return SOTCIX_VO;
1507 case SO_TC_CTL:
1508 return SOTCIX_CTL;
1509
1510 default:
1511 break;
39037602
A
1512 }
1513 /*
1514 * Unknown traffic class value
1515 */
0a7de745 1516 return SIZE_T_MAX;
39037602
A
1517}
1518
cb323159
A
1519uint8_t
1520fastlane_sc_to_dscp(uint32_t svc_class)
1521{
1522 uint8_t dscp = _DSCP_DF;
1523
1524 switch (svc_class) {
1525 case MBUF_SC_BK_SYS:
1526 case MBUF_SC_BK:
1527 dscp = _DSCP_AF11;
1528 break;
1529
1530 case MBUF_SC_BE:
1531 dscp = _DSCP_DF;
1532 break;
1533 case MBUF_SC_RD:
1534 dscp = _DSCP_AF21;
1535 break;
1536 case MBUF_SC_OAM:
1537 dscp = _DSCP_CS2;
1538 break;
1539
1540 case MBUF_SC_AV:
1541 dscp = _DSCP_AF31;
1542 break;
1543 case MBUF_SC_RV:
1544 dscp = _DSCP_CS4;
1545 break;
1546 case MBUF_SC_VI:
1547 dscp = _DSCP_AF41;
1548 break;
1549 case MBUF_SC_SIG:
1550 dscp = _DSCP_CS3;
1551 break;
1552
1553 case MBUF_SC_VO:
1554 dscp = _DSCP_EF;
1555 break;
1556 case MBUF_SC_CTL:
1557 dscp = _DSCP_DF;
1558 break;
1559 default:
1560 dscp = _DSCP_DF;
1561 break;
1562 }
1563
1564 return dscp;
1565}
1566
1567uint8_t
1568rfc4594_sc_to_dscp(uint32_t svc_class)
1569{
1570 uint8_t dscp = _DSCP_DF;
1571
1572 switch (svc_class) {
1573 case MBUF_SC_BK_SYS: /* Low-Priority Data */
1574 case MBUF_SC_BK:
1575 dscp = _DSCP_CS1;
1576 break;
1577
1578 case MBUF_SC_BE: /* Standard */
1579 dscp = _DSCP_DF;
1580 break;
1581 case MBUF_SC_RD: /* Low-Latency Data */
1582 dscp = _DSCP_AF21;
1583 break;
1584
1585 /* SVC_CLASS Not Defined: High-Throughput Data */
1586
1587 case MBUF_SC_OAM: /* OAM */
1588 dscp = _DSCP_CS2;
1589 break;
1590
1591 /* SVC_CLASS Not Defined: Broadcast Video */
1592
1593 case MBUF_SC_AV: /* Multimedia Streaming */
1594 dscp = _DSCP_AF31;
1595 break;
1596 case MBUF_SC_RV: /* Real-Time Interactive */
1597 dscp = _DSCP_CS4;
1598 break;
1599 case MBUF_SC_VI: /* Multimedia Conferencing */
1600 dscp = _DSCP_AF41;
1601 break;
1602 case MBUF_SC_SIG: /* Signaling */
1603 dscp = _DSCP_CS5;
1604 break;
1605
1606 case MBUF_SC_VO: /* Telephony */
1607 dscp = _DSCP_EF;
1608 break;
1609 case MBUF_SC_CTL: /* Network Control*/
1610 dscp = _DSCP_CS6;
1611 break;
1612 default:
1613 dscp = _DSCP_DF;
1614 break;
1615 }
1616
1617 return dscp;
1618}
1619
1620mbuf_traffic_class_t
1621rfc4594_dscp_to_tc(uint8_t dscp)
1622{
1623 mbuf_traffic_class_t tc = MBUF_TC_BE;
1624
1625 switch (dscp) {
1626 case _DSCP_CS1:
1627 tc = MBUF_TC_BK;
1628 break;
1629 case _DSCP_DF:
1630 case _DSCP_AF21:
1631 case _DSCP_CS2:
1632 tc = MBUF_TC_BE;
1633 break;
1634 case _DSCP_AF31:
1635 case _DSCP_CS4:
1636 case _DSCP_AF41:
1637 case _DSCP_CS5:
1638 tc = MBUF_TC_VI;
1639 break;
1640 case _DSCP_EF:
1641 case _DSCP_CS6:
1642 tc = MBUF_TC_VO;
1643 break;
1644 default:
1645 tc = MBUF_TC_BE;
1646 break;
1647 }
1648
1649 return tc;
1650}
1651
39037602
A
1652/*
1653 * Pass NULL ifp for default map
1654 */
1655static errno_t
cb323159 1656set_netsvctype_dscp_map(struct net_qos_dscp_map *net_qos_dscp_map,
39037602
A
1657 const struct netsvctype_dscp_map *netsvctype_dscp_map)
1658{
1659 size_t i;
39037602
A
1660 int netsvctype;
1661
1662 /*
1663 * Do not accept more that max number of distinct DSCPs
1664 */
cb323159 1665 if (net_qos_dscp_map == NULL || netsvctype_dscp_map == NULL) {
0a7de745
A
1666 return EINVAL;
1667 }
39037602
A
1668
1669 /*
1670 * Validate input parameters
1671 */
cb323159 1672 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
0a7de745
A
1673 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map[i].netsvctype)) {
1674 return EINVAL;
1675 }
1676 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1677 return EINVAL;
1678 }
39037602
A
1679 }
1680
cb323159 1681 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
39037602
A
1682 netsvctype = netsvctype_dscp_map[i].netsvctype;
1683
1684 net_qos_dscp_map->netsvctype_to_dscp[netsvctype] =
1685 netsvctype_dscp_map[i].dscp;
1686 }
1687 for (netsvctype = 0; netsvctype < _NET_SERVICE_TYPE_COUNT; netsvctype++) {
1688 switch (netsvctype) {
0a7de745
A
1689 case NET_SERVICE_TYPE_BE:
1690 case NET_SERVICE_TYPE_BK:
1691 case NET_SERVICE_TYPE_VI:
1692 case NET_SERVICE_TYPE_VO:
1693 case NET_SERVICE_TYPE_RV:
1694 case NET_SERVICE_TYPE_AV:
1695 case NET_SERVICE_TYPE_OAM:
1696 case NET_SERVICE_TYPE_RD: {
1697 size_t sotcix;
1698
1699 sotcix = sotc_index(sotc_by_netservicetype[netsvctype]);
1700 if (sotcix != SIZE_T_MAX) {
1701 net_qos_dscp_map->sotc_to_dscp[sotcix] =
1702 netsvctype_dscp_map[netsvctype].dscp;
39037602 1703 }
0a7de745
A
1704 break;
1705 }
1706 case NET_SERVICE_TYPE_SIG:
1707 /* Signaling does not have its own traffic class */
1708 break;
1709 default:
1710 /* We should not be here */
1711 ASSERT(0);
39037602
A
1712 }
1713 }
1714 /* Network control socket traffic class is always best effort */
1715 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_DF;
1716
1717 /* Backround socket traffic class DSCP same as backround system */
1718 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK] =
0a7de745 1719 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK_SYS];
39037602 1720
0a7de745 1721 return 0;
39037602
A
1722}
1723
1724/*
1725 * out_count is an input/ouput parameter
1726 */
1727static errno_t
1728get_netsvctype_dscp_map(size_t *out_count,
1729 struct netsvctype_dscp_map *netsvctype_dscp_map)
1730{
1731 size_t i;
1732 struct net_qos_dscp_map *net_qos_dscp_map = NULL;
1733
1734 /*
1735 * Do not accept more that max number of distinct DSCPs
1736 */
0a7de745
A
1737 if (out_count == NULL || netsvctype_dscp_map == NULL) {
1738 return EINVAL;
1739 }
1740 if (*out_count > _MAX_DSCP) {
1741 return EINVAL;
1742 }
39037602 1743
cb323159 1744 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
39037602
A
1745
1746 for (i = 0; i < MIN(_NET_SERVICE_TYPE_COUNT, *out_count); i++) {
1747 netsvctype_dscp_map[i].netsvctype = i;
1748 netsvctype_dscp_map[i].dscp = net_qos_dscp_map->netsvctype_to_dscp[i];
39037602
A
1749 }
1750 *out_count = i;
1751
0a7de745 1752 return 0;
39037602
A
1753}
1754
1755void
1756net_qos_map_init()
1757{
1758 errno_t error;
1759
cb323159 1760 error = set_netsvctype_dscp_map(&fastlane_net_qos_dscp_map,
0a7de745 1761 fastlane_netsvctype_dscp_map);
39037602
A
1762 ASSERT(error == 0);
1763
cb323159
A
1764 error = set_netsvctype_dscp_map(&rfc4594_net_qos_dscp_map,
1765 rfc4594_netsvctype_dscp_map);
1766 ASSERT(error == 0);
39037602
A
1767
1768 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
1769}
1770
1771int
1772sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
1773{
1774#pragma unused(oidp, arg1, arg2)
1775 int error = 0;
39037602 1776 size_t len;
527f9951 1777 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
39037602
A
1778 size_t count;
1779
1780 if (req->oldptr == USER_ADDR_NULL) {
1781 req->oldidx =
1782 _NET_SERVICE_TYPE_COUNT * sizeof(struct netsvctype_dscp_map);
1783 } else if (req->oldlen > 0) {
1784 count = _NET_SERVICE_TYPE_COUNT;
1785 error = get_netsvctype_dscp_map(&count, netsvctype_dscp_map);
0a7de745 1786 if (error != 0) {
39037602 1787 goto done;
0a7de745 1788 }
39037602
A
1789 len = count * sizeof(struct netsvctype_dscp_map);
1790 error = SYSCTL_OUT(req, netsvctype_dscp_map,
0a7de745
A
1791 MIN(len, req->oldlen));
1792 if (error != 0) {
39037602 1793 goto done;
0a7de745 1794 }
39037602
A
1795 }
1796
cb323159
A
1797 if (req->newptr != USER_ADDR_NULL) {
1798 error = EPERM;
0a7de745 1799 }
39037602 1800done:
0a7de745 1801 return error;
39037602
A
1802}
1803
1804__private_extern__ errno_t
1805set_packet_qos(struct mbuf *m, struct ifnet *ifp, boolean_t qos_allowed,
cb323159 1806 int sotc, int netsvctype, uint8_t *dscp_inout)
39037602 1807{
0a7de745
A
1808 if (ifp == NULL || dscp_inout == NULL) {
1809 return EINVAL;
1810 }
39037602 1811
cb323159
A
1812 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0 &&
1813 ifp->if_qosmarking_mode != IFRTYPE_QOSMARKING_MODE_NONE) {
1814 uint8_t dscp;
1815 const struct net_qos_dscp_map *net_qos_dscp_map = NULL;
1816
1817 switch (ifp->if_qosmarking_mode) {
1818 case IFRTYPE_QOSMARKING_FASTLANE:
1819 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1820 break;
1821 case IFRTYPE_QOSMARKING_RFC4594:
1822 net_qos_dscp_map = &rfc4594_net_qos_dscp_map;
1823 break;
1824 default:
1825 panic("invalid QoS marking type");
1826 /* NOTREACHED */
1827 }
39037602
A
1828
1829 /*
1830 * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops
1831 */
1832 dscp = _DSCP_DF;
1833
1834 /*
1835 * For DSCP use the network service type is specified, otherwise
1836 * use the socket traffic class
1837 *
1838 * When not whitelisted by the policy, set DSCP only for best
1839 * effort and background, and set the mbuf service class to
1840 * best effort as well so the packet will be queued and
1841 * scheduled at a lower priority.
1842 * We still want to prioritize control traffic on the interface
1843 * so we do not change the mbuf service class for SO_TC_CTL
1844 */
a39ff7e2 1845 if (IS_VALID_NET_SERVICE_TYPE(netsvctype) &&
39037602 1846 netsvctype != NET_SERVICE_TYPE_BE) {
cb323159 1847 dscp = net_qos_dscp_map->netsvctype_to_dscp[netsvctype];
39037602
A
1848
1849 if (qos_allowed == FALSE &&
1850 netsvctype != NET_SERVICE_TYPE_BE &&
1851 netsvctype != NET_SERVICE_TYPE_BK) {
1852 dscp = _DSCP_DF;
0a7de745 1853 if (sotc != SO_TC_CTL) {
39037602 1854 m_set_service_class(m, MBUF_SC_BE);
0a7de745 1855 }
39037602 1856 }
a39ff7e2 1857 } else if (sotc != SO_TC_UNSPEC) {
39037602 1858 size_t sotcix = sotc_index(sotc);
a39ff7e2 1859 if (sotcix != SIZE_T_MAX) {
cb323159 1860 dscp = net_qos_dscp_map->sotc_to_dscp[sotcix];
a39ff7e2
A
1861
1862 if (qos_allowed == FALSE && sotc != SO_TC_BE &&
1863 sotc != SO_TC_BK && sotc != SO_TC_BK_SYS &&
1864 sotc != SO_TC_CTL) {
1865 dscp = _DSCP_DF;
0a7de745 1866 if (sotc != SO_TC_CTL) {
a39ff7e2 1867 m_set_service_class(m, MBUF_SC_BE);
0a7de745 1868 }
a39ff7e2 1869 }
39037602
A
1870 }
1871 }
0a7de745 1872 if (net_qos_verbose != 0) {
39037602
A
1873 printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n",
1874 __func__, qos_allowed, sotc, netsvctype, dscp);
0a7de745 1875 }
39037602
A
1876
1877 if (*dscp_inout != dscp) {
1878 *dscp_inout = dscp;
1879 }
1880 } else if (*dscp_inout != _DSCP_DF && IFNET_IS_WIFI_INFRA(ifp)) {
1881 mbuf_svc_class_t msc = m_get_service_class(m);
1882
1883 /*
1884 * For WiFi infra, when the mbuf service class is best effort
1885 * and the DSCP is not default, set the service class based
1886 * on DSCP
1887 */
1888 if (msc == MBUF_SC_BE) {
1889 msc = wifi_dscp_to_msc_array[*dscp_inout];
1890
1891 if (msc != MBUF_SC_BE) {
1892 m_set_service_class(m, msc);
1893
0a7de745 1894 if (net_qos_verbose != 0) {
39037602
A
1895 printf("%s set msc %u for dscp %u\n",
1896 __func__, msc, *dscp_inout);
0a7de745 1897 }
39037602
A
1898 }
1899 }
1900 }
1901
0a7de745 1902 return 0;
39037602
A
1903}
1904
1905static void
1906set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *map, int clear)
1907{
1908 int i;
1909
0a7de745 1910 if (clear) {
39037602 1911 bzero(wifi_dscp_to_msc_array, sizeof(wifi_dscp_to_msc_array));
0a7de745 1912 }
39037602
A
1913
1914 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1915 const struct dcsp_msc_map *elem = map + i;
1916
0a7de745 1917 if (elem->dscp > _MAX_DSCP || elem->msc == MBUF_SC_UNSPEC) {
39037602 1918 break;
0a7de745 1919 }
39037602 1920 switch (elem->msc) {
0a7de745
A
1921 case MBUF_SC_BK_SYS:
1922 case MBUF_SC_BK:
1923 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BK;
1924 break;
1925 default:
1926 case MBUF_SC_BE:
1927 case MBUF_SC_RD:
1928 case MBUF_SC_OAM:
1929 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BE;
1930 break;
1931 case MBUF_SC_AV:
1932 case MBUF_SC_RV:
1933 case MBUF_SC_VI:
1934 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VI;
1935 break;
1936 case MBUF_SC_VO:
1937 case MBUF_SC_CTL:
1938 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VO;
1939 break;
39037602
A
1940 }
1941 }
1942}
1943
1944static errno_t
1945dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map,
1946 size_t count, struct dcsp_msc_map *dcsp_msc_map)
1947{
1948 errno_t error = 0;
cb323159 1949 uint32_t i;
39037602
A
1950
1951 /*
1952 * Validate input parameters
1953 */
1954 for (i = 0; i < count; i++) {
1955 if (!SO_VALID_TC(netsvctype_dscp_map[i].netsvctype)) {
1956 error = EINVAL;
1957 goto done;
1958 }
1959 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1960 error = EINVAL;
1961 goto done;
1962 }
1963 }
1964
1965 bzero(dcsp_msc_map, DSCP_ARRAY_SIZE * sizeof(struct dcsp_msc_map));
1966
1967 for (i = 0; i < count; i++) {
1968 dcsp_msc_map[i].dscp = netsvctype_dscp_map[i].dscp;
1969 dcsp_msc_map[i].msc = so_tc2msc(netsvctype_dscp_map[i].netsvctype);
1970 }
1971done:
0a7de745 1972 return error;
39037602
A
1973}
1974
1975int
1976sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1977{
1978#pragma unused(oidp, arg1, arg2)
1979 int error = 0;
1980 size_t len = DSCP_ARRAY_SIZE * sizeof(struct netsvctype_dscp_map);
527f9951 1981 struct netsvctype_dscp_map netsvctype_dscp_map[DSCP_ARRAY_SIZE] = {};
39037602
A
1982 struct dcsp_msc_map dcsp_msc_map[DSCP_ARRAY_SIZE];
1983 size_t count;
cb323159 1984 uint32_t i;
39037602
A
1985
1986 if (req->oldptr == USER_ADDR_NULL) {
1987 req->oldidx = len;
1988 } else if (req->oldlen > 0) {
1989 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1990 netsvctype_dscp_map[i].dscp = i;
1991 netsvctype_dscp_map[i].netsvctype =
1992 so_svc2tc(wifi_dscp_to_msc_array[i]);
1993 }
1994 error = SYSCTL_OUT(req, netsvctype_dscp_map,
0a7de745
A
1995 MIN(len, req->oldlen));
1996 if (error != 0) {
39037602 1997 goto done;
0a7de745 1998 }
39037602
A
1999 }
2000
0a7de745 2001 if (req->newptr == USER_ADDR_NULL) {
39037602 2002 goto done;
0a7de745 2003 }
39037602
A
2004
2005 error = proc_suser(current_proc());
0a7de745 2006 if (error != 0) {
39037602 2007 goto done;
0a7de745 2008 }
39037602
A
2009
2010 /*
2011 * Check input length
2012 */
2013 if (req->newlen > len) {
2014 error = EINVAL;
2015 goto done;
2016 }
2017 /*
2018 * Cap the number of entries to copy from input buffer
2019 */
0a7de745 2020 if (len > req->newlen) {
39037602 2021 len = req->newlen;
0a7de745 2022 }
39037602
A
2023 error = SYSCTL_IN(req, netsvctype_dscp_map, len);
2024 if (error != 0) {
2025 goto done;
2026 }
2027 count = len / sizeof(struct netsvctype_dscp_map);
2028 bzero(dcsp_msc_map, sizeof(dcsp_msc_map));
2029 error = dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map, count,
2030 dcsp_msc_map);
2031 if (error != 0) {
2032 goto done;
2033 }
2034 set_dscp_to_wifi_ac_map(dcsp_msc_map, 0);
2035done:
0a7de745 2036 return error;
39037602
A
2037}
2038
2039int
2040sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
2041{
2042#pragma unused(oidp, arg1, arg2)
2043 int error = 0;
2044 int val = 0;
2045
2046 error = sysctl_handle_int(oidp, &val, 0, req);
0a7de745
A
2047 if (error || !req->newptr) {
2048 return error;
2049 }
39037602
A
2050
2051 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
2052
0a7de745 2053 return 0;
39037602 2054}
5ba3f43e
A
2055
2056/*
2057 * Returns whether a large upload or download transfer should be marked as
2058 * BK service type for network activity. This is a system level
2059 * hint/suggestion to classify application traffic based on statistics
2060 * collected from the current network attachment
2061 *
2062 * Returns 1 for BK and 0 for default
2063 */
2064
2065int
2066net_qos_guideline(struct proc *p, struct net_qos_guideline_args *arg,
2067 int *retval)
2068{
2069#pragma unused(p)
0a7de745
A
2070#define RETURN_USE_BK 1
2071#define RETURN_USE_DEFAULT 0
5ba3f43e
A
2072 struct net_qos_param qos_arg;
2073 struct ifnet *ipv4_primary, *ipv6_primary;
2074 int err = 0;
2075
2076 if (arg->param == USER_ADDR_NULL || retval == NULL ||
0a7de745
A
2077 arg->param_len != sizeof(qos_arg)) {
2078 return EINVAL;
2079 }
2080 err = copyin(arg->param, (caddr_t) &qos_arg, sizeof(qos_arg));
2081 if (err != 0) {
2082 return err;
5ba3f43e 2083 }
5ba3f43e
A
2084
2085 *retval = RETURN_USE_DEFAULT;
2086 ipv4_primary = ifindex2ifnet[get_primary_ifscope(AF_INET)];
2087 ipv6_primary = ifindex2ifnet[get_primary_ifscope(AF_INET6)];
2088
2089 /*
2090 * If either of the interfaces is in Low Internet mode, enable
2091 * background delay based algorithms on this transfer
2092 */
2093 if (qos_arg.nq_uplink) {
2094 if ((ipv4_primary != NULL &&
2095 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_UL)) ||
2096 (ipv6_primary != NULL &&
2097 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_UL))) {
2098 *retval = RETURN_USE_BK;
0a7de745 2099 return 0;
5ba3f43e
A
2100 }
2101 } else {
2102 if ((ipv4_primary != NULL &&
2103 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_DL)) ||
2104 (ipv6_primary != NULL &&
2105 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_DL))) {
2106 *retval = RETURN_USE_BK;
0a7de745 2107 return 0;
5ba3f43e
A
2108 }
2109 }
2110
2111 /*
2112 * Some times IPv4 and IPv6 primary interfaces can be different.
2113 * In this case, if either of them is non-cellular, we should mark
2114 * the transfer as BK as it can potentially get used based on
2115 * the host name resolution
2116 */
2117 if (ipv4_primary != NULL && IFNET_IS_EXPENSIVE(ipv4_primary) &&
2118 ipv6_primary != NULL && IFNET_IS_EXPENSIVE(ipv6_primary)) {
2119 if (qos_arg.nq_use_expensive) {
0a7de745 2120 return 0;
5ba3f43e
A
2121 } else {
2122 *retval = RETURN_USE_BK;
0a7de745 2123 return 0;
5ba3f43e
A
2124 }
2125 }
cb323159
A
2126 if (ipv4_primary != NULL && IFNET_IS_CONSTRAINED(ipv4_primary) &&
2127 ipv6_primary != NULL && IFNET_IS_CONSTRAINED(ipv6_primary)) {
2128 if (qos_arg.nq_use_constrained) {
2129 return 0;
2130 } else {
2131 *retval = RETURN_USE_BK;
2132 return 0;
2133 }
2134 }
5ba3f43e
A
2135 if (qos_arg.nq_transfer_size >= 5 * 1024 * 1024) {
2136 *retval = RETURN_USE_BK;
0a7de745 2137 return 0;
5ba3f43e
A
2138 }
2139
2140
0a7de745
A
2141#undef RETURN_USE_BK
2142#undef RETURN_USE_DEFAULT
2143 return 0;
5ba3f43e 2144}