]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/in_tclass.c
xnu-2050.18.24.tar.gz
[apple/xnu.git] / bsd / netinet / in_tclass.c
CommitLineData
6d2010ae 1/*
316670eb 2 * Copyright (c) 2009-2012 Apple Inc. All rights reserved.
6d2010ae
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/systm.h>
30#include <sys/kernel.h>
31#include <sys/types.h>
32#include <sys/filedesc.h>
33#include <sys/file_internal.h>
34#include <sys/proc.h>
35#include <sys/socket.h>
36#include <sys/socketvar.h>
37#include <sys/errno.h>
38#include <sys/protosw.h>
39#include <sys/domain.h>
40#include <sys/mbuf.h>
41#include <sys/queue.h>
42
43#include <net/if.h>
44#include <net/route.h>
45
46#include <netinet/in.h>
47#include <netinet/in_var.h>
48#include <netinet/in_pcb.h>
49#include <netinet/ip.h>
50#include <netinet/ip_var.h>
51#include <netinet/ip6.h>
52#include <netinet6/ip6_var.h>
53#include <netinet/udp.h>
54#include <netinet/udp_var.h>
55#include <netinet/tcp.h>
56#include <netinet/tcp_var.h>
57#include <netinet/tcp_cc.h>
58
59extern char *proc_name_address(void *p);
60
61static int tfp_count = 0;
62
316670eb
A
63static TAILQ_HEAD(, tclass_for_proc) tfp_head =
64 TAILQ_HEAD_INITIALIZER(tfp_head);
6d2010ae
A
65
66struct tclass_for_proc {
67 TAILQ_ENTRY(tclass_for_proc) tfp_link;
316670eb
A
68 int tfp_class;
69 pid_t tfp_pid;
70 char tfp_pname[MAXCOMLEN + 1];
6d2010ae
A
71};
72
316670eb
A
73static int dscp_code_from_mbuf_tclass(mbuf_traffic_class_t);
74static int get_pid_tclass(struct so_tcdbg *);
75static int get_pname_tclass(struct so_tcdbg *);
76static int set_pid_tclass(struct so_tcdbg *);
77static int set_pname_tclass(struct so_tcdbg *);
78static int flush_pid_tclass(struct so_tcdbg *);
6d2010ae
A
79static int purge_tclass_for_proc(void);
80static int flush_tclass_for_proc(void);
316670eb
A
81static void so_set_lro(struct socket*, int);
82int get_tclass_for_curr_proc(int *);
6d2010ae 83
316670eb
A
84static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */
85static lck_grp_t *tclass_lck_grp = NULL; /* mutex group definition */
86static lck_attr_t *tclass_lck_attr = NULL; /* mutex attributes */
87decl_lck_mtx_data(static, tclass_lock_data);
88static lck_mtx_t *tclass_lock = &tclass_lock_data;
6d2010ae
A
89
90/*
91 * Must be called with tclass_lock held
92 */
93static struct tclass_for_proc *
94find_tfp_by_pid(pid_t pid)
95{
96 struct tclass_for_proc *tfp;
316670eb 97
6d2010ae
A
98 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
99 if (tfp->tfp_pid == pid)
100 break;
101 }
316670eb 102 return (tfp);
6d2010ae
A
103}
104
105/*
106 * Must be called with tclass_lock held
107 */
108static struct tclass_for_proc *
109find_tfp_by_pname(const char *pname)
110{
111 struct tclass_for_proc *tfp;
316670eb 112
6d2010ae 113 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
316670eb
A
114 if (strncmp(pname, tfp->tfp_pname,
115 sizeof (tfp->tfp_pname)) == 0)
6d2010ae
A
116 break;
117 }
316670eb 118 return (tfp);
6d2010ae
A
119}
120
316670eb
A
121__private_extern__ int
122get_tclass_for_curr_proc(int *sotc)
6d2010ae 123{
316670eb 124 struct tclass_for_proc *tfp = NULL;
6d2010ae
A
125 proc_t p = current_proc(); /* Not ref counted */
126 pid_t pid = proc_pid(p);
127 char *pname = proc_name_address(p);
316670eb
A
128
129 *sotc = -1;
130
6d2010ae 131 lck_mtx_lock(tclass_lock);
316670eb 132
6d2010ae 133 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
316670eb
A
134 if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
135 strncmp(pname, tfp->tfp_pname,
136 sizeof (tfp->tfp_pname)) == 0)) {
137 *sotc = tfp->tfp_class;
6d2010ae 138 break;
316670eb 139 }
6d2010ae
A
140 }
141
142 lck_mtx_unlock(tclass_lock);
143
316670eb 144 return ((tfp == NULL) ? 0 : 1);
6d2010ae
A
145}
146
147/*
148 * Purge entries with PIDs of exited processes
149 */
150int
151purge_tclass_for_proc(void)
152{
153 int error = 0;
154 struct tclass_for_proc *tfp, *tvar;
155
156 lck_mtx_lock(tclass_lock);
157
158 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
159 proc_t p;
316670eb 160
6d2010ae
A
161 if (tfp->tfp_pid == -1)
162 continue;
163 if ((p = proc_find(tfp->tfp_pid)) == NULL) {
164 tfp_count--;
165 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
316670eb 166
6d2010ae
A
167 _FREE(tfp, M_TEMP);
168 } else {
169 proc_rele(p);
170 }
171 }
172
173 lck_mtx_unlock(tclass_lock);
316670eb
A
174
175 return (error);
6d2010ae
A
176}
177
178/*
179 * Remove one entry
180 * Must be called with tclass_lock held
181 */
182static void
183free_tclass_for_proc(struct tclass_for_proc *tfp)
184{
185 if (tfp == NULL)
186 return;
187 tfp_count--;
188 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
189 _FREE(tfp, M_TEMP);
190}
191
192/*
193 * Remove all entries
194 */
195int
196flush_tclass_for_proc(void)
197{
198 int error = 0;
199 struct tclass_for_proc *tfp, *tvar;
200
201 lck_mtx_lock(tclass_lock);
202
203 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
204 free_tclass_for_proc(tfp);
205 }
316670eb 206
6d2010ae 207 lck_mtx_unlock(tclass_lock);
316670eb
A
208
209 return (error);
6d2010ae
A
210
211}
212
213/*
214 * Must be called with tclass_lock held
215 */
216static struct tclass_for_proc *
316670eb 217alloc_tclass_for_proc(pid_t pid, const char *pname)
6d2010ae
A
218{
219 struct tclass_for_proc *tfp;
316670eb 220
6d2010ae 221 if (pid == -1 && pname == NULL)
316670eb 222 return (NULL);
6d2010ae 223
316670eb 224 tfp = _MALLOC(sizeof (struct tclass_for_proc), M_TEMP, M_NOWAIT|M_ZERO);
6d2010ae 225 if (tfp == NULL)
316670eb
A
226 return (NULL);
227
6d2010ae 228 tfp->tfp_pid = pid;
6d2010ae 229 /*
316670eb 230 * Add per pid entries before per proc name so we can find
6d2010ae
A
231 * a specific instance of a process before the general name base entry.
232 */
233 if (pid != -1) {
234 TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
235 } else {
316670eb 236 strlcpy(tfp->tfp_pname, pname, sizeof (tfp->tfp_pname));
6d2010ae
A
237 TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
238 }
316670eb 239
6d2010ae
A
240 tfp_count++;
241
316670eb 242 return (tfp);
6d2010ae
A
243}
244
245/*
246 * -1 for tclass means to remove the entry
247 */
316670eb
A
248int
249set_pid_tclass(struct so_tcdbg *so_tcdbg)
6d2010ae
A
250{
251 int error = EINVAL;
252 proc_t p = NULL;
253 struct filedesc *fdp;
254 struct fileproc *fp;
255 struct tclass_for_proc *tfp;
256 int i;
316670eb
A
257 pid_t pid = so_tcdbg->so_tcdbg_pid;
258 int tclass = so_tcdbg->so_tcdbg_tclass;
6d2010ae
A
259
260 p = proc_find(pid);
261 if (p == NULL) {
316670eb 262 printf("%s proc_find(%d) failed\n", __func__, pid);
6d2010ae
A
263 goto done;
264 }
316670eb 265
6d2010ae
A
266 /* Need a tfp */
267 lck_mtx_lock(tclass_lock);
316670eb 268
6d2010ae 269 tfp = find_tfp_by_pid(pid);
316670eb
A
270 if (tfp == NULL) {
271 tfp = alloc_tclass_for_proc(pid, NULL);
6d2010ae 272 if (tfp == NULL) {
316670eb
A
273 lck_mtx_unlock(tclass_lock);
274 error = ENOBUFS;
275 goto done;
6d2010ae
A
276 }
277 }
316670eb
A
278 tfp->tfp_class = tclass;
279
6d2010ae
A
280 lck_mtx_unlock(tclass_lock);
281
282 if (tfp != NULL) {
283 proc_fdlock(p);
316670eb 284
6d2010ae
A
285 fdp = p->p_fd;
286 for (i = 0; i < fdp->fd_nfiles; i++) {
287 struct socket *so;
316670eb 288
6d2010ae 289 fp = fdp->fd_ofiles[i];
316670eb
A
290 if (fp == NULL ||
291 (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
292 fp->f_fglob->fg_type != DTYPE_SOCKET)
6d2010ae 293 continue;
316670eb 294
6d2010ae 295 so = (struct socket *)fp->f_fglob->fg_data;
316670eb
A
296 if (so->so_proto->pr_domain->dom_family != AF_INET &&
297 so->so_proto->pr_domain->dom_family != AF_INET6)
6d2010ae
A
298 continue;
299 socket_lock(so, 1);
316670eb
A
300 if (tclass != -1) {
301 error = so_set_traffic_class(so, tclass);
302 if (error != 0) {
303 printf("%s: so_set_traffic_class"
304 "(so=%p, fd=%d, tclass=%d) "
305 "failed %d\n", __func__,
306 so, i, tclass, error);
307 error = 0;
308 }
6d2010ae 309 }
316670eb 310 socket_unlock(so, 1);
6d2010ae 311 }
316670eb 312
6d2010ae
A
313 proc_fdunlock(p);
314 }
316670eb
A
315
316 error = 0;
6d2010ae
A
317done:
318 if (p != NULL)
319 proc_rele(p);
316670eb
A
320
321 return (error);
6d2010ae
A
322}
323
316670eb
A
324int
325set_pname_tclass(struct so_tcdbg *so_tcdbg)
6d2010ae
A
326{
327 int error = EINVAL;
328 struct tclass_for_proc *tfp;
329
330 lck_mtx_lock(tclass_lock);
316670eb
A
331
332 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
333 if (tfp == NULL) {
334 tfp = alloc_tclass_for_proc(-1, so_tcdbg->so_tcdbg_pname);
6d2010ae 335 if (tfp == NULL) {
316670eb
A
336 lck_mtx_unlock(tclass_lock);
337 error = ENOBUFS;
338 goto done;
6d2010ae
A
339 }
340 }
316670eb
A
341 tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
342
6d2010ae 343 lck_mtx_unlock(tclass_lock);
316670eb
A
344
345 error = 0;
6d2010ae 346done:
316670eb
A
347
348 return (error);
6d2010ae
A
349}
350
316670eb
A
351static int
352flush_pid_tclass(struct so_tcdbg *so_tcdbg)
353{
354 pid_t pid = so_tcdbg->so_tcdbg_pid;
355 int tclass = so_tcdbg->so_tcdbg_tclass;
356 struct filedesc *fdp;
357 int error = EINVAL;
358 proc_t p;
359 int i;
360
361 p = proc_find(pid);
362 if (p == PROC_NULL) {
363 printf("%s proc_find(%d) failed\n", __func__, pid);
364 goto done;
365 }
366
367 proc_fdlock(p);
368 fdp = p->p_fd;
369 for (i = 0; i < fdp->fd_nfiles; i++) {
370 struct socket *so;
371 struct fileproc *fp;
372
373 fp = fdp->fd_ofiles[i];
374 if (fp == NULL ||
375 (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
376 fp->f_fglob->fg_type != DTYPE_SOCKET)
377 continue;
378
379 so = (struct socket *)fp->f_fglob->fg_data;
380 error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
381 sizeof (tclass));
382 if (error != 0) {
383 printf("%s: setsockopt(SO_FLUSH) (so=%p, fd=%d, "
384 "tclass=%d) failed %d\n", __func__, so, i, tclass,
385 error);
386 error = 0;
387 }
388 }
389 proc_fdunlock(p);
390
391 error = 0;
392done:
393 if (p != PROC_NULL)
394 proc_rele(p);
395
396 return (error);
397}
398
399int
400get_pid_tclass(struct so_tcdbg *so_tcdbg)
6d2010ae
A
401{
402 int error = EINVAL;
403 proc_t p = NULL;
404 struct tclass_for_proc *tfp;
316670eb
A
405 pid_t pid = so_tcdbg->so_tcdbg_pid;
406
407 so_tcdbg->so_tcdbg_tclass = -1; /* Means not set */
408 so_tcdbg->so_tcdbg_opportunistic = -1; /* Means not set */
6d2010ae
A
409
410 p = proc_find(pid);
411 if (p == NULL) {
316670eb 412 printf("%s proc_find(%d) failed\n", __func__, pid);
6d2010ae
A
413 goto done;
414 }
316670eb 415
6d2010ae
A
416 /* Need a tfp */
417 lck_mtx_lock(tclass_lock);
316670eb 418
6d2010ae
A
419 tfp = find_tfp_by_pid(pid);
420 if (tfp != NULL) {
316670eb 421 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
6d2010ae
A
422 error = 0;
423 }
424 lck_mtx_unlock(tclass_lock);
425done:
426 if (p != NULL)
427 proc_rele(p);
316670eb
A
428
429 return (error);
6d2010ae
A
430}
431
316670eb
A
432int
433get_pname_tclass(struct so_tcdbg *so_tcdbg)
6d2010ae
A
434{
435 int error = EINVAL;
436 struct tclass_for_proc *tfp;
316670eb
A
437
438 so_tcdbg->so_tcdbg_tclass = -1; /* Means not set */
439 so_tcdbg->so_tcdbg_opportunistic = -1; /* Means not set */
6d2010ae
A
440
441 /* Need a tfp */
442 lck_mtx_lock(tclass_lock);
316670eb
A
443
444 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
6d2010ae 445 if (tfp != NULL) {
316670eb 446 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
6d2010ae
A
447 error = 0;
448 }
449 lck_mtx_unlock(tclass_lock);
316670eb
A
450
451 return (error);
6d2010ae
A
452}
453
316670eb
A
454static int
455delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
456{
457 int error = EINVAL;
458 pid_t pid = so_tcdbg->so_tcdbg_pid;
459 struct tclass_for_proc *tfp = NULL;
460
461 lck_mtx_lock(tclass_lock);
6d2010ae 462
316670eb
A
463 if (pid != -1)
464 tfp = find_tfp_by_pid(pid);
465 else
466 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
467
468 if (tfp != NULL) {
469 free_tclass_for_proc(tfp);
470 error = 0;
471 }
472
473 lck_mtx_unlock(tclass_lock);
474
475 return (error);
476}
6d2010ae
A
477
478/*
479 * Setting options requires privileges
480 */
316670eb 481__private_extern__ int
6d2010ae
A
482so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
483{
484 int error = 0;
316670eb 485
6d2010ae 486 if ((so->so_state & SS_PRIV) == 0)
316670eb 487 return (EPERM);
6d2010ae
A
488
489 socket_unlock(so, 0);
490
491 switch (so_tcdbg->so_tcdbg_cmd) {
492 case SO_TCDBG_PID:
316670eb 493 error = set_pid_tclass(so_tcdbg);
6d2010ae 494 break;
316670eb 495
6d2010ae 496 case SO_TCDBG_PNAME:
316670eb 497 error = set_pname_tclass(so_tcdbg);
6d2010ae 498 break;
316670eb 499
6d2010ae
A
500 case SO_TCDBG_PURGE:
501 error = purge_tclass_for_proc();
502 break;
316670eb 503
6d2010ae
A
504 case SO_TCDBG_FLUSH:
505 error = flush_tclass_for_proc();
506 break;
316670eb
A
507
508 case SO_TCDBG_DELETE:
509 error = delete_tclass_for_pid_pname(so_tcdbg);
510 break;
511
512 case SO_TCDBG_TCFLUSH_PID:
513 error = flush_pid_tclass(so_tcdbg);
514 break;
515
6d2010ae
A
516 default:
517 error = EINVAL;
518 break;
6d2010ae
A
519 }
520
521 socket_lock(so, 0);
522
316670eb 523 return (error);
6d2010ae
A
524}
525
526/*
527 * Not required to be privileged to get
528 */
316670eb 529__private_extern__ int
6d2010ae
A
530sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
531{
532 int error = 0;
533 struct so_tcdbg so_tcdbg;
534 void *buf = NULL;
535 size_t len = sopt->sopt_valsize;
536
316670eb
A
537 error = sooptcopyin(sopt, &so_tcdbg, sizeof (struct so_tcdbg),
538 sizeof (struct so_tcdbg));
6d2010ae 539 if (error != 0)
316670eb
A
540 return (error);
541
6d2010ae 542 sopt->sopt_valsize = len;
316670eb 543
6d2010ae
A
544 socket_unlock(so, 0);
545
546 switch (so_tcdbg.so_tcdbg_cmd) {
547 case SO_TCDBG_PID:
316670eb 548 error = get_pid_tclass(&so_tcdbg);
6d2010ae 549 break;
316670eb 550
6d2010ae 551 case SO_TCDBG_PNAME:
316670eb 552 error = get_pname_tclass(&so_tcdbg);
6d2010ae 553 break;
316670eb 554
6d2010ae
A
555 case SO_TCDBG_COUNT:
556 lck_mtx_lock(tclass_lock);
557 so_tcdbg.so_tcdbg_count = tfp_count;
558 lck_mtx_unlock(tclass_lock);
559 break;
560
561 case SO_TCDBG_LIST: {
562 struct tclass_for_proc *tfp;
563 int n, alloc_count;
564 struct so_tcdbg *ptr;
565
566 lck_mtx_lock(tclass_lock);
567 if ((alloc_count = tfp_count) == 0) {
568 lck_mtx_unlock(tclass_lock);
569 error = EINVAL;
570 break;
571 }
316670eb 572 len = alloc_count * sizeof (struct so_tcdbg);
6d2010ae
A
573 lck_mtx_unlock(tclass_lock);
574
575 buf = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
576 if (buf == NULL) {
577 error = ENOBUFS;
578 break;
579 }
580
581 lck_mtx_lock(tclass_lock);
582 n = 0;
583 ptr = (struct so_tcdbg *)buf;
584 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
585 if (++n > alloc_count)
586 break;
587 if (tfp->tfp_pid != -1) {
588 ptr->so_tcdbg_cmd = SO_TCDBG_PID;
589 ptr->so_tcdbg_pid = tfp->tfp_pid;
590 } else {
591 ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
592 ptr->so_tcdbg_pid = -1;
316670eb
A
593 strlcpy(ptr->so_tcdbg_pname,
594 tfp->tfp_pname,
595 sizeof (ptr->so_tcdbg_pname));
6d2010ae
A
596 }
597 ptr->so_tcdbg_tclass = tfp->tfp_class;
598 ptr++;
599 }
316670eb 600
6d2010ae
A
601 lck_mtx_unlock(tclass_lock);
602 }
603 break;
316670eb 604
6d2010ae
A
605 default:
606 error = EINVAL;
607 break;
6d2010ae
A
608 }
609
610 socket_lock(so, 0);
611
612 if (error == 0) {
613 if (buf == NULL) {
316670eb
A
614 error = sooptcopyout(sopt, &so_tcdbg,
615 sizeof (struct so_tcdbg));
6d2010ae
A
616 } else {
617 error = sooptcopyout(sopt, buf, len);
618 _FREE(buf, M_TEMP);
619 }
620 }
316670eb 621 return (error);
6d2010ae
A
622}
623
624
625__private_extern__ int
626so_set_traffic_class(struct socket *so, int optval)
627{
628 int error = 0;
316670eb
A
629
630 if (optval < SO_TC_BE || optval > SO_TC_CTL) {
6d2010ae
A
631 error = EINVAL;
632 } else {
316670eb
A
633 switch (optval) {
634 case _SO_TC_BK:
635 optval = SO_TC_BK;
636 break;
637 case _SO_TC_VI:
638 optval = SO_TC_VI;
639 break;
640 case _SO_TC_VO:
641 optval = SO_TC_VO;
642 break;
643 default:
644 if (!SO_VALID_TC(optval))
645 error = EINVAL;
646 break;
647 }
648
649 if (error == 0) {
650 int oldval = so->so_traffic_class;
651
652 VERIFY(SO_VALID_TC(optval));
653 so->so_traffic_class = optval;
654
655 if ((INP_SOCKAF(so) == AF_INET ||
656 INP_SOCKAF(so) == AF_INET6) &&
657 INP_SOCKTYPE(so) == SOCK_STREAM) {
658 set_tcp_stream_priority(so);
659
660 /* Set/unset use of Large Receive Offload */
661 so_set_lro(so, optval);
662 }
663
664 if ((INP_SOCKAF(so) == AF_INET ||
665 INP_SOCKAF(so) == AF_INET6) &&
666 optval != oldval && (optval == SO_TC_BK_SYS ||
667 oldval == SO_TC_BK_SYS)) {
668 /*
669 * If the app switches from BK_SYS to something
670 * else, resume the socket if it was suspended.
671 */
672 if (oldval == SO_TC_BK_SYS)
673 inp_reset_fc_state(so->so_pcb);
674
675 SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] "
676 "opportunistic %s\n", so->last_pid,
677 so, INP_SOCKAF(so), INP_SOCKTYPE(so),
678 (optval == SO_TC_BK_SYS) ? "ON" : "OFF"));
679 }
6d2010ae
A
680 }
681 }
316670eb 682 return (error);
6d2010ae
A
683}
684
685__private_extern__ void
686so_set_default_traffic_class(struct socket *so)
687{
316670eb 688 int sotc = -1;
6d2010ae 689
316670eb
A
690 if (tfp_count > 0 &&
691 (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6)) {
692 get_tclass_for_curr_proc(&sotc);
6d2010ae 693 }
316670eb
A
694
695 so->so_traffic_class = (sotc != -1) ? sotc : SO_TC_BE;
6d2010ae
A
696}
697
316670eb
A
698__private_extern__ int
699so_set_opportunistic(struct socket *so, int optval)
700{
701 return (so_set_traffic_class(so, (optval == 0) ?
702 SO_TC_BE : SO_TC_BK_SYS));
703}
6d2010ae
A
704
705__private_extern__ int
316670eb
A
706so_get_opportunistic(struct socket *so)
707{
708 return (so->so_traffic_class == SO_TC_BK_SYS);
709}
710
711__private_extern__ mbuf_svc_class_t
712mbuf_service_class_from_control(struct mbuf *control)
6d2010ae
A
713{
714 struct cmsghdr *cm;
316670eb
A
715 mbuf_svc_class_t msc = MBUF_SC_UNSPEC;
716
717 for (cm = M_FIRST_CMSGHDR(control); cm != NULL;
718 cm = M_NXT_CMSGHDR(control, cm)) {
6d2010ae
A
719 int tc;
720
316670eb 721 if (cm->cmsg_len < sizeof (struct cmsghdr))
6d2010ae 722 break;
316670eb 723
6d2010ae 724 if (cm->cmsg_level != SOL_SOCKET ||
316670eb 725 cm->cmsg_type != SO_TRAFFIC_CLASS)
6d2010ae 726 continue;
316670eb 727 if (cm->cmsg_len != CMSG_LEN(sizeof (int)))
6d2010ae 728 continue;
316670eb
A
729
730 tc = *(int *)(void *)CMSG_DATA(cm);
731 msc = so_tc2msc(tc);
732 if (MBUF_VALID_SC(msc))
733 break;
6d2010ae 734 }
316670eb
A
735
736 return (msc);
6d2010ae
A
737}
738
739__private_extern__ int
316670eb 740dscp_code_from_mbuf_tclass(mbuf_traffic_class_t mtc)
6d2010ae
A
741{
742 int dscp_code;
316670eb 743
6d2010ae
A
744 switch (mtc) {
745 default:
746 case MBUF_TC_BE:
747 dscp_code = 0;
748 break;
749 case MBUF_TC_BK:
750 dscp_code = 0x08;
751 break;
752 case MBUF_TC_VI:
753 dscp_code = 0x20;
754 break;
755 case MBUF_TC_VO:
756 dscp_code = 0x30;
757 break;
758 }
316670eb
A
759
760 return (dscp_code);
6d2010ae
A
761}
762
763__private_extern__ void
764so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
765{
316670eb 766 uint32_t sotc = m_get_traffic_class(m);
6d2010ae
A
767
768 if (sotc >= SO_TC_STATS_MAX)
769 sotc = SO_TC_BE;
6d2010ae 770
316670eb
A
771 so->so_tc_stats[sotc].rxpackets += 1;
772 so->so_tc_stats[sotc].rxbytes +=
773 ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
6d2010ae
A
774}
775
776__private_extern__ void
777set_tcp_stream_priority(struct socket *so)
778{
779 struct tcpcb *tp = intotcpcb(sotoinpcb(so));
316670eb
A
780 int old_cc = tp->tcp_cc_index;
781 int recvbg = IS_TCP_RECV_BG(so);
6d2010ae 782
316670eb
A
783 /*
784 * If the socket was marked as a background socket or if the
785 * traffic class is set to background with traffic class socket
786 * option then make both send and recv side of the stream to be
787 * background. The variable sotcdb which can be set with sysctl
6d2010ae
A
788 * is used to disable these settings for testing.
789 */
316670eb 790 if (soisthrottled(so) || IS_SO_TC_BACKGROUND(so->so_traffic_class)) {
6d2010ae 791 if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0) {
316670eb 792 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX)
6d2010ae
A
793 tcp_set_foreground_cc(so);
794 } else {
316670eb 795 if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX)
6d2010ae
A
796 tcp_set_background_cc(so);
797 }
316670eb 798
6d2010ae 799 /* Set receive side background flags */
316670eb
A
800 if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0)
801 tcp_clear_recv_bg(so);
802 else
803 tcp_set_recv_bg(so);
6d2010ae 804 } else {
316670eb
A
805 tcp_clear_recv_bg(so);
806 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX)
6d2010ae
A
807 tcp_set_foreground_cc(so);
808 }
316670eb
A
809
810 if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
811 SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] TCP %s send; "
812 "%s recv\n", so->last_pid, so, INP_SOCKAF(so),
813 INP_SOCKTYPE(so),
814 (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
815 "background" : "foreground",
816 IS_TCP_RECV_BG(so) ? "background" : "foreground"));
817 }
6d2010ae
A
818}
819
820/*
821 * Set traffic class to an IPv4 or IPv6 packet
822 * - mark the mbuf
823 * - set the DSCP code following the WMM mapping
824 */
825__private_extern__ void
316670eb
A
826set_packet_service_class(struct mbuf *m, struct socket *so,
827 mbuf_svc_class_t in_msc, u_int32_t flags)
6d2010ae 828{
316670eb
A
829 mbuf_svc_class_t msc = MBUF_SC_BE; /* Best effort by default */
830 struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
6d2010ae
A
831 struct ip *ip = mtod(m, struct ip *);
832#if INET6
833 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
834#endif /* INET6 */
316670eb
A
835 int isipv6 = ((flags & PKT_SCF_IPV6) != 0) ? 1 : 0;
836
6d2010ae
A
837 if (!(m->m_flags & M_PKTHDR))
838 return;
316670eb
A
839
840 /*
6d2010ae
A
841 * Here is the precedence:
842 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
843 * 2) Traffic class passed via ancillary data to sendmsdg(2)
844 * 3) Traffic class socket option last
845 */
316670eb
A
846 if (in_msc != MBUF_SC_UNSPEC) {
847 if (in_msc >= MBUF_SC_BE && in_msc <= MBUF_SC_CTL)
848 msc = in_msc;
6d2010ae 849 } else {
316670eb
A
850 VERIFY(SO_VALID_TC(so->so_traffic_class));
851 msc = so_tc2msc(so->so_traffic_class);
852 /* Assert because tc must have been valid */
853 VERIFY(MBUF_VALID_SC(msc));
6d2010ae 854 }
316670eb
A
855
856 /*
857 * If TRAFFIC_MGT_SO_BACKGROUND is set, depress the priority.
858 */
859 if (soisthrottled(so) && !IS_MBUF_SC_BACKGROUND(msc))
860 msc = MBUF_SC_BK;
861
6d2010ae 862 /*
316670eb 863 * Set the traffic class in the mbuf packet header svc field
6d2010ae 864 */
316670eb 865 if (sotcdb & SOTCDB_NO_MTC)
6d2010ae 866 goto no_mbtc;
316670eb
A
867
868 /* Elevate service class if the packet is a pure TCP ACK.
869 * We can do this only when the flow is not a background
870 * flow and the outgoing interface supports
871 * transmit-start model.
872 */
873 if (!IS_MBUF_SC_BACKGROUND(msc) && (flags & PKT_SCF_TCP_ACK))
874 msc = MBUF_SC_CTL;
875
876 (void) m_set_service_class(m, msc);
877
878 /*
879 * Set the privileged traffic auxiliary flag if applicable, or clear it.
880 */
881 if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
882 msc != MBUF_SC_UNSPEC)
883 m->m_pkthdr.aux_flags |= MAUXF_PRIO_PRIVILEGED;
884 else
885 m->m_pkthdr.aux_flags &= ~MAUXF_PRIO_PRIVILEGED;
886
6d2010ae
A
887no_mbtc:
888 /*
316670eb 889 * Quick exit when best effort
6d2010ae 890 */
316670eb 891 if (msc == MBUF_SC_BE)
6d2010ae 892 goto no_dscp;
316670eb 893
6d2010ae 894 /*
316670eb
A
895 * The default behavior is for the networking stack to not set the
896 * DSCP code, based on SOTCDB_NO_DSCP being set. If the flag is
897 * cleared, set the DSCP code in IPv4 or IPv6 header only for local
898 * traffic, if it is not already set. <rdar://problem/11277343>
6d2010ae 899 */
316670eb 900 if (sotcdb & SOTCDB_NO_DSCP)
6d2010ae 901 goto no_dscp;
316670eb 902
6d2010ae 903 /*
316670eb
A
904 * Test if a IP TOS or IPV6 TCLASS has already been set
905 * on the socket or the raw packet.
6d2010ae 906 */
316670eb 907 if (!(sotcdb & SOTCDB_NO_DSCPTST)) {
6d2010ae 908#if INET6
316670eb
A
909 if (isipv6) {
910 if ((so->so_type == SOCK_RAW &&
911 (ip6->ip6_flow & htonl(0xff << 20)) != 0) ||
912 (inp->in6p_outputopts &&
913 inp->in6p_outputopts->ip6po_tclass != -1))
6d2010ae 914 goto no_dscp;
316670eb 915 } else
6d2010ae 916#endif /* INET6 */
316670eb
A
917 if ((so->so_type == SOCK_RAW &&
918 (inp->inp_flags & INP_HDRINCL)) ||
919 inp->inp_ip_tos != 0)
920 goto no_dscp;
6d2010ae 921 }
316670eb 922
6d2010ae
A
923 /*
924 * Test if destination is local
925 */
316670eb 926 if (!(sotcdb & SOTCDB_NO_LCLTST)) {
6d2010ae 927 int islocal = 0;
316670eb 928 struct rtentry *rt = inp->inp_route.ro_rt;
6d2010ae
A
929
930 if (so->so_type == SOCK_STREAM) {
316670eb 931 if (intotcpcb(inp)->t_flags & TF_LOCAL)
6d2010ae 932 islocal = 1;
316670eb
A
933 } else if (rt != NULL &&
934 (rt->rt_gateway->sa_family == AF_LINK ||
935 (rt->rt_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))) {
936 if (!(rt->rt_ifp->if_flags & IFF_POINTOPOINT))
6d2010ae 937 islocal = 1;
316670eb
A
938 } else
939#if INET6
940 if (isipv6 && in6addr_local(&ip6->ip6_dst)) {
941 islocal = 1;
942 } else
6d2010ae 943#endif /* INET6 */
316670eb
A
944 if (inaddr_local(ip->ip_dst)) {
945 islocal = 1;
6d2010ae
A
946 }
947 if (islocal == 0)
948 goto no_dscp;
949 }
950
951#if INET6
952 if (isipv6)
316670eb
A
953 ip6->ip6_flow |= htonl(dscp_code_from_mbuf_tclass(
954 m_get_traffic_class(m)) << 20);
6d2010ae
A
955 else
956#endif /* INET6 */
316670eb
A
957 ip->ip_tos |= dscp_code_from_mbuf_tclass(
958 m_get_traffic_class(m)) << 2;
959
6d2010ae
A
960no_dscp:
961 /*
962 * For TCP with background traffic class switch CC algo based on sysctl
963 */
316670eb 964 if (so->so_type == SOCK_STREAM)
6d2010ae 965 set_tcp_stream_priority(so);
316670eb
A
966
967 so_tc_update_stats(m, so, msc);
968}
969
970__private_extern__ void
971so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc)
972{
973 mbuf_traffic_class_t mtc;
974
6d2010ae
A
975 /*
976 * Assume socket and mbuf traffic class values are the same
316670eb
A
977 * Also assume the socket lock is held. Note that the stats
978 * at the socket layer are reduced down to the legacy traffic
979 * classes; we could/should potentially expand so_tc_stats[].
6d2010ae 980 */
316670eb
A
981 mtc = MBUF_SC2TC(msc);
982 VERIFY(mtc < SO_TC_STATS_MAX);
6d2010ae
A
983 so->so_tc_stats[mtc].txpackets += 1;
984 so->so_tc_stats[mtc].txbytes += m->m_pkthdr.len;
6d2010ae
A
985}
986
987__private_extern__ void
988socket_tclass_init(void)
989{
990 tclass_lck_grp_attr = lck_grp_attr_alloc_init();
991 tclass_lck_grp = lck_grp_alloc_init("tclass", tclass_lck_grp_attr);
992 tclass_lck_attr = lck_attr_alloc_init();
316670eb
A
993 lck_mtx_init(tclass_lock, tclass_lck_grp, tclass_lck_attr);
994}
995
996__private_extern__ mbuf_svc_class_t
997so_tc2msc(int tc)
998{
999 mbuf_svc_class_t msc;
1000
1001 switch (tc) {
1002 case SO_TC_BK_SYS:
1003 msc = MBUF_SC_BK_SYS;
1004 break;
1005 case SO_TC_BK:
1006 case _SO_TC_BK:
1007 msc = MBUF_SC_BK;
1008 break;
1009 case SO_TC_BE:
1010 msc = MBUF_SC_BE;
1011 break;
1012 case SO_TC_RD:
1013 msc = MBUF_SC_RD;
1014 break;
1015 case SO_TC_OAM:
1016 msc = MBUF_SC_OAM;
1017 break;
1018 case SO_TC_AV:
1019 msc = MBUF_SC_AV;
1020 break;
1021 case SO_TC_RV:
1022 msc = MBUF_SC_RV;
1023 break;
1024 case SO_TC_VI:
1025 case _SO_TC_VI:
1026 msc = MBUF_SC_VI;
1027 break;
1028 case SO_TC_VO:
1029 case _SO_TC_VO:
1030 msc = MBUF_SC_VO;
1031 break;
1032 case SO_TC_CTL:
1033 msc = MBUF_SC_CTL;
1034 break;
1035 case SO_TC_ALL:
1036 default:
1037 msc = MBUF_SC_UNSPEC;
1038 break;
6d2010ae 1039 }
316670eb
A
1040
1041 return (msc);
6d2010ae
A
1042}
1043
316670eb
A
1044__private_extern__ int
1045so_svc2tc(mbuf_svc_class_t svc)
1046{
1047 switch (svc) {
1048 case MBUF_SC_UNSPEC:
1049 return SO_TC_BE;
1050 case MBUF_SC_BK_SYS:
1051 return SO_TC_BK_SYS;
1052 case MBUF_SC_BK:
1053 return SO_TC_BK;
1054 case MBUF_SC_BE:
1055 return SO_TC_BE;
1056 case MBUF_SC_RD:
1057 return SO_TC_RD;
1058 case MBUF_SC_OAM:
1059 return SO_TC_OAM;
1060 case MBUF_SC_AV:
1061 return SO_TC_AV;
1062 case MBUF_SC_RV:
1063 return SO_TC_RV;
1064 case MBUF_SC_VI:
1065 return SO_TC_VI;
1066 case MBUF_SC_VO:
1067 return SO_TC_VO;
1068 case MBUF_SC_CTL:
1069 return SO_TC_CTL;
1070 default:
1071 return SO_TC_BE;
1072 }
1073}
1074
1075/*
1076 * LRO is turned on for AV streaming and background classes.
1077 */
1078static void
1079so_set_lro(struct socket *so, int optval)
1080{
1081 if ((optval == SO_TC_BK) ||
1082 (optval == SO_TC_BK_SYS) ||
1083 (optval == SO_TC_AV)) {
1084 so->so_flags |= SOF_USELRO;
1085 } else {
1086 so->so_flags &= ~SOF_USELRO;
1087 }
1088}
6d2010ae 1089