2 * Copyright (c) 2009-2014 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/types.h>
32 #include <sys/filedesc.h>
33 #include <sys/file_internal.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/errno.h>
38 #include <sys/protosw.h>
39 #include <sys/domain.h>
41 #include <sys/queue.h>
44 #include <net/route.h>
46 #include <netinet/in.h>
47 #include <netinet/in_var.h>
48 #include <netinet/in_pcb.h>
49 #include <netinet/ip.h>
50 #include <netinet/ip_var.h>
51 #include <netinet/ip6.h>
52 #include <netinet6/ip6_var.h>
53 #include <netinet/udp.h>
54 #include <netinet/udp_var.h>
55 #include <netinet/tcp.h>
56 #include <netinet/tcp_var.h>
57 #include <netinet/tcp_cc.h>
58 #include <netinet/lro_ext.h>
60 extern char *proc_name_address(void *p
);
62 static int tfp_count
= 0;
64 static TAILQ_HEAD(, tclass_for_proc
) tfp_head
=
65 TAILQ_HEAD_INITIALIZER(tfp_head
);
67 struct tclass_for_proc
{
68 TAILQ_ENTRY(tclass_for_proc
) tfp_link
;
71 char tfp_pname
[MAXCOMLEN
+ 1];
74 static int dscp_code_from_mbuf_tclass(mbuf_traffic_class_t
);
75 static int get_pid_tclass(struct so_tcdbg
*);
76 static int get_pname_tclass(struct so_tcdbg
*);
77 static int set_pid_tclass(struct so_tcdbg
*);
78 static int set_pname_tclass(struct so_tcdbg
*);
79 static int flush_pid_tclass(struct so_tcdbg
*);
80 static int purge_tclass_for_proc(void);
81 static int flush_tclass_for_proc(void);
82 int get_tclass_for_curr_proc(int *);
84 static lck_grp_attr_t
*tclass_lck_grp_attr
= NULL
; /* mutex group attributes */
85 static lck_grp_t
*tclass_lck_grp
= NULL
; /* mutex group definition */
86 static lck_attr_t
*tclass_lck_attr
= NULL
; /* mutex attributes */
87 decl_lck_mtx_data(static, tclass_lock_data
);
88 static lck_mtx_t
*tclass_lock
= &tclass_lock_data
;
91 * If there is no foreground activity on the interface for bg_switch_time
92 * seconds, the background connections can switch to foreground TCP
95 #define TCP_BG_SWITCH_TIME 2
98 * Must be called with tclass_lock held
100 static struct tclass_for_proc
*
101 find_tfp_by_pid(pid_t pid
)
103 struct tclass_for_proc
*tfp
;
105 TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) {
106 if (tfp
->tfp_pid
== pid
)
113 * Must be called with tclass_lock held
115 static struct tclass_for_proc
*
116 find_tfp_by_pname(const char *pname
)
118 struct tclass_for_proc
*tfp
;
120 TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) {
121 if (strncmp(pname
, tfp
->tfp_pname
,
122 sizeof (tfp
->tfp_pname
)) == 0)
128 __private_extern__
int
129 get_tclass_for_curr_proc(int *sotc
)
131 struct tclass_for_proc
*tfp
= NULL
;
132 proc_t p
= current_proc(); /* Not ref counted */
133 pid_t pid
= proc_pid(p
);
134 char *pname
= proc_name_address(p
);
138 lck_mtx_lock(tclass_lock
);
140 TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) {
141 if ((tfp
->tfp_pid
== pid
) || (tfp
->tfp_pid
== -1 &&
142 strncmp(pname
, tfp
->tfp_pname
,
143 sizeof (tfp
->tfp_pname
)) == 0)) {
144 *sotc
= tfp
->tfp_class
;
149 lck_mtx_unlock(tclass_lock
);
151 return ((tfp
== NULL
) ? 0 : 1);
155 * Purge entries with PIDs of exited processes
158 purge_tclass_for_proc(void)
161 struct tclass_for_proc
*tfp
, *tvar
;
163 lck_mtx_lock(tclass_lock
);
165 TAILQ_FOREACH_SAFE(tfp
, &tfp_head
, tfp_link
, tvar
) {
168 if (tfp
->tfp_pid
== -1)
170 if ((p
= proc_find(tfp
->tfp_pid
)) == NULL
) {
172 TAILQ_REMOVE(&tfp_head
, tfp
, tfp_link
);
180 lck_mtx_unlock(tclass_lock
);
187 * Must be called with tclass_lock held
190 free_tclass_for_proc(struct tclass_for_proc
*tfp
)
195 TAILQ_REMOVE(&tfp_head
, tfp
, tfp_link
);
203 flush_tclass_for_proc(void)
206 struct tclass_for_proc
*tfp
, *tvar
;
208 lck_mtx_lock(tclass_lock
);
210 TAILQ_FOREACH_SAFE(tfp
, &tfp_head
, tfp_link
, tvar
) {
211 free_tclass_for_proc(tfp
);
214 lck_mtx_unlock(tclass_lock
);
221 * Must be called with tclass_lock held
223 static struct tclass_for_proc
*
224 alloc_tclass_for_proc(pid_t pid
, const char *pname
)
226 struct tclass_for_proc
*tfp
;
228 if (pid
== -1 && pname
== NULL
)
231 tfp
= _MALLOC(sizeof (struct tclass_for_proc
), M_TEMP
, M_NOWAIT
|M_ZERO
);
237 * Add per pid entries before per proc name so we can find
238 * a specific instance of a process before the general name base entry.
241 TAILQ_INSERT_HEAD(&tfp_head
, tfp
, tfp_link
);
243 strlcpy(tfp
->tfp_pname
, pname
, sizeof (tfp
->tfp_pname
));
244 TAILQ_INSERT_TAIL(&tfp_head
, tfp
, tfp_link
);
253 * -1 for tclass means to remove the entry
256 set_pid_tclass(struct so_tcdbg
*so_tcdbg
)
260 struct filedesc
*fdp
;
262 struct tclass_for_proc
*tfp
;
264 pid_t pid
= so_tcdbg
->so_tcdbg_pid
;
265 int tclass
= so_tcdbg
->so_tcdbg_tclass
;
269 printf("%s proc_find(%d) failed\n", __func__
, pid
);
274 lck_mtx_lock(tclass_lock
);
276 tfp
= find_tfp_by_pid(pid
);
278 tfp
= alloc_tclass_for_proc(pid
, NULL
);
280 lck_mtx_unlock(tclass_lock
);
285 tfp
->tfp_class
= tclass
;
287 lck_mtx_unlock(tclass_lock
);
293 for (i
= 0; i
< fdp
->fd_nfiles
; i
++) {
296 fp
= fdp
->fd_ofiles
[i
];
298 (fdp
->fd_ofileflags
[i
] & UF_RESERVED
) != 0 ||
299 FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_SOCKET
)
302 so
= (struct socket
*)fp
->f_fglob
->fg_data
;
303 if (SOCK_DOM(so
) != PF_INET
&& SOCK_DOM(so
) != PF_INET6
)
307 error
= so_set_traffic_class(so
, tclass
);
309 printf("%s: so_set_traffic_class"
310 "(so=0x%llx, fd=%d, tclass=%d) "
311 "failed %d\n", __func__
,
312 (uint64_t)VM_KERNEL_ADDRPERM(so
),
317 socket_unlock(so
, 1);
332 set_pname_tclass(struct so_tcdbg
*so_tcdbg
)
335 struct tclass_for_proc
*tfp
;
337 lck_mtx_lock(tclass_lock
);
339 tfp
= find_tfp_by_pname(so_tcdbg
->so_tcdbg_pname
);
341 tfp
= alloc_tclass_for_proc(-1, so_tcdbg
->so_tcdbg_pname
);
343 lck_mtx_unlock(tclass_lock
);
348 tfp
->tfp_class
= so_tcdbg
->so_tcdbg_tclass
;
350 lck_mtx_unlock(tclass_lock
);
359 flush_pid_tclass(struct so_tcdbg
*so_tcdbg
)
361 pid_t pid
= so_tcdbg
->so_tcdbg_pid
;
362 int tclass
= so_tcdbg
->so_tcdbg_tclass
;
363 struct filedesc
*fdp
;
369 if (p
== PROC_NULL
) {
370 printf("%s proc_find(%d) failed\n", __func__
, pid
);
376 for (i
= 0; i
< fdp
->fd_nfiles
; i
++) {
380 fp
= fdp
->fd_ofiles
[i
];
382 (fdp
->fd_ofileflags
[i
] & UF_RESERVED
) != 0 ||
383 FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_SOCKET
)
386 so
= (struct socket
*)fp
->f_fglob
->fg_data
;
387 error
= sock_setsockopt(so
, SOL_SOCKET
, SO_FLUSH
, &tclass
,
390 printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
391 "tclass=%d) failed %d\n", __func__
,
392 (uint64_t)VM_KERNEL_ADDRPERM(so
), i
, tclass
,
408 get_pid_tclass(struct so_tcdbg
*so_tcdbg
)
412 struct tclass_for_proc
*tfp
;
413 pid_t pid
= so_tcdbg
->so_tcdbg_pid
;
415 so_tcdbg
->so_tcdbg_tclass
= -1; /* Means not set */
416 so_tcdbg
->so_tcdbg_opportunistic
= -1; /* Means not set */
420 printf("%s proc_find(%d) failed\n", __func__
, pid
);
425 lck_mtx_lock(tclass_lock
);
427 tfp
= find_tfp_by_pid(pid
);
429 so_tcdbg
->so_tcdbg_tclass
= tfp
->tfp_class
;
432 lck_mtx_unlock(tclass_lock
);
441 get_pname_tclass(struct so_tcdbg
*so_tcdbg
)
444 struct tclass_for_proc
*tfp
;
446 so_tcdbg
->so_tcdbg_tclass
= -1; /* Means not set */
447 so_tcdbg
->so_tcdbg_opportunistic
= -1; /* Means not set */
450 lck_mtx_lock(tclass_lock
);
452 tfp
= find_tfp_by_pname(so_tcdbg
->so_tcdbg_pname
);
454 so_tcdbg
->so_tcdbg_tclass
= tfp
->tfp_class
;
457 lck_mtx_unlock(tclass_lock
);
463 delete_tclass_for_pid_pname(struct so_tcdbg
*so_tcdbg
)
466 pid_t pid
= so_tcdbg
->so_tcdbg_pid
;
467 struct tclass_for_proc
*tfp
= NULL
;
469 lck_mtx_lock(tclass_lock
);
472 tfp
= find_tfp_by_pid(pid
);
474 tfp
= find_tfp_by_pname(so_tcdbg
->so_tcdbg_pname
);
477 free_tclass_for_proc(tfp
);
481 lck_mtx_unlock(tclass_lock
);
487 * Setting options requires privileges
489 __private_extern__
int
490 so_set_tcdbg(struct socket
*so
, struct so_tcdbg
*so_tcdbg
)
494 if ((so
->so_state
& SS_PRIV
) == 0)
497 socket_unlock(so
, 0);
499 switch (so_tcdbg
->so_tcdbg_cmd
) {
501 error
= set_pid_tclass(so_tcdbg
);
505 error
= set_pname_tclass(so_tcdbg
);
509 error
= purge_tclass_for_proc();
513 error
= flush_tclass_for_proc();
516 case SO_TCDBG_DELETE
:
517 error
= delete_tclass_for_pid_pname(so_tcdbg
);
520 case SO_TCDBG_TCFLUSH_PID
:
521 error
= flush_pid_tclass(so_tcdbg
);
535 * Not required to be privileged to get
537 __private_extern__
int
538 sogetopt_tcdbg(struct socket
*so
, struct sockopt
*sopt
)
541 struct so_tcdbg so_tcdbg
;
543 size_t len
= sopt
->sopt_valsize
;
545 error
= sooptcopyin(sopt
, &so_tcdbg
, sizeof (struct so_tcdbg
),
546 sizeof (struct so_tcdbg
));
550 sopt
->sopt_valsize
= len
;
552 socket_unlock(so
, 0);
554 switch (so_tcdbg
.so_tcdbg_cmd
) {
556 error
= get_pid_tclass(&so_tcdbg
);
560 error
= get_pname_tclass(&so_tcdbg
);
564 lck_mtx_lock(tclass_lock
);
565 so_tcdbg
.so_tcdbg_count
= tfp_count
;
566 lck_mtx_unlock(tclass_lock
);
569 case SO_TCDBG_LIST
: {
570 struct tclass_for_proc
*tfp
;
572 struct so_tcdbg
*ptr
;
574 lck_mtx_lock(tclass_lock
);
575 if ((alloc_count
= tfp_count
) == 0) {
576 lck_mtx_unlock(tclass_lock
);
580 len
= alloc_count
* sizeof (struct so_tcdbg
);
581 lck_mtx_unlock(tclass_lock
);
583 buf
= _MALLOC(len
, M_TEMP
, M_WAITOK
| M_ZERO
);
589 lck_mtx_lock(tclass_lock
);
591 ptr
= (struct so_tcdbg
*)buf
;
592 TAILQ_FOREACH(tfp
, &tfp_head
, tfp_link
) {
593 if (++n
> alloc_count
)
595 if (tfp
->tfp_pid
!= -1) {
596 ptr
->so_tcdbg_cmd
= SO_TCDBG_PID
;
597 ptr
->so_tcdbg_pid
= tfp
->tfp_pid
;
599 ptr
->so_tcdbg_cmd
= SO_TCDBG_PNAME
;
600 ptr
->so_tcdbg_pid
= -1;
601 strlcpy(ptr
->so_tcdbg_pname
,
603 sizeof (ptr
->so_tcdbg_pname
));
605 ptr
->so_tcdbg_tclass
= tfp
->tfp_class
;
609 lck_mtx_unlock(tclass_lock
);
622 error
= sooptcopyout(sopt
, &so_tcdbg
,
623 sizeof (struct so_tcdbg
));
625 error
= sooptcopyout(sopt
, buf
, len
);
633 __private_extern__
int
634 so_set_traffic_class(struct socket
*so
, int optval
)
638 if (optval
< SO_TC_BE
|| optval
> SO_TC_CTL
) {
652 if (!SO_VALID_TC(optval
))
658 int oldval
= so
->so_traffic_class
;
660 VERIFY(SO_VALID_TC(optval
));
661 so
->so_traffic_class
= optval
;
663 if ((SOCK_DOM(so
) == PF_INET
||
664 SOCK_DOM(so
) == PF_INET6
) &&
665 SOCK_TYPE(so
) == SOCK_STREAM
)
666 set_tcp_stream_priority(so
);
668 if ((SOCK_DOM(so
) == PF_INET
||
669 SOCK_DOM(so
) == PF_INET6
) &&
670 optval
!= oldval
&& (optval
== SO_TC_BK_SYS
||
671 oldval
== SO_TC_BK_SYS
)) {
673 * If the app switches from BK_SYS to something
674 * else, resume the socket if it was suspended.
676 if (oldval
== SO_TC_BK_SYS
)
677 inp_reset_fc_state(so
->so_pcb
);
679 SOTHROTTLELOG(("throttle[%d]: so 0x%llx "
680 "[%d,%d] opportunistic %s\n", so
->last_pid
,
681 (uint64_t)VM_KERNEL_ADDRPERM(so
),
682 SOCK_DOM(so
), SOCK_TYPE(so
),
683 (optval
== SO_TC_BK_SYS
) ? "ON" : "OFF"));
690 __private_extern__
void
691 so_set_default_traffic_class(struct socket
*so
)
696 (SOCK_DOM(so
) == PF_INET
|| SOCK_DOM(so
) == PF_INET6
)) {
697 get_tclass_for_curr_proc(&sotc
);
700 so
->so_traffic_class
= (sotc
!= -1) ? sotc
: SO_TC_BE
;
703 __private_extern__
int
704 so_set_opportunistic(struct socket
*so
, int optval
)
706 return (so_set_traffic_class(so
, (optval
== 0) ?
707 SO_TC_BE
: SO_TC_BK_SYS
));
710 __private_extern__
int
711 so_get_opportunistic(struct socket
*so
)
713 return (so
->so_traffic_class
== SO_TC_BK_SYS
);
716 __private_extern__ mbuf_svc_class_t
717 mbuf_service_class_from_control(struct mbuf
*control
)
720 mbuf_svc_class_t msc
= MBUF_SC_UNSPEC
;
722 for (cm
= M_FIRST_CMSGHDR(control
); cm
!= NULL
;
723 cm
= M_NXT_CMSGHDR(control
, cm
)) {
726 if (cm
->cmsg_len
< sizeof (struct cmsghdr
))
729 if (cm
->cmsg_level
!= SOL_SOCKET
||
730 cm
->cmsg_type
!= SO_TRAFFIC_CLASS
)
732 if (cm
->cmsg_len
!= CMSG_LEN(sizeof (int)))
735 tc
= *(int *)(void *)CMSG_DATA(cm
);
737 if (MBUF_VALID_SC(msc
))
744 __private_extern__
int
745 dscp_code_from_mbuf_tclass(mbuf_traffic_class_t mtc
)
768 __private_extern__
void
769 so_recv_data_stat(struct socket
*so
, struct mbuf
*m
, size_t off
)
771 uint32_t sotc
= m_get_traffic_class(m
);
773 if (sotc
>= SO_TC_STATS_MAX
)
776 so
->so_tc_stats
[sotc
].rxpackets
+= 1;
777 so
->so_tc_stats
[sotc
].rxbytes
+=
778 ((m
->m_flags
& M_PKTHDR
) ? m
->m_pkthdr
.len
: 0) + off
;
781 __private_extern__
void
782 so_inc_recv_data_stat(struct socket
*so
, size_t pkts
, size_t bytes
, uint32_t tc
)
784 if (tc
>= SO_TC_STATS_MAX
)
787 so
->so_tc_stats
[tc
].rxpackets
+= pkts
;
788 so
->so_tc_stats
[tc
].rxbytes
+=bytes
;
790 __private_extern__
void
791 set_tcp_stream_priority(struct socket
*so
)
793 struct inpcb
*inp
= sotoinpcb(so
);
794 struct tcpcb
*tp
= intotcpcb(inp
);
795 struct ifnet
*outifp
;
796 u_char old_cc
= tp
->tcp_cc_index
;
797 int recvbg
= IS_TCP_RECV_BG(so
);
798 bool is_local
, fg_active
= false;
801 VERIFY((SOCK_CHECK_DOM(so
, PF_INET
)
802 || SOCK_CHECK_DOM(so
, PF_INET6
))
803 && SOCK_CHECK_TYPE(so
, SOCK_STREAM
)
804 && SOCK_CHECK_PROTO(so
, IPPROTO_TCP
));
806 /* Return if the socket is in a terminal state */
807 if (inp
->inp_state
== INPCB_STATE_DEAD
)
810 outifp
= inp
->inp_last_outifp
;
811 uptime
= net_uptime();
814 * If the socket was marked as a background socket or if the
815 * traffic class is set to background with traffic class socket
816 * option then make both send and recv side of the stream to be
817 * background. The variable sotcdb which can be set with sysctl
818 * is used to disable these settings for testing.
820 if (soissrcbackground(so
)) {
821 if (outifp
== NULL
|| (outifp
->if_flags
& IFF_LOOPBACK
))
826 /* Check if there has been recent foreground activity */
827 if ((outifp
!= NULL
&&
828 outifp
->if_fg_sendts
> 0 &&
829 (int)(uptime
- outifp
->if_fg_sendts
) <=
830 TCP_BG_SWITCH_TIME
) ||
831 net_io_policy_throttled
)
835 * If the interface that the connection is using is
836 * loopback, do not use background congestion
839 * If there has been recent foreground activity or if
840 * there was an indication that a foreground application
841 * is going to use networking (net_io_policy_throttled),
842 * switch the backgroung streams to use background
843 * congestion control algorithm. Otherwise, even background
844 * flows can move into foreground.
846 if ((sotcdb
& SOTCDB_NO_SENDTCPBG
) != 0 ||
847 is_local
|| !fg_active
) {
848 if (old_cc
== TCP_CC_ALGO_BACKGROUND_INDEX
)
849 tcp_set_foreground_cc(so
);
851 if (old_cc
!= TCP_CC_ALGO_BACKGROUND_INDEX
)
852 tcp_set_background_cc(so
);
855 /* Set receive side background flags */
856 if ((sotcdb
& SOTCDB_NO_RECVTCPBG
) != 0 ||
857 is_local
|| !fg_active
)
858 tcp_clear_recv_bg(so
);
862 tcp_clear_recv_bg(so
);
863 if (old_cc
== TCP_CC_ALGO_BACKGROUND_INDEX
)
864 tcp_set_foreground_cc(so
);
867 if (old_cc
!= tp
->tcp_cc_index
|| recvbg
!= IS_TCP_RECV_BG(so
)) {
868 SOTHROTTLELOG(("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
869 "%s recv\n", so
->last_pid
, (uint64_t)VM_KERNEL_ADDRPERM(so
),
870 SOCK_DOM(so
), SOCK_TYPE(so
),
871 (tp
->tcp_cc_index
== TCP_CC_ALGO_BACKGROUND_INDEX
) ?
872 "background" : "foreground",
873 IS_TCP_RECV_BG(so
) ? "background" : "foreground"));
878 * Set traffic class to an IPv4 or IPv6 packet
880 * - set the DSCP code following the WMM mapping
882 __private_extern__
void
883 set_packet_service_class(struct mbuf
*m
, struct socket
*so
,
884 mbuf_svc_class_t in_msc
, u_int32_t flags
)
886 mbuf_svc_class_t msc
= MBUF_SC_BE
; /* Best effort by default */
887 struct inpcb
*inp
= sotoinpcb(so
); /* in6pcb and inpcb are the same */
888 struct ip
*ip
= mtod(m
, struct ip
*);
890 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
892 int isipv6
= ((flags
& PKT_SCF_IPV6
) != 0) ? 1 : 0;
894 if (!(m
->m_flags
& M_PKTHDR
))
898 * Here is the precedence:
899 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
900 * 2) Traffic class passed via ancillary data to sendmsdg(2)
901 * 3) Traffic class socket option last
903 if (in_msc
!= MBUF_SC_UNSPEC
) {
904 if (in_msc
>= MBUF_SC_BE
&& in_msc
<= MBUF_SC_CTL
)
907 VERIFY(SO_VALID_TC(so
->so_traffic_class
));
908 msc
= so_tc2msc(so
->so_traffic_class
);
909 /* Assert because tc must have been valid */
910 VERIFY(MBUF_VALID_SC(msc
));
914 * If TRAFFIC_MGT_SO_BACKGROUND is set, depress the priority.
916 if (soisthrottled(so
) && !IS_MBUF_SC_BACKGROUND(msc
))
919 if (soissrcbackground(so
))
920 m
->m_pkthdr
.pkt_flags
|= PKTF_SO_BACKGROUND
;
922 * Set the traffic class in the mbuf packet header svc field
924 if (sotcdb
& SOTCDB_NO_MTC
)
927 /* Elevate service class if the packet is a pure TCP ACK.
928 * We can do this only when the flow is not a background
929 * flow and the outgoing interface supports
930 * transmit-start model.
932 if (!IS_MBUF_SC_BACKGROUND(msc
) && (flags
& PKT_SCF_TCP_ACK
))
935 (void) m_set_service_class(m
, msc
);
938 * Set the privileged traffic auxiliary flag if applicable,
941 if (!(sotcdb
& SOTCDB_NO_PRIVILEGED
) && soisprivilegedtraffic(so
) &&
942 msc
!= MBUF_SC_UNSPEC
)
943 m
->m_pkthdr
.pkt_flags
|= PKTF_PRIO_PRIVILEGED
;
945 m
->m_pkthdr
.pkt_flags
&= ~PKTF_PRIO_PRIVILEGED
;
949 * Quick exit when best effort
951 if (msc
== MBUF_SC_BE
)
955 * The default behavior is for the networking stack to not set the
956 * DSCP code, based on SOTCDB_NO_DSCP being set. If the flag is
957 * cleared, set the DSCP code in IPv4 or IPv6 header only for local
958 * traffic, if it is not already set. <rdar://problem/11277343>
960 if (sotcdb
& SOTCDB_NO_DSCP
)
964 * Test if a IP TOS or IPV6 TCLASS has already been set
965 * on the socket or the raw packet.
967 if (!(sotcdb
& SOTCDB_NO_DSCPTST
)) {
970 if ((so
->so_type
== SOCK_RAW
&&
971 (ip6
->ip6_flow
& htonl(0xff << 20)) != 0) ||
972 (inp
->in6p_outputopts
&&
973 inp
->in6p_outputopts
->ip6po_tclass
!= -1))
977 if ((so
->so_type
== SOCK_RAW
&&
978 (inp
->inp_flags
& INP_HDRINCL
)) ||
979 inp
->inp_ip_tos
!= 0)
984 * Test if destination is local
986 if (!(sotcdb
& SOTCDB_NO_LCLTST
)) {
988 struct rtentry
*rt
= inp
->inp_route
.ro_rt
;
990 if (so
->so_type
== SOCK_STREAM
) {
991 if (intotcpcb(inp
)->t_flags
& TF_LOCAL
)
993 } else if (rt
!= NULL
&&
994 (rt
->rt_gateway
->sa_family
== AF_LINK
||
995 (rt
->rt_ifp
->if_flags
& (IFF_LOOPBACK
|IFF_POINTOPOINT
)))) {
996 if (!(rt
->rt_ifp
->if_flags
& IFF_POINTOPOINT
))
1000 if (isipv6
&& in6addr_local(&ip6
->ip6_dst
)) {
1004 if (inaddr_local(ip
->ip_dst
)) {
1013 ip6
->ip6_flow
|= htonl(dscp_code_from_mbuf_tclass(
1014 m_get_traffic_class(m
)) << 20);
1017 ip
->ip_tos
|= dscp_code_from_mbuf_tclass(
1018 m_get_traffic_class(m
)) << 2;
1022 * For TCP with background traffic class switch CC algo based on sysctl
1024 if (so
->so_type
== SOCK_STREAM
)
1025 set_tcp_stream_priority(so
);
1027 so_tc_update_stats(m
, so
, msc
);
1030 __private_extern__
void
1031 so_tc_update_stats(struct mbuf
*m
, struct socket
*so
, mbuf_svc_class_t msc
)
1033 mbuf_traffic_class_t mtc
;
1036 * Assume socket and mbuf traffic class values are the same
1037 * Also assume the socket lock is held. Note that the stats
1038 * at the socket layer are reduced down to the legacy traffic
1039 * classes; we could/should potentially expand so_tc_stats[].
1041 mtc
= MBUF_SC2TC(msc
);
1042 VERIFY(mtc
< SO_TC_STATS_MAX
);
1043 so
->so_tc_stats
[mtc
].txpackets
+= 1;
1044 so
->so_tc_stats
[mtc
].txbytes
+= m
->m_pkthdr
.len
;
1047 __private_extern__
void
1048 socket_tclass_init(void)
1050 _CASSERT(_SO_TC_MAX
== SO_TC_STATS_MAX
);
1052 tclass_lck_grp_attr
= lck_grp_attr_alloc_init();
1053 tclass_lck_grp
= lck_grp_alloc_init("tclass", tclass_lck_grp_attr
);
1054 tclass_lck_attr
= lck_attr_alloc_init();
1055 lck_mtx_init(tclass_lock
, tclass_lck_grp
, tclass_lck_attr
);
1058 __private_extern__ mbuf_svc_class_t
1061 mbuf_svc_class_t msc
;
1065 msc
= MBUF_SC_BK_SYS
;
1099 msc
= MBUF_SC_UNSPEC
;
1106 __private_extern__
int
1107 so_svc2tc(mbuf_svc_class_t svc
)
1110 case MBUF_SC_UNSPEC
:
1112 case MBUF_SC_BK_SYS
:
1113 return SO_TC_BK_SYS
;
1138 * LRO is turned on for AV streaming class.
1141 so_set_lro(struct socket
*so
, int optval
)
1143 if (optval
== SO_TC_AV
) {
1144 so
->so_flags
|= SOF_USELRO
;
1146 if (so
->so_flags
& SOF_USELRO
) {
1147 /* transition to non LRO class */
1148 so
->so_flags
&= ~SOF_USELRO
;
1149 struct inpcb
*inp
= sotoinpcb(so
);
1150 struct tcpcb
*tp
= NULL
;
1152 tp
= intotcpcb(inp
);
1153 if (tp
&& (tp
->t_flagsext
& TF_LRO_OFFLOADED
)) {
1154 tcp_lro_remove_state(inp
->inp_laddr
,
1158 tp
->t_flagsext
&= ~TF_LRO_OFFLOADED
;