2 * Copyright (c) 2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
32 #include <sys/socket.h>
33 #include <sys/sockio.h>
34 #include <sys/systm.h>
35 #include <sys/sysctl.h>
36 #include <sys/syslog.h>
38 #include <sys/errno.h>
39 #include <sys/kernel.h>
40 #include <sys/kauth.h>
41 #include <kern/zalloc.h>
42 #include <netinet/in.h>
44 #include <net/pktsched/pktsched_fq_codel.h>
45 #include <net/classq/classq_fq_codel.h>
47 static struct zone
*flowq_zone
= NULL
;
48 static size_t flowq_size
;
50 #define FQ_ZONE_MAX (32 * 1024) /* across all interfaces */
51 #define FQ_SEQ_LT(a,b) ((int)((a)-(b)) < 0)
52 #define FQ_SEQ_GT(a,b) ((int)((a)-(b)) > 0)
57 if (flowq_zone
!= NULL
)
60 flowq_size
= sizeof (fq_t
);
61 flowq_zone
= zinit(flowq_size
, FQ_ZONE_MAX
* flowq_size
,
63 if (flowq_zone
== NULL
) {
64 panic("%s: failed to allocate flowq_zone", __func__
);
67 zone_change(flowq_zone
, Z_EXPAND
, TRUE
);
68 zone_change(flowq_zone
, Z_CALLERACCT
, TRUE
);
75 fq
= (how
== M_WAITOK
) ? zalloc(flowq_zone
) :
76 zalloc_noblock(flowq_zone
);
78 log(LOG_ERR
, "%s: unable to allocate from flowq_zone\n");
82 bzero(fq
, flowq_size
);
83 MBUFQ_INIT(&fq
->fq_mbufq
);
90 VERIFY(MBUFQ_EMPTY(&fq
->fq_mbufq
));
91 VERIFY(!(fq
->fq_flags
& (FQF_NEW_FLOW
| FQF_OLD_FLOW
)));
92 bzero(fq
, flowq_size
);
93 zfree(flowq_zone
, fq
);
97 fq_detect_dequeue_stall(fq_if_t
*fqs
, fq_t
*flowq
, fq_if_classq_t
*fq_cl
,
100 u_int64_t maxgetqtime
;
101 if (FQ_IS_DELAYHIGH(flowq
) || flowq
->fq_getqtime
== 0 ||
102 MBUFQ_EMPTY(&flowq
->fq_mbufq
) ||
103 flowq
->fq_bytes
< FQ_MIN_FC_THRESHOLD_BYTES
)
105 maxgetqtime
= flowq
->fq_getqtime
+ fqs
->fqs_update_interval
;
106 if ((*now
) > maxgetqtime
) {
108 * there was no dequeue in an update interval worth of
109 * time. It means that the queue is stalled.
111 FQ_SET_DELAY_HIGH(flowq
);
112 fq_cl
->fcl_stat
.fcl_dequeue_stall
++;
117 fq_head_drop(fq_if_t
*fqs
, fq_t
*fq
)
119 struct mbuf
*m
= NULL
;
120 struct ifclassq
*ifq
= fqs
->fqs_ifq
;
122 m
= fq_getq_flow(fqs
, fq
);
126 IFCQ_DROP_ADD(ifq
, 1, m_length(m
));
127 IFCQ_CONVERT_LOCK(ifq
);
132 fq_addq(fq_if_t
*fqs
, struct mbuf
*m
, fq_if_classq_t
*fq_cl
)
134 struct pkthdr
*pkt
= &m
->m_pkthdr
;
135 int droptype
= DTYPE_NODROP
, fc_adv
= 0, ret
= CLASSQEQ_SUCCESS
;
139 VERIFY(!(pkt
->pkt_flags
& PKTF_PRIV_GUARDED
));
140 pkt
->pkt_flags
|= PKTF_PRIV_GUARDED
;
142 if (pkt
->pkt_timestamp
> 0) {
143 now
= pkt
->pkt_timestamp
;
145 now
= mach_absolute_time();
146 pkt
->pkt_timestamp
= now
;
149 /* find the flowq for this packet */
150 fq
= fq_if_hash_pkt(fqs
, pkt
->pkt_flowid
, m_get_service_class(m
),
153 /* drop the packet if we could not allocate a flow queue */
154 fq_cl
->fcl_stat
.fcl_drop_memfailure
++;
155 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
157 return (CLASSQEQ_DROPPED
);
160 VERIFY(fq_cl
->fcl_service_class
==
161 (u_int32_t
)mbuf_get_service_class(m
));
163 fq_detect_dequeue_stall(fqs
, fq
, fq_cl
, &now
);
165 if (FQ_IS_DELAYHIGH(fq
)) {
166 if ((fq
->fq_flags
& FQF_FLOWCTL_CAPABLE
) &&
167 (pkt
->pkt_flags
& PKTF_FLOW_ADV
)) {
170 * If the flow is suspended or it is not
171 * TCP, drop the packet
173 if (pkt
->pkt_proto
!= IPPROTO_TCP
) {
174 droptype
= DTYPE_EARLY
;
175 fq_cl
->fcl_stat
.fcl_drop_early
++;
179 * Need to drop a packet, instead of dropping this
180 * one, try to drop from the head of the queue
182 if (!MBUFQ_EMPTY(&fq
->fq_mbufq
)) {
183 fq_head_drop(fqs
, fq
);
184 droptype
= DTYPE_NODROP
;
186 droptype
= DTYPE_EARLY
;
188 fq_cl
->fcl_stat
.fcl_drop_early
++;
194 * check if this packet is a retransmission of another pkt already
197 if ((pkt
->pkt_flags
& (PKTF_TCP_REXMT
|PKTF_START_SEQ
)) ==
198 (PKTF_TCP_REXMT
|PKTF_START_SEQ
) && fq
->fq_dequeue_seq
!= 0) {
199 if (FQ_SEQ_GT(pkt
->tx_start_seq
, fq
->fq_dequeue_seq
)) {
200 fq_cl
->fcl_stat
.fcl_dup_rexmts
++;
201 droptype
= DTYPE_FORCED
;
205 /* Set the return code correctly */
206 if (fc_adv
== 1 && droptype
!= DTYPE_FORCED
) {
207 if (fq_if_add_fcentry(fqs
, pkt
, fq_cl
)) {
208 fq
->fq_flags
|= FQF_FLOWCTL_ON
;
209 /* deliver flow control advisory error */
210 if (droptype
== DTYPE_NODROP
) {
211 ret
= CLASSQEQ_SUCCESS_FC
;
213 /* dropped due to flow control */
214 ret
= CLASSQEQ_DROPPED_FC
;
218 * if we could not flow control the flow, it is
221 droptype
= DTYPE_FORCED
;
222 ret
= CLASSQEQ_DROPPED_FC
;
223 fq_cl
->fcl_stat
.fcl_flow_control_fail
++;
228 * If the queue length hits the queue limit, drop a packet from the
229 * front of the queue for a flow with maximum number of bytes. This
230 * will penalize heavy and unresponsive flows. It will also avoid a
233 if (droptype
== DTYPE_NODROP
&& fq_if_at_drop_limit(fqs
)) {
234 fq_if_drop_packet(fqs
);
237 if (droptype
== DTYPE_NODROP
) {
238 MBUFQ_ENQUEUE(&fq
->fq_mbufq
, m
);
239 fq
->fq_bytes
+= m_length(m
);
240 fq_cl
->fcl_stat
.fcl_byte_cnt
+= m_length(m
);
241 fq_cl
->fcl_stat
.fcl_pkt_cnt
++;
244 * check if this queue will qualify to be the next
247 fq_if_is_flow_heavy(fqs
, fq
);
249 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
251 return ((ret
!= CLASSQEQ_SUCCESS
) ? ret
: CLASSQEQ_DROPPED
);
255 * If the queue is not currently active, add it to the end of new
256 * flows list for that service class.
258 if ((fq
->fq_flags
& (FQF_NEW_FLOW
|FQF_OLD_FLOW
)) == 0) {
259 VERIFY(STAILQ_NEXT(fq
, fq_actlink
) == NULL
);
260 STAILQ_INSERT_TAIL(&fq_cl
->fcl_new_flows
, fq
, fq_actlink
);
261 fq
->fq_flags
|= FQF_NEW_FLOW
;
263 fq_cl
->fcl_stat
.fcl_newflows_cnt
++;
265 fq
->fq_deficit
= fq_cl
->fcl_quantum
;
271 fq_getq_flow(fq_if_t
*fqs
, fq_t
*fq
)
273 struct mbuf
*m
= NULL
;
274 struct ifclassq
*ifq
= fqs
->fqs_ifq
;
275 fq_if_classq_t
*fq_cl
;
281 MBUFQ_DEQUEUE(&fq
->fq_mbufq
, m
);
287 VERIFY(fq
->fq_bytes
>= mlen
);
288 fq
->fq_bytes
-= mlen
;
290 fq_cl
= &fqs
->fqs_classq
[fq
->fq_sc_index
];
291 fq_cl
->fcl_stat
.fcl_byte_cnt
-= mlen
;
292 fq_cl
->fcl_stat
.fcl_pkt_cnt
--;
294 IFCQ_DEC_BYTES(ifq
, mlen
);
297 now
= mach_absolute_time();
299 /* this will compute qdelay in nanoseconds */
300 qdelay
= now
- pkt
->pkt_timestamp
;
302 if (fq
->fq_min_qdelay
== 0 ||
303 (qdelay
> 0 && (u_int64_t
)qdelay
< fq
->fq_min_qdelay
))
304 fq
->fq_min_qdelay
= qdelay
;
305 if (now
>= fq
->fq_updatetime
|| MBUFQ_EMPTY(&fq
->fq_mbufq
)) {
306 if (fq
->fq_min_qdelay
>= fqs
->fqs_target_qdelay
) {
307 if (!FQ_IS_DELAYHIGH(fq
))
308 FQ_SET_DELAY_HIGH(fq
);
311 if (!FQ_IS_DELAYHIGH(fq
) || MBUFQ_EMPTY(&fq
->fq_mbufq
)) {
312 FQ_CLEAR_DELAY_HIGH(fq
);
313 if (fq
->fq_flags
& FQF_FLOWCTL_ON
) {
314 fq_if_flow_feedback(fqs
, fq
, fq_cl
);
318 /* Reset measured queue delay and update time */
319 fq
->fq_updatetime
= now
+ fqs
->fqs_update_interval
;
320 fq
->fq_min_qdelay
= 0;
323 if ((pkt
->pkt_flags
& PKTF_START_SEQ
) && (fq
->fq_dequeue_seq
== 0 ||
324 (FQ_SEQ_LT(fq
->fq_dequeue_seq
, pkt
->tx_start_seq
))))
325 fq
->fq_dequeue_seq
= pkt
->tx_start_seq
;
327 pkt
->pkt_timestamp
= 0;
328 pkt
->pkt_flags
&= ~PKTF_PRIV_GUARDED
;
330 if (MBUFQ_EMPTY(&fq
->fq_mbufq
)) {
332 * Remove from large_flow field, if this happened to be
333 * the one that is tagged.
335 if (fqs
->fqs_large_flow
== fq
)
336 fqs
->fqs_large_flow
= NULL
;
338 /* Reset getqtime so that we don't count idle times */
341 fq
->fq_getqtime
= now
;