2 * Copyright (c) 2016-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
32 #include <sys/socket.h>
33 #include <sys/sockio.h>
34 #include <sys/systm.h>
35 #include <sys/sysctl.h>
36 #include <sys/syslog.h>
38 #include <sys/errno.h>
39 #include <sys/kernel.h>
40 #include <sys/kauth.h>
41 #include <kern/zalloc.h>
42 #include <netinet/in.h>
44 #include <net/classq/classq.h>
45 #include <net/classq/if_classq.h>
46 #include <net/pktsched/pktsched.h>
47 #include <net/pktsched/pktsched_fq_codel.h>
48 #include <net/classq/classq_fq_codel.h>
50 static uint32_t flowq_size
; /* size of flowq */
51 static struct mcache
*flowq_cache
= NULL
; /* mcache for flowq */
53 #define FQ_ZONE_MAX (32 * 1024) /* across all interfaces */
55 #define DTYPE_NODROP 0 /* no drop */
56 #define DTYPE_FORCED 1 /* a "forced" drop */
57 #define DTYPE_EARLY 2 /* an "unforced" (early) drop */
62 if (flowq_cache
!= NULL
)
65 flowq_size
= sizeof (fq_t
);
66 flowq_cache
= mcache_create("fq.flowq", flowq_size
, sizeof (uint64_t),
68 if (flowq_cache
== NULL
) {
69 panic("%s: failed to allocate flowq_cache", __func__
);
75 fq_codel_reap_caches(boolean_t purge
)
77 mcache_reap_now(flowq_cache
, purge
);
81 fq_alloc(classq_pkt_type_t ptype
)
84 fq
= mcache_alloc(flowq_cache
, MCR_SLEEP
);
86 log(LOG_ERR
, "%s: unable to allocate from flowq_cache\n");
90 bzero(fq
, flowq_size
);
92 if (ptype
== QP_MBUF
) {
93 MBUFQ_INIT(&fq
->fq_mbufq
);
101 VERIFY(fq_empty(fq
));
102 VERIFY(!(fq
->fq_flags
& (FQF_NEW_FLOW
| FQF_OLD_FLOW
)));
103 VERIFY(fq
->fq_bytes
== 0);
104 mcache_free(flowq_cache
, fq
);
108 fq_detect_dequeue_stall(fq_if_t
*fqs
, fq_t
*flowq
, fq_if_classq_t
*fq_cl
,
111 u_int64_t maxgetqtime
;
112 if (FQ_IS_DELAYHIGH(flowq
) || flowq
->fq_getqtime
== 0 ||
114 flowq
->fq_bytes
< FQ_MIN_FC_THRESHOLD_BYTES
)
116 maxgetqtime
= flowq
->fq_getqtime
+ fqs
->fqs_update_interval
;
117 if ((*now
) > maxgetqtime
) {
119 * there was no dequeue in an update interval worth of
120 * time. It means that the queue is stalled.
122 FQ_SET_DELAY_HIGH(flowq
);
123 fq_cl
->fcl_stat
.fcl_dequeue_stall
++;
128 fq_head_drop(fq_if_t
*fqs
, fq_t
*fq
)
132 uint64_t *pkt_timestamp
;
133 struct ifclassq
*ifq
= fqs
->fqs_ifq
;
135 _PKTSCHED_PKT_INIT(&pkt
);
136 if (fq_getq_flow_internal(fqs
, fq
, &pkt
) == NULL
)
139 pktsched_get_pkt_vars(&pkt
, &pkt_flags
, &pkt_timestamp
, NULL
, NULL
,
143 if (pkt
.pktsched_ptype
== QP_MBUF
)
144 *pkt_flags
&= ~PKTF_PRIV_GUARDED
;
146 IFCQ_DROP_ADD(ifq
, 1, pktsched_get_pkt_len(&pkt
));
147 IFCQ_CONVERT_LOCK(ifq
);
148 pktsched_free_pkt(&pkt
);
152 fq_addq(fq_if_t
*fqs
, pktsched_pkt_t
*pkt
, fq_if_classq_t
*fq_cl
)
154 int droptype
= DTYPE_NODROP
, fc_adv
= 0, ret
= CLASSQEQ_SUCCESS
;
157 uint64_t *pkt_timestamp
;
159 uint32_t pkt_flowid
, pkt_tx_start_seq
;
160 uint8_t pkt_proto
, pkt_flowsrc
;
162 pktsched_get_pkt_vars(pkt
, &pkt_flags
, &pkt_timestamp
, &pkt_flowid
,
163 &pkt_flowsrc
, &pkt_proto
, &pkt_tx_start_seq
);
165 if (pkt
->pktsched_ptype
== QP_MBUF
) {
166 /* See comments in <rdar://problem/14040693> */
167 VERIFY(!(*pkt_flags
& PKTF_PRIV_GUARDED
));
168 *pkt_flags
|= PKTF_PRIV_GUARDED
;
171 if (*pkt_timestamp
> 0) {
172 now
= *pkt_timestamp
;
174 struct timespec now_ts
;
176 now
= (now_ts
.tv_sec
* NSEC_PER_SEC
) + now_ts
.tv_nsec
;
177 *pkt_timestamp
= now
;
180 /* find the flowq for this packet */
181 fq
= fq_if_hash_pkt(fqs
, pkt_flowid
, pktsched_get_pkt_svc(pkt
),
182 now
, TRUE
, pkt
->pktsched_ptype
);
184 /* drop the packet if we could not allocate a flow queue */
185 fq_cl
->fcl_stat
.fcl_drop_memfailure
++;
186 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
187 return (CLASSQEQ_DROP
);
189 VERIFY(fq
->fq_ptype
== pkt
->pktsched_ptype
);
191 fq_detect_dequeue_stall(fqs
, fq
, fq_cl
, &now
);
193 if (FQ_IS_DELAYHIGH(fq
)) {
194 if ((fq
->fq_flags
& FQF_FLOWCTL_CAPABLE
) &&
195 (*pkt_flags
& PKTF_FLOW_ADV
)) {
198 * If the flow is suspended or it is not
199 * TCP, drop the packet
201 if (pkt_proto
!= IPPROTO_TCP
) {
202 droptype
= DTYPE_EARLY
;
203 fq_cl
->fcl_stat
.fcl_drop_early
++;
207 * Need to drop a packet, instead of dropping this
208 * one, try to drop from the head of the queue
211 fq_head_drop(fqs
, fq
);
212 droptype
= DTYPE_NODROP
;
214 droptype
= DTYPE_EARLY
;
216 fq_cl
->fcl_stat
.fcl_drop_early
++;
221 /* Set the return code correctly */
222 if (fc_adv
== 1 && droptype
!= DTYPE_FORCED
) {
223 if (fq_if_add_fcentry(fqs
, pkt
, pkt_flowid
, pkt_flowsrc
,
225 fq
->fq_flags
|= FQF_FLOWCTL_ON
;
226 /* deliver flow control advisory error */
227 if (droptype
== DTYPE_NODROP
) {
228 ret
= CLASSQEQ_SUCCESS_FC
;
230 /* dropped due to flow control */
231 ret
= CLASSQEQ_DROP_FC
;
235 * if we could not flow control the flow, it is
238 droptype
= DTYPE_FORCED
;
239 ret
= CLASSQEQ_DROP_FC
;
240 fq_cl
->fcl_stat
.fcl_flow_control_fail
++;
245 * If the queue length hits the queue limit, drop a packet from the
246 * front of the queue for a flow with maximum number of bytes. This
247 * will penalize heavy and unresponsive flows. It will also avoid a
250 if (droptype
== DTYPE_NODROP
&& fq_if_at_drop_limit(fqs
)) {
251 if (fqs
->fqs_large_flow
== fq
) {
253 * Drop from the head of the current fq. Since a
254 * new packet will be added to the tail, it is ok
255 * to leave fq in place.
257 fq_head_drop(fqs
, fq
);
259 if (fqs
->fqs_large_flow
== NULL
) {
260 droptype
= DTYPE_FORCED
;
261 fq_cl
->fcl_stat
.fcl_drop_overflow
++;
265 * if this fq was freshly created and there
266 * is nothing to enqueue, free it
268 if (fq_empty(fq
) && !(fq
->fq_flags
&
269 (FQF_NEW_FLOW
| FQF_OLD_FLOW
))) {
270 fq_if_destroy_flow(fqs
, fq_cl
, fq
);
274 fq_if_drop_packet(fqs
);
279 if (droptype
== DTYPE_NODROP
) {
280 uint32_t pkt_len
= pktsched_get_pkt_len(pkt
);
281 fq_enqueue(fq
, pkt
->pktsched_pkt
);
282 fq
->fq_bytes
+= pkt_len
;
283 fq_cl
->fcl_stat
.fcl_byte_cnt
+= pkt_len
;
284 fq_cl
->fcl_stat
.fcl_pkt_cnt
++;
287 * check if this queue will qualify to be the next
290 fq_if_is_flow_heavy(fqs
, fq
);
292 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
293 return ((ret
!= CLASSQEQ_SUCCESS
) ? ret
: CLASSQEQ_DROP
);
297 * If the queue is not currently active, add it to the end of new
298 * flows list for that service class.
300 if ((fq
->fq_flags
& (FQF_NEW_FLOW
|FQF_OLD_FLOW
)) == 0) {
301 VERIFY(STAILQ_NEXT(fq
, fq_actlink
) == NULL
);
302 STAILQ_INSERT_TAIL(&fq_cl
->fcl_new_flows
, fq
, fq_actlink
);
303 fq
->fq_flags
|= FQF_NEW_FLOW
;
305 fq_cl
->fcl_stat
.fcl_newflows_cnt
++;
307 fq
->fq_deficit
= fq_cl
->fcl_quantum
;
313 fq_getq_flow_internal(fq_if_t
*fqs
, fq_t
*fq
, pktsched_pkt_t
*pkt
)
317 fq_if_classq_t
*fq_cl
;
318 struct ifclassq
*ifq
= fqs
->fqs_ifq
;
324 pktsched_pkt_encap(pkt
, fq
->fq_ptype
, p
);
325 plen
= pktsched_get_pkt_len(pkt
);
327 VERIFY(fq
->fq_bytes
>= plen
);
328 fq
->fq_bytes
-= plen
;
330 fq_cl
= &fqs
->fqs_classq
[fq
->fq_sc_index
];
331 fq_cl
->fcl_stat
.fcl_byte_cnt
-= plen
;
332 fq_cl
->fcl_stat
.fcl_pkt_cnt
--;
334 IFCQ_DEC_BYTES(ifq
, plen
);
336 /* Reset getqtime so that we don't count idle times */
344 fq_getq_flow(fq_if_t
*fqs
, fq_t
*fq
, pktsched_pkt_t
*pkt
)
347 fq_if_classq_t
*fq_cl
;
350 struct timespec now_ts
;
351 uint32_t *pkt_flags
, pkt_tx_start_seq
;
352 uint64_t *pkt_timestamp
;
354 p
= fq_getq_flow_internal(fqs
, fq
, pkt
);
358 pktsched_get_pkt_vars(pkt
, &pkt_flags
, &pkt_timestamp
, NULL
, NULL
,
359 NULL
, &pkt_tx_start_seq
);
362 now
= (now_ts
.tv_sec
* NSEC_PER_SEC
) + now_ts
.tv_nsec
;
364 /* this will compute qdelay in nanoseconds */
365 if (now
> *pkt_timestamp
)
366 qdelay
= now
- *pkt_timestamp
;
367 fq_cl
= &fqs
->fqs_classq
[fq
->fq_sc_index
];
369 if (fq
->fq_min_qdelay
== 0 ||
370 (qdelay
> 0 && (u_int64_t
)qdelay
< fq
->fq_min_qdelay
))
371 fq
->fq_min_qdelay
= qdelay
;
372 if (now
>= fq
->fq_updatetime
) {
373 if (fq
->fq_min_qdelay
> fqs
->fqs_target_qdelay
) {
374 if (!FQ_IS_DELAYHIGH(fq
))
375 FQ_SET_DELAY_HIGH(fq
);
377 FQ_CLEAR_DELAY_HIGH(fq
);
381 /* Reset measured queue delay and update time */
382 fq
->fq_updatetime
= now
+ fqs
->fqs_update_interval
;
383 fq
->fq_min_qdelay
= 0;
385 if (!FQ_IS_DELAYHIGH(fq
) || fq_empty(fq
)) {
386 FQ_CLEAR_DELAY_HIGH(fq
);
387 if (fq
->fq_flags
& FQF_FLOWCTL_ON
) {
388 fq_if_flow_feedback(fqs
, fq
, fq_cl
);
393 /* Reset getqtime so that we don't count idle times */
396 fq
->fq_getqtime
= now
;
398 fq_if_is_flow_heavy(fqs
, fq
);
401 if (pkt
->pktsched_ptype
== QP_MBUF
)
402 *pkt_flags
&= ~PKTF_PRIV_GUARDED
;