]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/pktsched/pktsched_fq_codel.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / net / pktsched / pktsched_fq_codel.c
CommitLineData
39037602 1/*
f427ee49 2 * Copyright (c) 2016-2020 Apple Inc. All rights reserved.
39037602
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/types.h>
30#include <sys/param.h>
31#include <kern/zalloc.h>
c3c9b80d 32#include <net/ethernet.h>
39037602
A
33#include <net/if_var.h>
34#include <net/if.h>
35#include <net/classq/classq.h>
36#include <net/classq/classq_fq_codel.h>
37#include <net/pktsched/pktsched_fq_codel.h>
c3c9b80d
A
38#include <os/log.h>
39
40#define FQ_CODEL_DEFAULT_QUANTUM 1500
41
42#define FQ_CODEL_QUANTUM_BK_SYS(_q) (_q)
43#define FQ_CODEL_QUANTUM_BK(_q) (_q)
44#define FQ_CODEL_QUANTUM_BE(_q) (_q)
45#define FQ_CODEL_QUANTUM_RD(_q) (_q)
46#define FQ_CODEL_QUANTUM_OAM(_q) (_q)
47#define FQ_CODEL_QUANTUM_AV(_q) (_q * 2)
48#define FQ_CODEL_QUANTUM_RV(_q) (_q * 2)
49#define FQ_CODEL_QUANTUM_VI(_q) (_q * 2)
50#define FQ_CODEL_QUANTUM_VO(_q) ((_q * 2) / 5)
51#define FQ_CODEL_QUANTUM_CTL(_q) ((_q * 2) / 5)
52
53#define FQ_CODEL_DRR_MAX_BK_SYS 2
54#define FQ_CODEL_DRR_MAX_BK 2
55#define FQ_CODEL_DRR_MAX_BE 4
56#define FQ_CODEL_DRR_MAX_RD 4
57#define FQ_CODEL_DRR_MAX_OAM 4
58#define FQ_CODEL_DRR_MAX_AV 6
59#define FQ_CODEL_DRR_MAX_RV 6
60#define FQ_CODEL_DRR_MAX_VI 6
61#define FQ_CODEL_DRR_MAX_VO 8
62#define FQ_CODEL_DRR_MAX_CTL 8
39037602 63
f427ee49 64static ZONE_DECLARE(fq_if_zone, "pktsched_fq_if", sizeof(fq_if_t), ZC_ZFREE_CLEARMEM);
39037602 65
c3c9b80d
A
66typedef STAILQ_HEAD(, flowq) flowq_dqlist_t;
67
5ba3f43e 68static fq_if_t *fq_if_alloc(struct ifnet *, classq_pkt_type_t);
39037602 69static void fq_if_destroy(fq_if_t *fqs);
f427ee49
A
70static void fq_if_classq_init(fq_if_t *fqs, uint32_t priority,
71 uint16_t quantum, uint32_t drr_max, uint32_t svc_class);
72static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, uint32_t,
73 int64_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
c3c9b80d 74 uint32_t *, flowq_dqlist_t *, boolean_t drvmgmt);
39037602
A
75void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat);
76static void fq_if_purge(fq_if_t *);
77static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *);
78static void fq_if_purge_flow(fq_if_t *, fq_t *, u_int32_t *, u_int32_t *);
79static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl,
80 bool add_to_old);
81static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl,
c3c9b80d 82 fq_t *fq, bool remove_hash, bool destroy);
39037602 83
0a7de745 84#define FQ_IF_FLOW_HASH_ID(_flowid_) \
39037602
A
85 (((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
86
0a7de745 87#define FQ_IF_CLASSQ_IDLE(_fcl_) \
39037602
A
88 (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
89 STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
90
cb323159 91typedef void (* fq_if_append_pkt_t)(classq_pkt_t *, classq_pkt_t *);
5ba3f43e 92typedef boolean_t (* fq_getq_flow_t)(fq_if_t *, fq_if_classq_t *, fq_t *,
f427ee49 93 int64_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
cb323159 94 u_int32_t *, boolean_t *, u_int32_t);
5ba3f43e
A
95
96static void
cb323159 97fq_if_append_mbuf(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
5ba3f43e 98{
cb323159 99 pkt->cp_mbuf->m_nextpkt = next_pkt->cp_mbuf;
5ba3f43e
A
100}
101
102
103
104static boolean_t
105fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
c3c9b80d
A
106 int64_t byte_limit, u_int32_t pkt_limit, classq_pkt_t *head,
107 classq_pkt_t *tail, u_int32_t *byte_cnt, u_int32_t *pkt_cnt,
cb323159 108 boolean_t *qempty, u_int32_t pflags)
5ba3f43e 109{
5ba3f43e
A
110 u_int32_t plen;
111 pktsched_pkt_t pkt;
112 boolean_t limit_reached = FALSE;
113 struct ifclassq *ifq = fqs->fqs_ifq;
114 struct ifnet *ifp = ifq->ifcq_ifp;
115
116 while (fq->fq_deficit > 0 && limit_reached == FALSE &&
117 !MBUFQ_EMPTY(&fq->fq_mbufq)) {
5ba3f43e 118 _PKTSCHED_PKT_INIT(&pkt);
cb323159 119 fq_getq_flow(fqs, fq, &pkt);
5ba3f43e
A
120 ASSERT(pkt.pktsched_ptype == QP_MBUF);
121
122 plen = pktsched_get_pkt_len(&pkt);
123 fq->fq_deficit -= plen;
cb323159 124 pkt.pktsched_pkt_mbuf->m_pkthdr.pkt_flags |= pflags;
5ba3f43e 125
c3c9b80d
A
126 if (head->cp_mbuf == NULL) {
127 *head = pkt.pktsched_pkt;
5ba3f43e 128 } else {
c3c9b80d
A
129 ASSERT(tail->cp_mbuf != NULL);
130 ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
131 tail->cp_mbuf->m_nextpkt = pkt.pktsched_pkt_mbuf;
5ba3f43e 132 }
c3c9b80d
A
133 *tail = pkt.pktsched_pkt;
134 tail->cp_mbuf->m_nextpkt = NULL;
5ba3f43e
A
135 fq_cl->fcl_stat.fcl_dequeue++;
136 fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
137 *pkt_cnt += 1;
138 *byte_cnt += plen;
139
cb323159 140 ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
5ba3f43e
A
141
142 /* Check if the limit is reached */
0a7de745 143 if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
5ba3f43e 144 limit_reached = TRUE;
0a7de745 145 }
5ba3f43e
A
146 }
147
148 *qempty = MBUFQ_EMPTY(&fq->fq_mbufq);
0a7de745 149 return limit_reached;
5ba3f43e
A
150}
151
39037602 152fq_if_t *
5ba3f43e 153fq_if_alloc(struct ifnet *ifp, classq_pkt_type_t ptype)
39037602
A
154{
155 fq_if_t *fqs;
39037602 156
f427ee49 157 fqs = zalloc_flags(fq_if_zone, Z_WAITOK | Z_ZERO);
39037602 158 fqs->fqs_ifq = &ifp->if_snd;
5ba3f43e 159 fqs->fqs_ptype = ptype;
39037602
A
160
161 /* Calculate target queue delay */
162 ifclassq_calc_target_qdelay(ifp, &fqs->fqs_target_qdelay);
163
164 /* Calculate update interval */
165 ifclassq_calc_update_interval(&fqs->fqs_update_interval);
5ba3f43e
A
166
167 /* Configure packet drop limit across all queues */
168 fqs->fqs_pkt_droplimit = IFCQ_PKT_DROP_LIMIT(&ifp->if_snd);
39037602 169 STAILQ_INIT(&fqs->fqs_fclist);
0a7de745 170 return fqs;
39037602
A
171}
172
173void
174fq_if_destroy(fq_if_t *fqs)
175{
39037602
A
176 fq_if_purge(fqs);
177 fqs->fqs_ifq = NULL;
178 zfree(fq_if_zone, fqs);
179}
180
f427ee49 181static inline uint8_t
5ba3f43e 182fq_if_service_to_priority(fq_if_t *fqs, mbuf_svc_class_t svc)
39037602 183{
f427ee49 184 uint8_t pri;
39037602 185
5ba3f43e
A
186 if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
187 switch (svc) {
188 case MBUF_SC_BK_SYS:
189 case MBUF_SC_BK:
190 pri = FQ_IF_BK_INDEX;
191 break;
192 case MBUF_SC_BE:
193 case MBUF_SC_RD:
194 case MBUF_SC_OAM:
195 pri = FQ_IF_BE_INDEX;
196 break;
197 case MBUF_SC_AV:
198 case MBUF_SC_RV:
199 case MBUF_SC_VI:
d9a64523 200 case MBUF_SC_SIG:
5ba3f43e
A
201 pri = FQ_IF_VI_INDEX;
202 break;
203 case MBUF_SC_VO:
204 case MBUF_SC_CTL:
205 pri = FQ_IF_VO_INDEX;
206 break;
207 default:
208 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
209 break;
210 }
0a7de745 211 return pri;
5ba3f43e
A
212 }
213
214 /* scheduler is not managed by the driver */
39037602
A
215 switch (svc) {
216 case MBUF_SC_BK_SYS:
217 pri = FQ_IF_BK_SYS_INDEX;
218 break;
219 case MBUF_SC_BK:
220 pri = FQ_IF_BK_INDEX;
221 break;
222 case MBUF_SC_BE:
223 pri = FQ_IF_BE_INDEX;
224 break;
225 case MBUF_SC_RD:
226 pri = FQ_IF_RD_INDEX;
227 break;
228 case MBUF_SC_OAM:
229 pri = FQ_IF_OAM_INDEX;
230 break;
231 case MBUF_SC_AV:
232 pri = FQ_IF_AV_INDEX;
233 break;
234 case MBUF_SC_RV:
235 pri = FQ_IF_RV_INDEX;
236 break;
237 case MBUF_SC_VI:
238 pri = FQ_IF_VI_INDEX;
239 break;
d9a64523
A
240 case MBUF_SC_SIG:
241 pri = FQ_IF_SIG_INDEX;
242 break;
39037602
A
243 case MBUF_SC_VO:
244 pri = FQ_IF_VO_INDEX;
245 break;
246 case MBUF_SC_CTL:
247 pri = FQ_IF_CTL_INDEX;
248 break;
249 default:
250 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
251 break;
252 }
0a7de745 253 return pri;
39037602
A
254}
255
f427ee49
A
256static void
257fq_if_classq_init(fq_if_t *fqs, uint32_t pri, uint16_t quantum,
258 uint32_t drr_max, uint32_t svc_class)
39037602
A
259{
260 fq_if_classq_t *fq_cl;
cb323159 261 VERIFY(pri < FQ_IF_MAX_CLASSES);
39037602
A
262 fq_cl = &fqs->fqs_classq[pri];
263
cb323159 264 VERIFY(fq_cl->fcl_quantum == 0);
39037602
A
265 fq_cl->fcl_quantum = quantum;
266 fq_cl->fcl_pri = pri;
267 fq_cl->fcl_drr_max = drr_max;
268 fq_cl->fcl_service_class = svc_class;
269 STAILQ_INIT(&fq_cl->fcl_new_flows);
270 STAILQ_INIT(&fq_cl->fcl_old_flows);
271}
272
273int
f427ee49
A
274fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *head,
275 classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t *pdrop)
39037602 276{
f427ee49 277 uint8_t pri;
39037602
A
278 fq_if_t *fqs;
279 fq_if_classq_t *fq_cl;
f427ee49 280 int ret;
39037602 281 mbuf_svc_class_t svc;
5ba3f43e 282 pktsched_pkt_t pkt;
39037602 283
f427ee49 284 pktsched_pkt_encap_chain(&pkt, head, tail, cnt, bytes);
39037602
A
285
286 fqs = (fq_if_t *)ifq->ifcq_disc;
5ba3f43e
A
287 svc = pktsched_get_pkt_svc(&pkt);
288 pri = fq_if_service_to_priority(fqs, svc);
cb323159 289 VERIFY(pri < FQ_IF_MAX_CLASSES);
39037602
A
290 fq_cl = &fqs->fqs_classq[pri];
291
f427ee49 292 if (__improbable(svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1)) {
39037602 293 /* BK_SYS is currently throttled */
f427ee49 294 atomic_add_32(&fq_cl->fcl_stat.fcl_throttle_drops, 1);
5ba3f43e
A
295 pktsched_free_pkt(&pkt);
296 *pdrop = TRUE;
f427ee49
A
297 ret = EQSUSPENDED;
298 goto done;
39037602
A
299 }
300
f427ee49 301 IFCQ_LOCK_SPIN(ifq);
5ba3f43e
A
302 ret = fq_addq(fqs, &pkt, fq_cl);
303 if (!(fqs->fqs_flags & FQS_DRIVER_MANAGED) &&
304 !FQ_IF_CLASSQ_IDLE(fq_cl)) {
39037602
A
305 if (((fqs->fqs_bitmaps[FQ_IF_ER] | fqs->fqs_bitmaps[FQ_IF_EB]) &
306 (1 << pri)) == 0) {
307 /*
308 * this group is not in ER or EB groups,
309 * mark it as IB
310 */
311 pktsched_bit_set(pri, &fqs->fqs_bitmaps[FQ_IF_IB]);
312 }
313 }
314
f427ee49 315 if (__improbable(ret != 0)) {
39037602
A
316 if (ret == CLASSQEQ_SUCCESS_FC) {
317 /* packet enqueued, return advisory feedback */
318 ret = EQFULL;
5ba3f43e 319 *pdrop = FALSE;
f427ee49
A
320 } else if (ret == CLASSQEQ_COMPRESSED) {
321 ret = 0;
322 *pdrop = FALSE;
39037602 323 } else {
f427ee49 324 IFCQ_UNLOCK(ifq);
5ba3f43e 325 *pdrop = TRUE;
5ba3f43e 326 pktsched_free_pkt(&pkt);
39037602 327 switch (ret) {
5ba3f43e 328 case CLASSQEQ_DROP:
f427ee49
A
329 ret = ENOBUFS;
330 goto done;
5ba3f43e 331 case CLASSQEQ_DROP_FC:
f427ee49
A
332 ret = EQFULL;
333 goto done;
5ba3f43e 334 case CLASSQEQ_DROP_SP:
f427ee49
A
335 ret = EQSUSPENDED;
336 goto done;
337 default:
338 VERIFY(0);
339 /* NOTREACHED */
340 __builtin_unreachable();
39037602 341 }
f427ee49
A
342 /* NOTREACHED */
343 __builtin_unreachable();
39037602 344 }
5ba3f43e
A
345 } else {
346 *pdrop = FALSE;
39037602 347 }
f427ee49
A
348 IFCQ_ADD_LEN(ifq, cnt);
349 IFCQ_INC_BYTES(ifq, bytes);
350 IFCQ_UNLOCK(ifq);
351done:
c3c9b80d
A
352#if DEBUG || DEVELOPMENT
353 if (__improbable((ret == EQFULL) && (ifclassq_flow_control_adv == 0))) {
354 ret = 0;
355 }
356#endif /* DEBUG || DEVELOPMENT */
0a7de745 357 return ret;
39037602
A
358}
359
f427ee49 360void
cb323159 361fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt)
39037602 362{
5ba3f43e 363 (void) fq_if_dequeue_classq_multi(ifq, 1,
cb323159 364 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL);
5ba3f43e 365}
39037602 366
f427ee49 367void
5ba3f43e 368fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc,
cb323159 369 classq_pkt_t *pkt)
5ba3f43e 370{
5ba3f43e 371 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
f427ee49 372 uint32_t total_pktcnt = 0, total_bytecnt = 0;
5ba3f43e 373 fq_if_classq_t *fq_cl;
f427ee49 374 uint8_t pri;
5ba3f43e
A
375
376 pri = fq_if_service_to_priority(fqs, svc);
377 fq_cl = &fqs->fqs_classq[pri];
378
379 fq_if_dequeue(fqs, fq_cl, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
c3c9b80d 380 pkt, NULL, &total_pktcnt, &total_bytecnt, NULL, TRUE);
f427ee49
A
381
382 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
39037602
A
383}
384
c3c9b80d
A
385static inline void
386fq_dqlist_add(flowq_dqlist_t *fq_dqlist_head, fq_t *fq)
387{
388 ASSERT(fq->fq_dq_head.cp_mbuf == NULL);
389 ASSERT(!fq->fq_in_dqlist);
390 STAILQ_INSERT_TAIL(fq_dqlist_head, fq, fq_dqlink);
391 fq->fq_in_dqlist = true;
392}
393
394static inline void
395fq_dqlist_remove(flowq_dqlist_t *fq_dqlist_head, fq_t *fq, classq_pkt_t *head,
396 classq_pkt_t *tail)
397{
398 ASSERT(fq->fq_in_dqlist);
399 if (fq->fq_dq_head.cp_mbuf == NULL) {
400 goto done;
401 }
402
403 if (head->cp_mbuf == NULL) {
404 *head = fq->fq_dq_head;
405 } else {
406 ASSERT(tail->cp_mbuf != NULL);
407
408 switch (fq->fq_ptype) {
409 case QP_MBUF:
410 ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
411 tail->cp_mbuf->m_nextpkt = fq->fq_dq_head.cp_mbuf;
412 ASSERT(fq->fq_dq_tail.cp_mbuf->m_nextpkt == NULL);
413 break;
414 default:
415 VERIFY(0);
416 /* NOTREACHED */
417 __builtin_unreachable();
418 }
419 }
420 *tail = fq->fq_dq_tail;
421done:
422 STAILQ_REMOVE(fq_dqlist_head, fq, flowq, fq_dqlink);
423 CLASSQ_PKT_INIT(&fq->fq_dq_head);
424 CLASSQ_PKT_INIT(&fq->fq_dq_tail);
425 fq->fq_in_dqlist = false;
426 if (fq->fq_flags & FQF_DESTROYED) {
427 fq_destroy(fq);
428 }
429}
430
431static inline void
432fq_dqlist_get_packet_list(flowq_dqlist_t *fq_dqlist_head, classq_pkt_t *head,
433 classq_pkt_t *tail)
434{
435 fq_t *fq, *tfq;
436
437 STAILQ_FOREACH_SAFE(fq, fq_dqlist_head, fq_dqlink, tfq) {
438 fq_dqlist_remove(fq_dqlist_head, fq, head, tail);
439 }
440}
441
39037602 442int
5ba3f43e 443fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt,
cb323159
A
444 u_int32_t maxbytecnt, classq_pkt_t *first_packet,
445 classq_pkt_t *last_packet, u_int32_t *retpktcnt,
446 u_int32_t *retbytecnt)
39037602 447{
c3c9b80d 448 uint32_t total_pktcnt = 0, total_bytecnt = 0;
cb323159
A
449 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
450 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
451 classq_pkt_t tmp = CLASSQ_PKT_INITIALIZER(tmp);
452 fq_if_append_pkt_t append_pkt;
c3c9b80d 453 flowq_dqlist_t fq_dqlist_head;
39037602 454 fq_if_classq_t *fq_cl;
cb323159 455 fq_if_t *fqs;
39037602
A
456 int pri;
457
458 IFCQ_LOCK_ASSERT_HELD(ifq);
459
460 fqs = (fq_if_t *)ifq->ifcq_disc;
c3c9b80d 461 STAILQ_INIT(&fq_dqlist_head);
39037602 462
5ba3f43e
A
463 switch (fqs->fqs_ptype) {
464 case QP_MBUF:
465 append_pkt = fq_if_append_mbuf;
466 break;
467
468
469 default:
470 VERIFY(0);
471 /* NOTREACHED */
cb323159 472 __builtin_unreachable();
5ba3f43e
A
473 }
474
39037602 475 for (;;) {
c3c9b80d
A
476 uint32_t pktcnt = 0, bytecnt = 0;
477 classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
cb323159
A
478 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
479
39037602
A
480 if (fqs->fqs_bitmaps[FQ_IF_ER] == 0 &&
481 fqs->fqs_bitmaps[FQ_IF_EB] == 0) {
482 fqs->fqs_bitmaps[FQ_IF_EB] = fqs->fqs_bitmaps[FQ_IF_IB];
483 fqs->fqs_bitmaps[FQ_IF_IB] = 0;
0a7de745 484 if (fqs->fqs_bitmaps[FQ_IF_EB] == 0) {
39037602 485 break;
0a7de745 486 }
39037602
A
487 }
488 pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_ER]);
489 if (pri == 0) {
490 /*
491 * There are no ER flows, move the highest
492 * priority one from EB if there are any in that
493 * category
494 */
495 pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_EB]);
496 VERIFY(pri > 0);
497 pktsched_bit_clr((pri - 1),
498 &fqs->fqs_bitmaps[FQ_IF_EB]);
499 pktsched_bit_set((pri - 1),
500 &fqs->fqs_bitmaps[FQ_IF_ER]);
501 }
502 pri--; /* index starts at 0 */
503 fq_cl = &fqs->fqs_classq[pri];
504
505 if (fq_cl->fcl_budget <= 0) {
506 /* Update the budget */
507 fq_cl->fcl_budget += (min(fq_cl->fcl_drr_max,
508 fq_cl->fcl_stat.fcl_flows_cnt) *
509 fq_cl->fcl_quantum);
0a7de745 510 if (fq_cl->fcl_budget <= 0) {
39037602 511 goto state_change;
0a7de745 512 }
39037602
A
513 }
514 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
c3c9b80d
A
515 (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
516 &bytecnt, &fq_dqlist_head, FALSE);
517 if (head.cp_mbuf != NULL) {
518 ASSERT(STAILQ_EMPTY(&fq_dqlist_head));
cb323159 519 if (first.cp_mbuf == NULL) {
c3c9b80d 520 first = head;
39037602 521 } else {
cb323159 522 ASSERT(last.cp_mbuf != NULL);
c3c9b80d 523 append_pkt(&last, &head);
39037602 524 }
cb323159
A
525 last = tail;
526 append_pkt(&last, &tmp);
39037602 527 }
c3c9b80d
A
528 fq_cl->fcl_budget -= bytecnt;
529 total_pktcnt += pktcnt;
530 total_bytecnt += bytecnt;
39037602
A
531
532 /*
533 * If the class has exceeded the budget but still has data
534 * to send, move it to IB
535 */
536state_change:
537 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
538 if (fq_cl->fcl_budget <= 0) {
539 pktsched_bit_set(pri,
540 &fqs->fqs_bitmaps[FQ_IF_IB]);
541 pktsched_bit_clr(pri,
542 &fqs->fqs_bitmaps[FQ_IF_ER]);
543 }
544 } else {
545 pktsched_bit_clr(pri, &fqs->fqs_bitmaps[FQ_IF_ER]);
546 VERIFY(((fqs->fqs_bitmaps[FQ_IF_ER] |
547 fqs->fqs_bitmaps[FQ_IF_EB] |
0a7de745 548 fqs->fqs_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
39037602
A
549 fq_cl->fcl_budget = 0;
550 }
0a7de745 551 if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt) {
39037602 552 break;
0a7de745 553 }
39037602 554 }
cb323159 555
c3c9b80d
A
556 fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last);
557
cb323159
A
558 if (__probable(first_packet != NULL)) {
559 *first_packet = first;
560 }
561 if (last_packet != NULL) {
562 *last_packet = last;
39037602 563 }
cb323159
A
564 if (retpktcnt != NULL) {
565 *retpktcnt = total_pktcnt;
566 }
567 if (retbytecnt != NULL) {
568 *retbytecnt = total_bytecnt;
569 }
570
571 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
0a7de745 572 return 0;
39037602
A
573}
574
5ba3f43e
A
575int
576fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, mbuf_svc_class_t svc,
cb323159
A
577 u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
578 classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt)
5ba3f43e 579{
5ba3f43e 580 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
f427ee49 581 uint8_t pri;
5ba3f43e
A
582 u_int32_t total_pktcnt = 0, total_bytecnt = 0;
583 fq_if_classq_t *fq_cl;
cb323159
A
584 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
585 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
5ba3f43e 586 fq_if_append_pkt_t append_pkt;
c3c9b80d 587 flowq_dqlist_t fq_dqlist_head;
5ba3f43e
A
588
589 switch (fqs->fqs_ptype) {
590 case QP_MBUF:
591 append_pkt = fq_if_append_mbuf;
592 break;
593
594
595 default:
596 VERIFY(0);
597 /* NOTREACHED */
cb323159 598 __builtin_unreachable();
5ba3f43e
A
599 }
600
c3c9b80d 601 STAILQ_INIT(&fq_dqlist_head);
5ba3f43e
A
602 pri = fq_if_service_to_priority(fqs, svc);
603 fq_cl = &fqs->fqs_classq[pri];
5ba3f43e
A
604 /*
605 * Now we have the queue for a particular service class. We need
606 * to dequeue as many packets as needed, first from the new flows
607 * and then from the old flows.
608 */
609 while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt &&
610 fq_cl->fcl_stat.fcl_pkt_cnt > 0) {
c3c9b80d 611 classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
cb323159 612 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
5ba3f43e 613 u_int32_t pktcnt = 0, bytecnt = 0;
cb323159 614
5ba3f43e 615 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
c3c9b80d
A
616 (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
617 &bytecnt, &fq_dqlist_head, TRUE);
618 if (head.cp_mbuf != NULL) {
cb323159 619 if (first.cp_mbuf == NULL) {
c3c9b80d 620 first = head;
cb323159
A
621 } else {
622 ASSERT(last.cp_mbuf != NULL);
c3c9b80d 623 append_pkt(&last, &head);
cb323159
A
624 }
625 last = tail;
5ba3f43e 626 }
c3c9b80d
A
627 total_pktcnt += pktcnt;
628 total_bytecnt += bytecnt;
5ba3f43e 629 }
cb323159 630
c3c9b80d
A
631 fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last);
632
cb323159
A
633 if (__probable(first_packet != NULL)) {
634 *first_packet = first;
635 }
636 if (last_packet != NULL) {
637 *last_packet = last;
5ba3f43e 638 }
cb323159
A
639 if (retpktcnt != NULL) {
640 *retpktcnt = total_pktcnt;
641 }
642 if (retbytecnt != NULL) {
643 *retbytecnt = total_bytecnt;
644 }
645
f427ee49
A
646 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
647
0a7de745 648 return 0;
5ba3f43e
A
649}
650
39037602
A
651static void
652fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, u_int32_t *pktsp,
653 u_int32_t *bytesp)
654{
655 fq_if_classq_t *fq_cl;
656 u_int32_t pkts, bytes;
5ba3f43e 657 pktsched_pkt_t pkt;
39037602
A
658
659 fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
660 pkts = bytes = 0;
5ba3f43e 661 _PKTSCHED_PKT_INIT(&pkt);
cb323159
A
662 for (;;) {
663 fq_getq_flow(fqs, fq, &pkt);
664 if (pkt.pktsched_pkt_mbuf == NULL) {
665 VERIFY(pkt.pktsched_ptype == QP_INVALID);
666 break;
667 }
39037602 668 pkts++;
5ba3f43e
A
669 bytes += pktsched_get_pkt_len(&pkt);
670 pktsched_free_pkt(&pkt);
39037602
A
671 }
672 IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes);
673
674 if (fq->fq_flags & FQF_NEW_FLOW) {
675 fq_if_empty_new_flow(fq, fq_cl, false);
676 } else if (fq->fq_flags & FQF_OLD_FLOW) {
c3c9b80d 677 fq_if_empty_old_flow(fqs, fq_cl, fq, false, true);
39037602
A
678 }
679
c3c9b80d 680 fq_if_destroy_flow(fqs, fq_cl, fq, true);
39037602
A
681
682 if (FQ_IF_CLASSQ_IDLE(fq_cl)) {
683 int i;
684 for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) {
685 pktsched_bit_clr(fq_cl->fcl_pri,
686 &fqs->fqs_bitmaps[i]);
687 }
688 }
0a7de745 689 if (pktsp != NULL) {
39037602 690 *pktsp = pkts;
0a7de745
A
691 }
692 if (bytesp != NULL) {
39037602 693 *bytesp = bytes;
0a7de745 694 }
39037602
A
695}
696
697static void
698fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl)
699{
700 fq_t *fq, *tfq;
701 /*
702 * Take each flow from new/old flow list and flush mbufs
703 * in that flow
704 */
705 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
706 fq_if_purge_flow(fqs, fq, NULL, NULL);
707 }
708 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
709 fq_if_purge_flow(fqs, fq, NULL, NULL);
710 }
711 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows));
712 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows));
713
714 STAILQ_INIT(&fq_cl->fcl_new_flows);
715 STAILQ_INIT(&fq_cl->fcl_old_flows);
716 fq_cl->fcl_budget = 0;
717}
718
719static void
720fq_if_purge(fq_if_t *fqs)
721{
722 int i;
723
724 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
725 for (i = 0; i < FQ_IF_MAX_CLASSES; i++) {
726 fq_if_purge_classq(fqs, &fqs->fqs_classq[i]);
727 }
728
729 VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist));
730
731 fqs->fqs_large_flow = NULL;
732 for (i = 0; i < FQ_IF_HASH_TABLE_SIZE; i++) {
733 VERIFY(SLIST_EMPTY(&fqs->fqs_flows[i]));
734 }
735
0a7de745 736 bzero(&fqs->fqs_bitmaps, sizeof(fqs->fqs_bitmaps));
39037602
A
737
738 IFCQ_LEN(fqs->fqs_ifq) = 0;
739 IFCQ_BYTES(fqs->fqs_ifq) = 0;
740}
741
742static void
743fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req)
744{
745 fq_t *fq;
746
747 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
748 req->packets = req->bytes = 0;
749 VERIFY(req->flow != 0);
750
5ba3f43e
A
751 /* packet type is needed only if we want to create a flow queue */
752 fq = fq_if_hash_pkt(fqs, req->flow, req->sc, 0, FALSE, QP_INVALID);
39037602 753
0a7de745 754 if (fq != NULL) {
39037602 755 fq_if_purge_flow(fqs, fq, &req->packets, &req->bytes);
0a7de745 756 }
39037602
A
757}
758
c3c9b80d
A
759static uint16_t
760fq_if_calc_quantum(struct ifnet *ifp)
761{
762 uint16_t quantum;
763
764 switch (ifp->if_family) {
765 case IFNET_FAMILY_ETHERNET:
766 VERIFY((ifp->if_mtu + ETHER_HDR_LEN) <= UINT16_MAX);
767 quantum = (uint16_t)ifp->if_mtu + ETHER_HDR_LEN;
768 break;
769
770 case IFNET_FAMILY_CELLULAR:
771 case IFNET_FAMILY_IPSEC:
772 case IFNET_FAMILY_UTUN:
773 VERIFY(ifp->if_mtu <= UINT16_MAX);
774 quantum = (uint16_t)ifp->if_mtu;
775 break;
776
777 default:
778 quantum = FQ_CODEL_DEFAULT_QUANTUM;
779 break;
780 }
781
782 /*
783 * XXX: Skywalk native interface doesn't support HW TSO offload.
784 */
785 if (((ifp->if_eflags & IFEF_SKYWALK_NATIVE) == 0) &&
786 ((ifp->if_hwassist & IFNET_TSOF) != 0)) {
787 VERIFY(ifp->if_tso_v4_mtu <= UINT16_MAX);
788 VERIFY(ifp->if_tso_v6_mtu <= UINT16_MAX);
789 quantum = (uint16_t)MAX(ifp->if_tso_v4_mtu, ifp->if_tso_v6_mtu);
790 quantum = (quantum != 0) ? quantum : IF_MAXMTU;
791 }
792
793 quantum = MAX(FQ_CODEL_DEFAULT_QUANTUM, quantum);
794#if DEBUG || DEVELOPMENT
795 quantum = (fq_codel_quantum != 0) ? fq_codel_quantum : quantum;
796#endif /* DEBUG || DEVELOPMENT */
797 return quantum;
798}
799
800static void
801fq_if_mtu_update(fq_if_t *fqs)
802{
803#define _FQ_CLASSQ_UPDATE_QUANTUM(_fqs, _s, _q) \
804 (_fqs)->fqs_classq[FQ_IF_ ## _s ## _INDEX].fcl_quantum = \
805 FQ_CODEL_QUANTUM_ ## _s(_q)
806
807 uint16_t quantum;
808
809 quantum = fq_if_calc_quantum(fqs->fqs_ifq->ifcq_ifp);
810
811 if ((fqs->fqs_flags & FQS_DRIVER_MANAGED) != 0) {
812 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BK, quantum);
813 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BE, quantum);
814 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, VI, quantum);
815 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, VO, quantum);
816 } else {
817 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BK_SYS, quantum);
818 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BK, quantum);
819 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BE, quantum);
820 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, RD, quantum);
821 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, OAM, quantum);
822 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, AV, quantum);
823 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, RV, quantum);
824 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, VI, quantum);
825 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, VO, quantum);
826 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, CTL, quantum);
827 }
828#undef _FQ_CLASSQ_UPDATE_QUANTUM
829}
830
39037602
A
831static void
832fq_if_event(fq_if_t *fqs, cqev_t ev)
833{
834 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
835
836 switch (ev) {
837 case CLASSQ_EV_LINK_UP:
838 case CLASSQ_EV_LINK_DOWN:
839 fq_if_purge(fqs);
840 break;
c3c9b80d
A
841 case CLASSQ_EV_LINK_MTU:
842 fq_if_mtu_update(fqs);
843 break;
39037602
A
844 default:
845 break;
846 }
847}
848
849static void
850fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl)
851{
852 fq_if_purge_classq(fqs, fq_cl);
853 fqs->fqs_throttle = 1;
854 fq_cl->fcl_stat.fcl_throttle_on++;
855}
856
857static void
858fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl)
859{
860 VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl));
861 fqs->fqs_throttle = 0;
862 fq_cl->fcl_stat.fcl_throttle_off++;
863}
864
865
866static int
867fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr)
868{
869 struct ifclassq *ifq = fqs->fqs_ifq;
f427ee49 870 uint8_t index;
5ba3f43e
A
871#if !MACH_ASSERT
872#pragma unused(ifq)
873#endif
39037602
A
874 IFCQ_LOCK_ASSERT_HELD(ifq);
875
876 if (!tr->set) {
877 tr->level = fqs->fqs_throttle;
0a7de745 878 return 0;
39037602
A
879 }
880
0a7de745
A
881 if (tr->level == fqs->fqs_throttle) {
882 return EALREADY;
883 }
39037602
A
884
885 /* Throttling is allowed on BK_SYS class only */
5ba3f43e 886 index = fq_if_service_to_priority(fqs, MBUF_SC_BK_SYS);
39037602
A
887 switch (tr->level) {
888 case IFNET_THROTTLE_OFF:
889 fq_if_classq_resume(fqs, &fqs->fqs_classq[index]);
890 break;
891 case IFNET_THROTTLE_OPPORTUNISTIC:
892 fq_if_classq_suspend(fqs, &fqs->fqs_classq[index]);
893 break;
894 default:
895 break;
896 }
0a7de745 897 return 0;
39037602
A
898}
899
900void
901fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat)
902{
f427ee49 903 uint8_t pri;
39037602
A
904 fq_if_classq_t *fq_cl;
905
0a7de745 906 if (stat == NULL) {
39037602 907 return;
0a7de745 908 }
39037602 909
5ba3f43e 910 pri = fq_if_service_to_priority(fqs, stat->sc);
39037602 911 fq_cl = &fqs->fqs_classq[pri];
f427ee49
A
912 stat->packets = (uint32_t)fq_cl->fcl_stat.fcl_pkt_cnt;
913 stat->bytes = (uint32_t)fq_cl->fcl_stat.fcl_byte_cnt;
39037602
A
914}
915
916int
917fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg)
918{
919 int err = 0;
920 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
921
922 IFCQ_LOCK_ASSERT_HELD(ifq);
923
924 /*
925 * These are usually slow operations, convert the lock ahead of time
926 */
927 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
928 switch (rq) {
929 case CLASSQRQ_PURGE:
930 fq_if_purge(fqs);
931 break;
932 case CLASSQRQ_PURGE_SC:
933 fq_if_purge_sc(fqs, (cqrq_purge_sc_t *)arg);
934 break;
935 case CLASSQRQ_EVENT:
936 fq_if_event(fqs, (cqev_t)arg);
937 break;
938 case CLASSQRQ_THROTTLE:
939 fq_if_throttle(fqs, (cqrq_throttle_t *)arg);
940 break;
941 case CLASSQRQ_STAT_SC:
942 fq_if_stat_sc(fqs, (cqrq_stat_sc_t *)arg);
943 break;
944 }
0a7de745 945 return err;
39037602
A
946}
947
948int
5ba3f43e
A
949fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
950 classq_pkt_type_t ptype)
39037602
A
951{
952#pragma unused(flags)
c3c9b80d
A
953#define _FQ_CLASSQ_INIT(_fqs, _s, _q) \
954 fq_if_classq_init((_fqs), FQ_IF_ ## _s ## _INDEX, \
955 FQ_CODEL_QUANTUM_ ## _s(_q), FQ_CODEL_DRR_MAX_ ## _s, \
956 MBUF_SC_ ## _s )
957
39037602
A
958 struct ifnet *ifp = ifq->ifcq_ifp;
959 fq_if_t *fqs = NULL;
c3c9b80d 960 uint16_t quantum;
39037602
A
961 int err = 0;
962
963 IFCQ_LOCK_ASSERT_HELD(ifq);
964 VERIFY(ifq->ifcq_disc == NULL);
965 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
966
5ba3f43e 967 fqs = fq_if_alloc(ifp, ptype);
0a7de745
A
968 if (fqs == NULL) {
969 return ENOMEM;
970 }
39037602 971
c3c9b80d
A
972 quantum = fq_if_calc_quantum(ifp);
973
5ba3f43e
A
974 if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) {
975 fqs->fqs_flags |= FQS_DRIVER_MANAGED;
c3c9b80d
A
976 _FQ_CLASSQ_INIT(fqs, BK, quantum);
977 _FQ_CLASSQ_INIT(fqs, BE, quantum);
978 _FQ_CLASSQ_INIT(fqs, VI, quantum);
979 _FQ_CLASSQ_INIT(fqs, VO, quantum);
5ba3f43e 980 } else {
d9a64523
A
981 /* SIG shares same INDEX with VI */
982 _CASSERT(SCIDX_SIG == SCIDX_VI);
983 _CASSERT(FQ_IF_SIG_INDEX == FQ_IF_VI_INDEX);
984
c3c9b80d
A
985 _FQ_CLASSQ_INIT(fqs, BK_SYS, quantum);
986 _FQ_CLASSQ_INIT(fqs, BK, quantum);
987 _FQ_CLASSQ_INIT(fqs, BE, quantum);
988 _FQ_CLASSQ_INIT(fqs, RD, quantum);
989 _FQ_CLASSQ_INIT(fqs, OAM, quantum);
990 _FQ_CLASSQ_INIT(fqs, AV, quantum);
991 _FQ_CLASSQ_INIT(fqs, RV, quantum);
992 _FQ_CLASSQ_INIT(fqs, VI, quantum);
993 _FQ_CLASSQ_INIT(fqs, VO, quantum);
994 _FQ_CLASSQ_INIT(fqs, CTL, quantum);
5ba3f43e 995 }
39037602 996
f427ee49 997 err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs);
39037602 998 if (err != 0) {
c3c9b80d 999 os_log_error(OS_LOG_DEFAULT, "%s: error from ifclassq_attach, "
39037602
A
1000 "failed to attach fq_if: %d\n", __func__, err);
1001 fq_if_destroy(fqs);
1002 }
0a7de745 1003 return err;
c3c9b80d 1004#undef _FQ_CLASSQ_INIT
39037602
A
1005}
1006
1007fq_t *
1008fq_if_hash_pkt(fq_if_t *fqs, u_int32_t flowid, mbuf_svc_class_t svc_class,
5ba3f43e 1009 u_int64_t now, boolean_t create, classq_pkt_type_t ptype)
39037602
A
1010{
1011 fq_t *fq = NULL;
1012 flowq_list_t *fq_list;
1013 fq_if_classq_t *fq_cl;
1014 u_int8_t fqs_hash_id;
1015 u_int8_t scidx;
1016
5ba3f43e 1017 scidx = fq_if_service_to_priority(fqs, svc_class);
39037602
A
1018
1019 fqs_hash_id = FQ_IF_FLOW_HASH_ID(flowid);
1020
1021 fq_list = &fqs->fqs_flows[fqs_hash_id];
1022
1023 SLIST_FOREACH(fq, fq_list, fq_hashlink) {
1024 if (fq->fq_flowhash == flowid &&
0a7de745 1025 fq->fq_sc_index == scidx) {
39037602 1026 break;
0a7de745 1027 }
39037602
A
1028 }
1029 if (fq == NULL && create == TRUE) {
5ba3f43e
A
1030 ASSERT(ptype == QP_MBUF);
1031
39037602
A
1032 /* If the flow is not already on the list, allocate it */
1033 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
5ba3f43e 1034 fq = fq_alloc(ptype);
39037602
A
1035 if (fq != NULL) {
1036 fq->fq_flowhash = flowid;
1037 fq->fq_sc_index = scidx;
1038 fq->fq_updatetime = now + fqs->fqs_update_interval;
1039 fq_cl = &fqs->fqs_classq[scidx];
39037602
A
1040 fq->fq_flags = FQF_FLOWCTL_CAPABLE;
1041 SLIST_INSERT_HEAD(fq_list, fq, fq_hashlink);
1042 fq_cl->fcl_stat.fcl_flows_cnt++;
1043 }
1044 }
1045
1046 /*
1047 * If getq time is not set because this is the first packet or after
1048 * idle time, set it now so that we can detect a stall.
1049 */
0a7de745 1050 if (fq != NULL && fq->fq_getqtime == 0) {
39037602 1051 fq->fq_getqtime = now;
0a7de745 1052 }
39037602 1053
0a7de745 1054 return fq;
39037602
A
1055}
1056
5ba3f43e 1057void
c3c9b80d
A
1058fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
1059 bool destroy_now)
39037602
A
1060{
1061 u_int8_t hash_id;
1062 hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash);
1063 SLIST_REMOVE(&fqs->fqs_flows[hash_id], fq, flowq,
1064 fq_hashlink);
1065 fq_cl->fcl_stat.fcl_flows_cnt--;
1066 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
c3c9b80d
A
1067 fq->fq_flags |= FQF_DESTROYED;
1068 if (destroy_now) {
1069 fq_destroy(fq);
1070 }
39037602
A
1071}
1072
1073inline boolean_t
1074fq_if_at_drop_limit(fq_if_t *fqs)
1075{
0a7de745
A
1076 return (IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ?
1077 TRUE : FALSE;
39037602
A
1078}
1079
1080static void
1081fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
c3c9b80d 1082 bool remove_hash, bool destroy)
39037602
A
1083{
1084 /*
1085 * Remove the flow queue if it is empty
1086 * and delete it
1087 */
1088 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq,
1089 fq_actlink);
1090 fq->fq_flags &= ~FQF_OLD_FLOW;
1091 fq_cl->fcl_stat.fcl_oldflows_cnt--;
1092 VERIFY(fq->fq_bytes == 0);
1093
1094 if (remove_hash) {
1095 /* Remove from the hash list */
c3c9b80d 1096 fq_if_destroy_flow(fqs, fq_cl, fq, destroy);
39037602
A
1097 }
1098}
1099
1100static void
1101fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl, bool add_to_old)
1102{
1103 /* Move to the end of old queue list */
1104 STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq,
1105 flowq, fq_actlink);
1106 fq->fq_flags &= ~FQF_NEW_FLOW;
1107 fq_cl->fcl_stat.fcl_newflows_cnt--;
1108
1109 if (add_to_old) {
1110 STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq,
1111 fq_actlink);
1112 fq->fq_flags |= FQF_OLD_FLOW;
1113 fq_cl->fcl_stat.fcl_oldflows_cnt++;
1114 }
1115}
1116
1117inline void
1118fq_if_drop_packet(fq_if_t *fqs)
1119{
1120 fq_t *fq = fqs->fqs_large_flow;
39037602 1121 fq_if_classq_t *fq_cl;
5ba3f43e 1122 pktsched_pkt_t pkt;
cb323159 1123 volatile uint32_t *pkt_flags;
5ba3f43e 1124 uint64_t *pkt_timestamp;
39037602 1125
0a7de745 1126 if (fq == NULL) {
39037602 1127 return;
0a7de745 1128 }
5ba3f43e
A
1129 /* queue can not be empty on the largest flow */
1130 VERIFY(!fq_empty(fq));
39037602
A
1131
1132 fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
5ba3f43e 1133 _PKTSCHED_PKT_INIT(&pkt);
cb323159
A
1134 fq_getq_flow_internal(fqs, fq, &pkt);
1135 ASSERT(pkt.pktsched_ptype != QP_INVALID);
39037602 1136
5ba3f43e
A
1137 pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
1138 NULL, NULL);
39037602
A
1139
1140 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
5ba3f43e 1141 *pkt_timestamp = 0;
cb323159
A
1142 switch (pkt.pktsched_ptype) {
1143 case QP_MBUF:
5ba3f43e 1144 *pkt_flags &= ~PKTF_PRIV_GUARDED;
cb323159
A
1145 break;
1146 default:
1147 VERIFY(0);
1148 /* NOTREACHED */
1149 __builtin_unreachable();
0a7de745 1150 }
5ba3f43e
A
1151
1152 if (fq_empty(fq)) {
1153 fqs->fqs_large_flow = NULL;
39037602 1154 if (fq->fq_flags & FQF_OLD_FLOW) {
c3c9b80d 1155 fq_if_empty_old_flow(fqs, fq_cl, fq, true, true);
39037602
A
1156 } else {
1157 VERIFY(fq->fq_flags & FQF_NEW_FLOW);
1158 fq_if_empty_new_flow(fq, fq_cl, true);
1159 }
1160 }
5ba3f43e 1161 IFCQ_DROP_ADD(fqs->fqs_ifq, 1, pktsched_get_pkt_len(&pkt));
39037602 1162
5ba3f43e 1163 pktsched_free_pkt(&pkt);
39037602
A
1164 fq_cl->fcl_stat.fcl_drop_overflow++;
1165}
1166
1167inline void
1168fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq)
1169{
5ba3f43e
A
1170 fq_t *prev_fq;
1171
1172 if (fqs->fqs_large_flow != NULL &&
0a7de745 1173 fqs->fqs_large_flow->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
5ba3f43e 1174 fqs->fqs_large_flow = NULL;
0a7de745 1175 }
5ba3f43e 1176
0a7de745 1177 if (fq == NULL || fq->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
5ba3f43e 1178 return;
0a7de745 1179 }
5ba3f43e
A
1180
1181 prev_fq = fqs->fqs_large_flow;
1182 if (prev_fq == NULL) {
0a7de745 1183 if (!fq_empty(fq)) {
5ba3f43e 1184 fqs->fqs_large_flow = fq;
0a7de745 1185 }
39037602
A
1186 return;
1187 } else if (fq->fq_bytes > prev_fq->fq_bytes) {
1188 fqs->fqs_large_flow = fq;
1189 }
1190}
1191
1192boolean_t
c3c9b80d
A
1193fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint8_t flowsrc,
1194 fq_t *fq, fq_if_classq_t *fq_cl)
39037602
A
1195{
1196 struct flowadv_fcentry *fce;
39037602 1197
c3c9b80d
A
1198#if DEBUG || DEVELOPMENT
1199 if (__improbable(ifclassq_flow_control_adv == 0)) {
1200 os_log(OS_LOG_DEFAULT, "%s: skipped flow control", __func__);
1201 return TRUE;
1202 }
1203#endif /* DEBUG || DEVELOPMENT */
1204
39037602 1205 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
5ba3f43e 1206 if ((uint8_t)fce->fce_flowsrc_type == flowsrc &&
c3c9b80d 1207 fce->fce_flowid == fq->fq_flowhash) {
39037602 1208 /* Already on flowcontrol list */
0a7de745 1209 return TRUE;
39037602
A
1210 }
1211 }
39037602 1212 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
5ba3f43e 1213 fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
39037602 1214 if (fce != NULL) {
39037602
A
1215 /* XXX Add number of bytes in the queue */
1216 STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link);
1217 fq_cl->fcl_stat.fcl_flow_control++;
c3c9b80d
A
1218 os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
1219 "flow: 0x%x, iface: %s\n", __func__,
1220 fq_cl->fcl_stat.fcl_flow_control,
1221 fq->fq_sc_index, fce->fce_flowsrc_type, fq->fq_flowhash,
1222 if_name(fqs->fqs_ifq->ifcq_ifp));
39037602 1223 }
0a7de745 1224 return (fce != NULL) ? TRUE : FALSE;
39037602
A
1225}
1226
1227void
1228fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl)
1229{
1230 struct flowadv_fcentry *fce = NULL;
1231
1232 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1233 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
0a7de745 1234 if (fce->fce_flowid == fq->fq_flowhash) {
39037602 1235 break;
0a7de745 1236 }
39037602
A
1237 }
1238 if (fce != NULL) {
1239 STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry,
1240 fce_link);
1241 STAILQ_NEXT(fce, fce_link) = NULL;
39037602 1242 fq_cl->fcl_stat.fcl_flow_feedback++;
c3c9b80d
A
1243 os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
1244 "flow: 0x%x, iface: %s\n", __func__,
1245 fq_cl->fcl_stat.fcl_flow_feedback, fq->fq_sc_index,
1246 fce->fce_flowsrc_type, fce->fce_flowid,
1247 if_name(fqs->fqs_ifq->ifcq_ifp));
1248 flowadv_add_entry(fce);
39037602
A
1249 }
1250 fq->fq_flags &= ~FQF_FLOWCTL_ON;
1251}
1252
1253void
f427ee49 1254fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, uint32_t pktlimit,
c3c9b80d
A
1255 int64_t bytelimit, classq_pkt_t *top, classq_pkt_t *bottom,
1256 uint32_t *retpktcnt, uint32_t *retbytecnt, flowq_dqlist_t *fq_dqlist,
1257 boolean_t drvmgmt)
39037602
A
1258{
1259 fq_t *fq = NULL, *tfq = NULL;
39037602 1260 flowq_stailq_t temp_stailq;
c3c9b80d 1261 uint32_t pktcnt, bytecnt;
5ba3f43e 1262 boolean_t qempty, limit_reached = FALSE;
cb323159 1263 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
5ba3f43e 1264 fq_getq_flow_t fq_getq_flow_fn;
c3c9b80d 1265 classq_pkt_t *head, *tail;
5ba3f43e
A
1266
1267 switch (fqs->fqs_ptype) {
1268 case QP_MBUF:
1269 fq_getq_flow_fn = fq_getq_flow_mbuf;
1270 break;
1271
1272
1273 default:
1274 VERIFY(0);
1275 /* NOTREACHED */
cb323159 1276 __builtin_unreachable();
5ba3f43e 1277 }
39037602
A
1278
1279 /*
1280 * maximum byte limit should not be greater than the budget for
1281 * this class
1282 */
f427ee49 1283 if (bytelimit > fq_cl->fcl_budget && !drvmgmt) {
39037602 1284 bytelimit = fq_cl->fcl_budget;
0a7de745 1285 }
39037602
A
1286
1287 VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL);
39037602
A
1288 pktcnt = bytecnt = 0;
1289 STAILQ_INIT(&temp_stailq);
1290
1291 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
0a7de745 1292 ASSERT((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
39037602 1293 FQF_NEW_FLOW);
39037602 1294
c3c9b80d
A
1295 if (fq_dqlist != NULL) {
1296 if (!fq->fq_in_dqlist) {
1297 fq_dqlist_add(fq_dqlist, fq);
1298 }
1299 head = &fq->fq_dq_head;
1300 tail = &fq->fq_dq_tail;
1301 } else {
1302 ASSERT(!fq->fq_in_dqlist);
1303 head = top;
1304 tail = &last;
1305 }
1306
5ba3f43e 1307 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
c3c9b80d 1308 pktlimit, head, tail, &bytecnt, &pktcnt, &qempty,
5ba3f43e 1309 PKTF_NEW_FLOW);
39037602 1310
0a7de745 1311 if (fq->fq_deficit <= 0 || qempty) {
39037602 1312 fq_if_empty_new_flow(fq, fq_cl, true);
0a7de745 1313 }
5ba3f43e 1314 fq->fq_deficit += fq_cl->fcl_quantum;
0a7de745 1315 if (limit_reached) {
39037602 1316 goto done;
0a7de745 1317 }
39037602
A
1318 }
1319
1320 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
0a7de745 1321 VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
39037602 1322 FQF_OLD_FLOW);
c3c9b80d
A
1323 bool destroy = true;
1324
1325 if (fq_dqlist != NULL) {
1326 if (!fq->fq_in_dqlist) {
1327 fq_dqlist_add(fq_dqlist, fq);
1328 }
1329 head = &fq->fq_dq_head;
1330 tail = &fq->fq_dq_tail;
1331 destroy = false;
1332 } else {
1333 ASSERT(!fq->fq_in_dqlist);
1334 head = top;
1335 tail = &last;
1336 }
39037602 1337
5ba3f43e 1338 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
c3c9b80d 1339 pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, 0);
39037602 1340
5ba3f43e 1341 if (qempty) {
c3c9b80d 1342 fq_if_empty_old_flow(fqs, fq_cl, fq, true, destroy);
39037602
A
1343 } else if (fq->fq_deficit <= 0) {
1344 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq,
1345 flowq, fq_actlink);
1346 /*
1347 * Move to the end of the old queues list. We do not
1348 * need to update the flow count since this flow
1349 * will be added to the tail again
1350 */
1351 STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink);
1352 fq->fq_deficit += fq_cl->fcl_quantum;
1353 }
0a7de745 1354 if (limit_reached) {
39037602 1355 break;
0a7de745 1356 }
39037602
A
1357 }
1358
1359done:
1360 if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) {
1361 STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq);
1362 } else if (!STAILQ_EMPTY(&temp_stailq)) {
1363 fq_cl->fcl_old_flows = temp_stailq;
1364 }
cb323159
A
1365 if (last.cp_mbuf != NULL) {
1366 VERIFY(top->cp_mbuf != NULL);
c3c9b80d
A
1367 if (bottom != NULL) {
1368 *bottom = last;
0a7de745 1369 }
39037602 1370 }
c3c9b80d
A
1371 if (retpktcnt != NULL) {
1372 *retpktcnt = pktcnt;
1373 }
1374 if (retbytecnt != NULL) {
1375 *retbytecnt = bytecnt;
1376 }
39037602
A
1377}
1378
f427ee49 1379void
39037602
A
1380fq_if_teardown_ifclassq(struct ifclassq *ifq)
1381{
1382 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1383
1384 IFCQ_LOCK_ASSERT_HELD(ifq);
1385 VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL);
1386
1387 fq_if_destroy(fqs);
1388 ifq->ifcq_disc = NULL;
f427ee49 1389 ifclassq_detach(ifq);
39037602
A
1390}
1391
5ba3f43e
A
1392static void
1393fq_export_flowstats(fq_if_t *fqs, fq_t *fq,
1394 struct fq_codel_flowstats *flowstat)
1395{
0a7de745 1396 bzero(flowstat, sizeof(*flowstat));
f427ee49 1397 flowstat->fqst_min_qdelay = (uint32_t)fq->fq_min_qdelay;
5ba3f43e
A
1398 flowstat->fqst_bytes = fq->fq_bytes;
1399 flowstat->fqst_flowhash = fq->fq_flowhash;
0a7de745 1400 if (fq->fq_flags & FQF_NEW_FLOW) {
5ba3f43e 1401 flowstat->fqst_flags |= FQ_FLOWSTATS_NEW_FLOW;
0a7de745
A
1402 }
1403 if (fq->fq_flags & FQF_OLD_FLOW) {
5ba3f43e 1404 flowstat->fqst_flags |= FQ_FLOWSTATS_OLD_FLOW;
0a7de745
A
1405 }
1406 if (fq->fq_flags & FQF_DELAY_HIGH) {
5ba3f43e 1407 flowstat->fqst_flags |= FQ_FLOWSTATS_DELAY_HIGH;
0a7de745
A
1408 }
1409 if (fq->fq_flags & FQF_FLOWCTL_ON) {
5ba3f43e 1410 flowstat->fqst_flags |= FQ_FLOWSTATS_FLOWCTL_ON;
0a7de745
A
1411 }
1412 if (fqs->fqs_large_flow == fq) {
5ba3f43e 1413 flowstat->fqst_flags |= FQ_FLOWSTATS_LARGE_FLOW;
0a7de745 1414 }
5ba3f43e
A
1415}
1416
39037602
A
1417int
1418fq_if_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t qid,
1419 struct if_ifclassq_stats *ifqs)
1420{
1421 struct fq_codel_classstats *fcls;
1422 fq_if_classq_t *fq_cl;
1423 fq_if_t *fqs;
5ba3f43e
A
1424 fq_t *fq = NULL;
1425 u_int32_t i, flowstat_cnt;
39037602 1426
0a7de745
A
1427 if (qid >= FQ_IF_MAX_CLASSES) {
1428 return EINVAL;
1429 }
39037602
A
1430
1431 fqs = (fq_if_t *)ifq->ifcq_disc;
1432 fcls = &ifqs->ifqs_fq_codel_stats;
1433
1434 fq_cl = &fqs->fqs_classq[qid];
1435
1436 fcls->fcls_pri = fq_cl->fcl_pri;
1437 fcls->fcls_service_class = fq_cl->fcl_service_class;
1438 fcls->fcls_quantum = fq_cl->fcl_quantum;
1439 fcls->fcls_drr_max = fq_cl->fcl_drr_max;
1440 fcls->fcls_budget = fq_cl->fcl_budget;
1441 fcls->fcls_target_qdelay = fqs->fqs_target_qdelay;
1442 fcls->fcls_update_interval = fqs->fqs_update_interval;
1443 fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control;
1444 fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback;
1445 fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall;
1446 fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow;
1447 fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early;
1448 fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure;
1449 fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt;
1450 fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt;
1451 fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt;
1452 fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt;
1453 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
1454 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
1455 fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue;
1456 fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes;
1457 fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt;
1458 fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on;
1459 fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off;
1460 fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops;
1461 fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts;
f427ee49
A
1462 fcls->fcls_pkts_compressible = fq_cl->fcl_stat.fcl_pkts_compressible;
1463 fcls->fcls_pkts_compressed = fq_cl->fcl_stat.fcl_pkts_compressed;
39037602 1464
5ba3f43e
A
1465 /* Gather per flow stats */
1466 flowstat_cnt = min((fcls->fcls_newflows_cnt +
1467 fcls->fcls_oldflows_cnt), FQ_IF_MAX_FLOWSTATS);
1468 i = 0;
1469 STAILQ_FOREACH(fq, &fq_cl->fcl_new_flows, fq_actlink) {
0a7de745 1470 if (i >= fcls->fcls_newflows_cnt || i >= flowstat_cnt) {
5ba3f43e 1471 break;
0a7de745 1472 }
5ba3f43e
A
1473
1474 /* leave space for a few old flows */
1475 if ((flowstat_cnt - i) < fcls->fcls_oldflows_cnt &&
0a7de745 1476 i >= (FQ_IF_MAX_FLOWSTATS >> 1)) {
5ba3f43e 1477 break;
0a7de745 1478 }
5ba3f43e
A
1479 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
1480 i++;
1481 }
1482 STAILQ_FOREACH(fq, &fq_cl->fcl_old_flows, fq_actlink) {
0a7de745 1483 if (i >= flowstat_cnt) {
5ba3f43e 1484 break;
0a7de745 1485 }
5ba3f43e
A
1486 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
1487 i++;
1488 }
1489 VERIFY(i <= flowstat_cnt);
1490 fcls->fcls_flowstats_cnt = i;
0a7de745 1491 return 0;
39037602 1492}