]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/pktsched/pktsched_fq_codel.c
xnu-7195.50.7.100.1.tar.gz
[apple/xnu.git] / bsd / net / pktsched / pktsched_fq_codel.c
CommitLineData
39037602 1/*
f427ee49 2 * Copyright (c) 2016-2020 Apple Inc. All rights reserved.
39037602
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/types.h>
30#include <sys/param.h>
31#include <kern/zalloc.h>
32#include <net/if_var.h>
33#include <net/if.h>
34#include <net/classq/classq.h>
35#include <net/classq/classq_fq_codel.h>
36#include <net/pktsched/pktsched_fq_codel.h>
37
f427ee49 38static ZONE_DECLARE(fq_if_zone, "pktsched_fq_if", sizeof(fq_if_t), ZC_ZFREE_CLEARMEM);
39037602 39
5ba3f43e 40static fq_if_t *fq_if_alloc(struct ifnet *, classq_pkt_type_t);
39037602 41static void fq_if_destroy(fq_if_t *fqs);
f427ee49
A
42static void fq_if_classq_init(fq_if_t *fqs, uint32_t priority,
43 uint16_t quantum, uint32_t drr_max, uint32_t svc_class);
44static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, uint32_t,
45 int64_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
46 uint32_t *, boolean_t drvmgmt);
39037602
A
47void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat);
48static void fq_if_purge(fq_if_t *);
49static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *);
50static void fq_if_purge_flow(fq_if_t *, fq_t *, u_int32_t *, u_int32_t *);
51static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl,
52 bool add_to_old);
53static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl,
54 fq_t *fq, bool remove_hash);
39037602 55
0a7de745 56#define FQ_IF_FLOW_HASH_ID(_flowid_) \
39037602
A
57 (((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
58
0a7de745 59#define FQ_IF_CLASSQ_IDLE(_fcl_) \
39037602
A
60 (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
61 STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
62
cb323159 63typedef void (* fq_if_append_pkt_t)(classq_pkt_t *, classq_pkt_t *);
5ba3f43e 64typedef boolean_t (* fq_getq_flow_t)(fq_if_t *, fq_if_classq_t *, fq_t *,
f427ee49 65 int64_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
cb323159 66 u_int32_t *, boolean_t *, u_int32_t);
5ba3f43e
A
67
68static void
cb323159 69fq_if_append_mbuf(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
5ba3f43e 70{
cb323159 71 pkt->cp_mbuf->m_nextpkt = next_pkt->cp_mbuf;
5ba3f43e
A
72}
73
74
75
76static boolean_t
77fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
f427ee49 78 int64_t byte_limit, u_int32_t pkt_limit, classq_pkt_t *top,
cb323159
A
79 classq_pkt_t *last, u_int32_t *byte_cnt, u_int32_t *pkt_cnt,
80 boolean_t *qempty, u_int32_t pflags)
5ba3f43e 81{
5ba3f43e
A
82 u_int32_t plen;
83 pktsched_pkt_t pkt;
84 boolean_t limit_reached = FALSE;
85 struct ifclassq *ifq = fqs->fqs_ifq;
86 struct ifnet *ifp = ifq->ifcq_ifp;
87
88 while (fq->fq_deficit > 0 && limit_reached == FALSE &&
89 !MBUFQ_EMPTY(&fq->fq_mbufq)) {
5ba3f43e 90 _PKTSCHED_PKT_INIT(&pkt);
cb323159 91 fq_getq_flow(fqs, fq, &pkt);
5ba3f43e
A
92 ASSERT(pkt.pktsched_ptype == QP_MBUF);
93
94 plen = pktsched_get_pkt_len(&pkt);
95 fq->fq_deficit -= plen;
cb323159 96 pkt.pktsched_pkt_mbuf->m_pkthdr.pkt_flags |= pflags;
5ba3f43e 97
cb323159
A
98 if (top->cp_mbuf == NULL) {
99 *top = pkt.pktsched_pkt;
5ba3f43e 100 } else {
cb323159
A
101 ASSERT(last->cp_mbuf != NULL);
102 ASSERT(last->cp_mbuf->m_nextpkt == NULL);
103 last->cp_mbuf->m_nextpkt = pkt.pktsched_pkt_mbuf;
5ba3f43e 104 }
cb323159
A
105 *last = pkt.pktsched_pkt;
106 last->cp_mbuf->m_nextpkt = NULL;
5ba3f43e
A
107 fq_cl->fcl_stat.fcl_dequeue++;
108 fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
109 *pkt_cnt += 1;
110 *byte_cnt += plen;
111
cb323159 112 ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
5ba3f43e
A
113
114 /* Check if the limit is reached */
0a7de745 115 if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
5ba3f43e 116 limit_reached = TRUE;
0a7de745 117 }
5ba3f43e
A
118 }
119
120 *qempty = MBUFQ_EMPTY(&fq->fq_mbufq);
0a7de745 121 return limit_reached;
5ba3f43e
A
122}
123
39037602 124fq_if_t *
5ba3f43e 125fq_if_alloc(struct ifnet *ifp, classq_pkt_type_t ptype)
39037602
A
126{
127 fq_if_t *fqs;
39037602 128
f427ee49 129 fqs = zalloc_flags(fq_if_zone, Z_WAITOK | Z_ZERO);
39037602 130 fqs->fqs_ifq = &ifp->if_snd;
5ba3f43e 131 fqs->fqs_ptype = ptype;
39037602
A
132
133 /* Calculate target queue delay */
134 ifclassq_calc_target_qdelay(ifp, &fqs->fqs_target_qdelay);
135
136 /* Calculate update interval */
137 ifclassq_calc_update_interval(&fqs->fqs_update_interval);
5ba3f43e
A
138
139 /* Configure packet drop limit across all queues */
140 fqs->fqs_pkt_droplimit = IFCQ_PKT_DROP_LIMIT(&ifp->if_snd);
39037602 141 STAILQ_INIT(&fqs->fqs_fclist);
0a7de745 142 return fqs;
39037602
A
143}
144
145void
146fq_if_destroy(fq_if_t *fqs)
147{
39037602
A
148 fq_if_purge(fqs);
149 fqs->fqs_ifq = NULL;
150 zfree(fq_if_zone, fqs);
151}
152
f427ee49 153static inline uint8_t
5ba3f43e 154fq_if_service_to_priority(fq_if_t *fqs, mbuf_svc_class_t svc)
39037602 155{
f427ee49 156 uint8_t pri;
39037602 157
5ba3f43e
A
158 if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
159 switch (svc) {
160 case MBUF_SC_BK_SYS:
161 case MBUF_SC_BK:
162 pri = FQ_IF_BK_INDEX;
163 break;
164 case MBUF_SC_BE:
165 case MBUF_SC_RD:
166 case MBUF_SC_OAM:
167 pri = FQ_IF_BE_INDEX;
168 break;
169 case MBUF_SC_AV:
170 case MBUF_SC_RV:
171 case MBUF_SC_VI:
d9a64523 172 case MBUF_SC_SIG:
5ba3f43e
A
173 pri = FQ_IF_VI_INDEX;
174 break;
175 case MBUF_SC_VO:
176 case MBUF_SC_CTL:
177 pri = FQ_IF_VO_INDEX;
178 break;
179 default:
180 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
181 break;
182 }
0a7de745 183 return pri;
5ba3f43e
A
184 }
185
186 /* scheduler is not managed by the driver */
39037602
A
187 switch (svc) {
188 case MBUF_SC_BK_SYS:
189 pri = FQ_IF_BK_SYS_INDEX;
190 break;
191 case MBUF_SC_BK:
192 pri = FQ_IF_BK_INDEX;
193 break;
194 case MBUF_SC_BE:
195 pri = FQ_IF_BE_INDEX;
196 break;
197 case MBUF_SC_RD:
198 pri = FQ_IF_RD_INDEX;
199 break;
200 case MBUF_SC_OAM:
201 pri = FQ_IF_OAM_INDEX;
202 break;
203 case MBUF_SC_AV:
204 pri = FQ_IF_AV_INDEX;
205 break;
206 case MBUF_SC_RV:
207 pri = FQ_IF_RV_INDEX;
208 break;
209 case MBUF_SC_VI:
210 pri = FQ_IF_VI_INDEX;
211 break;
d9a64523
A
212 case MBUF_SC_SIG:
213 pri = FQ_IF_SIG_INDEX;
214 break;
39037602
A
215 case MBUF_SC_VO:
216 pri = FQ_IF_VO_INDEX;
217 break;
218 case MBUF_SC_CTL:
219 pri = FQ_IF_CTL_INDEX;
220 break;
221 default:
222 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
223 break;
224 }
0a7de745 225 return pri;
39037602
A
226}
227
f427ee49
A
228static void
229fq_if_classq_init(fq_if_t *fqs, uint32_t pri, uint16_t quantum,
230 uint32_t drr_max, uint32_t svc_class)
39037602
A
231{
232 fq_if_classq_t *fq_cl;
cb323159 233 VERIFY(pri < FQ_IF_MAX_CLASSES);
39037602
A
234 fq_cl = &fqs->fqs_classq[pri];
235
cb323159 236 VERIFY(fq_cl->fcl_quantum == 0);
39037602
A
237 fq_cl->fcl_quantum = quantum;
238 fq_cl->fcl_pri = pri;
239 fq_cl->fcl_drr_max = drr_max;
240 fq_cl->fcl_service_class = svc_class;
241 STAILQ_INIT(&fq_cl->fcl_new_flows);
242 STAILQ_INIT(&fq_cl->fcl_old_flows);
243}
244
245int
f427ee49
A
246fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *head,
247 classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t *pdrop)
39037602 248{
f427ee49 249 uint8_t pri;
39037602
A
250 fq_if_t *fqs;
251 fq_if_classq_t *fq_cl;
f427ee49 252 int ret;
39037602 253 mbuf_svc_class_t svc;
5ba3f43e 254 pktsched_pkt_t pkt;
39037602 255
f427ee49 256 pktsched_pkt_encap_chain(&pkt, head, tail, cnt, bytes);
39037602
A
257
258 fqs = (fq_if_t *)ifq->ifcq_disc;
5ba3f43e
A
259 svc = pktsched_get_pkt_svc(&pkt);
260 pri = fq_if_service_to_priority(fqs, svc);
cb323159 261 VERIFY(pri < FQ_IF_MAX_CLASSES);
39037602
A
262 fq_cl = &fqs->fqs_classq[pri];
263
f427ee49 264 if (__improbable(svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1)) {
39037602 265 /* BK_SYS is currently throttled */
f427ee49 266 atomic_add_32(&fq_cl->fcl_stat.fcl_throttle_drops, 1);
5ba3f43e
A
267 pktsched_free_pkt(&pkt);
268 *pdrop = TRUE;
f427ee49
A
269 ret = EQSUSPENDED;
270 goto done;
39037602
A
271 }
272
f427ee49 273 IFCQ_LOCK_SPIN(ifq);
5ba3f43e
A
274 ret = fq_addq(fqs, &pkt, fq_cl);
275 if (!(fqs->fqs_flags & FQS_DRIVER_MANAGED) &&
276 !FQ_IF_CLASSQ_IDLE(fq_cl)) {
39037602
A
277 if (((fqs->fqs_bitmaps[FQ_IF_ER] | fqs->fqs_bitmaps[FQ_IF_EB]) &
278 (1 << pri)) == 0) {
279 /*
280 * this group is not in ER or EB groups,
281 * mark it as IB
282 */
283 pktsched_bit_set(pri, &fqs->fqs_bitmaps[FQ_IF_IB]);
284 }
285 }
286
f427ee49 287 if (__improbable(ret != 0)) {
39037602
A
288 if (ret == CLASSQEQ_SUCCESS_FC) {
289 /* packet enqueued, return advisory feedback */
290 ret = EQFULL;
5ba3f43e 291 *pdrop = FALSE;
f427ee49
A
292 } else if (ret == CLASSQEQ_COMPRESSED) {
293 ret = 0;
294 *pdrop = FALSE;
39037602 295 } else {
f427ee49 296 IFCQ_UNLOCK(ifq);
5ba3f43e 297 *pdrop = TRUE;
5ba3f43e 298 pktsched_free_pkt(&pkt);
39037602 299 switch (ret) {
5ba3f43e 300 case CLASSQEQ_DROP:
f427ee49
A
301 ret = ENOBUFS;
302 goto done;
5ba3f43e 303 case CLASSQEQ_DROP_FC:
f427ee49
A
304 ret = EQFULL;
305 goto done;
5ba3f43e 306 case CLASSQEQ_DROP_SP:
f427ee49
A
307 ret = EQSUSPENDED;
308 goto done;
309 default:
310 VERIFY(0);
311 /* NOTREACHED */
312 __builtin_unreachable();
39037602 313 }
f427ee49
A
314 /* NOTREACHED */
315 __builtin_unreachable();
39037602 316 }
5ba3f43e
A
317 } else {
318 *pdrop = FALSE;
39037602 319 }
f427ee49
A
320 IFCQ_ADD_LEN(ifq, cnt);
321 IFCQ_INC_BYTES(ifq, bytes);
322 IFCQ_UNLOCK(ifq);
323done:
0a7de745 324 return ret;
39037602
A
325}
326
f427ee49 327void
cb323159 328fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt)
39037602 329{
5ba3f43e 330 (void) fq_if_dequeue_classq_multi(ifq, 1,
cb323159 331 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL);
5ba3f43e 332}
39037602 333
f427ee49 334void
5ba3f43e 335fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc,
cb323159 336 classq_pkt_t *pkt)
5ba3f43e 337{
5ba3f43e 338 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
f427ee49 339 uint32_t total_pktcnt = 0, total_bytecnt = 0;
5ba3f43e 340 fq_if_classq_t *fq_cl;
f427ee49 341 uint8_t pri;
5ba3f43e
A
342
343 pri = fq_if_service_to_priority(fqs, svc);
344 fq_cl = &fqs->fqs_classq[pri];
345
346 fq_if_dequeue(fqs, fq_cl, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
f427ee49
A
347 pkt, NULL, &total_pktcnt, &total_bytecnt, TRUE);
348
349 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
39037602
A
350}
351
352int
5ba3f43e 353fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt,
cb323159
A
354 u_int32_t maxbytecnt, classq_pkt_t *first_packet,
355 classq_pkt_t *last_packet, u_int32_t *retpktcnt,
356 u_int32_t *retbytecnt)
39037602 357{
cb323159
A
358 u_int32_t pktcnt = 0, bytecnt = 0, total_pktcnt = 0, total_bytecnt = 0;
359 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
360 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
361 classq_pkt_t tmp = CLASSQ_PKT_INITIALIZER(tmp);
362 fq_if_append_pkt_t append_pkt;
39037602 363 fq_if_classq_t *fq_cl;
cb323159 364 fq_if_t *fqs;
39037602
A
365 int pri;
366
367 IFCQ_LOCK_ASSERT_HELD(ifq);
368
369 fqs = (fq_if_t *)ifq->ifcq_disc;
370
5ba3f43e
A
371 switch (fqs->fqs_ptype) {
372 case QP_MBUF:
373 append_pkt = fq_if_append_mbuf;
374 break;
375
376
377 default:
378 VERIFY(0);
379 /* NOTREACHED */
cb323159 380 __builtin_unreachable();
5ba3f43e
A
381 }
382
39037602 383 for (;;) {
cb323159
A
384 classq_pkt_t top = CLASSQ_PKT_INITIALIZER(top);
385 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
386
39037602
A
387 if (fqs->fqs_bitmaps[FQ_IF_ER] == 0 &&
388 fqs->fqs_bitmaps[FQ_IF_EB] == 0) {
389 fqs->fqs_bitmaps[FQ_IF_EB] = fqs->fqs_bitmaps[FQ_IF_IB];
390 fqs->fqs_bitmaps[FQ_IF_IB] = 0;
0a7de745 391 if (fqs->fqs_bitmaps[FQ_IF_EB] == 0) {
39037602 392 break;
0a7de745 393 }
39037602
A
394 }
395 pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_ER]);
396 if (pri == 0) {
397 /*
398 * There are no ER flows, move the highest
399 * priority one from EB if there are any in that
400 * category
401 */
402 pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_EB]);
403 VERIFY(pri > 0);
404 pktsched_bit_clr((pri - 1),
405 &fqs->fqs_bitmaps[FQ_IF_EB]);
406 pktsched_bit_set((pri - 1),
407 &fqs->fqs_bitmaps[FQ_IF_ER]);
408 }
409 pri--; /* index starts at 0 */
410 fq_cl = &fqs->fqs_classq[pri];
411
412 if (fq_cl->fcl_budget <= 0) {
413 /* Update the budget */
414 fq_cl->fcl_budget += (min(fq_cl->fcl_drr_max,
415 fq_cl->fcl_stat.fcl_flows_cnt) *
416 fq_cl->fcl_quantum);
0a7de745 417 if (fq_cl->fcl_budget <= 0) {
39037602 418 goto state_change;
0a7de745 419 }
39037602
A
420 }
421 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
422 (maxbytecnt - total_bytecnt), &top, &tail, &pktcnt,
cb323159
A
423 &bytecnt, FALSE);
424 if (top.cp_mbuf != NULL) {
5ba3f43e 425 ASSERT(pktcnt > 0 && bytecnt > 0);
cb323159 426 if (first.cp_mbuf == NULL) {
39037602 427 first = top;
39037602
A
428 total_pktcnt = pktcnt;
429 total_bytecnt = bytecnt;
430 } else {
cb323159
A
431 ASSERT(last.cp_mbuf != NULL);
432 append_pkt(&last, &top);
39037602
A
433 total_pktcnt += pktcnt;
434 total_bytecnt += bytecnt;
435 }
cb323159
A
436 last = tail;
437 append_pkt(&last, &tmp);
39037602
A
438 fq_cl->fcl_budget -= bytecnt;
439 pktcnt = 0;
440 bytecnt = 0;
441 }
442
443 /*
444 * If the class has exceeded the budget but still has data
445 * to send, move it to IB
446 */
447state_change:
448 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
449 if (fq_cl->fcl_budget <= 0) {
450 pktsched_bit_set(pri,
451 &fqs->fqs_bitmaps[FQ_IF_IB]);
452 pktsched_bit_clr(pri,
453 &fqs->fqs_bitmaps[FQ_IF_ER]);
454 }
455 } else {
456 pktsched_bit_clr(pri, &fqs->fqs_bitmaps[FQ_IF_ER]);
457 VERIFY(((fqs->fqs_bitmaps[FQ_IF_ER] |
458 fqs->fqs_bitmaps[FQ_IF_EB] |
0a7de745 459 fqs->fqs_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
39037602
A
460 fq_cl->fcl_budget = 0;
461 }
0a7de745 462 if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt) {
39037602 463 break;
0a7de745 464 }
39037602 465 }
cb323159
A
466
467 if (__probable(first_packet != NULL)) {
468 *first_packet = first;
469 }
470 if (last_packet != NULL) {
471 *last_packet = last;
39037602 472 }
cb323159
A
473 if (retpktcnt != NULL) {
474 *retpktcnt = total_pktcnt;
475 }
476 if (retbytecnt != NULL) {
477 *retbytecnt = total_bytecnt;
478 }
479
480 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
0a7de745 481 return 0;
39037602
A
482}
483
5ba3f43e
A
484int
485fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, mbuf_svc_class_t svc,
cb323159
A
486 u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
487 classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt)
5ba3f43e 488{
5ba3f43e 489 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
f427ee49 490 uint8_t pri;
5ba3f43e
A
491 u_int32_t total_pktcnt = 0, total_bytecnt = 0;
492 fq_if_classq_t *fq_cl;
cb323159
A
493 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
494 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
5ba3f43e
A
495 fq_if_append_pkt_t append_pkt;
496
497 switch (fqs->fqs_ptype) {
498 case QP_MBUF:
499 append_pkt = fq_if_append_mbuf;
500 break;
501
502
503 default:
504 VERIFY(0);
505 /* NOTREACHED */
cb323159 506 __builtin_unreachable();
5ba3f43e
A
507 }
508
509 pri = fq_if_service_to_priority(fqs, svc);
510 fq_cl = &fqs->fqs_classq[pri];
5ba3f43e
A
511 /*
512 * Now we have the queue for a particular service class. We need
513 * to dequeue as many packets as needed, first from the new flows
514 * and then from the old flows.
515 */
516 while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt &&
517 fq_cl->fcl_stat.fcl_pkt_cnt > 0) {
cb323159
A
518 classq_pkt_t top = CLASSQ_PKT_INITIALIZER(top);
519 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
5ba3f43e 520 u_int32_t pktcnt = 0, bytecnt = 0;
cb323159 521
5ba3f43e
A
522 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
523 (maxbytecnt - total_bytecnt), &top, &tail, &pktcnt,
cb323159
A
524 &bytecnt, TRUE);
525 if (top.cp_mbuf != NULL) {
526 if (first.cp_mbuf == NULL) {
527 first = top;
528 total_pktcnt = pktcnt;
529 total_bytecnt = bytecnt;
530 } else {
531 ASSERT(last.cp_mbuf != NULL);
532 append_pkt(&last, &top);
533 total_pktcnt += pktcnt;
534 total_bytecnt += bytecnt;
535 }
536 last = tail;
5ba3f43e 537 }
5ba3f43e 538 }
cb323159
A
539
540 if (__probable(first_packet != NULL)) {
541 *first_packet = first;
542 }
543 if (last_packet != NULL) {
544 *last_packet = last;
5ba3f43e 545 }
cb323159
A
546 if (retpktcnt != NULL) {
547 *retpktcnt = total_pktcnt;
548 }
549 if (retbytecnt != NULL) {
550 *retbytecnt = total_bytecnt;
551 }
552
f427ee49
A
553 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
554
0a7de745 555 return 0;
5ba3f43e
A
556}
557
39037602
A
558static void
559fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, u_int32_t *pktsp,
560 u_int32_t *bytesp)
561{
562 fq_if_classq_t *fq_cl;
563 u_int32_t pkts, bytes;
5ba3f43e 564 pktsched_pkt_t pkt;
39037602
A
565
566 fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
567 pkts = bytes = 0;
5ba3f43e 568 _PKTSCHED_PKT_INIT(&pkt);
cb323159
A
569 for (;;) {
570 fq_getq_flow(fqs, fq, &pkt);
571 if (pkt.pktsched_pkt_mbuf == NULL) {
572 VERIFY(pkt.pktsched_ptype == QP_INVALID);
573 break;
574 }
39037602 575 pkts++;
5ba3f43e
A
576 bytes += pktsched_get_pkt_len(&pkt);
577 pktsched_free_pkt(&pkt);
39037602
A
578 }
579 IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes);
580
581 if (fq->fq_flags & FQF_NEW_FLOW) {
582 fq_if_empty_new_flow(fq, fq_cl, false);
583 } else if (fq->fq_flags & FQF_OLD_FLOW) {
584 fq_if_empty_old_flow(fqs, fq_cl, fq, false);
585 }
586
587 fq_if_destroy_flow(fqs, fq_cl, fq);
588
589 if (FQ_IF_CLASSQ_IDLE(fq_cl)) {
590 int i;
591 for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) {
592 pktsched_bit_clr(fq_cl->fcl_pri,
593 &fqs->fqs_bitmaps[i]);
594 }
595 }
0a7de745 596 if (pktsp != NULL) {
39037602 597 *pktsp = pkts;
0a7de745
A
598 }
599 if (bytesp != NULL) {
39037602 600 *bytesp = bytes;
0a7de745 601 }
39037602
A
602}
603
604static void
605fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl)
606{
607 fq_t *fq, *tfq;
608 /*
609 * Take each flow from new/old flow list and flush mbufs
610 * in that flow
611 */
612 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
613 fq_if_purge_flow(fqs, fq, NULL, NULL);
614 }
615 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
616 fq_if_purge_flow(fqs, fq, NULL, NULL);
617 }
618 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows));
619 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows));
620
621 STAILQ_INIT(&fq_cl->fcl_new_flows);
622 STAILQ_INIT(&fq_cl->fcl_old_flows);
623 fq_cl->fcl_budget = 0;
624}
625
626static void
627fq_if_purge(fq_if_t *fqs)
628{
629 int i;
630
631 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
632 for (i = 0; i < FQ_IF_MAX_CLASSES; i++) {
633 fq_if_purge_classq(fqs, &fqs->fqs_classq[i]);
634 }
635
636 VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist));
637
638 fqs->fqs_large_flow = NULL;
639 for (i = 0; i < FQ_IF_HASH_TABLE_SIZE; i++) {
640 VERIFY(SLIST_EMPTY(&fqs->fqs_flows[i]));
641 }
642
0a7de745 643 bzero(&fqs->fqs_bitmaps, sizeof(fqs->fqs_bitmaps));
39037602
A
644
645 IFCQ_LEN(fqs->fqs_ifq) = 0;
646 IFCQ_BYTES(fqs->fqs_ifq) = 0;
647}
648
649static void
650fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req)
651{
652 fq_t *fq;
653
654 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
655 req->packets = req->bytes = 0;
656 VERIFY(req->flow != 0);
657
5ba3f43e
A
658 /* packet type is needed only if we want to create a flow queue */
659 fq = fq_if_hash_pkt(fqs, req->flow, req->sc, 0, FALSE, QP_INVALID);
39037602 660
0a7de745 661 if (fq != NULL) {
39037602 662 fq_if_purge_flow(fqs, fq, &req->packets, &req->bytes);
0a7de745 663 }
39037602
A
664}
665
666static void
667fq_if_event(fq_if_t *fqs, cqev_t ev)
668{
669 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
670
671 switch (ev) {
672 case CLASSQ_EV_LINK_UP:
673 case CLASSQ_EV_LINK_DOWN:
674 fq_if_purge(fqs);
675 break;
676 default:
677 break;
678 }
679}
680
681static void
682fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl)
683{
684 fq_if_purge_classq(fqs, fq_cl);
685 fqs->fqs_throttle = 1;
686 fq_cl->fcl_stat.fcl_throttle_on++;
687}
688
689static void
690fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl)
691{
692 VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl));
693 fqs->fqs_throttle = 0;
694 fq_cl->fcl_stat.fcl_throttle_off++;
695}
696
697
698static int
699fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr)
700{
701 struct ifclassq *ifq = fqs->fqs_ifq;
f427ee49 702 uint8_t index;
5ba3f43e
A
703#if !MACH_ASSERT
704#pragma unused(ifq)
705#endif
39037602
A
706 IFCQ_LOCK_ASSERT_HELD(ifq);
707
708 if (!tr->set) {
709 tr->level = fqs->fqs_throttle;
0a7de745 710 return 0;
39037602
A
711 }
712
0a7de745
A
713 if (tr->level == fqs->fqs_throttle) {
714 return EALREADY;
715 }
39037602
A
716
717 /* Throttling is allowed on BK_SYS class only */
5ba3f43e 718 index = fq_if_service_to_priority(fqs, MBUF_SC_BK_SYS);
39037602
A
719 switch (tr->level) {
720 case IFNET_THROTTLE_OFF:
721 fq_if_classq_resume(fqs, &fqs->fqs_classq[index]);
722 break;
723 case IFNET_THROTTLE_OPPORTUNISTIC:
724 fq_if_classq_suspend(fqs, &fqs->fqs_classq[index]);
725 break;
726 default:
727 break;
728 }
0a7de745 729 return 0;
39037602
A
730}
731
732void
733fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat)
734{
f427ee49 735 uint8_t pri;
39037602
A
736 fq_if_classq_t *fq_cl;
737
0a7de745 738 if (stat == NULL) {
39037602 739 return;
0a7de745 740 }
39037602 741
5ba3f43e 742 pri = fq_if_service_to_priority(fqs, stat->sc);
39037602 743 fq_cl = &fqs->fqs_classq[pri];
f427ee49
A
744 stat->packets = (uint32_t)fq_cl->fcl_stat.fcl_pkt_cnt;
745 stat->bytes = (uint32_t)fq_cl->fcl_stat.fcl_byte_cnt;
39037602
A
746}
747
748int
749fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg)
750{
751 int err = 0;
752 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
753
754 IFCQ_LOCK_ASSERT_HELD(ifq);
755
756 /*
757 * These are usually slow operations, convert the lock ahead of time
758 */
759 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
760 switch (rq) {
761 case CLASSQRQ_PURGE:
762 fq_if_purge(fqs);
763 break;
764 case CLASSQRQ_PURGE_SC:
765 fq_if_purge_sc(fqs, (cqrq_purge_sc_t *)arg);
766 break;
767 case CLASSQRQ_EVENT:
768 fq_if_event(fqs, (cqev_t)arg);
769 break;
770 case CLASSQRQ_THROTTLE:
771 fq_if_throttle(fqs, (cqrq_throttle_t *)arg);
772 break;
773 case CLASSQRQ_STAT_SC:
774 fq_if_stat_sc(fqs, (cqrq_stat_sc_t *)arg);
775 break;
776 }
0a7de745 777 return err;
39037602
A
778}
779
780int
5ba3f43e
A
781fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
782 classq_pkt_type_t ptype)
39037602
A
783{
784#pragma unused(flags)
785 struct ifnet *ifp = ifq->ifcq_ifp;
786 fq_if_t *fqs = NULL;
787 int err = 0;
788
789 IFCQ_LOCK_ASSERT_HELD(ifq);
790 VERIFY(ifq->ifcq_disc == NULL);
791 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
792
5ba3f43e 793 fqs = fq_if_alloc(ifp, ptype);
0a7de745
A
794 if (fqs == NULL) {
795 return ENOMEM;
796 }
39037602 797
5ba3f43e
A
798 if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) {
799 fqs->fqs_flags |= FQS_DRIVER_MANAGED;
800 fq_if_classq_init(fqs, FQ_IF_BK_INDEX, 1500,
801 2, MBUF_SC_BK);
802 fq_if_classq_init(fqs, FQ_IF_BE_INDEX, 1500,
803 4, MBUF_SC_BE);
804 fq_if_classq_init(fqs, FQ_IF_VI_INDEX, 3000,
805 6, MBUF_SC_VI);
806 fq_if_classq_init(fqs, FQ_IF_VO_INDEX, 600,
807 8, MBUF_SC_VO);
808 } else {
d9a64523
A
809 /* SIG shares same INDEX with VI */
810 _CASSERT(SCIDX_SIG == SCIDX_VI);
811 _CASSERT(FQ_IF_SIG_INDEX == FQ_IF_VI_INDEX);
812
5ba3f43e
A
813 fq_if_classq_init(fqs, FQ_IF_BK_SYS_INDEX, 1500,
814 2, MBUF_SC_BK_SYS);
815 fq_if_classq_init(fqs, FQ_IF_BK_INDEX, 1500,
816 2, MBUF_SC_BK);
817 fq_if_classq_init(fqs, FQ_IF_BE_INDEX, 1500,
818 4, MBUF_SC_BE);
819 fq_if_classq_init(fqs, FQ_IF_RD_INDEX, 1500,
820 4, MBUF_SC_RD);
821 fq_if_classq_init(fqs, FQ_IF_OAM_INDEX, 1500,
822 4, MBUF_SC_OAM);
823 fq_if_classq_init(fqs, FQ_IF_AV_INDEX, 3000,
824 6, MBUF_SC_AV);
825 fq_if_classq_init(fqs, FQ_IF_RV_INDEX, 3000,
826 6, MBUF_SC_RV);
827 fq_if_classq_init(fqs, FQ_IF_VI_INDEX, 3000,
828 6, MBUF_SC_VI);
829 fq_if_classq_init(fqs, FQ_IF_VO_INDEX, 600,
830 8, MBUF_SC_VO);
831 fq_if_classq_init(fqs, FQ_IF_CTL_INDEX, 600,
832 8, MBUF_SC_CTL);
833 }
39037602 834
f427ee49 835 err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs);
39037602
A
836
837 if (err != 0) {
838 printf("%s: error from ifclassq_attach, "
839 "failed to attach fq_if: %d\n", __func__, err);
840 fq_if_destroy(fqs);
841 }
0a7de745 842 return err;
39037602
A
843}
844
845fq_t *
846fq_if_hash_pkt(fq_if_t *fqs, u_int32_t flowid, mbuf_svc_class_t svc_class,
5ba3f43e 847 u_int64_t now, boolean_t create, classq_pkt_type_t ptype)
39037602
A
848{
849 fq_t *fq = NULL;
850 flowq_list_t *fq_list;
851 fq_if_classq_t *fq_cl;
852 u_int8_t fqs_hash_id;
853 u_int8_t scidx;
854
5ba3f43e 855 scidx = fq_if_service_to_priority(fqs, svc_class);
39037602
A
856
857 fqs_hash_id = FQ_IF_FLOW_HASH_ID(flowid);
858
859 fq_list = &fqs->fqs_flows[fqs_hash_id];
860
861 SLIST_FOREACH(fq, fq_list, fq_hashlink) {
862 if (fq->fq_flowhash == flowid &&
0a7de745 863 fq->fq_sc_index == scidx) {
39037602 864 break;
0a7de745 865 }
39037602
A
866 }
867 if (fq == NULL && create == TRUE) {
5ba3f43e
A
868 ASSERT(ptype == QP_MBUF);
869
39037602
A
870 /* If the flow is not already on the list, allocate it */
871 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
5ba3f43e 872 fq = fq_alloc(ptype);
39037602
A
873 if (fq != NULL) {
874 fq->fq_flowhash = flowid;
875 fq->fq_sc_index = scidx;
876 fq->fq_updatetime = now + fqs->fqs_update_interval;
877 fq_cl = &fqs->fqs_classq[scidx];
39037602
A
878 fq->fq_flags = FQF_FLOWCTL_CAPABLE;
879 SLIST_INSERT_HEAD(fq_list, fq, fq_hashlink);
880 fq_cl->fcl_stat.fcl_flows_cnt++;
881 }
882 }
883
884 /*
885 * If getq time is not set because this is the first packet or after
886 * idle time, set it now so that we can detect a stall.
887 */
0a7de745 888 if (fq != NULL && fq->fq_getqtime == 0) {
39037602 889 fq->fq_getqtime = now;
0a7de745 890 }
39037602 891
0a7de745 892 return fq;
39037602
A
893}
894
5ba3f43e 895void
39037602
A
896fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq)
897{
898 u_int8_t hash_id;
899 hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash);
900 SLIST_REMOVE(&fqs->fqs_flows[hash_id], fq, flowq,
901 fq_hashlink);
902 fq_cl->fcl_stat.fcl_flows_cnt--;
903 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
904 fq_destroy(fq);
39037602
A
905}
906
907inline boolean_t
908fq_if_at_drop_limit(fq_if_t *fqs)
909{
0a7de745
A
910 return (IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ?
911 TRUE : FALSE;
39037602
A
912}
913
914static void
915fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
916 bool remove_hash)
917{
918 /*
919 * Remove the flow queue if it is empty
920 * and delete it
921 */
922 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq,
923 fq_actlink);
924 fq->fq_flags &= ~FQF_OLD_FLOW;
925 fq_cl->fcl_stat.fcl_oldflows_cnt--;
926 VERIFY(fq->fq_bytes == 0);
927
928 if (remove_hash) {
929 /* Remove from the hash list */
930 fq_if_destroy_flow(fqs, fq_cl, fq);
931 }
932}
933
934static void
935fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl, bool add_to_old)
936{
937 /* Move to the end of old queue list */
938 STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq,
939 flowq, fq_actlink);
940 fq->fq_flags &= ~FQF_NEW_FLOW;
941 fq_cl->fcl_stat.fcl_newflows_cnt--;
942
943 if (add_to_old) {
944 STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq,
945 fq_actlink);
946 fq->fq_flags |= FQF_OLD_FLOW;
947 fq_cl->fcl_stat.fcl_oldflows_cnt++;
948 }
949}
950
951inline void
952fq_if_drop_packet(fq_if_t *fqs)
953{
954 fq_t *fq = fqs->fqs_large_flow;
39037602 955 fq_if_classq_t *fq_cl;
5ba3f43e 956 pktsched_pkt_t pkt;
cb323159 957 volatile uint32_t *pkt_flags;
5ba3f43e 958 uint64_t *pkt_timestamp;
39037602 959
0a7de745 960 if (fq == NULL) {
39037602 961 return;
0a7de745 962 }
5ba3f43e
A
963 /* queue can not be empty on the largest flow */
964 VERIFY(!fq_empty(fq));
39037602
A
965
966 fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
5ba3f43e 967 _PKTSCHED_PKT_INIT(&pkt);
cb323159
A
968 fq_getq_flow_internal(fqs, fq, &pkt);
969 ASSERT(pkt.pktsched_ptype != QP_INVALID);
39037602 970
5ba3f43e
A
971 pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
972 NULL, NULL);
39037602
A
973
974 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
5ba3f43e 975 *pkt_timestamp = 0;
cb323159
A
976 switch (pkt.pktsched_ptype) {
977 case QP_MBUF:
5ba3f43e 978 *pkt_flags &= ~PKTF_PRIV_GUARDED;
cb323159
A
979 break;
980 default:
981 VERIFY(0);
982 /* NOTREACHED */
983 __builtin_unreachable();
0a7de745 984 }
5ba3f43e
A
985
986 if (fq_empty(fq)) {
987 fqs->fqs_large_flow = NULL;
39037602
A
988 if (fq->fq_flags & FQF_OLD_FLOW) {
989 fq_if_empty_old_flow(fqs, fq_cl, fq, true);
990 } else {
991 VERIFY(fq->fq_flags & FQF_NEW_FLOW);
992 fq_if_empty_new_flow(fq, fq_cl, true);
993 }
994 }
5ba3f43e 995 IFCQ_DROP_ADD(fqs->fqs_ifq, 1, pktsched_get_pkt_len(&pkt));
39037602 996
5ba3f43e 997 pktsched_free_pkt(&pkt);
39037602
A
998 fq_cl->fcl_stat.fcl_drop_overflow++;
999}
1000
1001inline void
1002fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq)
1003{
5ba3f43e
A
1004 fq_t *prev_fq;
1005
1006 if (fqs->fqs_large_flow != NULL &&
0a7de745 1007 fqs->fqs_large_flow->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
5ba3f43e 1008 fqs->fqs_large_flow = NULL;
0a7de745 1009 }
5ba3f43e 1010
0a7de745 1011 if (fq == NULL || fq->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
5ba3f43e 1012 return;
0a7de745 1013 }
5ba3f43e
A
1014
1015 prev_fq = fqs->fqs_large_flow;
1016 if (prev_fq == NULL) {
0a7de745 1017 if (!fq_empty(fq)) {
5ba3f43e 1018 fqs->fqs_large_flow = fq;
0a7de745 1019 }
39037602
A
1020 return;
1021 } else if (fq->fq_bytes > prev_fq->fq_bytes) {
1022 fqs->fqs_large_flow = fq;
1023 }
1024}
1025
1026boolean_t
5ba3f43e
A
1027fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint32_t flowid,
1028 uint8_t flowsrc, fq_if_classq_t *fq_cl)
39037602
A
1029{
1030 struct flowadv_fcentry *fce;
39037602
A
1031
1032 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
5ba3f43e 1033 if ((uint8_t)fce->fce_flowsrc_type == flowsrc &&
39037602
A
1034 fce->fce_flowid == flowid) {
1035 /* Already on flowcontrol list */
0a7de745 1036 return TRUE;
39037602
A
1037 }
1038 }
39037602 1039 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
5ba3f43e 1040 fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
39037602 1041 if (fce != NULL) {
39037602
A
1042 /* XXX Add number of bytes in the queue */
1043 STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link);
1044 fq_cl->fcl_stat.fcl_flow_control++;
1045 }
0a7de745 1046 return (fce != NULL) ? TRUE : FALSE;
39037602
A
1047}
1048
1049void
1050fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl)
1051{
1052 struct flowadv_fcentry *fce = NULL;
1053
1054 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1055 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
0a7de745 1056 if (fce->fce_flowid == fq->fq_flowhash) {
39037602 1057 break;
0a7de745 1058 }
39037602
A
1059 }
1060 if (fce != NULL) {
1061 STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry,
1062 fce_link);
1063 STAILQ_NEXT(fce, fce_link) = NULL;
1064 flowadv_add_entry(fce);
1065 fq_cl->fcl_stat.fcl_flow_feedback++;
1066 }
1067 fq->fq_flags &= ~FQF_FLOWCTL_ON;
1068}
1069
1070void
f427ee49
A
1071fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, uint32_t pktlimit,
1072 int64_t bytelimit, classq_pkt_t *top, classq_pkt_t *tail,
1073 uint32_t *retpktcnt, uint32_t *retbytecnt, boolean_t drvmgmt)
39037602
A
1074{
1075 fq_t *fq = NULL, *tfq = NULL;
39037602 1076 flowq_stailq_t temp_stailq;
5ba3f43e
A
1077 u_int32_t pktcnt, bytecnt;
1078 boolean_t qempty, limit_reached = FALSE;
cb323159 1079 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
5ba3f43e
A
1080 fq_getq_flow_t fq_getq_flow_fn;
1081
1082 switch (fqs->fqs_ptype) {
1083 case QP_MBUF:
1084 fq_getq_flow_fn = fq_getq_flow_mbuf;
1085 break;
1086
1087
1088 default:
1089 VERIFY(0);
1090 /* NOTREACHED */
cb323159 1091 __builtin_unreachable();
5ba3f43e 1092 }
39037602
A
1093
1094 /*
1095 * maximum byte limit should not be greater than the budget for
1096 * this class
1097 */
f427ee49 1098 if (bytelimit > fq_cl->fcl_budget && !drvmgmt) {
39037602 1099 bytelimit = fq_cl->fcl_budget;
0a7de745 1100 }
39037602
A
1101
1102 VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL);
39037602
A
1103 pktcnt = bytecnt = 0;
1104 STAILQ_INIT(&temp_stailq);
1105
1106 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
0a7de745 1107 ASSERT((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
39037602 1108 FQF_NEW_FLOW);
39037602 1109
5ba3f43e
A
1110 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
1111 pktlimit, top, &last, &bytecnt, &pktcnt, &qempty,
1112 PKTF_NEW_FLOW);
39037602 1113
0a7de745 1114 if (fq->fq_deficit <= 0 || qempty) {
39037602 1115 fq_if_empty_new_flow(fq, fq_cl, true);
0a7de745 1116 }
5ba3f43e 1117 fq->fq_deficit += fq_cl->fcl_quantum;
0a7de745 1118 if (limit_reached) {
39037602 1119 goto done;
0a7de745 1120 }
39037602
A
1121 }
1122
1123 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
0a7de745 1124 VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
39037602 1125 FQF_OLD_FLOW);
39037602 1126
5ba3f43e
A
1127 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
1128 pktlimit, top, &last, &bytecnt, &pktcnt, &qempty, 0);
39037602 1129
5ba3f43e 1130 if (qempty) {
39037602
A
1131 fq_if_empty_old_flow(fqs, fq_cl, fq, true);
1132 } else if (fq->fq_deficit <= 0) {
1133 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq,
1134 flowq, fq_actlink);
1135 /*
1136 * Move to the end of the old queues list. We do not
1137 * need to update the flow count since this flow
1138 * will be added to the tail again
1139 */
1140 STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink);
1141 fq->fq_deficit += fq_cl->fcl_quantum;
1142 }
0a7de745 1143 if (limit_reached) {
39037602 1144 break;
0a7de745 1145 }
39037602
A
1146 }
1147
1148done:
1149 if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) {
1150 STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq);
1151 } else if (!STAILQ_EMPTY(&temp_stailq)) {
1152 fq_cl->fcl_old_flows = temp_stailq;
1153 }
1154
cb323159
A
1155 if (last.cp_mbuf != NULL) {
1156 VERIFY(top->cp_mbuf != NULL);
0a7de745 1157 if (tail != NULL) {
39037602 1158 *tail = last;
0a7de745
A
1159 }
1160 if (retpktcnt != NULL) {
39037602 1161 *retpktcnt = pktcnt;
0a7de745
A
1162 }
1163 if (retbytecnt != NULL) {
39037602 1164 *retbytecnt = bytecnt;
0a7de745 1165 }
39037602
A
1166 }
1167}
1168
f427ee49 1169void
39037602
A
1170fq_if_teardown_ifclassq(struct ifclassq *ifq)
1171{
1172 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1173
1174 IFCQ_LOCK_ASSERT_HELD(ifq);
1175 VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL);
1176
1177 fq_if_destroy(fqs);
1178 ifq->ifcq_disc = NULL;
f427ee49 1179 ifclassq_detach(ifq);
39037602
A
1180}
1181
5ba3f43e
A
1182static void
1183fq_export_flowstats(fq_if_t *fqs, fq_t *fq,
1184 struct fq_codel_flowstats *flowstat)
1185{
0a7de745 1186 bzero(flowstat, sizeof(*flowstat));
f427ee49 1187 flowstat->fqst_min_qdelay = (uint32_t)fq->fq_min_qdelay;
5ba3f43e
A
1188 flowstat->fqst_bytes = fq->fq_bytes;
1189 flowstat->fqst_flowhash = fq->fq_flowhash;
0a7de745 1190 if (fq->fq_flags & FQF_NEW_FLOW) {
5ba3f43e 1191 flowstat->fqst_flags |= FQ_FLOWSTATS_NEW_FLOW;
0a7de745
A
1192 }
1193 if (fq->fq_flags & FQF_OLD_FLOW) {
5ba3f43e 1194 flowstat->fqst_flags |= FQ_FLOWSTATS_OLD_FLOW;
0a7de745
A
1195 }
1196 if (fq->fq_flags & FQF_DELAY_HIGH) {
5ba3f43e 1197 flowstat->fqst_flags |= FQ_FLOWSTATS_DELAY_HIGH;
0a7de745
A
1198 }
1199 if (fq->fq_flags & FQF_FLOWCTL_ON) {
5ba3f43e 1200 flowstat->fqst_flags |= FQ_FLOWSTATS_FLOWCTL_ON;
0a7de745
A
1201 }
1202 if (fqs->fqs_large_flow == fq) {
5ba3f43e 1203 flowstat->fqst_flags |= FQ_FLOWSTATS_LARGE_FLOW;
0a7de745 1204 }
5ba3f43e
A
1205}
1206
39037602
A
1207int
1208fq_if_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t qid,
1209 struct if_ifclassq_stats *ifqs)
1210{
1211 struct fq_codel_classstats *fcls;
1212 fq_if_classq_t *fq_cl;
1213 fq_if_t *fqs;
5ba3f43e
A
1214 fq_t *fq = NULL;
1215 u_int32_t i, flowstat_cnt;
39037602 1216
0a7de745
A
1217 if (qid >= FQ_IF_MAX_CLASSES) {
1218 return EINVAL;
1219 }
39037602
A
1220
1221 fqs = (fq_if_t *)ifq->ifcq_disc;
1222 fcls = &ifqs->ifqs_fq_codel_stats;
1223
1224 fq_cl = &fqs->fqs_classq[qid];
1225
1226 fcls->fcls_pri = fq_cl->fcl_pri;
1227 fcls->fcls_service_class = fq_cl->fcl_service_class;
1228 fcls->fcls_quantum = fq_cl->fcl_quantum;
1229 fcls->fcls_drr_max = fq_cl->fcl_drr_max;
1230 fcls->fcls_budget = fq_cl->fcl_budget;
1231 fcls->fcls_target_qdelay = fqs->fqs_target_qdelay;
1232 fcls->fcls_update_interval = fqs->fqs_update_interval;
1233 fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control;
1234 fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback;
1235 fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall;
1236 fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow;
1237 fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early;
1238 fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure;
1239 fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt;
1240 fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt;
1241 fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt;
1242 fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt;
1243 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
1244 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
1245 fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue;
1246 fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes;
1247 fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt;
1248 fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on;
1249 fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off;
1250 fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops;
1251 fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts;
f427ee49
A
1252 fcls->fcls_pkts_compressible = fq_cl->fcl_stat.fcl_pkts_compressible;
1253 fcls->fcls_pkts_compressed = fq_cl->fcl_stat.fcl_pkts_compressed;
39037602 1254
5ba3f43e
A
1255 /* Gather per flow stats */
1256 flowstat_cnt = min((fcls->fcls_newflows_cnt +
1257 fcls->fcls_oldflows_cnt), FQ_IF_MAX_FLOWSTATS);
1258 i = 0;
1259 STAILQ_FOREACH(fq, &fq_cl->fcl_new_flows, fq_actlink) {
0a7de745 1260 if (i >= fcls->fcls_newflows_cnt || i >= flowstat_cnt) {
5ba3f43e 1261 break;
0a7de745 1262 }
5ba3f43e
A
1263
1264 /* leave space for a few old flows */
1265 if ((flowstat_cnt - i) < fcls->fcls_oldflows_cnt &&
0a7de745 1266 i >= (FQ_IF_MAX_FLOWSTATS >> 1)) {
5ba3f43e 1267 break;
0a7de745 1268 }
5ba3f43e
A
1269 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
1270 i++;
1271 }
1272 STAILQ_FOREACH(fq, &fq_cl->fcl_old_flows, fq_actlink) {
0a7de745 1273 if (i >= flowstat_cnt) {
5ba3f43e 1274 break;
0a7de745 1275 }
5ba3f43e
A
1276 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
1277 i++;
1278 }
1279 VERIFY(i <= flowstat_cnt);
1280 fcls->fcls_flowstats_cnt = i;
0a7de745 1281 return 0;
39037602 1282}