]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/pktsched/pktsched_fq_codel.c
xnu-6153.121.1.tar.gz
[apple/xnu.git] / bsd / net / pktsched / pktsched_fq_codel.c
1 /*
2 * Copyright (c) 2016-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <kern/zalloc.h>
32 #include <net/if_var.h>
33 #include <net/if.h>
34 #include <net/classq/classq.h>
35 #include <net/classq/classq_fq_codel.h>
36 #include <net/pktsched/pktsched_fq_codel.h>
37
38 static size_t fq_if_size;
39 static struct zone *fq_if_zone;
40
41 static fq_if_t *fq_if_alloc(struct ifnet *, classq_pkt_type_t);
42 static void fq_if_destroy(fq_if_t *fqs);
43 static void fq_if_classq_init(fq_if_t *fqs, u_int32_t priority,
44 u_int32_t quantum, u_int32_t drr_max, u_int32_t svc_class);
45 static int fq_if_enqueue_classq(struct ifclassq *, classq_pkt_t *, boolean_t *);
46 static void fq_if_dequeue_classq(struct ifclassq *, classq_pkt_t *);
47 static int fq_if_dequeue_classq_multi(struct ifclassq *, u_int32_t,
48 u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *, u_int32_t *);
49 static void fq_if_dequeue_sc_classq(struct ifclassq *, mbuf_svc_class_t,
50 classq_pkt_t *);
51 static int fq_if_dequeue_sc_classq_multi(struct ifclassq *,
52 mbuf_svc_class_t, u_int32_t, u_int32_t, classq_pkt_t *,
53 classq_pkt_t *, u_int32_t *, u_int32_t *);
54 static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, u_int32_t,
55 u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
56 u_int32_t *, boolean_t drvmgmt);
57 static int fq_if_request_classq(struct ifclassq *ifq, cqrq_t op, void *arg);
58 void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat);
59 static void fq_if_purge(fq_if_t *);
60 static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *);
61 static void fq_if_purge_flow(fq_if_t *, fq_t *, u_int32_t *, u_int32_t *);
62 static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl,
63 bool add_to_old);
64 static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl,
65 fq_t *fq, bool remove_hash);
66
67 #define FQ_IF_ZONE_MAX 32 /* Maximum elements in zone */
68 #define FQ_IF_ZONE_NAME "pktsched_fq_if" /* zone for fq_if class */
69
70 #define FQ_IF_FLOW_HASH_ID(_flowid_) \
71 (((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
72
73 #define FQ_IF_CLASSQ_IDLE(_fcl_) \
74 (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
75 STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
76
77 typedef void (* fq_if_append_pkt_t)(classq_pkt_t *, classq_pkt_t *);
78 typedef boolean_t (* fq_getq_flow_t)(fq_if_t *, fq_if_classq_t *, fq_t *,
79 u_int32_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
80 u_int32_t *, boolean_t *, u_int32_t);
81
82 static void
83 fq_if_append_mbuf(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
84 {
85 pkt->cp_mbuf->m_nextpkt = next_pkt->cp_mbuf;
86 }
87
88
89
90 static boolean_t
91 fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
92 u_int32_t byte_limit, u_int32_t pkt_limit, classq_pkt_t *top,
93 classq_pkt_t *last, u_int32_t *byte_cnt, u_int32_t *pkt_cnt,
94 boolean_t *qempty, u_int32_t pflags)
95 {
96 u_int32_t plen;
97 pktsched_pkt_t pkt;
98 boolean_t limit_reached = FALSE;
99 struct ifclassq *ifq = fqs->fqs_ifq;
100 struct ifnet *ifp = ifq->ifcq_ifp;
101
102 while (fq->fq_deficit > 0 && limit_reached == FALSE &&
103 !MBUFQ_EMPTY(&fq->fq_mbufq)) {
104 _PKTSCHED_PKT_INIT(&pkt);
105 fq_getq_flow(fqs, fq, &pkt);
106 ASSERT(pkt.pktsched_ptype == QP_MBUF);
107
108 plen = pktsched_get_pkt_len(&pkt);
109 fq->fq_deficit -= plen;
110 pkt.pktsched_pkt_mbuf->m_pkthdr.pkt_flags |= pflags;
111
112 if (top->cp_mbuf == NULL) {
113 *top = pkt.pktsched_pkt;
114 } else {
115 ASSERT(last->cp_mbuf != NULL);
116 ASSERT(last->cp_mbuf->m_nextpkt == NULL);
117 last->cp_mbuf->m_nextpkt = pkt.pktsched_pkt_mbuf;
118 }
119 *last = pkt.pktsched_pkt;
120 last->cp_mbuf->m_nextpkt = NULL;
121 fq_cl->fcl_stat.fcl_dequeue++;
122 fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
123 *pkt_cnt += 1;
124 *byte_cnt += plen;
125
126 ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
127
128 /* Check if the limit is reached */
129 if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
130 limit_reached = TRUE;
131 }
132 }
133
134 *qempty = MBUFQ_EMPTY(&fq->fq_mbufq);
135 return limit_reached;
136 }
137
138 void
139 fq_codel_scheduler_init(void)
140 {
141 /* Initialize the zone for flow queue structures */
142 fq_codel_init();
143
144 fq_if_size = sizeof(fq_if_t);
145 fq_if_zone = zinit(fq_if_size, (FQ_IF_ZONE_MAX * fq_if_size), 0,
146 FQ_IF_ZONE_NAME);
147 if (fq_if_zone == NULL) {
148 panic("%s: failed allocating from %s", __func__,
149 (FQ_IF_ZONE_NAME));
150 }
151 zone_change(fq_if_zone, Z_EXPAND, TRUE);
152 zone_change(fq_if_zone, Z_CALLERACCT, TRUE);
153 }
154
155 fq_if_t *
156 fq_if_alloc(struct ifnet *ifp, classq_pkt_type_t ptype)
157 {
158 fq_if_t *fqs;
159 fqs = zalloc(fq_if_zone);
160 if (fqs == NULL) {
161 return NULL;
162 }
163
164 bzero(fqs, fq_if_size);
165 fqs->fqs_ifq = &ifp->if_snd;
166 fqs->fqs_ptype = ptype;
167
168 /* Calculate target queue delay */
169 ifclassq_calc_target_qdelay(ifp, &fqs->fqs_target_qdelay);
170
171 /* Calculate update interval */
172 ifclassq_calc_update_interval(&fqs->fqs_update_interval);
173
174 /* Configure packet drop limit across all queues */
175 fqs->fqs_pkt_droplimit = IFCQ_PKT_DROP_LIMIT(&ifp->if_snd);
176 STAILQ_INIT(&fqs->fqs_fclist);
177 return fqs;
178 }
179
180 void
181 fq_if_destroy(fq_if_t *fqs)
182 {
183 fq_if_purge(fqs);
184 fqs->fqs_ifq = NULL;
185 zfree(fq_if_zone, fqs);
186 }
187
188 static inline u_int32_t
189 fq_if_service_to_priority(fq_if_t *fqs, mbuf_svc_class_t svc)
190 {
191 u_int32_t pri;
192
193 if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
194 switch (svc) {
195 case MBUF_SC_BK_SYS:
196 case MBUF_SC_BK:
197 pri = FQ_IF_BK_INDEX;
198 break;
199 case MBUF_SC_BE:
200 case MBUF_SC_RD:
201 case MBUF_SC_OAM:
202 pri = FQ_IF_BE_INDEX;
203 break;
204 case MBUF_SC_AV:
205 case MBUF_SC_RV:
206 case MBUF_SC_VI:
207 case MBUF_SC_SIG:
208 pri = FQ_IF_VI_INDEX;
209 break;
210 case MBUF_SC_VO:
211 case MBUF_SC_CTL:
212 pri = FQ_IF_VO_INDEX;
213 break;
214 default:
215 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
216 break;
217 }
218 return pri;
219 }
220
221 /* scheduler is not managed by the driver */
222 switch (svc) {
223 case MBUF_SC_BK_SYS:
224 pri = FQ_IF_BK_SYS_INDEX;
225 break;
226 case MBUF_SC_BK:
227 pri = FQ_IF_BK_INDEX;
228 break;
229 case MBUF_SC_BE:
230 pri = FQ_IF_BE_INDEX;
231 break;
232 case MBUF_SC_RD:
233 pri = FQ_IF_RD_INDEX;
234 break;
235 case MBUF_SC_OAM:
236 pri = FQ_IF_OAM_INDEX;
237 break;
238 case MBUF_SC_AV:
239 pri = FQ_IF_AV_INDEX;
240 break;
241 case MBUF_SC_RV:
242 pri = FQ_IF_RV_INDEX;
243 break;
244 case MBUF_SC_VI:
245 pri = FQ_IF_VI_INDEX;
246 break;
247 case MBUF_SC_SIG:
248 pri = FQ_IF_SIG_INDEX;
249 break;
250 case MBUF_SC_VO:
251 pri = FQ_IF_VO_INDEX;
252 break;
253 case MBUF_SC_CTL:
254 pri = FQ_IF_CTL_INDEX;
255 break;
256 default:
257 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
258 break;
259 }
260 return pri;
261 }
262
263 void
264 fq_if_classq_init(fq_if_t *fqs, u_int32_t pri, u_int32_t quantum,
265 u_int32_t drr_max, u_int32_t svc_class)
266 {
267 fq_if_classq_t *fq_cl;
268 VERIFY(pri < FQ_IF_MAX_CLASSES);
269 fq_cl = &fqs->fqs_classq[pri];
270
271 VERIFY(fq_cl->fcl_quantum == 0);
272 fq_cl->fcl_quantum = quantum;
273 fq_cl->fcl_pri = pri;
274 fq_cl->fcl_drr_max = drr_max;
275 fq_cl->fcl_service_class = svc_class;
276 STAILQ_INIT(&fq_cl->fcl_new_flows);
277 STAILQ_INIT(&fq_cl->fcl_old_flows);
278 }
279
280 int
281 fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *p, boolean_t *pdrop)
282 {
283 u_int32_t pri;
284 fq_if_t *fqs;
285 fq_if_classq_t *fq_cl;
286 int ret, len;
287 mbuf_svc_class_t svc;
288 pktsched_pkt_t pkt;
289
290 IFCQ_LOCK_ASSERT_HELD(ifq);
291 if ((p->cp_ptype == QP_MBUF) && !(p->cp_mbuf->m_flags & M_PKTHDR)) {
292 IFCQ_CONVERT_LOCK(ifq);
293 m_freem(p->cp_mbuf);
294 *p = CLASSQ_PKT_INITIALIZER(*p);
295 *pdrop = TRUE;
296 return ENOBUFS;
297 }
298 pktsched_pkt_encap(&pkt, p);
299
300 fqs = (fq_if_t *)ifq->ifcq_disc;
301 svc = pktsched_get_pkt_svc(&pkt);
302 pri = fq_if_service_to_priority(fqs, svc);
303 VERIFY(pri < FQ_IF_MAX_CLASSES);
304 fq_cl = &fqs->fqs_classq[pri];
305
306 if (svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1) {
307 /* BK_SYS is currently throttled */
308 fq_cl->fcl_stat.fcl_throttle_drops++;
309 IFCQ_CONVERT_LOCK(ifq);
310 pktsched_free_pkt(&pkt);
311 *pdrop = TRUE;
312 return EQSUSPENDED;
313 }
314
315 len = pktsched_get_pkt_len(&pkt);
316 ret = fq_addq(fqs, &pkt, fq_cl);
317 if (!(fqs->fqs_flags & FQS_DRIVER_MANAGED) &&
318 !FQ_IF_CLASSQ_IDLE(fq_cl)) {
319 if (((fqs->fqs_bitmaps[FQ_IF_ER] | fqs->fqs_bitmaps[FQ_IF_EB]) &
320 (1 << pri)) == 0) {
321 /*
322 * this group is not in ER or EB groups,
323 * mark it as IB
324 */
325 pktsched_bit_set(pri, &fqs->fqs_bitmaps[FQ_IF_IB]);
326 }
327 }
328
329 if (ret != 0) {
330 if (ret == CLASSQEQ_SUCCESS_FC) {
331 /* packet enqueued, return advisory feedback */
332 ret = EQFULL;
333 *pdrop = FALSE;
334 } else {
335 *pdrop = TRUE;
336 VERIFY(ret == CLASSQEQ_DROP ||
337 ret == CLASSQEQ_DROP_FC ||
338 ret == CLASSQEQ_DROP_SP);
339 pktsched_free_pkt(&pkt);
340 switch (ret) {
341 case CLASSQEQ_DROP:
342 return ENOBUFS;
343 case CLASSQEQ_DROP_FC:
344 return EQFULL;
345 case CLASSQEQ_DROP_SP:
346 return EQSUSPENDED;
347 }
348 }
349 } else {
350 *pdrop = FALSE;
351 }
352 IFCQ_INC_LEN(ifq);
353 IFCQ_INC_BYTES(ifq, len);
354 return ret;
355 }
356
357 static void
358 fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt)
359 {
360 (void) fq_if_dequeue_classq_multi(ifq, 1,
361 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL);
362 }
363
364 static void
365 fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc,
366 classq_pkt_t *pkt)
367 {
368 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
369 fq_if_classq_t *fq_cl;
370 u_int32_t pri;
371
372 pri = fq_if_service_to_priority(fqs, svc);
373 fq_cl = &fqs->fqs_classq[pri];
374
375 fq_if_dequeue(fqs, fq_cl, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
376 pkt, NULL, NULL, NULL, TRUE);
377 }
378
379 int
380 fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt,
381 u_int32_t maxbytecnt, classq_pkt_t *first_packet,
382 classq_pkt_t *last_packet, u_int32_t *retpktcnt,
383 u_int32_t *retbytecnt)
384 {
385 u_int32_t pktcnt = 0, bytecnt = 0, total_pktcnt = 0, total_bytecnt = 0;
386 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
387 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
388 classq_pkt_t tmp = CLASSQ_PKT_INITIALIZER(tmp);
389 fq_if_append_pkt_t append_pkt;
390 fq_if_classq_t *fq_cl;
391 fq_if_t *fqs;
392 int pri;
393
394 IFCQ_LOCK_ASSERT_HELD(ifq);
395
396 fqs = (fq_if_t *)ifq->ifcq_disc;
397
398 switch (fqs->fqs_ptype) {
399 case QP_MBUF:
400 append_pkt = fq_if_append_mbuf;
401 break;
402
403
404 default:
405 VERIFY(0);
406 /* NOTREACHED */
407 __builtin_unreachable();
408 }
409
410 for (;;) {
411 classq_pkt_t top = CLASSQ_PKT_INITIALIZER(top);
412 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
413
414 if (fqs->fqs_bitmaps[FQ_IF_ER] == 0 &&
415 fqs->fqs_bitmaps[FQ_IF_EB] == 0) {
416 fqs->fqs_bitmaps[FQ_IF_EB] = fqs->fqs_bitmaps[FQ_IF_IB];
417 fqs->fqs_bitmaps[FQ_IF_IB] = 0;
418 if (fqs->fqs_bitmaps[FQ_IF_EB] == 0) {
419 break;
420 }
421 }
422 pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_ER]);
423 if (pri == 0) {
424 /*
425 * There are no ER flows, move the highest
426 * priority one from EB if there are any in that
427 * category
428 */
429 pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_EB]);
430 VERIFY(pri > 0);
431 pktsched_bit_clr((pri - 1),
432 &fqs->fqs_bitmaps[FQ_IF_EB]);
433 pktsched_bit_set((pri - 1),
434 &fqs->fqs_bitmaps[FQ_IF_ER]);
435 }
436 pri--; /* index starts at 0 */
437 fq_cl = &fqs->fqs_classq[pri];
438
439 if (fq_cl->fcl_budget <= 0) {
440 /* Update the budget */
441 fq_cl->fcl_budget += (min(fq_cl->fcl_drr_max,
442 fq_cl->fcl_stat.fcl_flows_cnt) *
443 fq_cl->fcl_quantum);
444 if (fq_cl->fcl_budget <= 0) {
445 goto state_change;
446 }
447 }
448 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
449 (maxbytecnt - total_bytecnt), &top, &tail, &pktcnt,
450 &bytecnt, FALSE);
451 if (top.cp_mbuf != NULL) {
452 ASSERT(pktcnt > 0 && bytecnt > 0);
453 if (first.cp_mbuf == NULL) {
454 first = top;
455 total_pktcnt = pktcnt;
456 total_bytecnt = bytecnt;
457 } else {
458 ASSERT(last.cp_mbuf != NULL);
459 append_pkt(&last, &top);
460 total_pktcnt += pktcnt;
461 total_bytecnt += bytecnt;
462 }
463 last = tail;
464 append_pkt(&last, &tmp);
465 fq_cl->fcl_budget -= bytecnt;
466 pktcnt = 0;
467 bytecnt = 0;
468 }
469
470 /*
471 * If the class has exceeded the budget but still has data
472 * to send, move it to IB
473 */
474 state_change:
475 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
476 if (fq_cl->fcl_budget <= 0) {
477 pktsched_bit_set(pri,
478 &fqs->fqs_bitmaps[FQ_IF_IB]);
479 pktsched_bit_clr(pri,
480 &fqs->fqs_bitmaps[FQ_IF_ER]);
481 }
482 } else {
483 pktsched_bit_clr(pri, &fqs->fqs_bitmaps[FQ_IF_ER]);
484 VERIFY(((fqs->fqs_bitmaps[FQ_IF_ER] |
485 fqs->fqs_bitmaps[FQ_IF_EB] |
486 fqs->fqs_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
487 fq_cl->fcl_budget = 0;
488 }
489 if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt) {
490 break;
491 }
492 }
493
494 if (__probable(first_packet != NULL)) {
495 *first_packet = first;
496 }
497 if (last_packet != NULL) {
498 *last_packet = last;
499 }
500 if (retpktcnt != NULL) {
501 *retpktcnt = total_pktcnt;
502 }
503 if (retbytecnt != NULL) {
504 *retbytecnt = total_bytecnt;
505 }
506
507 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
508 return 0;
509 }
510
511 int
512 fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, mbuf_svc_class_t svc,
513 u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
514 classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt)
515 {
516 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
517 u_int32_t pri;
518 u_int32_t total_pktcnt = 0, total_bytecnt = 0;
519 fq_if_classq_t *fq_cl;
520 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
521 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
522 fq_if_append_pkt_t append_pkt;
523
524 switch (fqs->fqs_ptype) {
525 case QP_MBUF:
526 append_pkt = fq_if_append_mbuf;
527 break;
528
529
530 default:
531 VERIFY(0);
532 /* NOTREACHED */
533 __builtin_unreachable();
534 }
535
536 pri = fq_if_service_to_priority(fqs, svc);
537 fq_cl = &fqs->fqs_classq[pri];
538 /*
539 * Now we have the queue for a particular service class. We need
540 * to dequeue as many packets as needed, first from the new flows
541 * and then from the old flows.
542 */
543 while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt &&
544 fq_cl->fcl_stat.fcl_pkt_cnt > 0) {
545 classq_pkt_t top = CLASSQ_PKT_INITIALIZER(top);
546 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
547 u_int32_t pktcnt = 0, bytecnt = 0;
548
549 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
550 (maxbytecnt - total_bytecnt), &top, &tail, &pktcnt,
551 &bytecnt, TRUE);
552 if (top.cp_mbuf != NULL) {
553 if (first.cp_mbuf == NULL) {
554 first = top;
555 total_pktcnt = pktcnt;
556 total_bytecnt = bytecnt;
557 } else {
558 ASSERT(last.cp_mbuf != NULL);
559 append_pkt(&last, &top);
560 total_pktcnt += pktcnt;
561 total_bytecnt += bytecnt;
562 }
563 last = tail;
564 }
565 }
566
567 if (__probable(first_packet != NULL)) {
568 *first_packet = first;
569 }
570 if (last_packet != NULL) {
571 *last_packet = last;
572 }
573 if (retpktcnt != NULL) {
574 *retpktcnt = total_pktcnt;
575 }
576 if (retbytecnt != NULL) {
577 *retbytecnt = total_bytecnt;
578 }
579
580 return 0;
581 }
582
583 static void
584 fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, u_int32_t *pktsp,
585 u_int32_t *bytesp)
586 {
587 fq_if_classq_t *fq_cl;
588 u_int32_t pkts, bytes;
589 pktsched_pkt_t pkt;
590
591 fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
592 pkts = bytes = 0;
593 _PKTSCHED_PKT_INIT(&pkt);
594 for (;;) {
595 fq_getq_flow(fqs, fq, &pkt);
596 if (pkt.pktsched_pkt_mbuf == NULL) {
597 VERIFY(pkt.pktsched_ptype == QP_INVALID);
598 break;
599 }
600 pkts++;
601 bytes += pktsched_get_pkt_len(&pkt);
602 pktsched_free_pkt(&pkt);
603 }
604 IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes);
605
606 if (fq->fq_flags & FQF_NEW_FLOW) {
607 fq_if_empty_new_flow(fq, fq_cl, false);
608 } else if (fq->fq_flags & FQF_OLD_FLOW) {
609 fq_if_empty_old_flow(fqs, fq_cl, fq, false);
610 }
611
612 fq_if_destroy_flow(fqs, fq_cl, fq);
613
614 if (FQ_IF_CLASSQ_IDLE(fq_cl)) {
615 int i;
616 for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) {
617 pktsched_bit_clr(fq_cl->fcl_pri,
618 &fqs->fqs_bitmaps[i]);
619 }
620 }
621 if (pktsp != NULL) {
622 *pktsp = pkts;
623 }
624 if (bytesp != NULL) {
625 *bytesp = bytes;
626 }
627 }
628
629 static void
630 fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl)
631 {
632 fq_t *fq, *tfq;
633 /*
634 * Take each flow from new/old flow list and flush mbufs
635 * in that flow
636 */
637 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
638 fq_if_purge_flow(fqs, fq, NULL, NULL);
639 }
640 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
641 fq_if_purge_flow(fqs, fq, NULL, NULL);
642 }
643 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows));
644 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows));
645
646 STAILQ_INIT(&fq_cl->fcl_new_flows);
647 STAILQ_INIT(&fq_cl->fcl_old_flows);
648 fq_cl->fcl_budget = 0;
649 }
650
651 static void
652 fq_if_purge(fq_if_t *fqs)
653 {
654 int i;
655
656 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
657 for (i = 0; i < FQ_IF_MAX_CLASSES; i++) {
658 fq_if_purge_classq(fqs, &fqs->fqs_classq[i]);
659 }
660
661 VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist));
662
663 fqs->fqs_large_flow = NULL;
664 for (i = 0; i < FQ_IF_HASH_TABLE_SIZE; i++) {
665 VERIFY(SLIST_EMPTY(&fqs->fqs_flows[i]));
666 }
667
668 bzero(&fqs->fqs_bitmaps, sizeof(fqs->fqs_bitmaps));
669
670 IFCQ_LEN(fqs->fqs_ifq) = 0;
671 IFCQ_BYTES(fqs->fqs_ifq) = 0;
672 }
673
674 static void
675 fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req)
676 {
677 fq_t *fq;
678
679 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
680 req->packets = req->bytes = 0;
681 VERIFY(req->flow != 0);
682
683 /* packet type is needed only if we want to create a flow queue */
684 fq = fq_if_hash_pkt(fqs, req->flow, req->sc, 0, FALSE, QP_INVALID);
685
686 if (fq != NULL) {
687 fq_if_purge_flow(fqs, fq, &req->packets, &req->bytes);
688 }
689 }
690
691 static void
692 fq_if_event(fq_if_t *fqs, cqev_t ev)
693 {
694 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
695
696 switch (ev) {
697 case CLASSQ_EV_LINK_UP:
698 case CLASSQ_EV_LINK_DOWN:
699 fq_if_purge(fqs);
700 break;
701 default:
702 break;
703 }
704 }
705
706 static void
707 fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl)
708 {
709 fq_if_purge_classq(fqs, fq_cl);
710 fqs->fqs_throttle = 1;
711 fq_cl->fcl_stat.fcl_throttle_on++;
712 }
713
714 static void
715 fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl)
716 {
717 VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl));
718 fqs->fqs_throttle = 0;
719 fq_cl->fcl_stat.fcl_throttle_off++;
720 }
721
722
723 static int
724 fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr)
725 {
726 struct ifclassq *ifq = fqs->fqs_ifq;
727 int index;
728 #if !MACH_ASSERT
729 #pragma unused(ifq)
730 #endif
731 IFCQ_LOCK_ASSERT_HELD(ifq);
732
733 if (!tr->set) {
734 tr->level = fqs->fqs_throttle;
735 return 0;
736 }
737
738 if (tr->level == fqs->fqs_throttle) {
739 return EALREADY;
740 }
741
742 /* Throttling is allowed on BK_SYS class only */
743 index = fq_if_service_to_priority(fqs, MBUF_SC_BK_SYS);
744 switch (tr->level) {
745 case IFNET_THROTTLE_OFF:
746 fq_if_classq_resume(fqs, &fqs->fqs_classq[index]);
747 break;
748 case IFNET_THROTTLE_OPPORTUNISTIC:
749 fq_if_classq_suspend(fqs, &fqs->fqs_classq[index]);
750 break;
751 default:
752 break;
753 }
754 return 0;
755 }
756
757 void
758 fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat)
759 {
760 u_int32_t pri;
761 fq_if_classq_t *fq_cl;
762
763 if (stat == NULL) {
764 return;
765 }
766
767 pri = fq_if_service_to_priority(fqs, stat->sc);
768 fq_cl = &fqs->fqs_classq[pri];
769 stat->packets = fq_cl->fcl_stat.fcl_pkt_cnt;
770 stat->bytes = fq_cl->fcl_stat.fcl_byte_cnt;
771 }
772
773 int
774 fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg)
775 {
776 int err = 0;
777 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
778
779 IFCQ_LOCK_ASSERT_HELD(ifq);
780
781 /*
782 * These are usually slow operations, convert the lock ahead of time
783 */
784 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
785 switch (rq) {
786 case CLASSQRQ_PURGE:
787 fq_if_purge(fqs);
788 break;
789 case CLASSQRQ_PURGE_SC:
790 fq_if_purge_sc(fqs, (cqrq_purge_sc_t *)arg);
791 break;
792 case CLASSQRQ_EVENT:
793 fq_if_event(fqs, (cqev_t)arg);
794 break;
795 case CLASSQRQ_THROTTLE:
796 fq_if_throttle(fqs, (cqrq_throttle_t *)arg);
797 break;
798 case CLASSQRQ_STAT_SC:
799 fq_if_stat_sc(fqs, (cqrq_stat_sc_t *)arg);
800 break;
801 }
802 return err;
803 }
804
805 int
806 fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
807 classq_pkt_type_t ptype)
808 {
809 #pragma unused(flags)
810 struct ifnet *ifp = ifq->ifcq_ifp;
811 fq_if_t *fqs = NULL;
812 int err = 0;
813
814 IFCQ_LOCK_ASSERT_HELD(ifq);
815 VERIFY(ifq->ifcq_disc == NULL);
816 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
817
818 fqs = fq_if_alloc(ifp, ptype);
819 if (fqs == NULL) {
820 return ENOMEM;
821 }
822
823 if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) {
824 fqs->fqs_flags |= FQS_DRIVER_MANAGED;
825 fq_if_classq_init(fqs, FQ_IF_BK_INDEX, 1500,
826 2, MBUF_SC_BK);
827 fq_if_classq_init(fqs, FQ_IF_BE_INDEX, 1500,
828 4, MBUF_SC_BE);
829 fq_if_classq_init(fqs, FQ_IF_VI_INDEX, 3000,
830 6, MBUF_SC_VI);
831 fq_if_classq_init(fqs, FQ_IF_VO_INDEX, 600,
832 8, MBUF_SC_VO);
833 } else {
834 /* SIG shares same INDEX with VI */
835 _CASSERT(SCIDX_SIG == SCIDX_VI);
836 _CASSERT(FQ_IF_SIG_INDEX == FQ_IF_VI_INDEX);
837
838 fq_if_classq_init(fqs, FQ_IF_BK_SYS_INDEX, 1500,
839 2, MBUF_SC_BK_SYS);
840 fq_if_classq_init(fqs, FQ_IF_BK_INDEX, 1500,
841 2, MBUF_SC_BK);
842 fq_if_classq_init(fqs, FQ_IF_BE_INDEX, 1500,
843 4, MBUF_SC_BE);
844 fq_if_classq_init(fqs, FQ_IF_RD_INDEX, 1500,
845 4, MBUF_SC_RD);
846 fq_if_classq_init(fqs, FQ_IF_OAM_INDEX, 1500,
847 4, MBUF_SC_OAM);
848 fq_if_classq_init(fqs, FQ_IF_AV_INDEX, 3000,
849 6, MBUF_SC_AV);
850 fq_if_classq_init(fqs, FQ_IF_RV_INDEX, 3000,
851 6, MBUF_SC_RV);
852 fq_if_classq_init(fqs, FQ_IF_VI_INDEX, 3000,
853 6, MBUF_SC_VI);
854 fq_if_classq_init(fqs, FQ_IF_VO_INDEX, 600,
855 8, MBUF_SC_VO);
856 fq_if_classq_init(fqs, FQ_IF_CTL_INDEX, 600,
857 8, MBUF_SC_CTL);
858 }
859
860 err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs,
861 fq_if_enqueue_classq, fq_if_dequeue_classq,
862 fq_if_dequeue_sc_classq, fq_if_dequeue_classq_multi,
863 fq_if_dequeue_sc_classq_multi, fq_if_request_classq);
864
865 if (err != 0) {
866 printf("%s: error from ifclassq_attach, "
867 "failed to attach fq_if: %d\n", __func__, err);
868 fq_if_destroy(fqs);
869 }
870 return err;
871 }
872
873 fq_t *
874 fq_if_hash_pkt(fq_if_t *fqs, u_int32_t flowid, mbuf_svc_class_t svc_class,
875 u_int64_t now, boolean_t create, classq_pkt_type_t ptype)
876 {
877 fq_t *fq = NULL;
878 flowq_list_t *fq_list;
879 fq_if_classq_t *fq_cl;
880 u_int8_t fqs_hash_id;
881 u_int8_t scidx;
882
883 scidx = fq_if_service_to_priority(fqs, svc_class);
884
885 fqs_hash_id = FQ_IF_FLOW_HASH_ID(flowid);
886
887 fq_list = &fqs->fqs_flows[fqs_hash_id];
888
889 SLIST_FOREACH(fq, fq_list, fq_hashlink) {
890 if (fq->fq_flowhash == flowid &&
891 fq->fq_sc_index == scidx) {
892 break;
893 }
894 }
895 if (fq == NULL && create == TRUE) {
896 ASSERT(ptype == QP_MBUF);
897
898 /* If the flow is not already on the list, allocate it */
899 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
900 fq = fq_alloc(ptype);
901 if (fq != NULL) {
902 fq->fq_flowhash = flowid;
903 fq->fq_sc_index = scidx;
904 fq->fq_updatetime = now + fqs->fqs_update_interval;
905 fq_cl = &fqs->fqs_classq[scidx];
906 fq->fq_flags = FQF_FLOWCTL_CAPABLE;
907 SLIST_INSERT_HEAD(fq_list, fq, fq_hashlink);
908 fq_cl->fcl_stat.fcl_flows_cnt++;
909 }
910 }
911
912 /*
913 * If getq time is not set because this is the first packet or after
914 * idle time, set it now so that we can detect a stall.
915 */
916 if (fq != NULL && fq->fq_getqtime == 0) {
917 fq->fq_getqtime = now;
918 }
919
920 return fq;
921 }
922
923 void
924 fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq)
925 {
926 u_int8_t hash_id;
927 hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash);
928 SLIST_REMOVE(&fqs->fqs_flows[hash_id], fq, flowq,
929 fq_hashlink);
930 fq_cl->fcl_stat.fcl_flows_cnt--;
931 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
932 fq_destroy(fq);
933 }
934
935 inline boolean_t
936 fq_if_at_drop_limit(fq_if_t *fqs)
937 {
938 return (IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ?
939 TRUE : FALSE;
940 }
941
942 static void
943 fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
944 bool remove_hash)
945 {
946 /*
947 * Remove the flow queue if it is empty
948 * and delete it
949 */
950 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq,
951 fq_actlink);
952 fq->fq_flags &= ~FQF_OLD_FLOW;
953 fq_cl->fcl_stat.fcl_oldflows_cnt--;
954 VERIFY(fq->fq_bytes == 0);
955
956 if (remove_hash) {
957 /* Remove from the hash list */
958 fq_if_destroy_flow(fqs, fq_cl, fq);
959 }
960 }
961
962 static void
963 fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl, bool add_to_old)
964 {
965 /* Move to the end of old queue list */
966 STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq,
967 flowq, fq_actlink);
968 fq->fq_flags &= ~FQF_NEW_FLOW;
969 fq_cl->fcl_stat.fcl_newflows_cnt--;
970
971 if (add_to_old) {
972 STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq,
973 fq_actlink);
974 fq->fq_flags |= FQF_OLD_FLOW;
975 fq_cl->fcl_stat.fcl_oldflows_cnt++;
976 }
977 }
978
979 inline void
980 fq_if_drop_packet(fq_if_t *fqs)
981 {
982 fq_t *fq = fqs->fqs_large_flow;
983 fq_if_classq_t *fq_cl;
984 pktsched_pkt_t pkt;
985 volatile uint32_t *pkt_flags;
986 uint64_t *pkt_timestamp;
987
988 if (fq == NULL) {
989 return;
990 }
991 /* queue can not be empty on the largest flow */
992 VERIFY(!fq_empty(fq));
993
994 fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
995 _PKTSCHED_PKT_INIT(&pkt);
996 fq_getq_flow_internal(fqs, fq, &pkt);
997 ASSERT(pkt.pktsched_ptype != QP_INVALID);
998
999 pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
1000 NULL, NULL);
1001
1002 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1003 *pkt_timestamp = 0;
1004 switch (pkt.pktsched_ptype) {
1005 case QP_MBUF:
1006 *pkt_flags &= ~PKTF_PRIV_GUARDED;
1007 break;
1008 default:
1009 VERIFY(0);
1010 /* NOTREACHED */
1011 __builtin_unreachable();
1012 }
1013
1014 if (fq_empty(fq)) {
1015 fqs->fqs_large_flow = NULL;
1016 if (fq->fq_flags & FQF_OLD_FLOW) {
1017 fq_if_empty_old_flow(fqs, fq_cl, fq, true);
1018 } else {
1019 VERIFY(fq->fq_flags & FQF_NEW_FLOW);
1020 fq_if_empty_new_flow(fq, fq_cl, true);
1021 }
1022 }
1023 IFCQ_DROP_ADD(fqs->fqs_ifq, 1, pktsched_get_pkt_len(&pkt));
1024
1025 pktsched_free_pkt(&pkt);
1026 fq_cl->fcl_stat.fcl_drop_overflow++;
1027 }
1028
1029 inline void
1030 fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq)
1031 {
1032 fq_t *prev_fq;
1033
1034 if (fqs->fqs_large_flow != NULL &&
1035 fqs->fqs_large_flow->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
1036 fqs->fqs_large_flow = NULL;
1037 }
1038
1039 if (fq == NULL || fq->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
1040 return;
1041 }
1042
1043 prev_fq = fqs->fqs_large_flow;
1044 if (prev_fq == NULL) {
1045 if (!fq_empty(fq)) {
1046 fqs->fqs_large_flow = fq;
1047 }
1048 return;
1049 } else if (fq->fq_bytes > prev_fq->fq_bytes) {
1050 fqs->fqs_large_flow = fq;
1051 }
1052 }
1053
1054 boolean_t
1055 fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint32_t flowid,
1056 uint8_t flowsrc, fq_if_classq_t *fq_cl)
1057 {
1058 struct flowadv_fcentry *fce;
1059
1060 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
1061 if ((uint8_t)fce->fce_flowsrc_type == flowsrc &&
1062 fce->fce_flowid == flowid) {
1063 /* Already on flowcontrol list */
1064 return TRUE;
1065 }
1066 }
1067 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1068 fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
1069 if (fce != NULL) {
1070 /* XXX Add number of bytes in the queue */
1071 STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link);
1072 fq_cl->fcl_stat.fcl_flow_control++;
1073 }
1074 return (fce != NULL) ? TRUE : FALSE;
1075 }
1076
1077 void
1078 fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl)
1079 {
1080 struct flowadv_fcentry *fce = NULL;
1081
1082 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1083 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
1084 if (fce->fce_flowid == fq->fq_flowhash) {
1085 break;
1086 }
1087 }
1088 if (fce != NULL) {
1089 STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry,
1090 fce_link);
1091 STAILQ_NEXT(fce, fce_link) = NULL;
1092 flowadv_add_entry(fce);
1093 fq_cl->fcl_stat.fcl_flow_feedback++;
1094 }
1095 fq->fq_flags &= ~FQF_FLOWCTL_ON;
1096 }
1097
1098 void
1099 fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, u_int32_t pktlimit,
1100 u_int32_t bytelimit, classq_pkt_t *top, classq_pkt_t *tail,
1101 u_int32_t *retpktcnt, u_int32_t *retbytecnt, boolean_t drvmgmt)
1102 {
1103 fq_t *fq = NULL, *tfq = NULL;
1104 flowq_stailq_t temp_stailq;
1105 u_int32_t pktcnt, bytecnt;
1106 boolean_t qempty, limit_reached = FALSE;
1107 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
1108 fq_getq_flow_t fq_getq_flow_fn;
1109
1110 switch (fqs->fqs_ptype) {
1111 case QP_MBUF:
1112 fq_getq_flow_fn = fq_getq_flow_mbuf;
1113 break;
1114
1115
1116 default:
1117 VERIFY(0);
1118 /* NOTREACHED */
1119 __builtin_unreachable();
1120 }
1121
1122 /*
1123 * maximum byte limit should not be greater than the budget for
1124 * this class
1125 */
1126 if ((int32_t)bytelimit > fq_cl->fcl_budget && !drvmgmt) {
1127 bytelimit = fq_cl->fcl_budget;
1128 }
1129
1130 VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL);
1131 pktcnt = bytecnt = 0;
1132 STAILQ_INIT(&temp_stailq);
1133
1134 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
1135 ASSERT((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
1136 FQF_NEW_FLOW);
1137
1138 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
1139 pktlimit, top, &last, &bytecnt, &pktcnt, &qempty,
1140 PKTF_NEW_FLOW);
1141
1142 if (fq->fq_deficit <= 0 || qempty) {
1143 fq_if_empty_new_flow(fq, fq_cl, true);
1144 }
1145 fq->fq_deficit += fq_cl->fcl_quantum;
1146 if (limit_reached) {
1147 goto done;
1148 }
1149 }
1150
1151 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
1152 VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
1153 FQF_OLD_FLOW);
1154
1155 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
1156 pktlimit, top, &last, &bytecnt, &pktcnt, &qempty, 0);
1157
1158 if (qempty) {
1159 fq_if_empty_old_flow(fqs, fq_cl, fq, true);
1160 } else if (fq->fq_deficit <= 0) {
1161 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq,
1162 flowq, fq_actlink);
1163 /*
1164 * Move to the end of the old queues list. We do not
1165 * need to update the flow count since this flow
1166 * will be added to the tail again
1167 */
1168 STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink);
1169 fq->fq_deficit += fq_cl->fcl_quantum;
1170 }
1171 if (limit_reached) {
1172 break;
1173 }
1174 }
1175
1176 done:
1177 if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) {
1178 STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq);
1179 } else if (!STAILQ_EMPTY(&temp_stailq)) {
1180 fq_cl->fcl_old_flows = temp_stailq;
1181 }
1182
1183 if (last.cp_mbuf != NULL) {
1184 VERIFY(top->cp_mbuf != NULL);
1185 if (tail != NULL) {
1186 *tail = last;
1187 }
1188 if (retpktcnt != NULL) {
1189 *retpktcnt = pktcnt;
1190 }
1191 if (retbytecnt != NULL) {
1192 *retbytecnt = bytecnt;
1193 }
1194 }
1195 }
1196
1197 int
1198 fq_if_teardown_ifclassq(struct ifclassq *ifq)
1199 {
1200 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1201
1202 IFCQ_LOCK_ASSERT_HELD(ifq);
1203 VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL);
1204
1205 fq_if_destroy(fqs);
1206 ifq->ifcq_disc = NULL;
1207 return ifclassq_detach(ifq);
1208 }
1209
1210 static void
1211 fq_export_flowstats(fq_if_t *fqs, fq_t *fq,
1212 struct fq_codel_flowstats *flowstat)
1213 {
1214 bzero(flowstat, sizeof(*flowstat));
1215 flowstat->fqst_min_qdelay = fq->fq_min_qdelay;
1216 flowstat->fqst_bytes = fq->fq_bytes;
1217 flowstat->fqst_flowhash = fq->fq_flowhash;
1218 if (fq->fq_flags & FQF_NEW_FLOW) {
1219 flowstat->fqst_flags |= FQ_FLOWSTATS_NEW_FLOW;
1220 }
1221 if (fq->fq_flags & FQF_OLD_FLOW) {
1222 flowstat->fqst_flags |= FQ_FLOWSTATS_OLD_FLOW;
1223 }
1224 if (fq->fq_flags & FQF_DELAY_HIGH) {
1225 flowstat->fqst_flags |= FQ_FLOWSTATS_DELAY_HIGH;
1226 }
1227 if (fq->fq_flags & FQF_FLOWCTL_ON) {
1228 flowstat->fqst_flags |= FQ_FLOWSTATS_FLOWCTL_ON;
1229 }
1230 if (fqs->fqs_large_flow == fq) {
1231 flowstat->fqst_flags |= FQ_FLOWSTATS_LARGE_FLOW;
1232 }
1233 }
1234
1235 int
1236 fq_if_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t qid,
1237 struct if_ifclassq_stats *ifqs)
1238 {
1239 struct fq_codel_classstats *fcls;
1240 fq_if_classq_t *fq_cl;
1241 fq_if_t *fqs;
1242 fq_t *fq = NULL;
1243 u_int32_t i, flowstat_cnt;
1244
1245 if (qid >= FQ_IF_MAX_CLASSES) {
1246 return EINVAL;
1247 }
1248
1249 fqs = (fq_if_t *)ifq->ifcq_disc;
1250 fcls = &ifqs->ifqs_fq_codel_stats;
1251
1252 fq_cl = &fqs->fqs_classq[qid];
1253
1254 fcls->fcls_pri = fq_cl->fcl_pri;
1255 fcls->fcls_service_class = fq_cl->fcl_service_class;
1256 fcls->fcls_quantum = fq_cl->fcl_quantum;
1257 fcls->fcls_drr_max = fq_cl->fcl_drr_max;
1258 fcls->fcls_budget = fq_cl->fcl_budget;
1259 fcls->fcls_target_qdelay = fqs->fqs_target_qdelay;
1260 fcls->fcls_update_interval = fqs->fqs_update_interval;
1261 fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control;
1262 fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback;
1263 fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall;
1264 fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow;
1265 fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early;
1266 fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure;
1267 fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt;
1268 fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt;
1269 fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt;
1270 fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt;
1271 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
1272 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
1273 fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue;
1274 fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes;
1275 fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt;
1276 fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on;
1277 fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off;
1278 fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops;
1279 fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts;
1280
1281 /* Gather per flow stats */
1282 flowstat_cnt = min((fcls->fcls_newflows_cnt +
1283 fcls->fcls_oldflows_cnt), FQ_IF_MAX_FLOWSTATS);
1284 i = 0;
1285 STAILQ_FOREACH(fq, &fq_cl->fcl_new_flows, fq_actlink) {
1286 if (i >= fcls->fcls_newflows_cnt || i >= flowstat_cnt) {
1287 break;
1288 }
1289
1290 /* leave space for a few old flows */
1291 if ((flowstat_cnt - i) < fcls->fcls_oldflows_cnt &&
1292 i >= (FQ_IF_MAX_FLOWSTATS >> 1)) {
1293 break;
1294 }
1295 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
1296 i++;
1297 }
1298 STAILQ_FOREACH(fq, &fq_cl->fcl_old_flows, fq_actlink) {
1299 if (i >= flowstat_cnt) {
1300 break;
1301 }
1302 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
1303 i++;
1304 }
1305 VERIFY(i <= flowstat_cnt);
1306 fcls->fcls_flowstats_cnt = i;
1307 return 0;
1308 }