]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/pktsched/pktsched_fq_codel.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / net / pktsched / pktsched_fq_codel.c
1 /*
2 * Copyright (c) 2016-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <kern/zalloc.h>
32 #include <net/ethernet.h>
33 #include <net/if_var.h>
34 #include <net/if.h>
35 #include <net/classq/classq.h>
36 #include <net/classq/classq_fq_codel.h>
37 #include <net/pktsched/pktsched_fq_codel.h>
38 #include <os/log.h>
39
40 #define FQ_CODEL_DEFAULT_QUANTUM 1500
41
42 #define FQ_CODEL_QUANTUM_BK_SYS(_q) (_q)
43 #define FQ_CODEL_QUANTUM_BK(_q) (_q)
44 #define FQ_CODEL_QUANTUM_BE(_q) (_q)
45 #define FQ_CODEL_QUANTUM_RD(_q) (_q)
46 #define FQ_CODEL_QUANTUM_OAM(_q) (_q)
47 #define FQ_CODEL_QUANTUM_AV(_q) (_q * 2)
48 #define FQ_CODEL_QUANTUM_RV(_q) (_q * 2)
49 #define FQ_CODEL_QUANTUM_VI(_q) (_q * 2)
50 #define FQ_CODEL_QUANTUM_VO(_q) ((_q * 2) / 5)
51 #define FQ_CODEL_QUANTUM_CTL(_q) ((_q * 2) / 5)
52
53 #define FQ_CODEL_DRR_MAX_BK_SYS 2
54 #define FQ_CODEL_DRR_MAX_BK 2
55 #define FQ_CODEL_DRR_MAX_BE 4
56 #define FQ_CODEL_DRR_MAX_RD 4
57 #define FQ_CODEL_DRR_MAX_OAM 4
58 #define FQ_CODEL_DRR_MAX_AV 6
59 #define FQ_CODEL_DRR_MAX_RV 6
60 #define FQ_CODEL_DRR_MAX_VI 6
61 #define FQ_CODEL_DRR_MAX_VO 8
62 #define FQ_CODEL_DRR_MAX_CTL 8
63
64 static ZONE_DECLARE(fq_if_zone, "pktsched_fq_if", sizeof(fq_if_t), ZC_ZFREE_CLEARMEM);
65
66 typedef STAILQ_HEAD(, flowq) flowq_dqlist_t;
67
68 static fq_if_t *fq_if_alloc(struct ifnet *, classq_pkt_type_t);
69 static void fq_if_destroy(fq_if_t *fqs);
70 static void fq_if_classq_init(fq_if_t *fqs, uint32_t priority,
71 uint16_t quantum, uint32_t drr_max, uint32_t svc_class);
72 static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, uint32_t,
73 int64_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
74 uint32_t *, flowq_dqlist_t *, boolean_t drvmgmt);
75 void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat);
76 static void fq_if_purge(fq_if_t *);
77 static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *);
78 static void fq_if_purge_flow(fq_if_t *, fq_t *, u_int32_t *, u_int32_t *);
79 static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl,
80 bool add_to_old);
81 static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl,
82 fq_t *fq, bool remove_hash, bool destroy);
83
84 #define FQ_IF_FLOW_HASH_ID(_flowid_) \
85 (((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
86
87 #define FQ_IF_CLASSQ_IDLE(_fcl_) \
88 (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
89 STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
90
91 typedef void (* fq_if_append_pkt_t)(classq_pkt_t *, classq_pkt_t *);
92 typedef boolean_t (* fq_getq_flow_t)(fq_if_t *, fq_if_classq_t *, fq_t *,
93 int64_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
94 u_int32_t *, boolean_t *, u_int32_t);
95
96 static void
97 fq_if_append_mbuf(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
98 {
99 pkt->cp_mbuf->m_nextpkt = next_pkt->cp_mbuf;
100 }
101
102
103
104 static boolean_t
105 fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
106 int64_t byte_limit, u_int32_t pkt_limit, classq_pkt_t *head,
107 classq_pkt_t *tail, u_int32_t *byte_cnt, u_int32_t *pkt_cnt,
108 boolean_t *qempty, u_int32_t pflags)
109 {
110 u_int32_t plen;
111 pktsched_pkt_t pkt;
112 boolean_t limit_reached = FALSE;
113 struct ifclassq *ifq = fqs->fqs_ifq;
114 struct ifnet *ifp = ifq->ifcq_ifp;
115
116 while (fq->fq_deficit > 0 && limit_reached == FALSE &&
117 !MBUFQ_EMPTY(&fq->fq_mbufq)) {
118 _PKTSCHED_PKT_INIT(&pkt);
119 fq_getq_flow(fqs, fq, &pkt);
120 ASSERT(pkt.pktsched_ptype == QP_MBUF);
121
122 plen = pktsched_get_pkt_len(&pkt);
123 fq->fq_deficit -= plen;
124 pkt.pktsched_pkt_mbuf->m_pkthdr.pkt_flags |= pflags;
125
126 if (head->cp_mbuf == NULL) {
127 *head = pkt.pktsched_pkt;
128 } else {
129 ASSERT(tail->cp_mbuf != NULL);
130 ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
131 tail->cp_mbuf->m_nextpkt = pkt.pktsched_pkt_mbuf;
132 }
133 *tail = pkt.pktsched_pkt;
134 tail->cp_mbuf->m_nextpkt = NULL;
135 fq_cl->fcl_stat.fcl_dequeue++;
136 fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
137 *pkt_cnt += 1;
138 *byte_cnt += plen;
139
140 ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
141
142 /* Check if the limit is reached */
143 if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
144 limit_reached = TRUE;
145 }
146 }
147
148 *qempty = MBUFQ_EMPTY(&fq->fq_mbufq);
149 return limit_reached;
150 }
151
152 fq_if_t *
153 fq_if_alloc(struct ifnet *ifp, classq_pkt_type_t ptype)
154 {
155 fq_if_t *fqs;
156
157 fqs = zalloc_flags(fq_if_zone, Z_WAITOK | Z_ZERO);
158 fqs->fqs_ifq = &ifp->if_snd;
159 fqs->fqs_ptype = ptype;
160
161 /* Calculate target queue delay */
162 ifclassq_calc_target_qdelay(ifp, &fqs->fqs_target_qdelay);
163
164 /* Calculate update interval */
165 ifclassq_calc_update_interval(&fqs->fqs_update_interval);
166
167 /* Configure packet drop limit across all queues */
168 fqs->fqs_pkt_droplimit = IFCQ_PKT_DROP_LIMIT(&ifp->if_snd);
169 STAILQ_INIT(&fqs->fqs_fclist);
170 return fqs;
171 }
172
173 void
174 fq_if_destroy(fq_if_t *fqs)
175 {
176 fq_if_purge(fqs);
177 fqs->fqs_ifq = NULL;
178 zfree(fq_if_zone, fqs);
179 }
180
181 static inline uint8_t
182 fq_if_service_to_priority(fq_if_t *fqs, mbuf_svc_class_t svc)
183 {
184 uint8_t pri;
185
186 if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
187 switch (svc) {
188 case MBUF_SC_BK_SYS:
189 case MBUF_SC_BK:
190 pri = FQ_IF_BK_INDEX;
191 break;
192 case MBUF_SC_BE:
193 case MBUF_SC_RD:
194 case MBUF_SC_OAM:
195 pri = FQ_IF_BE_INDEX;
196 break;
197 case MBUF_SC_AV:
198 case MBUF_SC_RV:
199 case MBUF_SC_VI:
200 case MBUF_SC_SIG:
201 pri = FQ_IF_VI_INDEX;
202 break;
203 case MBUF_SC_VO:
204 case MBUF_SC_CTL:
205 pri = FQ_IF_VO_INDEX;
206 break;
207 default:
208 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
209 break;
210 }
211 return pri;
212 }
213
214 /* scheduler is not managed by the driver */
215 switch (svc) {
216 case MBUF_SC_BK_SYS:
217 pri = FQ_IF_BK_SYS_INDEX;
218 break;
219 case MBUF_SC_BK:
220 pri = FQ_IF_BK_INDEX;
221 break;
222 case MBUF_SC_BE:
223 pri = FQ_IF_BE_INDEX;
224 break;
225 case MBUF_SC_RD:
226 pri = FQ_IF_RD_INDEX;
227 break;
228 case MBUF_SC_OAM:
229 pri = FQ_IF_OAM_INDEX;
230 break;
231 case MBUF_SC_AV:
232 pri = FQ_IF_AV_INDEX;
233 break;
234 case MBUF_SC_RV:
235 pri = FQ_IF_RV_INDEX;
236 break;
237 case MBUF_SC_VI:
238 pri = FQ_IF_VI_INDEX;
239 break;
240 case MBUF_SC_SIG:
241 pri = FQ_IF_SIG_INDEX;
242 break;
243 case MBUF_SC_VO:
244 pri = FQ_IF_VO_INDEX;
245 break;
246 case MBUF_SC_CTL:
247 pri = FQ_IF_CTL_INDEX;
248 break;
249 default:
250 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
251 break;
252 }
253 return pri;
254 }
255
256 static void
257 fq_if_classq_init(fq_if_t *fqs, uint32_t pri, uint16_t quantum,
258 uint32_t drr_max, uint32_t svc_class)
259 {
260 fq_if_classq_t *fq_cl;
261 VERIFY(pri < FQ_IF_MAX_CLASSES);
262 fq_cl = &fqs->fqs_classq[pri];
263
264 VERIFY(fq_cl->fcl_quantum == 0);
265 fq_cl->fcl_quantum = quantum;
266 fq_cl->fcl_pri = pri;
267 fq_cl->fcl_drr_max = drr_max;
268 fq_cl->fcl_service_class = svc_class;
269 STAILQ_INIT(&fq_cl->fcl_new_flows);
270 STAILQ_INIT(&fq_cl->fcl_old_flows);
271 }
272
273 int
274 fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *head,
275 classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t *pdrop)
276 {
277 uint8_t pri;
278 fq_if_t *fqs;
279 fq_if_classq_t *fq_cl;
280 int ret;
281 mbuf_svc_class_t svc;
282 pktsched_pkt_t pkt;
283
284 pktsched_pkt_encap_chain(&pkt, head, tail, cnt, bytes);
285
286 fqs = (fq_if_t *)ifq->ifcq_disc;
287 svc = pktsched_get_pkt_svc(&pkt);
288 pri = fq_if_service_to_priority(fqs, svc);
289 VERIFY(pri < FQ_IF_MAX_CLASSES);
290 fq_cl = &fqs->fqs_classq[pri];
291
292 if (__improbable(svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1)) {
293 /* BK_SYS is currently throttled */
294 atomic_add_32(&fq_cl->fcl_stat.fcl_throttle_drops, 1);
295 pktsched_free_pkt(&pkt);
296 *pdrop = TRUE;
297 ret = EQSUSPENDED;
298 goto done;
299 }
300
301 IFCQ_LOCK_SPIN(ifq);
302 ret = fq_addq(fqs, &pkt, fq_cl);
303 if (!(fqs->fqs_flags & FQS_DRIVER_MANAGED) &&
304 !FQ_IF_CLASSQ_IDLE(fq_cl)) {
305 if (((fqs->fqs_bitmaps[FQ_IF_ER] | fqs->fqs_bitmaps[FQ_IF_EB]) &
306 (1 << pri)) == 0) {
307 /*
308 * this group is not in ER or EB groups,
309 * mark it as IB
310 */
311 pktsched_bit_set(pri, &fqs->fqs_bitmaps[FQ_IF_IB]);
312 }
313 }
314
315 if (__improbable(ret != 0)) {
316 if (ret == CLASSQEQ_SUCCESS_FC) {
317 /* packet enqueued, return advisory feedback */
318 ret = EQFULL;
319 *pdrop = FALSE;
320 } else if (ret == CLASSQEQ_COMPRESSED) {
321 ret = 0;
322 *pdrop = FALSE;
323 } else {
324 IFCQ_UNLOCK(ifq);
325 *pdrop = TRUE;
326 pktsched_free_pkt(&pkt);
327 switch (ret) {
328 case CLASSQEQ_DROP:
329 ret = ENOBUFS;
330 goto done;
331 case CLASSQEQ_DROP_FC:
332 ret = EQFULL;
333 goto done;
334 case CLASSQEQ_DROP_SP:
335 ret = EQSUSPENDED;
336 goto done;
337 default:
338 VERIFY(0);
339 /* NOTREACHED */
340 __builtin_unreachable();
341 }
342 /* NOTREACHED */
343 __builtin_unreachable();
344 }
345 } else {
346 *pdrop = FALSE;
347 }
348 IFCQ_ADD_LEN(ifq, cnt);
349 IFCQ_INC_BYTES(ifq, bytes);
350 IFCQ_UNLOCK(ifq);
351 done:
352 #if DEBUG || DEVELOPMENT
353 if (__improbable((ret == EQFULL) && (ifclassq_flow_control_adv == 0))) {
354 ret = 0;
355 }
356 #endif /* DEBUG || DEVELOPMENT */
357 return ret;
358 }
359
360 void
361 fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt)
362 {
363 (void) fq_if_dequeue_classq_multi(ifq, 1,
364 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL);
365 }
366
367 void
368 fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc,
369 classq_pkt_t *pkt)
370 {
371 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
372 uint32_t total_pktcnt = 0, total_bytecnt = 0;
373 fq_if_classq_t *fq_cl;
374 uint8_t pri;
375
376 pri = fq_if_service_to_priority(fqs, svc);
377 fq_cl = &fqs->fqs_classq[pri];
378
379 fq_if_dequeue(fqs, fq_cl, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
380 pkt, NULL, &total_pktcnt, &total_bytecnt, NULL, TRUE);
381
382 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
383 }
384
385 static inline void
386 fq_dqlist_add(flowq_dqlist_t *fq_dqlist_head, fq_t *fq)
387 {
388 ASSERT(fq->fq_dq_head.cp_mbuf == NULL);
389 ASSERT(!fq->fq_in_dqlist);
390 STAILQ_INSERT_TAIL(fq_dqlist_head, fq, fq_dqlink);
391 fq->fq_in_dqlist = true;
392 }
393
394 static inline void
395 fq_dqlist_remove(flowq_dqlist_t *fq_dqlist_head, fq_t *fq, classq_pkt_t *head,
396 classq_pkt_t *tail)
397 {
398 ASSERT(fq->fq_in_dqlist);
399 if (fq->fq_dq_head.cp_mbuf == NULL) {
400 goto done;
401 }
402
403 if (head->cp_mbuf == NULL) {
404 *head = fq->fq_dq_head;
405 } else {
406 ASSERT(tail->cp_mbuf != NULL);
407
408 switch (fq->fq_ptype) {
409 case QP_MBUF:
410 ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
411 tail->cp_mbuf->m_nextpkt = fq->fq_dq_head.cp_mbuf;
412 ASSERT(fq->fq_dq_tail.cp_mbuf->m_nextpkt == NULL);
413 break;
414 default:
415 VERIFY(0);
416 /* NOTREACHED */
417 __builtin_unreachable();
418 }
419 }
420 *tail = fq->fq_dq_tail;
421 done:
422 STAILQ_REMOVE(fq_dqlist_head, fq, flowq, fq_dqlink);
423 CLASSQ_PKT_INIT(&fq->fq_dq_head);
424 CLASSQ_PKT_INIT(&fq->fq_dq_tail);
425 fq->fq_in_dqlist = false;
426 if (fq->fq_flags & FQF_DESTROYED) {
427 fq_destroy(fq);
428 }
429 }
430
431 static inline void
432 fq_dqlist_get_packet_list(flowq_dqlist_t *fq_dqlist_head, classq_pkt_t *head,
433 classq_pkt_t *tail)
434 {
435 fq_t *fq, *tfq;
436
437 STAILQ_FOREACH_SAFE(fq, fq_dqlist_head, fq_dqlink, tfq) {
438 fq_dqlist_remove(fq_dqlist_head, fq, head, tail);
439 }
440 }
441
442 int
443 fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt,
444 u_int32_t maxbytecnt, classq_pkt_t *first_packet,
445 classq_pkt_t *last_packet, u_int32_t *retpktcnt,
446 u_int32_t *retbytecnt)
447 {
448 uint32_t total_pktcnt = 0, total_bytecnt = 0;
449 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
450 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
451 classq_pkt_t tmp = CLASSQ_PKT_INITIALIZER(tmp);
452 fq_if_append_pkt_t append_pkt;
453 flowq_dqlist_t fq_dqlist_head;
454 fq_if_classq_t *fq_cl;
455 fq_if_t *fqs;
456 int pri;
457
458 IFCQ_LOCK_ASSERT_HELD(ifq);
459
460 fqs = (fq_if_t *)ifq->ifcq_disc;
461 STAILQ_INIT(&fq_dqlist_head);
462
463 switch (fqs->fqs_ptype) {
464 case QP_MBUF:
465 append_pkt = fq_if_append_mbuf;
466 break;
467
468
469 default:
470 VERIFY(0);
471 /* NOTREACHED */
472 __builtin_unreachable();
473 }
474
475 for (;;) {
476 uint32_t pktcnt = 0, bytecnt = 0;
477 classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
478 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
479
480 if (fqs->fqs_bitmaps[FQ_IF_ER] == 0 &&
481 fqs->fqs_bitmaps[FQ_IF_EB] == 0) {
482 fqs->fqs_bitmaps[FQ_IF_EB] = fqs->fqs_bitmaps[FQ_IF_IB];
483 fqs->fqs_bitmaps[FQ_IF_IB] = 0;
484 if (fqs->fqs_bitmaps[FQ_IF_EB] == 0) {
485 break;
486 }
487 }
488 pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_ER]);
489 if (pri == 0) {
490 /*
491 * There are no ER flows, move the highest
492 * priority one from EB if there are any in that
493 * category
494 */
495 pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_EB]);
496 VERIFY(pri > 0);
497 pktsched_bit_clr((pri - 1),
498 &fqs->fqs_bitmaps[FQ_IF_EB]);
499 pktsched_bit_set((pri - 1),
500 &fqs->fqs_bitmaps[FQ_IF_ER]);
501 }
502 pri--; /* index starts at 0 */
503 fq_cl = &fqs->fqs_classq[pri];
504
505 if (fq_cl->fcl_budget <= 0) {
506 /* Update the budget */
507 fq_cl->fcl_budget += (min(fq_cl->fcl_drr_max,
508 fq_cl->fcl_stat.fcl_flows_cnt) *
509 fq_cl->fcl_quantum);
510 if (fq_cl->fcl_budget <= 0) {
511 goto state_change;
512 }
513 }
514 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
515 (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
516 &bytecnt, &fq_dqlist_head, FALSE);
517 if (head.cp_mbuf != NULL) {
518 ASSERT(STAILQ_EMPTY(&fq_dqlist_head));
519 if (first.cp_mbuf == NULL) {
520 first = head;
521 } else {
522 ASSERT(last.cp_mbuf != NULL);
523 append_pkt(&last, &head);
524 }
525 last = tail;
526 append_pkt(&last, &tmp);
527 }
528 fq_cl->fcl_budget -= bytecnt;
529 total_pktcnt += pktcnt;
530 total_bytecnt += bytecnt;
531
532 /*
533 * If the class has exceeded the budget but still has data
534 * to send, move it to IB
535 */
536 state_change:
537 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
538 if (fq_cl->fcl_budget <= 0) {
539 pktsched_bit_set(pri,
540 &fqs->fqs_bitmaps[FQ_IF_IB]);
541 pktsched_bit_clr(pri,
542 &fqs->fqs_bitmaps[FQ_IF_ER]);
543 }
544 } else {
545 pktsched_bit_clr(pri, &fqs->fqs_bitmaps[FQ_IF_ER]);
546 VERIFY(((fqs->fqs_bitmaps[FQ_IF_ER] |
547 fqs->fqs_bitmaps[FQ_IF_EB] |
548 fqs->fqs_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
549 fq_cl->fcl_budget = 0;
550 }
551 if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt) {
552 break;
553 }
554 }
555
556 fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last);
557
558 if (__probable(first_packet != NULL)) {
559 *first_packet = first;
560 }
561 if (last_packet != NULL) {
562 *last_packet = last;
563 }
564 if (retpktcnt != NULL) {
565 *retpktcnt = total_pktcnt;
566 }
567 if (retbytecnt != NULL) {
568 *retbytecnt = total_bytecnt;
569 }
570
571 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
572 return 0;
573 }
574
575 int
576 fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, mbuf_svc_class_t svc,
577 u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
578 classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt)
579 {
580 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
581 uint8_t pri;
582 u_int32_t total_pktcnt = 0, total_bytecnt = 0;
583 fq_if_classq_t *fq_cl;
584 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
585 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
586 fq_if_append_pkt_t append_pkt;
587 flowq_dqlist_t fq_dqlist_head;
588
589 switch (fqs->fqs_ptype) {
590 case QP_MBUF:
591 append_pkt = fq_if_append_mbuf;
592 break;
593
594
595 default:
596 VERIFY(0);
597 /* NOTREACHED */
598 __builtin_unreachable();
599 }
600
601 STAILQ_INIT(&fq_dqlist_head);
602 pri = fq_if_service_to_priority(fqs, svc);
603 fq_cl = &fqs->fqs_classq[pri];
604 /*
605 * Now we have the queue for a particular service class. We need
606 * to dequeue as many packets as needed, first from the new flows
607 * and then from the old flows.
608 */
609 while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt &&
610 fq_cl->fcl_stat.fcl_pkt_cnt > 0) {
611 classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
612 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
613 u_int32_t pktcnt = 0, bytecnt = 0;
614
615 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
616 (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
617 &bytecnt, &fq_dqlist_head, TRUE);
618 if (head.cp_mbuf != NULL) {
619 if (first.cp_mbuf == NULL) {
620 first = head;
621 } else {
622 ASSERT(last.cp_mbuf != NULL);
623 append_pkt(&last, &head);
624 }
625 last = tail;
626 }
627 total_pktcnt += pktcnt;
628 total_bytecnt += bytecnt;
629 }
630
631 fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last);
632
633 if (__probable(first_packet != NULL)) {
634 *first_packet = first;
635 }
636 if (last_packet != NULL) {
637 *last_packet = last;
638 }
639 if (retpktcnt != NULL) {
640 *retpktcnt = total_pktcnt;
641 }
642 if (retbytecnt != NULL) {
643 *retbytecnt = total_bytecnt;
644 }
645
646 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
647
648 return 0;
649 }
650
651 static void
652 fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, u_int32_t *pktsp,
653 u_int32_t *bytesp)
654 {
655 fq_if_classq_t *fq_cl;
656 u_int32_t pkts, bytes;
657 pktsched_pkt_t pkt;
658
659 fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
660 pkts = bytes = 0;
661 _PKTSCHED_PKT_INIT(&pkt);
662 for (;;) {
663 fq_getq_flow(fqs, fq, &pkt);
664 if (pkt.pktsched_pkt_mbuf == NULL) {
665 VERIFY(pkt.pktsched_ptype == QP_INVALID);
666 break;
667 }
668 pkts++;
669 bytes += pktsched_get_pkt_len(&pkt);
670 pktsched_free_pkt(&pkt);
671 }
672 IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes);
673
674 if (fq->fq_flags & FQF_NEW_FLOW) {
675 fq_if_empty_new_flow(fq, fq_cl, false);
676 } else if (fq->fq_flags & FQF_OLD_FLOW) {
677 fq_if_empty_old_flow(fqs, fq_cl, fq, false, true);
678 }
679
680 fq_if_destroy_flow(fqs, fq_cl, fq, true);
681
682 if (FQ_IF_CLASSQ_IDLE(fq_cl)) {
683 int i;
684 for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) {
685 pktsched_bit_clr(fq_cl->fcl_pri,
686 &fqs->fqs_bitmaps[i]);
687 }
688 }
689 if (pktsp != NULL) {
690 *pktsp = pkts;
691 }
692 if (bytesp != NULL) {
693 *bytesp = bytes;
694 }
695 }
696
697 static void
698 fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl)
699 {
700 fq_t *fq, *tfq;
701 /*
702 * Take each flow from new/old flow list and flush mbufs
703 * in that flow
704 */
705 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
706 fq_if_purge_flow(fqs, fq, NULL, NULL);
707 }
708 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
709 fq_if_purge_flow(fqs, fq, NULL, NULL);
710 }
711 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows));
712 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows));
713
714 STAILQ_INIT(&fq_cl->fcl_new_flows);
715 STAILQ_INIT(&fq_cl->fcl_old_flows);
716 fq_cl->fcl_budget = 0;
717 }
718
719 static void
720 fq_if_purge(fq_if_t *fqs)
721 {
722 int i;
723
724 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
725 for (i = 0; i < FQ_IF_MAX_CLASSES; i++) {
726 fq_if_purge_classq(fqs, &fqs->fqs_classq[i]);
727 }
728
729 VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist));
730
731 fqs->fqs_large_flow = NULL;
732 for (i = 0; i < FQ_IF_HASH_TABLE_SIZE; i++) {
733 VERIFY(SLIST_EMPTY(&fqs->fqs_flows[i]));
734 }
735
736 bzero(&fqs->fqs_bitmaps, sizeof(fqs->fqs_bitmaps));
737
738 IFCQ_LEN(fqs->fqs_ifq) = 0;
739 IFCQ_BYTES(fqs->fqs_ifq) = 0;
740 }
741
742 static void
743 fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req)
744 {
745 fq_t *fq;
746
747 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
748 req->packets = req->bytes = 0;
749 VERIFY(req->flow != 0);
750
751 /* packet type is needed only if we want to create a flow queue */
752 fq = fq_if_hash_pkt(fqs, req->flow, req->sc, 0, FALSE, QP_INVALID);
753
754 if (fq != NULL) {
755 fq_if_purge_flow(fqs, fq, &req->packets, &req->bytes);
756 }
757 }
758
759 static uint16_t
760 fq_if_calc_quantum(struct ifnet *ifp)
761 {
762 uint16_t quantum;
763
764 switch (ifp->if_family) {
765 case IFNET_FAMILY_ETHERNET:
766 VERIFY((ifp->if_mtu + ETHER_HDR_LEN) <= UINT16_MAX);
767 quantum = (uint16_t)ifp->if_mtu + ETHER_HDR_LEN;
768 break;
769
770 case IFNET_FAMILY_CELLULAR:
771 case IFNET_FAMILY_IPSEC:
772 case IFNET_FAMILY_UTUN:
773 VERIFY(ifp->if_mtu <= UINT16_MAX);
774 quantum = (uint16_t)ifp->if_mtu;
775 break;
776
777 default:
778 quantum = FQ_CODEL_DEFAULT_QUANTUM;
779 break;
780 }
781
782 /*
783 * XXX: Skywalk native interface doesn't support HW TSO offload.
784 */
785 if (((ifp->if_eflags & IFEF_SKYWALK_NATIVE) == 0) &&
786 ((ifp->if_hwassist & IFNET_TSOF) != 0)) {
787 VERIFY(ifp->if_tso_v4_mtu <= UINT16_MAX);
788 VERIFY(ifp->if_tso_v6_mtu <= UINT16_MAX);
789 quantum = (uint16_t)MAX(ifp->if_tso_v4_mtu, ifp->if_tso_v6_mtu);
790 quantum = (quantum != 0) ? quantum : IF_MAXMTU;
791 }
792
793 quantum = MAX(FQ_CODEL_DEFAULT_QUANTUM, quantum);
794 #if DEBUG || DEVELOPMENT
795 quantum = (fq_codel_quantum != 0) ? fq_codel_quantum : quantum;
796 #endif /* DEBUG || DEVELOPMENT */
797 return quantum;
798 }
799
800 static void
801 fq_if_mtu_update(fq_if_t *fqs)
802 {
803 #define _FQ_CLASSQ_UPDATE_QUANTUM(_fqs, _s, _q) \
804 (_fqs)->fqs_classq[FQ_IF_ ## _s ## _INDEX].fcl_quantum = \
805 FQ_CODEL_QUANTUM_ ## _s(_q)
806
807 uint16_t quantum;
808
809 quantum = fq_if_calc_quantum(fqs->fqs_ifq->ifcq_ifp);
810
811 if ((fqs->fqs_flags & FQS_DRIVER_MANAGED) != 0) {
812 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BK, quantum);
813 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BE, quantum);
814 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, VI, quantum);
815 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, VO, quantum);
816 } else {
817 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BK_SYS, quantum);
818 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BK, quantum);
819 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BE, quantum);
820 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, RD, quantum);
821 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, OAM, quantum);
822 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, AV, quantum);
823 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, RV, quantum);
824 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, VI, quantum);
825 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, VO, quantum);
826 _FQ_CLASSQ_UPDATE_QUANTUM(fqs, CTL, quantum);
827 }
828 #undef _FQ_CLASSQ_UPDATE_QUANTUM
829 }
830
831 static void
832 fq_if_event(fq_if_t *fqs, cqev_t ev)
833 {
834 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
835
836 switch (ev) {
837 case CLASSQ_EV_LINK_UP:
838 case CLASSQ_EV_LINK_DOWN:
839 fq_if_purge(fqs);
840 break;
841 case CLASSQ_EV_LINK_MTU:
842 fq_if_mtu_update(fqs);
843 break;
844 default:
845 break;
846 }
847 }
848
849 static void
850 fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl)
851 {
852 fq_if_purge_classq(fqs, fq_cl);
853 fqs->fqs_throttle = 1;
854 fq_cl->fcl_stat.fcl_throttle_on++;
855 }
856
857 static void
858 fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl)
859 {
860 VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl));
861 fqs->fqs_throttle = 0;
862 fq_cl->fcl_stat.fcl_throttle_off++;
863 }
864
865
866 static int
867 fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr)
868 {
869 struct ifclassq *ifq = fqs->fqs_ifq;
870 uint8_t index;
871 #if !MACH_ASSERT
872 #pragma unused(ifq)
873 #endif
874 IFCQ_LOCK_ASSERT_HELD(ifq);
875
876 if (!tr->set) {
877 tr->level = fqs->fqs_throttle;
878 return 0;
879 }
880
881 if (tr->level == fqs->fqs_throttle) {
882 return EALREADY;
883 }
884
885 /* Throttling is allowed on BK_SYS class only */
886 index = fq_if_service_to_priority(fqs, MBUF_SC_BK_SYS);
887 switch (tr->level) {
888 case IFNET_THROTTLE_OFF:
889 fq_if_classq_resume(fqs, &fqs->fqs_classq[index]);
890 break;
891 case IFNET_THROTTLE_OPPORTUNISTIC:
892 fq_if_classq_suspend(fqs, &fqs->fqs_classq[index]);
893 break;
894 default:
895 break;
896 }
897 return 0;
898 }
899
900 void
901 fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat)
902 {
903 uint8_t pri;
904 fq_if_classq_t *fq_cl;
905
906 if (stat == NULL) {
907 return;
908 }
909
910 pri = fq_if_service_to_priority(fqs, stat->sc);
911 fq_cl = &fqs->fqs_classq[pri];
912 stat->packets = (uint32_t)fq_cl->fcl_stat.fcl_pkt_cnt;
913 stat->bytes = (uint32_t)fq_cl->fcl_stat.fcl_byte_cnt;
914 }
915
916 int
917 fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg)
918 {
919 int err = 0;
920 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
921
922 IFCQ_LOCK_ASSERT_HELD(ifq);
923
924 /*
925 * These are usually slow operations, convert the lock ahead of time
926 */
927 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
928 switch (rq) {
929 case CLASSQRQ_PURGE:
930 fq_if_purge(fqs);
931 break;
932 case CLASSQRQ_PURGE_SC:
933 fq_if_purge_sc(fqs, (cqrq_purge_sc_t *)arg);
934 break;
935 case CLASSQRQ_EVENT:
936 fq_if_event(fqs, (cqev_t)arg);
937 break;
938 case CLASSQRQ_THROTTLE:
939 fq_if_throttle(fqs, (cqrq_throttle_t *)arg);
940 break;
941 case CLASSQRQ_STAT_SC:
942 fq_if_stat_sc(fqs, (cqrq_stat_sc_t *)arg);
943 break;
944 }
945 return err;
946 }
947
948 int
949 fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
950 classq_pkt_type_t ptype)
951 {
952 #pragma unused(flags)
953 #define _FQ_CLASSQ_INIT(_fqs, _s, _q) \
954 fq_if_classq_init((_fqs), FQ_IF_ ## _s ## _INDEX, \
955 FQ_CODEL_QUANTUM_ ## _s(_q), FQ_CODEL_DRR_MAX_ ## _s, \
956 MBUF_SC_ ## _s )
957
958 struct ifnet *ifp = ifq->ifcq_ifp;
959 fq_if_t *fqs = NULL;
960 uint16_t quantum;
961 int err = 0;
962
963 IFCQ_LOCK_ASSERT_HELD(ifq);
964 VERIFY(ifq->ifcq_disc == NULL);
965 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
966
967 fqs = fq_if_alloc(ifp, ptype);
968 if (fqs == NULL) {
969 return ENOMEM;
970 }
971
972 quantum = fq_if_calc_quantum(ifp);
973
974 if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) {
975 fqs->fqs_flags |= FQS_DRIVER_MANAGED;
976 _FQ_CLASSQ_INIT(fqs, BK, quantum);
977 _FQ_CLASSQ_INIT(fqs, BE, quantum);
978 _FQ_CLASSQ_INIT(fqs, VI, quantum);
979 _FQ_CLASSQ_INIT(fqs, VO, quantum);
980 } else {
981 /* SIG shares same INDEX with VI */
982 _CASSERT(SCIDX_SIG == SCIDX_VI);
983 _CASSERT(FQ_IF_SIG_INDEX == FQ_IF_VI_INDEX);
984
985 _FQ_CLASSQ_INIT(fqs, BK_SYS, quantum);
986 _FQ_CLASSQ_INIT(fqs, BK, quantum);
987 _FQ_CLASSQ_INIT(fqs, BE, quantum);
988 _FQ_CLASSQ_INIT(fqs, RD, quantum);
989 _FQ_CLASSQ_INIT(fqs, OAM, quantum);
990 _FQ_CLASSQ_INIT(fqs, AV, quantum);
991 _FQ_CLASSQ_INIT(fqs, RV, quantum);
992 _FQ_CLASSQ_INIT(fqs, VI, quantum);
993 _FQ_CLASSQ_INIT(fqs, VO, quantum);
994 _FQ_CLASSQ_INIT(fqs, CTL, quantum);
995 }
996
997 err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs);
998 if (err != 0) {
999 os_log_error(OS_LOG_DEFAULT, "%s: error from ifclassq_attach, "
1000 "failed to attach fq_if: %d\n", __func__, err);
1001 fq_if_destroy(fqs);
1002 }
1003 return err;
1004 #undef _FQ_CLASSQ_INIT
1005 }
1006
1007 fq_t *
1008 fq_if_hash_pkt(fq_if_t *fqs, u_int32_t flowid, mbuf_svc_class_t svc_class,
1009 u_int64_t now, boolean_t create, classq_pkt_type_t ptype)
1010 {
1011 fq_t *fq = NULL;
1012 flowq_list_t *fq_list;
1013 fq_if_classq_t *fq_cl;
1014 u_int8_t fqs_hash_id;
1015 u_int8_t scidx;
1016
1017 scidx = fq_if_service_to_priority(fqs, svc_class);
1018
1019 fqs_hash_id = FQ_IF_FLOW_HASH_ID(flowid);
1020
1021 fq_list = &fqs->fqs_flows[fqs_hash_id];
1022
1023 SLIST_FOREACH(fq, fq_list, fq_hashlink) {
1024 if (fq->fq_flowhash == flowid &&
1025 fq->fq_sc_index == scidx) {
1026 break;
1027 }
1028 }
1029 if (fq == NULL && create == TRUE) {
1030 ASSERT(ptype == QP_MBUF);
1031
1032 /* If the flow is not already on the list, allocate it */
1033 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1034 fq = fq_alloc(ptype);
1035 if (fq != NULL) {
1036 fq->fq_flowhash = flowid;
1037 fq->fq_sc_index = scidx;
1038 fq->fq_updatetime = now + fqs->fqs_update_interval;
1039 fq_cl = &fqs->fqs_classq[scidx];
1040 fq->fq_flags = FQF_FLOWCTL_CAPABLE;
1041 SLIST_INSERT_HEAD(fq_list, fq, fq_hashlink);
1042 fq_cl->fcl_stat.fcl_flows_cnt++;
1043 }
1044 }
1045
1046 /*
1047 * If getq time is not set because this is the first packet or after
1048 * idle time, set it now so that we can detect a stall.
1049 */
1050 if (fq != NULL && fq->fq_getqtime == 0) {
1051 fq->fq_getqtime = now;
1052 }
1053
1054 return fq;
1055 }
1056
1057 void
1058 fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
1059 bool destroy_now)
1060 {
1061 u_int8_t hash_id;
1062 hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash);
1063 SLIST_REMOVE(&fqs->fqs_flows[hash_id], fq, flowq,
1064 fq_hashlink);
1065 fq_cl->fcl_stat.fcl_flows_cnt--;
1066 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1067 fq->fq_flags |= FQF_DESTROYED;
1068 if (destroy_now) {
1069 fq_destroy(fq);
1070 }
1071 }
1072
1073 inline boolean_t
1074 fq_if_at_drop_limit(fq_if_t *fqs)
1075 {
1076 return (IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ?
1077 TRUE : FALSE;
1078 }
1079
1080 static void
1081 fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
1082 bool remove_hash, bool destroy)
1083 {
1084 /*
1085 * Remove the flow queue if it is empty
1086 * and delete it
1087 */
1088 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq,
1089 fq_actlink);
1090 fq->fq_flags &= ~FQF_OLD_FLOW;
1091 fq_cl->fcl_stat.fcl_oldflows_cnt--;
1092 VERIFY(fq->fq_bytes == 0);
1093
1094 if (remove_hash) {
1095 /* Remove from the hash list */
1096 fq_if_destroy_flow(fqs, fq_cl, fq, destroy);
1097 }
1098 }
1099
1100 static void
1101 fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl, bool add_to_old)
1102 {
1103 /* Move to the end of old queue list */
1104 STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq,
1105 flowq, fq_actlink);
1106 fq->fq_flags &= ~FQF_NEW_FLOW;
1107 fq_cl->fcl_stat.fcl_newflows_cnt--;
1108
1109 if (add_to_old) {
1110 STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq,
1111 fq_actlink);
1112 fq->fq_flags |= FQF_OLD_FLOW;
1113 fq_cl->fcl_stat.fcl_oldflows_cnt++;
1114 }
1115 }
1116
1117 inline void
1118 fq_if_drop_packet(fq_if_t *fqs)
1119 {
1120 fq_t *fq = fqs->fqs_large_flow;
1121 fq_if_classq_t *fq_cl;
1122 pktsched_pkt_t pkt;
1123 volatile uint32_t *pkt_flags;
1124 uint64_t *pkt_timestamp;
1125
1126 if (fq == NULL) {
1127 return;
1128 }
1129 /* queue can not be empty on the largest flow */
1130 VERIFY(!fq_empty(fq));
1131
1132 fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
1133 _PKTSCHED_PKT_INIT(&pkt);
1134 fq_getq_flow_internal(fqs, fq, &pkt);
1135 ASSERT(pkt.pktsched_ptype != QP_INVALID);
1136
1137 pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
1138 NULL, NULL);
1139
1140 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1141 *pkt_timestamp = 0;
1142 switch (pkt.pktsched_ptype) {
1143 case QP_MBUF:
1144 *pkt_flags &= ~PKTF_PRIV_GUARDED;
1145 break;
1146 default:
1147 VERIFY(0);
1148 /* NOTREACHED */
1149 __builtin_unreachable();
1150 }
1151
1152 if (fq_empty(fq)) {
1153 fqs->fqs_large_flow = NULL;
1154 if (fq->fq_flags & FQF_OLD_FLOW) {
1155 fq_if_empty_old_flow(fqs, fq_cl, fq, true, true);
1156 } else {
1157 VERIFY(fq->fq_flags & FQF_NEW_FLOW);
1158 fq_if_empty_new_flow(fq, fq_cl, true);
1159 }
1160 }
1161 IFCQ_DROP_ADD(fqs->fqs_ifq, 1, pktsched_get_pkt_len(&pkt));
1162
1163 pktsched_free_pkt(&pkt);
1164 fq_cl->fcl_stat.fcl_drop_overflow++;
1165 }
1166
1167 inline void
1168 fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq)
1169 {
1170 fq_t *prev_fq;
1171
1172 if (fqs->fqs_large_flow != NULL &&
1173 fqs->fqs_large_flow->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
1174 fqs->fqs_large_flow = NULL;
1175 }
1176
1177 if (fq == NULL || fq->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
1178 return;
1179 }
1180
1181 prev_fq = fqs->fqs_large_flow;
1182 if (prev_fq == NULL) {
1183 if (!fq_empty(fq)) {
1184 fqs->fqs_large_flow = fq;
1185 }
1186 return;
1187 } else if (fq->fq_bytes > prev_fq->fq_bytes) {
1188 fqs->fqs_large_flow = fq;
1189 }
1190 }
1191
1192 boolean_t
1193 fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint8_t flowsrc,
1194 fq_t *fq, fq_if_classq_t *fq_cl)
1195 {
1196 struct flowadv_fcentry *fce;
1197
1198 #if DEBUG || DEVELOPMENT
1199 if (__improbable(ifclassq_flow_control_adv == 0)) {
1200 os_log(OS_LOG_DEFAULT, "%s: skipped flow control", __func__);
1201 return TRUE;
1202 }
1203 #endif /* DEBUG || DEVELOPMENT */
1204
1205 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
1206 if ((uint8_t)fce->fce_flowsrc_type == flowsrc &&
1207 fce->fce_flowid == fq->fq_flowhash) {
1208 /* Already on flowcontrol list */
1209 return TRUE;
1210 }
1211 }
1212 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1213 fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
1214 if (fce != NULL) {
1215 /* XXX Add number of bytes in the queue */
1216 STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link);
1217 fq_cl->fcl_stat.fcl_flow_control++;
1218 os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
1219 "flow: 0x%x, iface: %s\n", __func__,
1220 fq_cl->fcl_stat.fcl_flow_control,
1221 fq->fq_sc_index, fce->fce_flowsrc_type, fq->fq_flowhash,
1222 if_name(fqs->fqs_ifq->ifcq_ifp));
1223 }
1224 return (fce != NULL) ? TRUE : FALSE;
1225 }
1226
1227 void
1228 fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl)
1229 {
1230 struct flowadv_fcentry *fce = NULL;
1231
1232 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1233 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
1234 if (fce->fce_flowid == fq->fq_flowhash) {
1235 break;
1236 }
1237 }
1238 if (fce != NULL) {
1239 STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry,
1240 fce_link);
1241 STAILQ_NEXT(fce, fce_link) = NULL;
1242 fq_cl->fcl_stat.fcl_flow_feedback++;
1243 os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
1244 "flow: 0x%x, iface: %s\n", __func__,
1245 fq_cl->fcl_stat.fcl_flow_feedback, fq->fq_sc_index,
1246 fce->fce_flowsrc_type, fce->fce_flowid,
1247 if_name(fqs->fqs_ifq->ifcq_ifp));
1248 flowadv_add_entry(fce);
1249 }
1250 fq->fq_flags &= ~FQF_FLOWCTL_ON;
1251 }
1252
1253 void
1254 fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, uint32_t pktlimit,
1255 int64_t bytelimit, classq_pkt_t *top, classq_pkt_t *bottom,
1256 uint32_t *retpktcnt, uint32_t *retbytecnt, flowq_dqlist_t *fq_dqlist,
1257 boolean_t drvmgmt)
1258 {
1259 fq_t *fq = NULL, *tfq = NULL;
1260 flowq_stailq_t temp_stailq;
1261 uint32_t pktcnt, bytecnt;
1262 boolean_t qempty, limit_reached = FALSE;
1263 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
1264 fq_getq_flow_t fq_getq_flow_fn;
1265 classq_pkt_t *head, *tail;
1266
1267 switch (fqs->fqs_ptype) {
1268 case QP_MBUF:
1269 fq_getq_flow_fn = fq_getq_flow_mbuf;
1270 break;
1271
1272
1273 default:
1274 VERIFY(0);
1275 /* NOTREACHED */
1276 __builtin_unreachable();
1277 }
1278
1279 /*
1280 * maximum byte limit should not be greater than the budget for
1281 * this class
1282 */
1283 if (bytelimit > fq_cl->fcl_budget && !drvmgmt) {
1284 bytelimit = fq_cl->fcl_budget;
1285 }
1286
1287 VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL);
1288 pktcnt = bytecnt = 0;
1289 STAILQ_INIT(&temp_stailq);
1290
1291 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
1292 ASSERT((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
1293 FQF_NEW_FLOW);
1294
1295 if (fq_dqlist != NULL) {
1296 if (!fq->fq_in_dqlist) {
1297 fq_dqlist_add(fq_dqlist, fq);
1298 }
1299 head = &fq->fq_dq_head;
1300 tail = &fq->fq_dq_tail;
1301 } else {
1302 ASSERT(!fq->fq_in_dqlist);
1303 head = top;
1304 tail = &last;
1305 }
1306
1307 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
1308 pktlimit, head, tail, &bytecnt, &pktcnt, &qempty,
1309 PKTF_NEW_FLOW);
1310
1311 if (fq->fq_deficit <= 0 || qempty) {
1312 fq_if_empty_new_flow(fq, fq_cl, true);
1313 }
1314 fq->fq_deficit += fq_cl->fcl_quantum;
1315 if (limit_reached) {
1316 goto done;
1317 }
1318 }
1319
1320 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
1321 VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
1322 FQF_OLD_FLOW);
1323 bool destroy = true;
1324
1325 if (fq_dqlist != NULL) {
1326 if (!fq->fq_in_dqlist) {
1327 fq_dqlist_add(fq_dqlist, fq);
1328 }
1329 head = &fq->fq_dq_head;
1330 tail = &fq->fq_dq_tail;
1331 destroy = false;
1332 } else {
1333 ASSERT(!fq->fq_in_dqlist);
1334 head = top;
1335 tail = &last;
1336 }
1337
1338 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
1339 pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, 0);
1340
1341 if (qempty) {
1342 fq_if_empty_old_flow(fqs, fq_cl, fq, true, destroy);
1343 } else if (fq->fq_deficit <= 0) {
1344 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq,
1345 flowq, fq_actlink);
1346 /*
1347 * Move to the end of the old queues list. We do not
1348 * need to update the flow count since this flow
1349 * will be added to the tail again
1350 */
1351 STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink);
1352 fq->fq_deficit += fq_cl->fcl_quantum;
1353 }
1354 if (limit_reached) {
1355 break;
1356 }
1357 }
1358
1359 done:
1360 if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) {
1361 STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq);
1362 } else if (!STAILQ_EMPTY(&temp_stailq)) {
1363 fq_cl->fcl_old_flows = temp_stailq;
1364 }
1365 if (last.cp_mbuf != NULL) {
1366 VERIFY(top->cp_mbuf != NULL);
1367 if (bottom != NULL) {
1368 *bottom = last;
1369 }
1370 }
1371 if (retpktcnt != NULL) {
1372 *retpktcnt = pktcnt;
1373 }
1374 if (retbytecnt != NULL) {
1375 *retbytecnt = bytecnt;
1376 }
1377 }
1378
1379 void
1380 fq_if_teardown_ifclassq(struct ifclassq *ifq)
1381 {
1382 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1383
1384 IFCQ_LOCK_ASSERT_HELD(ifq);
1385 VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL);
1386
1387 fq_if_destroy(fqs);
1388 ifq->ifcq_disc = NULL;
1389 ifclassq_detach(ifq);
1390 }
1391
1392 static void
1393 fq_export_flowstats(fq_if_t *fqs, fq_t *fq,
1394 struct fq_codel_flowstats *flowstat)
1395 {
1396 bzero(flowstat, sizeof(*flowstat));
1397 flowstat->fqst_min_qdelay = (uint32_t)fq->fq_min_qdelay;
1398 flowstat->fqst_bytes = fq->fq_bytes;
1399 flowstat->fqst_flowhash = fq->fq_flowhash;
1400 if (fq->fq_flags & FQF_NEW_FLOW) {
1401 flowstat->fqst_flags |= FQ_FLOWSTATS_NEW_FLOW;
1402 }
1403 if (fq->fq_flags & FQF_OLD_FLOW) {
1404 flowstat->fqst_flags |= FQ_FLOWSTATS_OLD_FLOW;
1405 }
1406 if (fq->fq_flags & FQF_DELAY_HIGH) {
1407 flowstat->fqst_flags |= FQ_FLOWSTATS_DELAY_HIGH;
1408 }
1409 if (fq->fq_flags & FQF_FLOWCTL_ON) {
1410 flowstat->fqst_flags |= FQ_FLOWSTATS_FLOWCTL_ON;
1411 }
1412 if (fqs->fqs_large_flow == fq) {
1413 flowstat->fqst_flags |= FQ_FLOWSTATS_LARGE_FLOW;
1414 }
1415 }
1416
1417 int
1418 fq_if_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t qid,
1419 struct if_ifclassq_stats *ifqs)
1420 {
1421 struct fq_codel_classstats *fcls;
1422 fq_if_classq_t *fq_cl;
1423 fq_if_t *fqs;
1424 fq_t *fq = NULL;
1425 u_int32_t i, flowstat_cnt;
1426
1427 if (qid >= FQ_IF_MAX_CLASSES) {
1428 return EINVAL;
1429 }
1430
1431 fqs = (fq_if_t *)ifq->ifcq_disc;
1432 fcls = &ifqs->ifqs_fq_codel_stats;
1433
1434 fq_cl = &fqs->fqs_classq[qid];
1435
1436 fcls->fcls_pri = fq_cl->fcl_pri;
1437 fcls->fcls_service_class = fq_cl->fcl_service_class;
1438 fcls->fcls_quantum = fq_cl->fcl_quantum;
1439 fcls->fcls_drr_max = fq_cl->fcl_drr_max;
1440 fcls->fcls_budget = fq_cl->fcl_budget;
1441 fcls->fcls_target_qdelay = fqs->fqs_target_qdelay;
1442 fcls->fcls_update_interval = fqs->fqs_update_interval;
1443 fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control;
1444 fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback;
1445 fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall;
1446 fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow;
1447 fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early;
1448 fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure;
1449 fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt;
1450 fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt;
1451 fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt;
1452 fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt;
1453 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
1454 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
1455 fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue;
1456 fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes;
1457 fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt;
1458 fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on;
1459 fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off;
1460 fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops;
1461 fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts;
1462 fcls->fcls_pkts_compressible = fq_cl->fcl_stat.fcl_pkts_compressible;
1463 fcls->fcls_pkts_compressed = fq_cl->fcl_stat.fcl_pkts_compressed;
1464
1465 /* Gather per flow stats */
1466 flowstat_cnt = min((fcls->fcls_newflows_cnt +
1467 fcls->fcls_oldflows_cnt), FQ_IF_MAX_FLOWSTATS);
1468 i = 0;
1469 STAILQ_FOREACH(fq, &fq_cl->fcl_new_flows, fq_actlink) {
1470 if (i >= fcls->fcls_newflows_cnt || i >= flowstat_cnt) {
1471 break;
1472 }
1473
1474 /* leave space for a few old flows */
1475 if ((flowstat_cnt - i) < fcls->fcls_oldflows_cnt &&
1476 i >= (FQ_IF_MAX_FLOWSTATS >> 1)) {
1477 break;
1478 }
1479 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
1480 i++;
1481 }
1482 STAILQ_FOREACH(fq, &fq_cl->fcl_old_flows, fq_actlink) {
1483 if (i >= flowstat_cnt) {
1484 break;
1485 }
1486 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
1487 i++;
1488 }
1489 VERIFY(i <= flowstat_cnt);
1490 fcls->fcls_flowstats_cnt = i;
1491 return 0;
1492 }