2 * Copyright (c) 2016-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <kern/zalloc.h>
32 #include <net/if_var.h>
34 #include <net/classq/classq.h>
35 #include <net/classq/classq_fq_codel.h>
36 #include <net/pktsched/pktsched_fq_codel.h>
38 static size_t fq_if_size
;
39 static struct zone
*fq_if_zone
;
41 static fq_if_t
*fq_if_alloc(struct ifnet
*, classq_pkt_type_t
);
42 static void fq_if_destroy(fq_if_t
*fqs
);
43 static void fq_if_classq_init(fq_if_t
*fqs
, u_int32_t priority
,
44 u_int32_t quantum
, u_int32_t drr_max
, u_int32_t svc_class
);
45 static int fq_if_enqueue_classq(struct ifclassq
*ifq
, void *p
,
46 classq_pkt_type_t ptype
, boolean_t
*pdrop
);
47 static void *fq_if_dequeue_classq(struct ifclassq
*, classq_pkt_type_t
*);
48 static int fq_if_dequeue_classq_multi(struct ifclassq
*, u_int32_t
,
49 u_int32_t
, void **, void **, u_int32_t
*, u_int32_t
*, classq_pkt_type_t
*);
50 static void *fq_if_dequeue_sc_classq(struct ifclassq
*, mbuf_svc_class_t
,
52 static int fq_if_dequeue_sc_classq_multi(struct ifclassq
*,
53 mbuf_svc_class_t
, u_int32_t
, u_int32_t
, void **,
54 void **, u_int32_t
*, u_int32_t
*, classq_pkt_type_t
*);
55 static void fq_if_dequeue(fq_if_t
*, fq_if_classq_t
*, u_int32_t
,
56 u_int32_t
, void **, void **, u_int32_t
*, u_int32_t
*,
57 boolean_t drvmgmt
, classq_pkt_type_t
*);
58 static int fq_if_request_classq(struct ifclassq
*ifq
, cqrq_t op
, void *arg
);
59 void fq_if_stat_sc(fq_if_t
*fqs
, cqrq_stat_sc_t
*stat
);
60 static void fq_if_purge(fq_if_t
*);
61 static void fq_if_purge_classq(fq_if_t
*, fq_if_classq_t
*);
62 static void fq_if_purge_flow(fq_if_t
*, fq_t
*, u_int32_t
*, u_int32_t
*);
63 static void fq_if_empty_new_flow(fq_t
*fq
, fq_if_classq_t
*fq_cl
,
65 static void fq_if_empty_old_flow(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
,
66 fq_t
*fq
, bool remove_hash
);
68 #define FQ_IF_ZONE_MAX 32 /* Maximum elements in zone */
69 #define FQ_IF_ZONE_NAME "pktsched_fq_if" /* zone for fq_if class */
71 #define FQ_IF_FLOW_HASH_ID(_flowid_) \
72 (((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
74 #define FQ_IF_CLASSQ_IDLE(_fcl_) \
75 (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
76 STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
78 typedef void (* fq_if_append_pkt_t
)(void *, void *);
79 typedef boolean_t (* fq_getq_flow_t
)(fq_if_t
*, fq_if_classq_t
*, fq_t
*,
80 u_int32_t
, u_int32_t
, void **, void **, u_int32_t
*, u_int32_t
*,
81 boolean_t
*, u_int32_t
);
84 fq_if_append_mbuf(void *pkt
, void *next_pkt
)
86 ((mbuf_t
)pkt
)->m_nextpkt
= (mbuf_t
)next_pkt
;
92 fq_getq_flow_mbuf(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
, fq_t
*fq
,
93 u_int32_t byte_limit
, u_int32_t pkt_limit
, void **top
, void **last
,
94 u_int32_t
*byte_cnt
, u_int32_t
*pkt_cnt
, boolean_t
*qempty
,
100 boolean_t limit_reached
= FALSE
;
101 struct ifclassq
*ifq
= fqs
->fqs_ifq
;
102 struct ifnet
*ifp
= ifq
->ifcq_ifp
;
104 while (fq
->fq_deficit
> 0 && limit_reached
== FALSE
&&
105 !MBUFQ_EMPTY(&fq
->fq_mbufq
)) {
107 _PKTSCHED_PKT_INIT(&pkt
);
108 m
= fq_getq_flow(fqs
, fq
, &pkt
);
109 ASSERT(pkt
.pktsched_ptype
== QP_MBUF
);
111 plen
= pktsched_get_pkt_len(&pkt
);
112 fq
->fq_deficit
-= plen
;
113 m
->m_pkthdr
.pkt_flags
|= pflags
;
118 ASSERT(*last
!= NULL
);
119 ASSERT((*(struct mbuf
**)last
)->m_nextpkt
== NULL
);
120 (*(struct mbuf
**)last
)->m_nextpkt
= m
;
123 (*(mbuf_t
*)last
)->m_nextpkt
= NULL
;
124 fq_cl
->fcl_stat
.fcl_dequeue
++;
125 fq_cl
->fcl_stat
.fcl_dequeue_bytes
+= plen
;
129 ifclassq_set_packet_metadata(ifq
, ifp
, m
, QP_MBUF
);
131 /* Check if the limit is reached */
132 if (*pkt_cnt
>= pkt_limit
|| *byte_cnt
>= byte_limit
)
133 limit_reached
= TRUE
;
136 *qempty
= MBUFQ_EMPTY(&fq
->fq_mbufq
);
137 return (limit_reached
);
141 fq_codel_scheduler_init(void)
143 /* Initialize the zone for flow queue structures */
146 fq_if_size
= sizeof (fq_if_t
);
147 fq_if_zone
= zinit(fq_if_size
, (FQ_IF_ZONE_MAX
* fq_if_size
), 0,
149 if (fq_if_zone
== NULL
) {
150 panic("%s: failed allocating from %s", __func__
,
153 zone_change(fq_if_zone
, Z_EXPAND
, TRUE
);
154 zone_change(fq_if_zone
, Z_CALLERACCT
, TRUE
);
159 fq_if_alloc(struct ifnet
*ifp
, classq_pkt_type_t ptype
)
162 fqs
= zalloc(fq_if_zone
);
166 bzero(fqs
, fq_if_size
);
167 fqs
->fqs_ifq
= &ifp
->if_snd
;
168 fqs
->fqs_ptype
= ptype
;
170 /* Calculate target queue delay */
171 ifclassq_calc_target_qdelay(ifp
, &fqs
->fqs_target_qdelay
);
173 /* Calculate update interval */
174 ifclassq_calc_update_interval(&fqs
->fqs_update_interval
);
176 /* Configure packet drop limit across all queues */
177 fqs
->fqs_pkt_droplimit
= IFCQ_PKT_DROP_LIMIT(&ifp
->if_snd
);
178 STAILQ_INIT(&fqs
->fqs_fclist
);
183 fq_if_destroy(fq_if_t
*fqs
)
187 zfree(fq_if_zone
, fqs
);
190 static inline u_int32_t
191 fq_if_service_to_priority(fq_if_t
*fqs
, mbuf_svc_class_t svc
)
195 if (fqs
->fqs_flags
& FQS_DRIVER_MANAGED
) {
199 pri
= FQ_IF_BK_INDEX
;
204 pri
= FQ_IF_BE_INDEX
;
210 pri
= FQ_IF_VI_INDEX
;
214 pri
= FQ_IF_VO_INDEX
;
217 pri
= FQ_IF_BE_INDEX
; /* Use best effort by default */
223 /* scheduler is not managed by the driver */
226 pri
= FQ_IF_BK_SYS_INDEX
;
229 pri
= FQ_IF_BK_INDEX
;
232 pri
= FQ_IF_BE_INDEX
;
235 pri
= FQ_IF_RD_INDEX
;
238 pri
= FQ_IF_OAM_INDEX
;
241 pri
= FQ_IF_AV_INDEX
;
244 pri
= FQ_IF_RV_INDEX
;
247 pri
= FQ_IF_VI_INDEX
;
250 pri
= FQ_IF_SIG_INDEX
;
253 pri
= FQ_IF_VO_INDEX
;
256 pri
= FQ_IF_CTL_INDEX
;
259 pri
= FQ_IF_BE_INDEX
; /* Use best effort by default */
266 fq_if_classq_init(fq_if_t
*fqs
, u_int32_t pri
, u_int32_t quantum
,
267 u_int32_t drr_max
, u_int32_t svc_class
)
269 fq_if_classq_t
*fq_cl
;
271 fq_cl
= &fqs
->fqs_classq
[pri
];
273 VERIFY(pri
>= 0 && pri
< FQ_IF_MAX_CLASSES
&&
274 fq_cl
->fcl_quantum
== 0);
275 fq_cl
->fcl_quantum
= quantum
;
276 fq_cl
->fcl_pri
= pri
;
277 fq_cl
->fcl_drr_max
= drr_max
;
278 fq_cl
->fcl_service_class
= svc_class
;
279 STAILQ_INIT(&fq_cl
->fcl_new_flows
);
280 STAILQ_INIT(&fq_cl
->fcl_old_flows
);
284 fq_if_enqueue_classq(struct ifclassq
*ifq
, void *p
, classq_pkt_type_t ptype
,
289 fq_if_classq_t
*fq_cl
;
291 mbuf_svc_class_t svc
;
294 IFCQ_LOCK_ASSERT_HELD(ifq
);
295 if ((ptype
== QP_MBUF
) && !(((mbuf_t
)p
)->m_flags
& M_PKTHDR
)) {
296 IFCQ_CONVERT_LOCK(ifq
);
301 pktsched_pkt_encap(&pkt
, ptype
, p
);
303 fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
304 svc
= pktsched_get_pkt_svc(&pkt
);
305 pri
= fq_if_service_to_priority(fqs
, svc
);
306 VERIFY(pri
>= 0 && pri
< FQ_IF_MAX_CLASSES
);
307 fq_cl
= &fqs
->fqs_classq
[pri
];
309 if (svc
== MBUF_SC_BK_SYS
&& fqs
->fqs_throttle
== 1) {
310 /* BK_SYS is currently throttled */
311 fq_cl
->fcl_stat
.fcl_throttle_drops
++;
312 IFCQ_CONVERT_LOCK(ifq
);
313 pktsched_free_pkt(&pkt
);
315 return (EQSUSPENDED
);
318 len
= pktsched_get_pkt_len(&pkt
);
319 ret
= fq_addq(fqs
, &pkt
, fq_cl
);
320 if (!(fqs
->fqs_flags
& FQS_DRIVER_MANAGED
) &&
321 !FQ_IF_CLASSQ_IDLE(fq_cl
)) {
322 if (((fqs
->fqs_bitmaps
[FQ_IF_ER
] | fqs
->fqs_bitmaps
[FQ_IF_EB
]) &
325 * this group is not in ER or EB groups,
328 pktsched_bit_set(pri
, &fqs
->fqs_bitmaps
[FQ_IF_IB
]);
333 if (ret
== CLASSQEQ_SUCCESS_FC
) {
334 /* packet enqueued, return advisory feedback */
339 VERIFY(ret
== CLASSQEQ_DROP
||
340 ret
== CLASSQEQ_DROP_FC
||
341 ret
== CLASSQEQ_DROP_SP
);
342 pktsched_free_pkt(&pkt
);
346 case CLASSQEQ_DROP_FC
:
348 case CLASSQEQ_DROP_SP
:
349 return (EQSUSPENDED
);
356 IFCQ_INC_BYTES(ifq
, len
);
361 fq_if_dequeue_classq(struct ifclassq
*ifq
, classq_pkt_type_t
*ptype
)
365 (void) fq_if_dequeue_classq_multi(ifq
, 1,
366 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &top
, NULL
, NULL
, NULL
, ptype
);
371 fq_if_dequeue_sc_classq(struct ifclassq
*ifq
, mbuf_svc_class_t svc
,
372 classq_pkt_type_t
*ptype
)
375 fq_if_t
*fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
376 fq_if_classq_t
*fq_cl
;
379 pri
= fq_if_service_to_priority(fqs
, svc
);
380 fq_cl
= &fqs
->fqs_classq
[pri
];
382 fq_if_dequeue(fqs
, fq_cl
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
383 &top
, NULL
, NULL
, NULL
, TRUE
, ptype
);
388 fq_if_dequeue_classq_multi(struct ifclassq
*ifq
, u_int32_t maxpktcnt
,
389 u_int32_t maxbytecnt
, void **first_packet
,
390 void **last_packet
, u_int32_t
*retpktcnt
, u_int32_t
*retbytecnt
,
391 classq_pkt_type_t
*ptype
)
393 void *top
= NULL
, *tail
= NULL
, *first
, *last
;
394 u_int32_t pktcnt
= 0, bytecnt
= 0, total_pktcnt
, total_bytecnt
;
396 fq_if_classq_t
*fq_cl
;
398 fq_if_append_pkt_t append_pkt
;
400 IFCQ_LOCK_ASSERT_HELD(ifq
);
402 fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
404 switch (fqs
->fqs_ptype
) {
406 append_pkt
= fq_if_append_mbuf
;
416 total_pktcnt
= total_bytecnt
= 0;
417 *ptype
= fqs
->fqs_ptype
;
420 classq_pkt_type_t tmp_ptype
;
421 if (fqs
->fqs_bitmaps
[FQ_IF_ER
] == 0 &&
422 fqs
->fqs_bitmaps
[FQ_IF_EB
] == 0) {
423 fqs
->fqs_bitmaps
[FQ_IF_EB
] = fqs
->fqs_bitmaps
[FQ_IF_IB
];
424 fqs
->fqs_bitmaps
[FQ_IF_IB
] = 0;
425 if (fqs
->fqs_bitmaps
[FQ_IF_EB
] == 0)
428 pri
= pktsched_ffs(fqs
->fqs_bitmaps
[FQ_IF_ER
]);
431 * There are no ER flows, move the highest
432 * priority one from EB if there are any in that
435 pri
= pktsched_ffs(fqs
->fqs_bitmaps
[FQ_IF_EB
]);
437 pktsched_bit_clr((pri
- 1),
438 &fqs
->fqs_bitmaps
[FQ_IF_EB
]);
439 pktsched_bit_set((pri
- 1),
440 &fqs
->fqs_bitmaps
[FQ_IF_ER
]);
442 pri
--; /* index starts at 0 */
443 fq_cl
= &fqs
->fqs_classq
[pri
];
445 if (fq_cl
->fcl_budget
<= 0) {
446 /* Update the budget */
447 fq_cl
->fcl_budget
+= (min(fq_cl
->fcl_drr_max
,
448 fq_cl
->fcl_stat
.fcl_flows_cnt
) *
450 if (fq_cl
->fcl_budget
<= 0)
453 fq_if_dequeue(fqs
, fq_cl
, (maxpktcnt
- total_pktcnt
),
454 (maxbytecnt
- total_bytecnt
), &top
, &tail
, &pktcnt
,
455 &bytecnt
, FALSE
, &tmp_ptype
);
457 ASSERT(tmp_ptype
== *ptype
);
458 ASSERT(pktcnt
> 0 && bytecnt
> 0);
462 total_pktcnt
= pktcnt
;
463 total_bytecnt
= bytecnt
;
465 append_pkt(last
, top
);
467 total_pktcnt
+= pktcnt
;
468 total_bytecnt
+= bytecnt
;
470 append_pkt(last
, NULL
);
471 fq_cl
->fcl_budget
-= bytecnt
;
477 * If the class has exceeded the budget but still has data
478 * to send, move it to IB
481 if (!FQ_IF_CLASSQ_IDLE(fq_cl
)) {
482 if (fq_cl
->fcl_budget
<= 0) {
483 pktsched_bit_set(pri
,
484 &fqs
->fqs_bitmaps
[FQ_IF_IB
]);
485 pktsched_bit_clr(pri
,
486 &fqs
->fqs_bitmaps
[FQ_IF_ER
]);
489 pktsched_bit_clr(pri
, &fqs
->fqs_bitmaps
[FQ_IF_ER
]);
490 VERIFY(((fqs
->fqs_bitmaps
[FQ_IF_ER
] |
491 fqs
->fqs_bitmaps
[FQ_IF_EB
] |
492 fqs
->fqs_bitmaps
[FQ_IF_IB
])&(1 << pri
)) == 0);
493 fq_cl
->fcl_budget
= 0;
495 if (total_pktcnt
>= maxpktcnt
|| total_bytecnt
>= maxbytecnt
)
499 if (first_packet
!= NULL
)
500 *first_packet
= first
;
501 if (last_packet
!= NULL
)
503 if (retpktcnt
!= NULL
)
504 *retpktcnt
= total_pktcnt
;
505 if (retbytecnt
!= NULL
)
506 *retbytecnt
= total_bytecnt
;
507 IFCQ_XMIT_ADD(ifq
, total_pktcnt
, total_bytecnt
);
509 if (first_packet
!= NULL
)
510 *first_packet
= NULL
;
511 if (last_packet
!= NULL
)
513 if (retpktcnt
!= NULL
)
515 if (retbytecnt
!= NULL
)
522 fq_if_dequeue_sc_classq_multi(struct ifclassq
*ifq
, mbuf_svc_class_t svc
,
523 u_int32_t maxpktcnt
, u_int32_t maxbytecnt
, void **first_packet
,
524 void **last_packet
, u_int32_t
*retpktcnt
, u_int32_t
*retbytecnt
,
525 classq_pkt_type_t
*ptype
)
527 #pragma unused(maxpktcnt, maxbytecnt, first_packet, last_packet, retpktcnt, retbytecnt)
528 fq_if_t
*fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
530 u_int32_t total_pktcnt
= 0, total_bytecnt
= 0;
531 fq_if_classq_t
*fq_cl
;
532 void *first
= NULL
, *last
= NULL
;
533 fq_if_append_pkt_t append_pkt
;
535 switch (fqs
->fqs_ptype
) {
537 append_pkt
= fq_if_append_mbuf
;
546 pri
= fq_if_service_to_priority(fqs
, svc
);
547 fq_cl
= &fqs
->fqs_classq
[pri
];
550 * Now we have the queue for a particular service class. We need
551 * to dequeue as many packets as needed, first from the new flows
552 * and then from the old flows.
554 while (total_pktcnt
< maxpktcnt
&& total_bytecnt
< maxbytecnt
&&
555 fq_cl
->fcl_stat
.fcl_pkt_cnt
> 0) {
557 u_int32_t pktcnt
= 0, bytecnt
= 0;
558 fq_if_dequeue(fqs
, fq_cl
, (maxpktcnt
- total_pktcnt
),
559 (maxbytecnt
- total_bytecnt
), &top
, &tail
, &pktcnt
,
560 &bytecnt
, TRUE
, ptype
);
563 total_pktcnt
= pktcnt
;
564 total_bytecnt
= bytecnt
;
566 append_pkt(last
, top
);
567 total_pktcnt
+= pktcnt
;
568 total_bytecnt
+= bytecnt
;
573 if (first_packet
!= NULL
)
574 *first_packet
= first
;
575 if (last_packet
!= NULL
)
577 if (retpktcnt
!= NULL
)
578 *retpktcnt
= total_pktcnt
;
579 if (retbytecnt
!= NULL
)
580 *retbytecnt
= total_bytecnt
;
582 if (first_packet
!= NULL
)
583 *first_packet
= NULL
;
584 if (last_packet
!= NULL
)
586 if (retpktcnt
!= NULL
)
588 if (retbytecnt
!= NULL
)
595 fq_if_purge_flow(fq_if_t
*fqs
, fq_t
*fq
, u_int32_t
*pktsp
,
598 fq_if_classq_t
*fq_cl
;
599 u_int32_t pkts
, bytes
;
602 fq_cl
= &fqs
->fqs_classq
[fq
->fq_sc_index
];
604 _PKTSCHED_PKT_INIT(&pkt
);
605 while (fq_getq_flow(fqs
, fq
, &pkt
) != NULL
) {
607 bytes
+= pktsched_get_pkt_len(&pkt
);
608 pktsched_free_pkt(&pkt
);
610 IFCQ_DROP_ADD(fqs
->fqs_ifq
, pkts
, bytes
);
612 if (fq
->fq_flags
& FQF_NEW_FLOW
) {
613 fq_if_empty_new_flow(fq
, fq_cl
, false);
614 } else if (fq
->fq_flags
& FQF_OLD_FLOW
) {
615 fq_if_empty_old_flow(fqs
, fq_cl
, fq
, false);
618 fq_if_destroy_flow(fqs
, fq_cl
, fq
);
620 if (FQ_IF_CLASSQ_IDLE(fq_cl
)) {
622 for (i
= FQ_IF_ER
; i
< FQ_IF_MAX_STATE
; i
++) {
623 pktsched_bit_clr(fq_cl
->fcl_pri
,
624 &fqs
->fqs_bitmaps
[i
]);
634 fq_if_purge_classq(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
)
638 * Take each flow from new/old flow list and flush mbufs
641 STAILQ_FOREACH_SAFE(fq
, &fq_cl
->fcl_new_flows
, fq_actlink
, tfq
) {
642 fq_if_purge_flow(fqs
, fq
, NULL
, NULL
);
644 STAILQ_FOREACH_SAFE(fq
, &fq_cl
->fcl_old_flows
, fq_actlink
, tfq
) {
645 fq_if_purge_flow(fqs
, fq
, NULL
, NULL
);
647 VERIFY(STAILQ_EMPTY(&fq_cl
->fcl_new_flows
));
648 VERIFY(STAILQ_EMPTY(&fq_cl
->fcl_old_flows
));
650 STAILQ_INIT(&fq_cl
->fcl_new_flows
);
651 STAILQ_INIT(&fq_cl
->fcl_old_flows
);
652 fq_cl
->fcl_budget
= 0;
656 fq_if_purge(fq_if_t
*fqs
)
660 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
661 for (i
= 0; i
< FQ_IF_MAX_CLASSES
; i
++) {
662 fq_if_purge_classq(fqs
, &fqs
->fqs_classq
[i
]);
665 VERIFY(STAILQ_EMPTY(&fqs
->fqs_fclist
));
667 fqs
->fqs_large_flow
= NULL
;
668 for (i
= 0; i
< FQ_IF_HASH_TABLE_SIZE
; i
++) {
669 VERIFY(SLIST_EMPTY(&fqs
->fqs_flows
[i
]));
672 bzero(&fqs
->fqs_bitmaps
, sizeof (fqs
->fqs_bitmaps
));
674 IFCQ_LEN(fqs
->fqs_ifq
) = 0;
675 IFCQ_BYTES(fqs
->fqs_ifq
) = 0;
679 fq_if_purge_sc(fq_if_t
*fqs
, cqrq_purge_sc_t
*req
)
683 IFCQ_LOCK_ASSERT_HELD(fqs
->fqs_ifq
);
684 req
->packets
= req
->bytes
= 0;
685 VERIFY(req
->flow
!= 0);
687 /* packet type is needed only if we want to create a flow queue */
688 fq
= fq_if_hash_pkt(fqs
, req
->flow
, req
->sc
, 0, FALSE
, QP_INVALID
);
691 fq_if_purge_flow(fqs
, fq
, &req
->packets
, &req
->bytes
);
695 fq_if_event(fq_if_t
*fqs
, cqev_t ev
)
697 IFCQ_LOCK_ASSERT_HELD(fqs
->fqs_ifq
);
700 case CLASSQ_EV_LINK_UP
:
701 case CLASSQ_EV_LINK_DOWN
:
710 fq_if_classq_suspend(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
)
712 fq_if_purge_classq(fqs
, fq_cl
);
713 fqs
->fqs_throttle
= 1;
714 fq_cl
->fcl_stat
.fcl_throttle_on
++;
718 fq_if_classq_resume(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
)
720 VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl
));
721 fqs
->fqs_throttle
= 0;
722 fq_cl
->fcl_stat
.fcl_throttle_off
++;
727 fq_if_throttle(fq_if_t
*fqs
, cqrq_throttle_t
*tr
)
729 struct ifclassq
*ifq
= fqs
->fqs_ifq
;
734 IFCQ_LOCK_ASSERT_HELD(ifq
);
737 tr
->level
= fqs
->fqs_throttle
;
741 if (tr
->level
== fqs
->fqs_throttle
)
744 /* Throttling is allowed on BK_SYS class only */
745 index
= fq_if_service_to_priority(fqs
, MBUF_SC_BK_SYS
);
747 case IFNET_THROTTLE_OFF
:
748 fq_if_classq_resume(fqs
, &fqs
->fqs_classq
[index
]);
750 case IFNET_THROTTLE_OPPORTUNISTIC
:
751 fq_if_classq_suspend(fqs
, &fqs
->fqs_classq
[index
]);
760 fq_if_stat_sc(fq_if_t
*fqs
, cqrq_stat_sc_t
*stat
)
763 fq_if_classq_t
*fq_cl
;
768 pri
= fq_if_service_to_priority(fqs
, stat
->sc
);
769 fq_cl
= &fqs
->fqs_classq
[pri
];
770 stat
->packets
= fq_cl
->fcl_stat
.fcl_pkt_cnt
;
771 stat
->bytes
= fq_cl
->fcl_stat
.fcl_byte_cnt
;
775 fq_if_request_classq(struct ifclassq
*ifq
, cqrq_t rq
, void *arg
)
778 fq_if_t
*fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
780 IFCQ_LOCK_ASSERT_HELD(ifq
);
783 * These are usually slow operations, convert the lock ahead of time
785 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
790 case CLASSQRQ_PURGE_SC
:
791 fq_if_purge_sc(fqs
, (cqrq_purge_sc_t
*)arg
);
794 fq_if_event(fqs
, (cqev_t
)arg
);
796 case CLASSQRQ_THROTTLE
:
797 fq_if_throttle(fqs
, (cqrq_throttle_t
*)arg
);
799 case CLASSQRQ_STAT_SC
:
800 fq_if_stat_sc(fqs
, (cqrq_stat_sc_t
*)arg
);
807 fq_if_setup_ifclassq(struct ifclassq
*ifq
, u_int32_t flags
,
808 classq_pkt_type_t ptype
)
810 #pragma unused(flags)
811 struct ifnet
*ifp
= ifq
->ifcq_ifp
;
815 IFCQ_LOCK_ASSERT_HELD(ifq
);
816 VERIFY(ifq
->ifcq_disc
== NULL
);
817 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_NONE
);
819 fqs
= fq_if_alloc(ifp
, ptype
);
823 if (flags
& PKTSCHEDF_QALG_DRIVER_MANAGED
) {
824 fqs
->fqs_flags
|= FQS_DRIVER_MANAGED
;
825 fq_if_classq_init(fqs
, FQ_IF_BK_INDEX
, 1500,
827 fq_if_classq_init(fqs
, FQ_IF_BE_INDEX
, 1500,
829 fq_if_classq_init(fqs
, FQ_IF_VI_INDEX
, 3000,
831 fq_if_classq_init(fqs
, FQ_IF_VO_INDEX
, 600,
834 /* SIG shares same INDEX with VI */
835 _CASSERT(SCIDX_SIG
== SCIDX_VI
);
836 _CASSERT(FQ_IF_SIG_INDEX
== FQ_IF_VI_INDEX
);
838 fq_if_classq_init(fqs
, FQ_IF_BK_SYS_INDEX
, 1500,
840 fq_if_classq_init(fqs
, FQ_IF_BK_INDEX
, 1500,
842 fq_if_classq_init(fqs
, FQ_IF_BE_INDEX
, 1500,
844 fq_if_classq_init(fqs
, FQ_IF_RD_INDEX
, 1500,
846 fq_if_classq_init(fqs
, FQ_IF_OAM_INDEX
, 1500,
848 fq_if_classq_init(fqs
, FQ_IF_AV_INDEX
, 3000,
850 fq_if_classq_init(fqs
, FQ_IF_RV_INDEX
, 3000,
852 fq_if_classq_init(fqs
, FQ_IF_VI_INDEX
, 3000,
854 fq_if_classq_init(fqs
, FQ_IF_VO_INDEX
, 600,
856 fq_if_classq_init(fqs
, FQ_IF_CTL_INDEX
, 600,
860 err
= ifclassq_attach(ifq
, PKTSCHEDT_FQ_CODEL
, fqs
,
861 fq_if_enqueue_classq
, fq_if_dequeue_classq
,
862 fq_if_dequeue_sc_classq
, fq_if_dequeue_classq_multi
,
863 fq_if_dequeue_sc_classq_multi
, fq_if_request_classq
);
866 printf("%s: error from ifclassq_attach, "
867 "failed to attach fq_if: %d\n", __func__
, err
);
874 fq_if_hash_pkt(fq_if_t
*fqs
, u_int32_t flowid
, mbuf_svc_class_t svc_class
,
875 u_int64_t now
, boolean_t create
, classq_pkt_type_t ptype
)
878 flowq_list_t
*fq_list
;
879 fq_if_classq_t
*fq_cl
;
880 u_int8_t fqs_hash_id
;
883 scidx
= fq_if_service_to_priority(fqs
, svc_class
);
885 fqs_hash_id
= FQ_IF_FLOW_HASH_ID(flowid
);
887 fq_list
= &fqs
->fqs_flows
[fqs_hash_id
];
889 SLIST_FOREACH(fq
, fq_list
, fq_hashlink
) {
890 if (fq
->fq_flowhash
== flowid
&&
891 fq
->fq_sc_index
== scidx
)
894 if (fq
== NULL
&& create
== TRUE
) {
895 ASSERT(ptype
== QP_MBUF
);
897 /* If the flow is not already on the list, allocate it */
898 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
899 fq
= fq_alloc(ptype
);
901 fq
->fq_flowhash
= flowid
;
902 fq
->fq_sc_index
= scidx
;
903 fq
->fq_updatetime
= now
+ fqs
->fqs_update_interval
;
904 fq_cl
= &fqs
->fqs_classq
[scidx
];
905 fq
->fq_flags
= FQF_FLOWCTL_CAPABLE
;
906 SLIST_INSERT_HEAD(fq_list
, fq
, fq_hashlink
);
907 fq_cl
->fcl_stat
.fcl_flows_cnt
++;
912 * If getq time is not set because this is the first packet or after
913 * idle time, set it now so that we can detect a stall.
915 if (fq
!= NULL
&& fq
->fq_getqtime
== 0)
916 fq
->fq_getqtime
= now
;
922 fq_if_destroy_flow(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
, fq_t
*fq
)
925 hash_id
= FQ_IF_FLOW_HASH_ID(fq
->fq_flowhash
);
926 SLIST_REMOVE(&fqs
->fqs_flows
[hash_id
], fq
, flowq
,
928 fq_cl
->fcl_stat
.fcl_flows_cnt
--;
929 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
935 fq_if_at_drop_limit(fq_if_t
*fqs
)
937 return (((IFCQ_LEN(fqs
->fqs_ifq
) >= fqs
->fqs_pkt_droplimit
) ?
942 fq_if_empty_old_flow(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
, fq_t
*fq
,
946 * Remove the flow queue if it is empty
949 STAILQ_REMOVE(&fq_cl
->fcl_old_flows
, fq
, flowq
,
951 fq
->fq_flags
&= ~FQF_OLD_FLOW
;
952 fq_cl
->fcl_stat
.fcl_oldflows_cnt
--;
953 VERIFY(fq
->fq_bytes
== 0);
956 /* Remove from the hash list */
957 fq_if_destroy_flow(fqs
, fq_cl
, fq
);
962 fq_if_empty_new_flow(fq_t
*fq
, fq_if_classq_t
*fq_cl
, bool add_to_old
)
964 /* Move to the end of old queue list */
965 STAILQ_REMOVE(&fq_cl
->fcl_new_flows
, fq
,
967 fq
->fq_flags
&= ~FQF_NEW_FLOW
;
968 fq_cl
->fcl_stat
.fcl_newflows_cnt
--;
971 STAILQ_INSERT_TAIL(&fq_cl
->fcl_old_flows
, fq
,
973 fq
->fq_flags
|= FQF_OLD_FLOW
;
974 fq_cl
->fcl_stat
.fcl_oldflows_cnt
++;
979 fq_if_drop_packet(fq_if_t
*fqs
)
981 fq_t
*fq
= fqs
->fqs_large_flow
;
982 fq_if_classq_t
*fq_cl
;
985 uint64_t *pkt_timestamp
;
989 /* queue can not be empty on the largest flow */
990 VERIFY(!fq_empty(fq
));
992 fq_cl
= &fqs
->fqs_classq
[fq
->fq_sc_index
];
993 _PKTSCHED_PKT_INIT(&pkt
);
994 (void)fq_getq_flow_internal(fqs
, fq
, &pkt
);
996 pktsched_get_pkt_vars(&pkt
, &pkt_flags
, &pkt_timestamp
, NULL
, NULL
,
999 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
1001 if (pkt
.pktsched_ptype
== QP_MBUF
)
1002 *pkt_flags
&= ~PKTF_PRIV_GUARDED
;
1005 fqs
->fqs_large_flow
= NULL
;
1006 if (fq
->fq_flags
& FQF_OLD_FLOW
) {
1007 fq_if_empty_old_flow(fqs
, fq_cl
, fq
, true);
1009 VERIFY(fq
->fq_flags
& FQF_NEW_FLOW
);
1010 fq_if_empty_new_flow(fq
, fq_cl
, true);
1013 IFCQ_DROP_ADD(fqs
->fqs_ifq
, 1, pktsched_get_pkt_len(&pkt
));
1015 pktsched_free_pkt(&pkt
);
1016 fq_cl
->fcl_stat
.fcl_drop_overflow
++;
1020 fq_if_is_flow_heavy(fq_if_t
*fqs
, fq_t
*fq
)
1024 if (fqs
->fqs_large_flow
!= NULL
&&
1025 fqs
->fqs_large_flow
->fq_bytes
< FQ_IF_LARGE_FLOW_BYTE_LIMIT
)
1026 fqs
->fqs_large_flow
= NULL
;
1028 if (fq
== NULL
|| fq
->fq_bytes
< FQ_IF_LARGE_FLOW_BYTE_LIMIT
)
1031 prev_fq
= fqs
->fqs_large_flow
;
1032 if (prev_fq
== NULL
) {
1034 fqs
->fqs_large_flow
= fq
;
1036 } else if (fq
->fq_bytes
> prev_fq
->fq_bytes
) {
1037 fqs
->fqs_large_flow
= fq
;
1042 fq_if_add_fcentry(fq_if_t
*fqs
, pktsched_pkt_t
*pkt
, uint32_t flowid
,
1043 uint8_t flowsrc
, fq_if_classq_t
*fq_cl
)
1045 struct flowadv_fcentry
*fce
;
1047 STAILQ_FOREACH(fce
, &fqs
->fqs_fclist
, fce_link
) {
1048 if ((uint8_t)fce
->fce_flowsrc_type
== flowsrc
&&
1049 fce
->fce_flowid
== flowid
) {
1050 /* Already on flowcontrol list */
1054 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
1055 fce
= pktsched_alloc_fcentry(pkt
, fqs
->fqs_ifq
->ifcq_ifp
, M_WAITOK
);
1057 /* XXX Add number of bytes in the queue */
1058 STAILQ_INSERT_TAIL(&fqs
->fqs_fclist
, fce
, fce_link
);
1059 fq_cl
->fcl_stat
.fcl_flow_control
++;
1061 return ((fce
!= NULL
) ? TRUE
: FALSE
);
1065 fq_if_flow_feedback(fq_if_t
*fqs
, fq_t
*fq
, fq_if_classq_t
*fq_cl
)
1067 struct flowadv_fcentry
*fce
= NULL
;
1069 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
1070 STAILQ_FOREACH(fce
, &fqs
->fqs_fclist
, fce_link
) {
1071 if (fce
->fce_flowid
== fq
->fq_flowhash
)
1075 STAILQ_REMOVE(&fqs
->fqs_fclist
, fce
, flowadv_fcentry
,
1077 STAILQ_NEXT(fce
, fce_link
) = NULL
;
1078 flowadv_add_entry(fce
);
1079 fq_cl
->fcl_stat
.fcl_flow_feedback
++;
1081 fq
->fq_flags
&= ~FQF_FLOWCTL_ON
;
1085 fq_if_dequeue(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
, u_int32_t pktlimit
,
1086 u_int32_t bytelimit
, void **top
, void **tail
,
1087 u_int32_t
*retpktcnt
, u_int32_t
*retbytecnt
, boolean_t drvmgmt
,
1088 classq_pkt_type_t
*ptype
)
1090 fq_t
*fq
= NULL
, *tfq
= NULL
;
1091 flowq_stailq_t temp_stailq
;
1092 u_int32_t pktcnt
, bytecnt
;
1093 boolean_t qempty
, limit_reached
= FALSE
;
1095 fq_getq_flow_t fq_getq_flow_fn
;
1097 switch (fqs
->fqs_ptype
) {
1099 fq_getq_flow_fn
= fq_getq_flow_mbuf
;
1109 * maximum byte limit should not be greater than the budget for
1112 if ((int32_t)bytelimit
> fq_cl
->fcl_budget
&& !drvmgmt
)
1113 bytelimit
= fq_cl
->fcl_budget
;
1115 VERIFY(pktlimit
> 0 && bytelimit
> 0 && top
!= NULL
);
1118 *ptype
= fqs
->fqs_ptype
;
1119 pktcnt
= bytecnt
= 0;
1120 STAILQ_INIT(&temp_stailq
);
1122 STAILQ_FOREACH_SAFE(fq
, &fq_cl
->fcl_new_flows
, fq_actlink
, tfq
) {
1123 ASSERT((fq
->fq_flags
& (FQF_NEW_FLOW
|FQF_OLD_FLOW
)) ==
1126 limit_reached
= fq_getq_flow_fn(fqs
, fq_cl
, fq
, bytelimit
,
1127 pktlimit
, top
, &last
, &bytecnt
, &pktcnt
, &qempty
,
1130 if (fq
->fq_deficit
<= 0 || qempty
)
1131 fq_if_empty_new_flow(fq
, fq_cl
, true);
1132 fq
->fq_deficit
+= fq_cl
->fcl_quantum
;
1137 STAILQ_FOREACH_SAFE(fq
, &fq_cl
->fcl_old_flows
, fq_actlink
, tfq
) {
1138 VERIFY((fq
->fq_flags
& (FQF_NEW_FLOW
|FQF_OLD_FLOW
)) ==
1141 limit_reached
= fq_getq_flow_fn(fqs
, fq_cl
, fq
, bytelimit
,
1142 pktlimit
, top
, &last
, &bytecnt
, &pktcnt
, &qempty
, 0);
1145 fq_if_empty_old_flow(fqs
, fq_cl
, fq
, true);
1146 } else if (fq
->fq_deficit
<= 0) {
1147 STAILQ_REMOVE(&fq_cl
->fcl_old_flows
, fq
,
1150 * Move to the end of the old queues list. We do not
1151 * need to update the flow count since this flow
1152 * will be added to the tail again
1154 STAILQ_INSERT_TAIL(&temp_stailq
, fq
, fq_actlink
);
1155 fq
->fq_deficit
+= fq_cl
->fcl_quantum
;
1162 if (!STAILQ_EMPTY(&fq_cl
->fcl_old_flows
)) {
1163 STAILQ_CONCAT(&fq_cl
->fcl_old_flows
, &temp_stailq
);
1164 } else if (!STAILQ_EMPTY(&temp_stailq
)) {
1165 fq_cl
->fcl_old_flows
= temp_stailq
;
1169 VERIFY(*top
!= NULL
);
1172 if (retpktcnt
!= NULL
)
1173 *retpktcnt
= pktcnt
;
1174 if (retbytecnt
!= NULL
)
1175 *retbytecnt
= bytecnt
;
1180 fq_if_teardown_ifclassq(struct ifclassq
*ifq
)
1182 fq_if_t
*fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
1184 IFCQ_LOCK_ASSERT_HELD(ifq
);
1185 VERIFY(fqs
!= NULL
&& ifq
->ifcq_type
== PKTSCHEDT_FQ_CODEL
);
1188 ifq
->ifcq_disc
= NULL
;
1189 return (ifclassq_detach(ifq
));
1193 fq_export_flowstats(fq_if_t
*fqs
, fq_t
*fq
,
1194 struct fq_codel_flowstats
*flowstat
)
1196 bzero(flowstat
, sizeof (*flowstat
));
1197 flowstat
->fqst_min_qdelay
= fq
->fq_min_qdelay
;
1198 flowstat
->fqst_bytes
= fq
->fq_bytes
;
1199 flowstat
->fqst_flowhash
= fq
->fq_flowhash
;
1200 if (fq
->fq_flags
& FQF_NEW_FLOW
)
1201 flowstat
->fqst_flags
|= FQ_FLOWSTATS_NEW_FLOW
;
1202 if (fq
->fq_flags
& FQF_OLD_FLOW
)
1203 flowstat
->fqst_flags
|= FQ_FLOWSTATS_OLD_FLOW
;
1204 if (fq
->fq_flags
& FQF_DELAY_HIGH
)
1205 flowstat
->fqst_flags
|= FQ_FLOWSTATS_DELAY_HIGH
;
1206 if (fq
->fq_flags
& FQF_FLOWCTL_ON
)
1207 flowstat
->fqst_flags
|= FQ_FLOWSTATS_FLOWCTL_ON
;
1208 if (fqs
->fqs_large_flow
== fq
)
1209 flowstat
->fqst_flags
|= FQ_FLOWSTATS_LARGE_FLOW
;
1213 fq_if_getqstats_ifclassq(struct ifclassq
*ifq
, u_int32_t qid
,
1214 struct if_ifclassq_stats
*ifqs
)
1216 struct fq_codel_classstats
*fcls
;
1217 fq_if_classq_t
*fq_cl
;
1220 u_int32_t i
, flowstat_cnt
;
1222 if (qid
>= FQ_IF_MAX_CLASSES
)
1225 fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
1226 fcls
= &ifqs
->ifqs_fq_codel_stats
;
1228 fq_cl
= &fqs
->fqs_classq
[qid
];
1230 fcls
->fcls_pri
= fq_cl
->fcl_pri
;
1231 fcls
->fcls_service_class
= fq_cl
->fcl_service_class
;
1232 fcls
->fcls_quantum
= fq_cl
->fcl_quantum
;
1233 fcls
->fcls_drr_max
= fq_cl
->fcl_drr_max
;
1234 fcls
->fcls_budget
= fq_cl
->fcl_budget
;
1235 fcls
->fcls_target_qdelay
= fqs
->fqs_target_qdelay
;
1236 fcls
->fcls_update_interval
= fqs
->fqs_update_interval
;
1237 fcls
->fcls_flow_control
= fq_cl
->fcl_stat
.fcl_flow_control
;
1238 fcls
->fcls_flow_feedback
= fq_cl
->fcl_stat
.fcl_flow_feedback
;
1239 fcls
->fcls_dequeue_stall
= fq_cl
->fcl_stat
.fcl_dequeue_stall
;
1240 fcls
->fcls_drop_overflow
= fq_cl
->fcl_stat
.fcl_drop_overflow
;
1241 fcls
->fcls_drop_early
= fq_cl
->fcl_stat
.fcl_drop_early
;
1242 fcls
->fcls_drop_memfailure
= fq_cl
->fcl_stat
.fcl_drop_memfailure
;
1243 fcls
->fcls_flows_cnt
= fq_cl
->fcl_stat
.fcl_flows_cnt
;
1244 fcls
->fcls_newflows_cnt
= fq_cl
->fcl_stat
.fcl_newflows_cnt
;
1245 fcls
->fcls_oldflows_cnt
= fq_cl
->fcl_stat
.fcl_oldflows_cnt
;
1246 fcls
->fcls_pkt_cnt
= fq_cl
->fcl_stat
.fcl_pkt_cnt
;
1247 fcls
->fcls_flow_control_fail
= fq_cl
->fcl_stat
.fcl_flow_control_fail
;
1248 fcls
->fcls_flow_control_fail
= fq_cl
->fcl_stat
.fcl_flow_control_fail
;
1249 fcls
->fcls_dequeue
= fq_cl
->fcl_stat
.fcl_dequeue
;
1250 fcls
->fcls_dequeue_bytes
= fq_cl
->fcl_stat
.fcl_dequeue_bytes
;
1251 fcls
->fcls_byte_cnt
= fq_cl
->fcl_stat
.fcl_byte_cnt
;
1252 fcls
->fcls_throttle_on
= fq_cl
->fcl_stat
.fcl_throttle_on
;
1253 fcls
->fcls_throttle_off
= fq_cl
->fcl_stat
.fcl_throttle_off
;
1254 fcls
->fcls_throttle_drops
= fq_cl
->fcl_stat
.fcl_throttle_drops
;
1255 fcls
->fcls_dup_rexmts
= fq_cl
->fcl_stat
.fcl_dup_rexmts
;
1257 /* Gather per flow stats */
1258 flowstat_cnt
= min((fcls
->fcls_newflows_cnt
+
1259 fcls
->fcls_oldflows_cnt
), FQ_IF_MAX_FLOWSTATS
);
1261 STAILQ_FOREACH(fq
, &fq_cl
->fcl_new_flows
, fq_actlink
) {
1262 if (i
>= fcls
->fcls_newflows_cnt
|| i
>= flowstat_cnt
)
1265 /* leave space for a few old flows */
1266 if ((flowstat_cnt
- i
) < fcls
->fcls_oldflows_cnt
&&
1267 i
>= (FQ_IF_MAX_FLOWSTATS
>> 1))
1269 fq_export_flowstats(fqs
, fq
, &fcls
->fcls_flowstats
[i
]);
1272 STAILQ_FOREACH(fq
, &fq_cl
->fcl_old_flows
, fq_actlink
) {
1273 if (i
>= flowstat_cnt
)
1275 fq_export_flowstats(fqs
, fq
, &fcls
->fcls_flowstats
[i
]);
1278 VERIFY(i
<= flowstat_cnt
);
1279 fcls
->fcls_flowstats_cnt
= i
;