2 * Copyright (c) 2016-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <kern/zalloc.h>
32 #include <net/if_var.h>
34 #include <net/classq/classq.h>
35 #include <net/classq/classq_fq_codel.h>
36 #include <net/pktsched/pktsched_fq_codel.h>
38 static size_t fq_if_size
;
39 static struct zone
*fq_if_zone
;
41 static fq_if_t
*fq_if_alloc(struct ifnet
*, classq_pkt_type_t
);
42 static void fq_if_destroy(fq_if_t
*fqs
);
43 static void fq_if_classq_init(fq_if_t
*fqs
, u_int32_t priority
,
44 u_int32_t quantum
, u_int32_t drr_max
, u_int32_t svc_class
);
45 static int fq_if_enqueue_classq(struct ifclassq
*ifq
, void *p
,
46 classq_pkt_type_t ptype
, boolean_t
*pdrop
);
47 static void *fq_if_dequeue_classq(struct ifclassq
*, classq_pkt_type_t
*);
48 static int fq_if_dequeue_classq_multi(struct ifclassq
*, u_int32_t
,
49 u_int32_t
, void **, void **, u_int32_t
*, u_int32_t
*, classq_pkt_type_t
*);
50 static void *fq_if_dequeue_sc_classq(struct ifclassq
*, mbuf_svc_class_t
,
52 static int fq_if_dequeue_sc_classq_multi(struct ifclassq
*,
53 mbuf_svc_class_t
, u_int32_t
, u_int32_t
, void **,
54 void **, u_int32_t
*, u_int32_t
*, classq_pkt_type_t
*);
55 static void fq_if_dequeue(fq_if_t
*, fq_if_classq_t
*, u_int32_t
,
56 u_int32_t
, void **, void **, u_int32_t
*, u_int32_t
*,
57 boolean_t drvmgmt
, classq_pkt_type_t
*);
58 static int fq_if_request_classq(struct ifclassq
*ifq
, cqrq_t op
, void *arg
);
59 void fq_if_stat_sc(fq_if_t
*fqs
, cqrq_stat_sc_t
*stat
);
60 static void fq_if_purge(fq_if_t
*);
61 static void fq_if_purge_classq(fq_if_t
*, fq_if_classq_t
*);
62 static void fq_if_purge_flow(fq_if_t
*, fq_t
*, u_int32_t
*, u_int32_t
*);
63 static void fq_if_empty_new_flow(fq_t
*fq
, fq_if_classq_t
*fq_cl
,
65 static void fq_if_empty_old_flow(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
,
66 fq_t
*fq
, bool remove_hash
);
68 #define FQ_IF_ZONE_MAX 32 /* Maximum elements in zone */
69 #define FQ_IF_ZONE_NAME "pktsched_fq_if" /* zone for fq_if class */
71 #define FQ_IF_FLOW_HASH_ID(_flowid_) \
72 (((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
74 #define FQ_IF_CLASSQ_IDLE(_fcl_) \
75 (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
76 STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
78 typedef void (* fq_if_append_pkt_t
)(void *, void *);
79 typedef boolean_t (* fq_getq_flow_t
)(fq_if_t
*, fq_if_classq_t
*, fq_t
*,
80 u_int32_t
, u_int32_t
, void **, void **, u_int32_t
*, u_int32_t
*,
81 boolean_t
*, u_int32_t
);
84 fq_if_append_mbuf(void *pkt
, void *next_pkt
)
86 ((mbuf_t
)pkt
)->m_nextpkt
= (mbuf_t
)next_pkt
;
92 fq_getq_flow_mbuf(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
, fq_t
*fq
,
93 u_int32_t byte_limit
, u_int32_t pkt_limit
, void **top
, void **last
,
94 u_int32_t
*byte_cnt
, u_int32_t
*pkt_cnt
, boolean_t
*qempty
,
100 boolean_t limit_reached
= FALSE
;
101 struct ifclassq
*ifq
= fqs
->fqs_ifq
;
102 struct ifnet
*ifp
= ifq
->ifcq_ifp
;
104 while (fq
->fq_deficit
> 0 && limit_reached
== FALSE
&&
105 !MBUFQ_EMPTY(&fq
->fq_mbufq
)) {
107 _PKTSCHED_PKT_INIT(&pkt
);
108 m
= fq_getq_flow(fqs
, fq
, &pkt
);
109 ASSERT(pkt
.pktsched_ptype
== QP_MBUF
);
111 plen
= pktsched_get_pkt_len(&pkt
);
112 fq
->fq_deficit
-= plen
;
113 m
->m_pkthdr
.pkt_flags
|= pflags
;
118 ASSERT(*last
!= NULL
);
119 ASSERT((*(struct mbuf
**)last
)->m_nextpkt
== NULL
);
120 (*(struct mbuf
**)last
)->m_nextpkt
= m
;
123 (*(mbuf_t
*)last
)->m_nextpkt
= NULL
;
124 fq_cl
->fcl_stat
.fcl_dequeue
++;
125 fq_cl
->fcl_stat
.fcl_dequeue_bytes
+= plen
;
129 ifclassq_set_packet_metadata(ifq
, ifp
, m
, QP_MBUF
);
131 /* Check if the limit is reached */
132 if (*pkt_cnt
>= pkt_limit
|| *byte_cnt
>= byte_limit
)
133 limit_reached
= TRUE
;
136 *qempty
= MBUFQ_EMPTY(&fq
->fq_mbufq
);
137 return (limit_reached
);
141 fq_codel_scheduler_init(void)
143 /* Initialize the zone for flow queue structures */
146 fq_if_size
= sizeof (fq_if_t
);
147 fq_if_zone
= zinit(fq_if_size
, (FQ_IF_ZONE_MAX
* fq_if_size
), 0,
149 if (fq_if_zone
== NULL
) {
150 panic("%s: failed allocating from %s", __func__
,
153 zone_change(fq_if_zone
, Z_EXPAND
, TRUE
);
154 zone_change(fq_if_zone
, Z_CALLERACCT
, TRUE
);
159 fq_if_alloc(struct ifnet
*ifp
, classq_pkt_type_t ptype
)
162 fqs
= zalloc(fq_if_zone
);
166 bzero(fqs
, fq_if_size
);
167 fqs
->fqs_ifq
= &ifp
->if_snd
;
168 fqs
->fqs_ptype
= ptype
;
170 /* Calculate target queue delay */
171 ifclassq_calc_target_qdelay(ifp
, &fqs
->fqs_target_qdelay
);
173 /* Calculate update interval */
174 ifclassq_calc_update_interval(&fqs
->fqs_update_interval
);
176 /* Configure packet drop limit across all queues */
177 fqs
->fqs_pkt_droplimit
= IFCQ_PKT_DROP_LIMIT(&ifp
->if_snd
);
178 STAILQ_INIT(&fqs
->fqs_fclist
);
183 fq_if_destroy(fq_if_t
*fqs
)
187 zfree(fq_if_zone
, fqs
);
190 static inline u_int32_t
191 fq_if_service_to_priority(fq_if_t
*fqs
, mbuf_svc_class_t svc
)
195 if (fqs
->fqs_flags
& FQS_DRIVER_MANAGED
) {
199 pri
= FQ_IF_BK_INDEX
;
204 pri
= FQ_IF_BE_INDEX
;
209 pri
= FQ_IF_VI_INDEX
;
213 pri
= FQ_IF_VO_INDEX
;
216 pri
= FQ_IF_BE_INDEX
; /* Use best effort by default */
222 /* scheduler is not managed by the driver */
225 pri
= FQ_IF_BK_SYS_INDEX
;
228 pri
= FQ_IF_BK_INDEX
;
231 pri
= FQ_IF_BE_INDEX
;
234 pri
= FQ_IF_RD_INDEX
;
237 pri
= FQ_IF_OAM_INDEX
;
240 pri
= FQ_IF_AV_INDEX
;
243 pri
= FQ_IF_RV_INDEX
;
246 pri
= FQ_IF_VI_INDEX
;
249 pri
= FQ_IF_VO_INDEX
;
252 pri
= FQ_IF_CTL_INDEX
;
255 pri
= FQ_IF_BE_INDEX
; /* Use best effort by default */
262 fq_if_classq_init(fq_if_t
*fqs
, u_int32_t pri
, u_int32_t quantum
,
263 u_int32_t drr_max
, u_int32_t svc_class
)
265 fq_if_classq_t
*fq_cl
;
267 fq_cl
= &fqs
->fqs_classq
[pri
];
269 VERIFY(pri
>= 0 && pri
< FQ_IF_MAX_CLASSES
&&
270 fq_cl
->fcl_quantum
== 0);
271 fq_cl
->fcl_quantum
= quantum
;
272 fq_cl
->fcl_pri
= pri
;
273 fq_cl
->fcl_drr_max
= drr_max
;
274 fq_cl
->fcl_service_class
= svc_class
;
275 STAILQ_INIT(&fq_cl
->fcl_new_flows
);
276 STAILQ_INIT(&fq_cl
->fcl_old_flows
);
280 fq_if_enqueue_classq(struct ifclassq
*ifq
, void *p
, classq_pkt_type_t ptype
,
285 fq_if_classq_t
*fq_cl
;
287 mbuf_svc_class_t svc
;
290 IFCQ_LOCK_ASSERT_HELD(ifq
);
291 if ((ptype
== QP_MBUF
) && !(((mbuf_t
)p
)->m_flags
& M_PKTHDR
)) {
292 IFCQ_CONVERT_LOCK(ifq
);
297 pktsched_pkt_encap(&pkt
, ptype
, p
);
299 fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
300 svc
= pktsched_get_pkt_svc(&pkt
);
301 pri
= fq_if_service_to_priority(fqs
, svc
);
302 VERIFY(pri
>= 0 && pri
< FQ_IF_MAX_CLASSES
);
303 fq_cl
= &fqs
->fqs_classq
[pri
];
305 if (svc
== MBUF_SC_BK_SYS
&& fqs
->fqs_throttle
== 1) {
306 /* BK_SYS is currently throttled */
307 fq_cl
->fcl_stat
.fcl_throttle_drops
++;
308 IFCQ_CONVERT_LOCK(ifq
);
309 pktsched_free_pkt(&pkt
);
311 return (EQSUSPENDED
);
314 len
= pktsched_get_pkt_len(&pkt
);
315 ret
= fq_addq(fqs
, &pkt
, fq_cl
);
316 if (!(fqs
->fqs_flags
& FQS_DRIVER_MANAGED
) &&
317 !FQ_IF_CLASSQ_IDLE(fq_cl
)) {
318 if (((fqs
->fqs_bitmaps
[FQ_IF_ER
] | fqs
->fqs_bitmaps
[FQ_IF_EB
]) &
321 * this group is not in ER or EB groups,
324 pktsched_bit_set(pri
, &fqs
->fqs_bitmaps
[FQ_IF_IB
]);
329 if (ret
== CLASSQEQ_SUCCESS_FC
) {
330 /* packet enqueued, return advisory feedback */
335 VERIFY(ret
== CLASSQEQ_DROP
||
336 ret
== CLASSQEQ_DROP_FC
||
337 ret
== CLASSQEQ_DROP_SP
);
338 pktsched_free_pkt(&pkt
);
342 case CLASSQEQ_DROP_FC
:
344 case CLASSQEQ_DROP_SP
:
345 return (EQSUSPENDED
);
352 IFCQ_INC_BYTES(ifq
, len
);
357 fq_if_dequeue_classq(struct ifclassq
*ifq
, classq_pkt_type_t
*ptype
)
361 (void) fq_if_dequeue_classq_multi(ifq
, 1,
362 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &top
, NULL
, NULL
, NULL
, ptype
);
367 fq_if_dequeue_sc_classq(struct ifclassq
*ifq
, mbuf_svc_class_t svc
,
368 classq_pkt_type_t
*ptype
)
371 fq_if_t
*fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
372 fq_if_classq_t
*fq_cl
;
375 pri
= fq_if_service_to_priority(fqs
, svc
);
376 fq_cl
= &fqs
->fqs_classq
[pri
];
378 fq_if_dequeue(fqs
, fq_cl
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
379 &top
, NULL
, NULL
, NULL
, TRUE
, ptype
);
384 fq_if_dequeue_classq_multi(struct ifclassq
*ifq
, u_int32_t maxpktcnt
,
385 u_int32_t maxbytecnt
, void **first_packet
,
386 void **last_packet
, u_int32_t
*retpktcnt
, u_int32_t
*retbytecnt
,
387 classq_pkt_type_t
*ptype
)
389 void *top
= NULL
, *tail
= NULL
, *first
, *last
;
390 u_int32_t pktcnt
= 0, bytecnt
= 0, total_pktcnt
, total_bytecnt
;
392 fq_if_classq_t
*fq_cl
;
394 fq_if_append_pkt_t append_pkt
;
396 IFCQ_LOCK_ASSERT_HELD(ifq
);
398 fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
400 switch (fqs
->fqs_ptype
) {
402 append_pkt
= fq_if_append_mbuf
;
412 total_pktcnt
= total_bytecnt
= 0;
413 *ptype
= fqs
->fqs_ptype
;
416 classq_pkt_type_t tmp_ptype
;
417 if (fqs
->fqs_bitmaps
[FQ_IF_ER
] == 0 &&
418 fqs
->fqs_bitmaps
[FQ_IF_EB
] == 0) {
419 fqs
->fqs_bitmaps
[FQ_IF_EB
] = fqs
->fqs_bitmaps
[FQ_IF_IB
];
420 fqs
->fqs_bitmaps
[FQ_IF_IB
] = 0;
421 if (fqs
->fqs_bitmaps
[FQ_IF_EB
] == 0)
424 pri
= pktsched_ffs(fqs
->fqs_bitmaps
[FQ_IF_ER
]);
427 * There are no ER flows, move the highest
428 * priority one from EB if there are any in that
431 pri
= pktsched_ffs(fqs
->fqs_bitmaps
[FQ_IF_EB
]);
433 pktsched_bit_clr((pri
- 1),
434 &fqs
->fqs_bitmaps
[FQ_IF_EB
]);
435 pktsched_bit_set((pri
- 1),
436 &fqs
->fqs_bitmaps
[FQ_IF_ER
]);
438 pri
--; /* index starts at 0 */
439 fq_cl
= &fqs
->fqs_classq
[pri
];
441 if (fq_cl
->fcl_budget
<= 0) {
442 /* Update the budget */
443 fq_cl
->fcl_budget
+= (min(fq_cl
->fcl_drr_max
,
444 fq_cl
->fcl_stat
.fcl_flows_cnt
) *
446 if (fq_cl
->fcl_budget
<= 0)
449 fq_if_dequeue(fqs
, fq_cl
, (maxpktcnt
- total_pktcnt
),
450 (maxbytecnt
- total_bytecnt
), &top
, &tail
, &pktcnt
,
451 &bytecnt
, FALSE
, &tmp_ptype
);
453 ASSERT(tmp_ptype
== *ptype
);
454 ASSERT(pktcnt
> 0 && bytecnt
> 0);
458 total_pktcnt
= pktcnt
;
459 total_bytecnt
= bytecnt
;
461 append_pkt(last
, top
);
463 total_pktcnt
+= pktcnt
;
464 total_bytecnt
+= bytecnt
;
466 append_pkt(last
, NULL
);
467 fq_cl
->fcl_budget
-= bytecnt
;
473 * If the class has exceeded the budget but still has data
474 * to send, move it to IB
477 if (!FQ_IF_CLASSQ_IDLE(fq_cl
)) {
478 if (fq_cl
->fcl_budget
<= 0) {
479 pktsched_bit_set(pri
,
480 &fqs
->fqs_bitmaps
[FQ_IF_IB
]);
481 pktsched_bit_clr(pri
,
482 &fqs
->fqs_bitmaps
[FQ_IF_ER
]);
485 pktsched_bit_clr(pri
, &fqs
->fqs_bitmaps
[FQ_IF_ER
]);
486 VERIFY(((fqs
->fqs_bitmaps
[FQ_IF_ER
] |
487 fqs
->fqs_bitmaps
[FQ_IF_EB
] |
488 fqs
->fqs_bitmaps
[FQ_IF_IB
])&(1 << pri
)) == 0);
489 fq_cl
->fcl_budget
= 0;
491 if (total_pktcnt
>= maxpktcnt
|| total_bytecnt
>= maxbytecnt
)
495 if (first_packet
!= NULL
)
496 *first_packet
= first
;
497 if (last_packet
!= NULL
)
499 if (retpktcnt
!= NULL
)
500 *retpktcnt
= total_pktcnt
;
501 if (retbytecnt
!= NULL
)
502 *retbytecnt
= total_bytecnt
;
503 IFCQ_XMIT_ADD(ifq
, total_pktcnt
, total_bytecnt
);
505 if (first_packet
!= NULL
)
506 *first_packet
= NULL
;
507 if (last_packet
!= NULL
)
509 if (retpktcnt
!= NULL
)
511 if (retbytecnt
!= NULL
)
518 fq_if_dequeue_sc_classq_multi(struct ifclassq
*ifq
, mbuf_svc_class_t svc
,
519 u_int32_t maxpktcnt
, u_int32_t maxbytecnt
, void **first_packet
,
520 void **last_packet
, u_int32_t
*retpktcnt
, u_int32_t
*retbytecnt
,
521 classq_pkt_type_t
*ptype
)
523 #pragma unused(maxpktcnt, maxbytecnt, first_packet, last_packet, retpktcnt, retbytecnt)
524 fq_if_t
*fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
526 u_int32_t total_pktcnt
= 0, total_bytecnt
= 0;
527 fq_if_classq_t
*fq_cl
;
528 void *first
= NULL
, *last
= NULL
;
529 fq_if_append_pkt_t append_pkt
;
531 switch (fqs
->fqs_ptype
) {
533 append_pkt
= fq_if_append_mbuf
;
542 pri
= fq_if_service_to_priority(fqs
, svc
);
543 fq_cl
= &fqs
->fqs_classq
[pri
];
546 * Now we have the queue for a particular service class. We need
547 * to dequeue as many packets as needed, first from the new flows
548 * and then from the old flows.
550 while (total_pktcnt
< maxpktcnt
&& total_bytecnt
< maxbytecnt
&&
551 fq_cl
->fcl_stat
.fcl_pkt_cnt
> 0) {
553 u_int32_t pktcnt
= 0, bytecnt
= 0;
554 fq_if_dequeue(fqs
, fq_cl
, (maxpktcnt
- total_pktcnt
),
555 (maxbytecnt
- total_bytecnt
), &top
, &tail
, &pktcnt
,
556 &bytecnt
, TRUE
, ptype
);
559 total_pktcnt
= pktcnt
;
560 total_bytecnt
= bytecnt
;
562 append_pkt(last
, top
);
563 total_pktcnt
+= pktcnt
;
564 total_bytecnt
+= bytecnt
;
569 if (first_packet
!= NULL
)
570 *first_packet
= first
;
571 if (last_packet
!= NULL
)
573 if (retpktcnt
!= NULL
)
574 *retpktcnt
= total_pktcnt
;
575 if (retbytecnt
!= NULL
)
576 *retbytecnt
= total_bytecnt
;
578 if (first_packet
!= NULL
)
579 *first_packet
= NULL
;
580 if (last_packet
!= NULL
)
582 if (retpktcnt
!= NULL
)
584 if (retbytecnt
!= NULL
)
591 fq_if_purge_flow(fq_if_t
*fqs
, fq_t
*fq
, u_int32_t
*pktsp
,
594 fq_if_classq_t
*fq_cl
;
595 u_int32_t pkts
, bytes
;
598 fq_cl
= &fqs
->fqs_classq
[fq
->fq_sc_index
];
600 _PKTSCHED_PKT_INIT(&pkt
);
601 while (fq_getq_flow(fqs
, fq
, &pkt
) != NULL
) {
603 bytes
+= pktsched_get_pkt_len(&pkt
);
604 pktsched_free_pkt(&pkt
);
606 IFCQ_DROP_ADD(fqs
->fqs_ifq
, pkts
, bytes
);
608 if (fq
->fq_flags
& FQF_NEW_FLOW
) {
609 fq_if_empty_new_flow(fq
, fq_cl
, false);
610 } else if (fq
->fq_flags
& FQF_OLD_FLOW
) {
611 fq_if_empty_old_flow(fqs
, fq_cl
, fq
, false);
614 fq_if_destroy_flow(fqs
, fq_cl
, fq
);
616 if (FQ_IF_CLASSQ_IDLE(fq_cl
)) {
618 for (i
= FQ_IF_ER
; i
< FQ_IF_MAX_STATE
; i
++) {
619 pktsched_bit_clr(fq_cl
->fcl_pri
,
620 &fqs
->fqs_bitmaps
[i
]);
630 fq_if_purge_classq(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
)
634 * Take each flow from new/old flow list and flush mbufs
637 STAILQ_FOREACH_SAFE(fq
, &fq_cl
->fcl_new_flows
, fq_actlink
, tfq
) {
638 fq_if_purge_flow(fqs
, fq
, NULL
, NULL
);
640 STAILQ_FOREACH_SAFE(fq
, &fq_cl
->fcl_old_flows
, fq_actlink
, tfq
) {
641 fq_if_purge_flow(fqs
, fq
, NULL
, NULL
);
643 VERIFY(STAILQ_EMPTY(&fq_cl
->fcl_new_flows
));
644 VERIFY(STAILQ_EMPTY(&fq_cl
->fcl_old_flows
));
646 STAILQ_INIT(&fq_cl
->fcl_new_flows
);
647 STAILQ_INIT(&fq_cl
->fcl_old_flows
);
648 fq_cl
->fcl_budget
= 0;
652 fq_if_purge(fq_if_t
*fqs
)
656 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
657 for (i
= 0; i
< FQ_IF_MAX_CLASSES
; i
++) {
658 fq_if_purge_classq(fqs
, &fqs
->fqs_classq
[i
]);
661 VERIFY(STAILQ_EMPTY(&fqs
->fqs_fclist
));
663 fqs
->fqs_large_flow
= NULL
;
664 for (i
= 0; i
< FQ_IF_HASH_TABLE_SIZE
; i
++) {
665 VERIFY(SLIST_EMPTY(&fqs
->fqs_flows
[i
]));
668 bzero(&fqs
->fqs_bitmaps
, sizeof (fqs
->fqs_bitmaps
));
670 IFCQ_LEN(fqs
->fqs_ifq
) = 0;
671 IFCQ_BYTES(fqs
->fqs_ifq
) = 0;
675 fq_if_purge_sc(fq_if_t
*fqs
, cqrq_purge_sc_t
*req
)
679 IFCQ_LOCK_ASSERT_HELD(fqs
->fqs_ifq
);
680 req
->packets
= req
->bytes
= 0;
681 VERIFY(req
->flow
!= 0);
683 /* packet type is needed only if we want to create a flow queue */
684 fq
= fq_if_hash_pkt(fqs
, req
->flow
, req
->sc
, 0, FALSE
, QP_INVALID
);
687 fq_if_purge_flow(fqs
, fq
, &req
->packets
, &req
->bytes
);
691 fq_if_event(fq_if_t
*fqs
, cqev_t ev
)
693 IFCQ_LOCK_ASSERT_HELD(fqs
->fqs_ifq
);
696 case CLASSQ_EV_LINK_UP
:
697 case CLASSQ_EV_LINK_DOWN
:
706 fq_if_classq_suspend(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
)
708 fq_if_purge_classq(fqs
, fq_cl
);
709 fqs
->fqs_throttle
= 1;
710 fq_cl
->fcl_stat
.fcl_throttle_on
++;
714 fq_if_classq_resume(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
)
716 VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl
));
717 fqs
->fqs_throttle
= 0;
718 fq_cl
->fcl_stat
.fcl_throttle_off
++;
723 fq_if_throttle(fq_if_t
*fqs
, cqrq_throttle_t
*tr
)
725 struct ifclassq
*ifq
= fqs
->fqs_ifq
;
730 IFCQ_LOCK_ASSERT_HELD(ifq
);
733 tr
->level
= fqs
->fqs_throttle
;
737 if (tr
->level
== fqs
->fqs_throttle
)
740 /* Throttling is allowed on BK_SYS class only */
741 index
= fq_if_service_to_priority(fqs
, MBUF_SC_BK_SYS
);
743 case IFNET_THROTTLE_OFF
:
744 fq_if_classq_resume(fqs
, &fqs
->fqs_classq
[index
]);
746 case IFNET_THROTTLE_OPPORTUNISTIC
:
747 fq_if_classq_suspend(fqs
, &fqs
->fqs_classq
[index
]);
756 fq_if_stat_sc(fq_if_t
*fqs
, cqrq_stat_sc_t
*stat
)
759 fq_if_classq_t
*fq_cl
;
764 pri
= fq_if_service_to_priority(fqs
, stat
->sc
);
765 fq_cl
= &fqs
->fqs_classq
[pri
];
766 stat
->packets
= fq_cl
->fcl_stat
.fcl_pkt_cnt
;
767 stat
->bytes
= fq_cl
->fcl_stat
.fcl_byte_cnt
;
771 fq_if_request_classq(struct ifclassq
*ifq
, cqrq_t rq
, void *arg
)
774 fq_if_t
*fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
776 IFCQ_LOCK_ASSERT_HELD(ifq
);
779 * These are usually slow operations, convert the lock ahead of time
781 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
786 case CLASSQRQ_PURGE_SC
:
787 fq_if_purge_sc(fqs
, (cqrq_purge_sc_t
*)arg
);
790 fq_if_event(fqs
, (cqev_t
)arg
);
792 case CLASSQRQ_THROTTLE
:
793 fq_if_throttle(fqs
, (cqrq_throttle_t
*)arg
);
795 case CLASSQRQ_STAT_SC
:
796 fq_if_stat_sc(fqs
, (cqrq_stat_sc_t
*)arg
);
803 fq_if_setup_ifclassq(struct ifclassq
*ifq
, u_int32_t flags
,
804 classq_pkt_type_t ptype
)
806 #pragma unused(flags)
807 struct ifnet
*ifp
= ifq
->ifcq_ifp
;
811 IFCQ_LOCK_ASSERT_HELD(ifq
);
812 VERIFY(ifq
->ifcq_disc
== NULL
);
813 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_NONE
);
815 fqs
= fq_if_alloc(ifp
, ptype
);
819 if (flags
& PKTSCHEDF_QALG_DRIVER_MANAGED
) {
820 fqs
->fqs_flags
|= FQS_DRIVER_MANAGED
;
821 fq_if_classq_init(fqs
, FQ_IF_BK_INDEX
, 1500,
823 fq_if_classq_init(fqs
, FQ_IF_BE_INDEX
, 1500,
825 fq_if_classq_init(fqs
, FQ_IF_VI_INDEX
, 3000,
827 fq_if_classq_init(fqs
, FQ_IF_VO_INDEX
, 600,
830 fq_if_classq_init(fqs
, FQ_IF_BK_SYS_INDEX
, 1500,
832 fq_if_classq_init(fqs
, FQ_IF_BK_INDEX
, 1500,
834 fq_if_classq_init(fqs
, FQ_IF_BE_INDEX
, 1500,
836 fq_if_classq_init(fqs
, FQ_IF_RD_INDEX
, 1500,
838 fq_if_classq_init(fqs
, FQ_IF_OAM_INDEX
, 1500,
840 fq_if_classq_init(fqs
, FQ_IF_AV_INDEX
, 3000,
842 fq_if_classq_init(fqs
, FQ_IF_RV_INDEX
, 3000,
844 fq_if_classq_init(fqs
, FQ_IF_VI_INDEX
, 3000,
846 fq_if_classq_init(fqs
, FQ_IF_VO_INDEX
, 600,
848 fq_if_classq_init(fqs
, FQ_IF_CTL_INDEX
, 600,
852 err
= ifclassq_attach(ifq
, PKTSCHEDT_FQ_CODEL
, fqs
,
853 fq_if_enqueue_classq
, fq_if_dequeue_classq
,
854 fq_if_dequeue_sc_classq
, fq_if_dequeue_classq_multi
,
855 fq_if_dequeue_sc_classq_multi
, fq_if_request_classq
);
858 printf("%s: error from ifclassq_attach, "
859 "failed to attach fq_if: %d\n", __func__
, err
);
866 fq_if_hash_pkt(fq_if_t
*fqs
, u_int32_t flowid
, mbuf_svc_class_t svc_class
,
867 u_int64_t now
, boolean_t create
, classq_pkt_type_t ptype
)
870 flowq_list_t
*fq_list
;
871 fq_if_classq_t
*fq_cl
;
872 u_int8_t fqs_hash_id
;
875 scidx
= fq_if_service_to_priority(fqs
, svc_class
);
877 fqs_hash_id
= FQ_IF_FLOW_HASH_ID(flowid
);
879 fq_list
= &fqs
->fqs_flows
[fqs_hash_id
];
881 SLIST_FOREACH(fq
, fq_list
, fq_hashlink
) {
882 if (fq
->fq_flowhash
== flowid
&&
883 fq
->fq_sc_index
== scidx
)
886 if (fq
== NULL
&& create
== TRUE
) {
887 ASSERT(ptype
== QP_MBUF
);
889 /* If the flow is not already on the list, allocate it */
890 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
891 fq
= fq_alloc(ptype
);
893 fq
->fq_flowhash
= flowid
;
894 fq
->fq_sc_index
= scidx
;
895 fq
->fq_updatetime
= now
+ fqs
->fqs_update_interval
;
896 fq_cl
= &fqs
->fqs_classq
[scidx
];
897 fq
->fq_flags
= FQF_FLOWCTL_CAPABLE
;
898 SLIST_INSERT_HEAD(fq_list
, fq
, fq_hashlink
);
899 fq_cl
->fcl_stat
.fcl_flows_cnt
++;
904 * If getq time is not set because this is the first packet or after
905 * idle time, set it now so that we can detect a stall.
907 if (fq
!= NULL
&& fq
->fq_getqtime
== 0)
908 fq
->fq_getqtime
= now
;
914 fq_if_destroy_flow(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
, fq_t
*fq
)
917 hash_id
= FQ_IF_FLOW_HASH_ID(fq
->fq_flowhash
);
918 SLIST_REMOVE(&fqs
->fqs_flows
[hash_id
], fq
, flowq
,
920 fq_cl
->fcl_stat
.fcl_flows_cnt
--;
921 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
927 fq_if_at_drop_limit(fq_if_t
*fqs
)
929 return (((IFCQ_LEN(fqs
->fqs_ifq
) >= fqs
->fqs_pkt_droplimit
) ?
934 fq_if_empty_old_flow(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
, fq_t
*fq
,
938 * Remove the flow queue if it is empty
941 STAILQ_REMOVE(&fq_cl
->fcl_old_flows
, fq
, flowq
,
943 fq
->fq_flags
&= ~FQF_OLD_FLOW
;
944 fq_cl
->fcl_stat
.fcl_oldflows_cnt
--;
945 VERIFY(fq
->fq_bytes
== 0);
948 /* Remove from the hash list */
949 fq_if_destroy_flow(fqs
, fq_cl
, fq
);
954 fq_if_empty_new_flow(fq_t
*fq
, fq_if_classq_t
*fq_cl
, bool add_to_old
)
956 /* Move to the end of old queue list */
957 STAILQ_REMOVE(&fq_cl
->fcl_new_flows
, fq
,
959 fq
->fq_flags
&= ~FQF_NEW_FLOW
;
960 fq_cl
->fcl_stat
.fcl_newflows_cnt
--;
963 STAILQ_INSERT_TAIL(&fq_cl
->fcl_old_flows
, fq
,
965 fq
->fq_flags
|= FQF_OLD_FLOW
;
966 fq_cl
->fcl_stat
.fcl_oldflows_cnt
++;
971 fq_if_drop_packet(fq_if_t
*fqs
)
973 fq_t
*fq
= fqs
->fqs_large_flow
;
974 fq_if_classq_t
*fq_cl
;
977 uint64_t *pkt_timestamp
;
981 /* queue can not be empty on the largest flow */
982 VERIFY(!fq_empty(fq
));
984 fq_cl
= &fqs
->fqs_classq
[fq
->fq_sc_index
];
985 _PKTSCHED_PKT_INIT(&pkt
);
986 (void)fq_getq_flow_internal(fqs
, fq
, &pkt
);
988 pktsched_get_pkt_vars(&pkt
, &pkt_flags
, &pkt_timestamp
, NULL
, NULL
,
991 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
993 if (pkt
.pktsched_ptype
== QP_MBUF
)
994 *pkt_flags
&= ~PKTF_PRIV_GUARDED
;
997 fqs
->fqs_large_flow
= NULL
;
998 if (fq
->fq_flags
& FQF_OLD_FLOW
) {
999 fq_if_empty_old_flow(fqs
, fq_cl
, fq
, true);
1001 VERIFY(fq
->fq_flags
& FQF_NEW_FLOW
);
1002 fq_if_empty_new_flow(fq
, fq_cl
, true);
1005 IFCQ_DROP_ADD(fqs
->fqs_ifq
, 1, pktsched_get_pkt_len(&pkt
));
1007 pktsched_free_pkt(&pkt
);
1008 fq_cl
->fcl_stat
.fcl_drop_overflow
++;
1012 fq_if_is_flow_heavy(fq_if_t
*fqs
, fq_t
*fq
)
1016 if (fqs
->fqs_large_flow
!= NULL
&&
1017 fqs
->fqs_large_flow
->fq_bytes
< FQ_IF_LARGE_FLOW_BYTE_LIMIT
)
1018 fqs
->fqs_large_flow
= NULL
;
1020 if (fq
== NULL
|| fq
->fq_bytes
< FQ_IF_LARGE_FLOW_BYTE_LIMIT
)
1023 prev_fq
= fqs
->fqs_large_flow
;
1024 if (prev_fq
== NULL
) {
1026 fqs
->fqs_large_flow
= fq
;
1028 } else if (fq
->fq_bytes
> prev_fq
->fq_bytes
) {
1029 fqs
->fqs_large_flow
= fq
;
1034 fq_if_add_fcentry(fq_if_t
*fqs
, pktsched_pkt_t
*pkt
, uint32_t flowid
,
1035 uint8_t flowsrc
, fq_if_classq_t
*fq_cl
)
1037 struct flowadv_fcentry
*fce
;
1039 STAILQ_FOREACH(fce
, &fqs
->fqs_fclist
, fce_link
) {
1040 if ((uint8_t)fce
->fce_flowsrc_type
== flowsrc
&&
1041 fce
->fce_flowid
== flowid
) {
1042 /* Already on flowcontrol list */
1046 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
1047 fce
= pktsched_alloc_fcentry(pkt
, fqs
->fqs_ifq
->ifcq_ifp
, M_WAITOK
);
1049 /* XXX Add number of bytes in the queue */
1050 STAILQ_INSERT_TAIL(&fqs
->fqs_fclist
, fce
, fce_link
);
1051 fq_cl
->fcl_stat
.fcl_flow_control
++;
1053 return ((fce
!= NULL
) ? TRUE
: FALSE
);
1057 fq_if_flow_feedback(fq_if_t
*fqs
, fq_t
*fq
, fq_if_classq_t
*fq_cl
)
1059 struct flowadv_fcentry
*fce
= NULL
;
1061 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
1062 STAILQ_FOREACH(fce
, &fqs
->fqs_fclist
, fce_link
) {
1063 if (fce
->fce_flowid
== fq
->fq_flowhash
)
1067 STAILQ_REMOVE(&fqs
->fqs_fclist
, fce
, flowadv_fcentry
,
1069 STAILQ_NEXT(fce
, fce_link
) = NULL
;
1070 flowadv_add_entry(fce
);
1071 fq_cl
->fcl_stat
.fcl_flow_feedback
++;
1073 fq
->fq_flags
&= ~FQF_FLOWCTL_ON
;
1077 fq_if_dequeue(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
, u_int32_t pktlimit
,
1078 u_int32_t bytelimit
, void **top
, void **tail
,
1079 u_int32_t
*retpktcnt
, u_int32_t
*retbytecnt
, boolean_t drvmgmt
,
1080 classq_pkt_type_t
*ptype
)
1082 fq_t
*fq
= NULL
, *tfq
= NULL
;
1083 flowq_stailq_t temp_stailq
;
1084 u_int32_t pktcnt
, bytecnt
;
1085 boolean_t qempty
, limit_reached
= FALSE
;
1087 fq_getq_flow_t fq_getq_flow_fn
;
1089 switch (fqs
->fqs_ptype
) {
1091 fq_getq_flow_fn
= fq_getq_flow_mbuf
;
1101 * maximum byte limit should not be greater than the budget for
1104 if ((int32_t)bytelimit
> fq_cl
->fcl_budget
&& !drvmgmt
)
1105 bytelimit
= fq_cl
->fcl_budget
;
1107 VERIFY(pktlimit
> 0 && bytelimit
> 0 && top
!= NULL
);
1110 *ptype
= fqs
->fqs_ptype
;
1111 pktcnt
= bytecnt
= 0;
1112 STAILQ_INIT(&temp_stailq
);
1114 STAILQ_FOREACH_SAFE(fq
, &fq_cl
->fcl_new_flows
, fq_actlink
, tfq
) {
1115 ASSERT((fq
->fq_flags
& (FQF_NEW_FLOW
|FQF_OLD_FLOW
)) ==
1118 limit_reached
= fq_getq_flow_fn(fqs
, fq_cl
, fq
, bytelimit
,
1119 pktlimit
, top
, &last
, &bytecnt
, &pktcnt
, &qempty
,
1122 if (fq
->fq_deficit
<= 0 || qempty
)
1123 fq_if_empty_new_flow(fq
, fq_cl
, true);
1124 fq
->fq_deficit
+= fq_cl
->fcl_quantum
;
1129 STAILQ_FOREACH_SAFE(fq
, &fq_cl
->fcl_old_flows
, fq_actlink
, tfq
) {
1130 VERIFY((fq
->fq_flags
& (FQF_NEW_FLOW
|FQF_OLD_FLOW
)) ==
1133 limit_reached
= fq_getq_flow_fn(fqs
, fq_cl
, fq
, bytelimit
,
1134 pktlimit
, top
, &last
, &bytecnt
, &pktcnt
, &qempty
, 0);
1137 fq_if_empty_old_flow(fqs
, fq_cl
, fq
, true);
1138 } else if (fq
->fq_deficit
<= 0) {
1139 STAILQ_REMOVE(&fq_cl
->fcl_old_flows
, fq
,
1142 * Move to the end of the old queues list. We do not
1143 * need to update the flow count since this flow
1144 * will be added to the tail again
1146 STAILQ_INSERT_TAIL(&temp_stailq
, fq
, fq_actlink
);
1147 fq
->fq_deficit
+= fq_cl
->fcl_quantum
;
1154 if (!STAILQ_EMPTY(&fq_cl
->fcl_old_flows
)) {
1155 STAILQ_CONCAT(&fq_cl
->fcl_old_flows
, &temp_stailq
);
1156 } else if (!STAILQ_EMPTY(&temp_stailq
)) {
1157 fq_cl
->fcl_old_flows
= temp_stailq
;
1161 VERIFY(*top
!= NULL
);
1164 if (retpktcnt
!= NULL
)
1165 *retpktcnt
= pktcnt
;
1166 if (retbytecnt
!= NULL
)
1167 *retbytecnt
= bytecnt
;
1172 fq_if_teardown_ifclassq(struct ifclassq
*ifq
)
1174 fq_if_t
*fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
1176 IFCQ_LOCK_ASSERT_HELD(ifq
);
1177 VERIFY(fqs
!= NULL
&& ifq
->ifcq_type
== PKTSCHEDT_FQ_CODEL
);
1180 ifq
->ifcq_disc
= NULL
;
1181 return (ifclassq_detach(ifq
));
1185 fq_export_flowstats(fq_if_t
*fqs
, fq_t
*fq
,
1186 struct fq_codel_flowstats
*flowstat
)
1188 bzero(flowstat
, sizeof (*flowstat
));
1189 flowstat
->fqst_min_qdelay
= fq
->fq_min_qdelay
;
1190 flowstat
->fqst_bytes
= fq
->fq_bytes
;
1191 flowstat
->fqst_flowhash
= fq
->fq_flowhash
;
1192 if (fq
->fq_flags
& FQF_NEW_FLOW
)
1193 flowstat
->fqst_flags
|= FQ_FLOWSTATS_NEW_FLOW
;
1194 if (fq
->fq_flags
& FQF_OLD_FLOW
)
1195 flowstat
->fqst_flags
|= FQ_FLOWSTATS_OLD_FLOW
;
1196 if (fq
->fq_flags
& FQF_DELAY_HIGH
)
1197 flowstat
->fqst_flags
|= FQ_FLOWSTATS_DELAY_HIGH
;
1198 if (fq
->fq_flags
& FQF_FLOWCTL_ON
)
1199 flowstat
->fqst_flags
|= FQ_FLOWSTATS_FLOWCTL_ON
;
1200 if (fqs
->fqs_large_flow
== fq
)
1201 flowstat
->fqst_flags
|= FQ_FLOWSTATS_LARGE_FLOW
;
1205 fq_if_getqstats_ifclassq(struct ifclassq
*ifq
, u_int32_t qid
,
1206 struct if_ifclassq_stats
*ifqs
)
1208 struct fq_codel_classstats
*fcls
;
1209 fq_if_classq_t
*fq_cl
;
1212 u_int32_t i
, flowstat_cnt
;
1214 if (qid
>= FQ_IF_MAX_CLASSES
)
1217 fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
1218 fcls
= &ifqs
->ifqs_fq_codel_stats
;
1220 fq_cl
= &fqs
->fqs_classq
[qid
];
1222 fcls
->fcls_pri
= fq_cl
->fcl_pri
;
1223 fcls
->fcls_service_class
= fq_cl
->fcl_service_class
;
1224 fcls
->fcls_quantum
= fq_cl
->fcl_quantum
;
1225 fcls
->fcls_drr_max
= fq_cl
->fcl_drr_max
;
1226 fcls
->fcls_budget
= fq_cl
->fcl_budget
;
1227 fcls
->fcls_target_qdelay
= fqs
->fqs_target_qdelay
;
1228 fcls
->fcls_update_interval
= fqs
->fqs_update_interval
;
1229 fcls
->fcls_flow_control
= fq_cl
->fcl_stat
.fcl_flow_control
;
1230 fcls
->fcls_flow_feedback
= fq_cl
->fcl_stat
.fcl_flow_feedback
;
1231 fcls
->fcls_dequeue_stall
= fq_cl
->fcl_stat
.fcl_dequeue_stall
;
1232 fcls
->fcls_drop_overflow
= fq_cl
->fcl_stat
.fcl_drop_overflow
;
1233 fcls
->fcls_drop_early
= fq_cl
->fcl_stat
.fcl_drop_early
;
1234 fcls
->fcls_drop_memfailure
= fq_cl
->fcl_stat
.fcl_drop_memfailure
;
1235 fcls
->fcls_flows_cnt
= fq_cl
->fcl_stat
.fcl_flows_cnt
;
1236 fcls
->fcls_newflows_cnt
= fq_cl
->fcl_stat
.fcl_newflows_cnt
;
1237 fcls
->fcls_oldflows_cnt
= fq_cl
->fcl_stat
.fcl_oldflows_cnt
;
1238 fcls
->fcls_pkt_cnt
= fq_cl
->fcl_stat
.fcl_pkt_cnt
;
1239 fcls
->fcls_flow_control_fail
= fq_cl
->fcl_stat
.fcl_flow_control_fail
;
1240 fcls
->fcls_flow_control_fail
= fq_cl
->fcl_stat
.fcl_flow_control_fail
;
1241 fcls
->fcls_dequeue
= fq_cl
->fcl_stat
.fcl_dequeue
;
1242 fcls
->fcls_dequeue_bytes
= fq_cl
->fcl_stat
.fcl_dequeue_bytes
;
1243 fcls
->fcls_byte_cnt
= fq_cl
->fcl_stat
.fcl_byte_cnt
;
1244 fcls
->fcls_throttle_on
= fq_cl
->fcl_stat
.fcl_throttle_on
;
1245 fcls
->fcls_throttle_off
= fq_cl
->fcl_stat
.fcl_throttle_off
;
1246 fcls
->fcls_throttle_drops
= fq_cl
->fcl_stat
.fcl_throttle_drops
;
1247 fcls
->fcls_dup_rexmts
= fq_cl
->fcl_stat
.fcl_dup_rexmts
;
1249 /* Gather per flow stats */
1250 flowstat_cnt
= min((fcls
->fcls_newflows_cnt
+
1251 fcls
->fcls_oldflows_cnt
), FQ_IF_MAX_FLOWSTATS
);
1253 STAILQ_FOREACH(fq
, &fq_cl
->fcl_new_flows
, fq_actlink
) {
1254 if (i
>= fcls
->fcls_newflows_cnt
|| i
>= flowstat_cnt
)
1257 /* leave space for a few old flows */
1258 if ((flowstat_cnt
- i
) < fcls
->fcls_oldflows_cnt
&&
1259 i
>= (FQ_IF_MAX_FLOWSTATS
>> 1))
1261 fq_export_flowstats(fqs
, fq
, &fcls
->fcls_flowstats
[i
]);
1264 STAILQ_FOREACH(fq
, &fq_cl
->fcl_old_flows
, fq_actlink
) {
1265 if (i
>= flowstat_cnt
)
1267 fq_export_flowstats(fqs
, fq
, &fcls
->fcls_flowstats
[i
]);
1270 VERIFY(i
<= flowstat_cnt
);
1271 fcls
->fcls_flowstats_cnt
= i
;