2 * Copyright (c) 2011-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * Quick Fair Queueing is described in
57 * "QFQ: Efficient Packet Scheduling with Tight Bandwidth Distribution
58 * Guarantees" by Fabio Checconi, Paolo Valente, and Luigi Rizzo.
60 * This code is ported from the dummynet(4) QFQ implementation.
61 * See also http://info.iet.unipi.it/~luigi/qfq/
64 #include <sys/cdefs.h>
65 #include <sys/param.h>
66 #include <sys/malloc.h>
68 #include <sys/systm.h>
69 #include <sys/errno.h>
70 #include <sys/kernel.h>
71 #include <sys/syslog.h>
73 #include <kern/zalloc.h>
76 #include <net/net_osdep.h>
78 #include <net/pktsched/pktsched_qfq.h>
79 #include <netinet/in.h>
85 static int qfq_enqueue_ifclassq(struct ifclassq
*, classq_pkt_t
*, boolean_t
*);
86 static void qfq_dequeue_ifclassq(struct ifclassq
*, classq_pkt_t
*);
87 static int qfq_request_ifclassq(struct ifclassq
*, cqrq_t
, void *);
88 static int qfq_clear_interface(struct qfq_if
*);
89 static struct qfq_class
*qfq_class_create(struct qfq_if
*, u_int32_t
,
90 u_int32_t
, u_int32_t
, u_int32_t
, u_int32_t
, classq_pkt_type_t
);
91 static int qfq_class_destroy(struct qfq_if
*, struct qfq_class
*);
92 static int qfq_destroy_locked(struct qfq_if
*);
93 static inline int qfq_addq(struct qfq_class
*, pktsched_pkt_t
*,
95 static inline void qfq_getq(struct qfq_class
*, pktsched_pkt_t
*);
96 static void qfq_purgeq(struct qfq_if
*, struct qfq_class
*, u_int32_t
,
97 u_int32_t
*, u_int32_t
*);
98 static void qfq_purge_sc(struct qfq_if
*, cqrq_purge_sc_t
*);
99 static void qfq_updateq(struct qfq_if
*, struct qfq_class
*, cqev_t
);
100 static int qfq_throttle(struct qfq_if
*, cqrq_throttle_t
*);
101 static int qfq_resumeq(struct qfq_if
*, struct qfq_class
*);
102 static int qfq_suspendq(struct qfq_if
*, struct qfq_class
*);
103 static int qfq_stat_sc(struct qfq_if
*, cqrq_stat_sc_t
*);
104 static inline struct qfq_class
*qfq_clh_to_clp(struct qfq_if
*, u_int32_t
);
105 static const char *qfq_style(struct qfq_if
*);
107 static inline int qfq_gt(u_int64_t
, u_int64_t
);
108 static inline u_int64_t
qfq_round_down(u_int64_t
, u_int32_t
);
109 static inline struct qfq_group
*qfq_ffs(struct qfq_if
*, pktsched_bitmap_t
);
110 static int qfq_calc_index(struct qfq_class
*, u_int32_t
, u_int32_t
);
111 static inline pktsched_bitmap_t
mask_from(pktsched_bitmap_t
, int);
112 static inline u_int32_t
qfq_calc_state(struct qfq_if
*, struct qfq_group
*);
113 static inline void qfq_move_groups(struct qfq_if
*, pktsched_bitmap_t
,
115 static inline void qfq_unblock_groups(struct qfq_if
*, int, u_int64_t
);
116 static inline void qfq_make_eligible(struct qfq_if
*, u_int64_t
);
117 static inline void qfq_slot_insert(struct qfq_if
*, struct qfq_group
*,
118 struct qfq_class
*, u_int64_t
);
119 static inline void qfq_front_slot_remove(struct qfq_group
*);
120 static inline struct qfq_class
*qfq_slot_scan(struct qfq_if
*,
122 static inline void qfq_slot_rotate(struct qfq_if
*, struct qfq_group
*,
124 static inline void qfq_update_eligible(struct qfq_if
*, u_int64_t
);
125 static inline int qfq_update_class(struct qfq_if
*, struct qfq_group
*,
127 static inline void qfq_update_start(struct qfq_if
*, struct qfq_class
*);
128 static inline void qfq_slot_remove(struct qfq_if
*, struct qfq_group
*,
130 static void qfq_deactivate_class(struct qfq_if
*, struct qfq_class
*);
131 static const char *qfq_state2str(int);
133 static void qfq_dump_groups(struct qfq_if
*, u_int32_t
);
134 static void qfq_dump_sched(struct qfq_if
*, const char *);
135 #endif /* QFQ_DEBUG */
137 #define QFQ_ZONE_MAX 32 /* maximum elements in zone */
138 #define QFQ_ZONE_NAME "pktsched_qfq" /* zone name */
140 static unsigned int qfq_size
; /* size of zone element */
141 static struct zone
*qfq_zone
; /* zone for qfq */
143 #define QFQ_CL_ZONE_MAX 32 /* maximum elements in zone */
144 #define QFQ_CL_ZONE_NAME "pktsched_qfq_cl" /* zone name */
146 static unsigned int qfq_cl_size
; /* size of zone element */
147 static struct zone
*qfq_cl_zone
; /* zone for qfq_class */
150 * Maximum number of consecutive slots occupied by backlogged classes
151 * inside a group. This is approx lmax/lmin + 5. Used when ALTQ is
154 * XXX check because it poses constraints on MAX_INDEX
156 #define QFQ_MAX_SLOTS 32 /* default when ALTQ is available */
161 qfq_size
= sizeof(struct qfq_if
);
162 qfq_zone
= zinit(qfq_size
, QFQ_ZONE_MAX
* qfq_size
,
164 if (qfq_zone
== NULL
) {
165 panic("%s: failed allocating %s", __func__
, QFQ_ZONE_NAME
);
168 zone_change(qfq_zone
, Z_EXPAND
, TRUE
);
169 zone_change(qfq_zone
, Z_CALLERACCT
, TRUE
);
171 qfq_cl_size
= sizeof(struct qfq_class
);
172 qfq_cl_zone
= zinit(qfq_cl_size
, QFQ_CL_ZONE_MAX
* qfq_cl_size
,
173 0, QFQ_CL_ZONE_NAME
);
174 if (qfq_cl_zone
== NULL
) {
175 panic("%s: failed allocating %s", __func__
, QFQ_CL_ZONE_NAME
);
178 zone_change(qfq_cl_zone
, Z_EXPAND
, TRUE
);
179 zone_change(qfq_cl_zone
, Z_CALLERACCT
, TRUE
);
183 qfq_alloc(struct ifnet
*ifp
, int how
)
187 qif
= (how
== M_WAITOK
) ? zalloc(qfq_zone
) : zalloc_noblock(qfq_zone
);
192 bzero(qif
, qfq_size
);
193 qif
->qif_ifq
= &ifp
->if_snd
;
195 qif
->qif_maxclasses
= IFCQ_SC_MAX
;
197 * TODO: adi@apple.com
199 * Ideally I would like to have the following
200 * but QFQ needs further modifications.
202 * qif->qif_maxslots = IFCQ_SC_MAX;
204 qif
->qif_maxslots
= QFQ_MAX_SLOTS
;
206 if ((qif
->qif_class_tbl
= _MALLOC(sizeof(struct qfq_class
*) *
207 qif
->qif_maxclasses
, M_DEVBUF
, M_WAITOK
| M_ZERO
)) == NULL
) {
208 log(LOG_ERR
, "%s: %s unable to allocate class table array\n",
209 if_name(ifp
), qfq_style(qif
));
213 if ((qif
->qif_groups
= _MALLOC(sizeof(struct qfq_group
*) *
214 (QFQ_MAX_INDEX
+ 1), M_DEVBUF
, M_WAITOK
| M_ZERO
)) == NULL
) {
215 log(LOG_ERR
, "%s: %s unable to allocate group array\n",
216 if_name(ifp
), qfq_style(qif
));
220 if (pktsched_verbose
) {
221 log(LOG_DEBUG
, "%s: %s scheduler allocated\n",
222 if_name(ifp
), qfq_style(qif
));
228 if (qif
->qif_class_tbl
!= NULL
) {
229 _FREE(qif
->qif_class_tbl
, M_DEVBUF
);
230 qif
->qif_class_tbl
= NULL
;
232 if (qif
->qif_groups
!= NULL
) {
233 _FREE(qif
->qif_groups
, M_DEVBUF
);
234 qif
->qif_groups
= NULL
;
236 zfree(qfq_zone
, qif
);
242 qfq_destroy(struct qfq_if
*qif
)
244 struct ifclassq
*ifq
= qif
->qif_ifq
;
248 err
= qfq_destroy_locked(qif
);
255 qfq_destroy_locked(struct qfq_if
*qif
)
259 IFCQ_LOCK_ASSERT_HELD(qif
->qif_ifq
);
261 (void) qfq_clear_interface(qif
);
263 VERIFY(qif
->qif_class_tbl
!= NULL
);
264 _FREE(qif
->qif_class_tbl
, M_DEVBUF
);
265 qif
->qif_class_tbl
= NULL
;
267 VERIFY(qif
->qif_groups
!= NULL
);
268 for (i
= 0; i
<= QFQ_MAX_INDEX
; i
++) {
269 struct qfq_group
*grp
= qif
->qif_groups
[i
];
272 VERIFY(grp
->qfg_slots
!= NULL
);
273 _FREE(grp
->qfg_slots
, M_DEVBUF
);
274 grp
->qfg_slots
= NULL
;
275 _FREE(grp
, M_DEVBUF
);
276 qif
->qif_groups
[i
] = NULL
;
279 _FREE(qif
->qif_groups
, M_DEVBUF
);
280 qif
->qif_groups
= NULL
;
282 if (pktsched_verbose
) {
283 log(LOG_DEBUG
, "%s: %s scheduler destroyed\n",
284 if_name(QFQIF_IFP(qif
)), qfq_style(qif
));
287 zfree(qfq_zone
, qif
);
293 * bring the interface back to the initial state by discarding
294 * all the filters and classes.
297 qfq_clear_interface(struct qfq_if
*qif
)
299 struct qfq_class
*cl
;
302 IFCQ_LOCK_ASSERT_HELD(qif
->qif_ifq
);
304 /* clear out the classes */
305 for (i
= 0; i
< qif
->qif_maxclasses
; i
++) {
306 if ((cl
= qif
->qif_class_tbl
[i
]) != NULL
) {
307 qfq_class_destroy(qif
, cl
);
314 /* discard all the queued packets on the interface */
316 qfq_purge(struct qfq_if
*qif
)
318 struct qfq_class
*cl
;
321 IFCQ_LOCK_ASSERT_HELD(qif
->qif_ifq
);
323 for (i
= 0; i
< qif
->qif_maxclasses
; i
++) {
324 if ((cl
= qif
->qif_class_tbl
[i
]) != NULL
) {
325 qfq_purgeq(qif
, cl
, 0, NULL
, NULL
);
328 VERIFY(IFCQ_LEN(qif
->qif_ifq
) == 0);
332 qfq_purge_sc(struct qfq_if
*qif
, cqrq_purge_sc_t
*pr
)
334 struct ifclassq
*ifq
= qif
->qif_ifq
;
337 IFCQ_LOCK_ASSERT_HELD(ifq
);
339 VERIFY(pr
->sc
== MBUF_SC_UNSPEC
|| MBUF_VALID_SC(pr
->sc
));
340 VERIFY(pr
->flow
!= 0);
342 if (pr
->sc
!= MBUF_SC_UNSPEC
) {
343 i
= MBUF_SCIDX(pr
->sc
);
344 VERIFY(i
< IFCQ_SC_MAX
);
346 qfq_purgeq(qif
, ifq
->ifcq_disc_slots
[i
].cl
,
347 pr
->flow
, &pr
->packets
, &pr
->bytes
);
354 for (i
= 0; i
< IFCQ_SC_MAX
; i
++) {
355 qfq_purgeq(qif
, ifq
->ifcq_disc_slots
[i
].cl
,
356 pr
->flow
, &cnt
, &len
);
364 qfq_event(struct qfq_if
*qif
, cqev_t ev
)
366 struct qfq_class
*cl
;
369 IFCQ_LOCK_ASSERT_HELD(qif
->qif_ifq
);
371 for (i
= 0; i
< qif
->qif_maxclasses
; i
++) {
372 if ((cl
= qif
->qif_class_tbl
[i
]) != NULL
) {
373 qfq_updateq(qif
, cl
, ev
);
379 qfq_add_queue(struct qfq_if
*qif
, u_int32_t qlimit
, u_int32_t weight
,
380 u_int32_t maxsz
, u_int32_t flags
, u_int32_t qid
, struct qfq_class
**clp
,
381 classq_pkt_type_t ptype
)
383 struct qfq_class
*cl
;
386 IFCQ_LOCK_ASSERT_HELD(qif
->qif_ifq
);
388 if (qfq_clh_to_clp(qif
, qid
) != NULL
) {
392 /* check parameters */
393 if (weight
== 0 || weight
> QFQ_MAX_WEIGHT
) {
397 w
= (QFQ_ONE_FP
/ (QFQ_ONE_FP
/ weight
));
398 if (qif
->qif_wsum
+ w
> QFQ_MAX_WSUM
) {
402 if (maxsz
== 0 || maxsz
> (1 << QFQ_MTU_SHIFT
)) {
406 cl
= qfq_class_create(qif
, weight
, qlimit
, flags
, maxsz
, qid
, ptype
);
418 static struct qfq_class
*
419 qfq_class_create(struct qfq_if
*qif
, u_int32_t weight
, u_int32_t qlimit
,
420 u_int32_t flags
, u_int32_t maxsz
, u_int32_t qid
, classq_pkt_type_t ptype
)
423 struct ifclassq
*ifq
;
424 struct qfq_group
*grp
;
425 struct qfq_class
*cl
;
426 u_int32_t w
; /* approximated weight */
429 IFCQ_LOCK_ASSERT_HELD(qif
->qif_ifq
);
431 if (qif
->qif_classes
>= qif
->qif_maxclasses
) {
432 log(LOG_ERR
, "%s: %s out of classes! (max %d)\n",
433 if_name(QFQIF_IFP(qif
)), qfq_style(qif
),
434 qif
->qif_maxclasses
);
439 ifp
= QFQIF_IFP(qif
);
441 cl
= zalloc(qfq_cl_zone
);
446 bzero(cl
, qfq_cl_size
);
448 if (qlimit
== 0 || qlimit
> IFCQ_MAXLEN(ifq
)) {
449 qlimit
= IFCQ_MAXLEN(ifq
);
451 qlimit
= DEFAULT_QLIMIT
; /* use default */
454 _qinit(&cl
->cl_q
, Q_DROPTAIL
, qlimit
, ptype
);
456 cl
->cl_flags
= flags
;
460 * Find a free slot in the class table. If the slot matching
461 * the lower bits of qid is free, use this slot. Otherwise,
462 * use the first free slot.
464 i
= qid
% qif
->qif_maxclasses
;
465 if (qif
->qif_class_tbl
[i
] == NULL
) {
466 qif
->qif_class_tbl
[i
] = cl
;
468 for (i
= 0; i
< qif
->qif_maxclasses
; i
++) {
469 if (qif
->qif_class_tbl
[i
] == NULL
) {
470 qif
->qif_class_tbl
[i
] = cl
;
474 if (i
== qif
->qif_maxclasses
) {
475 zfree(qfq_cl_zone
, cl
);
481 VERIFY(w
> 0 && w
<= QFQ_MAX_WEIGHT
);
483 cl
->cl_inv_w
= (QFQ_ONE_FP
/ w
);
484 w
= (QFQ_ONE_FP
/ cl
->cl_inv_w
);
485 VERIFY(qif
->qif_wsum
+ w
<= QFQ_MAX_WSUM
);
487 i
= qfq_calc_index(cl
, cl
->cl_inv_w
, cl
->cl_lmax
);
488 VERIFY(i
<= QFQ_MAX_INDEX
);
489 grp
= qif
->qif_groups
[i
];
491 grp
= _MALLOC(sizeof(*grp
), M_DEVBUF
, M_WAITOK
| M_ZERO
);
494 grp
->qfg_slot_shift
=
495 QFQ_MTU_SHIFT
+ QFQ_FRAC_BITS
- (QFQ_MAX_INDEX
- i
);
496 grp
->qfg_slots
= _MALLOC(sizeof(struct qfq_class
*) *
497 qif
->qif_maxslots
, M_DEVBUF
, M_WAITOK
| M_ZERO
);
498 if (grp
->qfg_slots
== NULL
) {
499 log(LOG_ERR
, "%s: %s unable to allocate group "
500 "slots for index %d\n", if_name(ifp
),
504 log(LOG_ERR
, "%s: %s unable to allocate group for "
505 "qid=%d\n", if_name(ifp
), qfq_style(qif
),
508 if (grp
== NULL
|| grp
->qfg_slots
== NULL
) {
509 qif
->qif_class_tbl
[qid
% qif
->qif_maxclasses
] = NULL
;
511 _FREE(grp
, M_DEVBUF
);
513 zfree(qfq_cl_zone
, cl
);
516 qif
->qif_groups
[i
] = grp
;
521 /* XXX cl->cl_S = qif->qif_V; ? */
522 /* XXX compute qif->qif_i_wsum */
526 if (flags
& QFCF_DEFAULTCLASS
) {
527 qif
->qif_default
= cl
;
530 if (flags
& QFCF_SFB
) {
532 if (flags
& QFCF_ECN
) {
533 cl
->cl_qflags
|= SFBF_ECN
;
535 if (flags
& QFCF_FLOWCTL
) {
536 cl
->cl_qflags
|= SFBF_FLOWCTL
;
538 if (flags
& QFCF_DELAYBASED
) {
539 cl
->cl_qflags
|= SFBF_DELAYBASED
;
541 if (!(cl
->cl_flags
& QFCF_LAZY
)) {
542 cl
->cl_sfb
= sfb_alloc(ifp
, cl
->cl_handle
,
543 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
545 if (cl
->cl_sfb
!= NULL
|| (cl
->cl_flags
& QFCF_LAZY
)) {
546 qtype(&cl
->cl_q
) = Q_SFB
;
550 if (pktsched_verbose
) {
551 log(LOG_DEBUG
, "%s: %s created qid=%d grp=%d weight=%d "
552 "qlimit=%d flags=%b\n", if_name(ifp
), qfq_style(qif
),
553 cl
->cl_handle
, cl
->cl_grp
->qfg_index
, weight
, qlimit
,
561 qfq_remove_queue(struct qfq_if
*qif
, u_int32_t qid
)
563 struct qfq_class
*cl
;
565 IFCQ_LOCK_ASSERT_HELD(qif
->qif_ifq
);
567 if ((cl
= qfq_clh_to_clp(qif
, qid
)) == NULL
) {
571 return qfq_class_destroy(qif
, cl
);
575 qfq_class_destroy(struct qfq_if
*qif
, struct qfq_class
*cl
)
577 struct ifclassq
*ifq
= qif
->qif_ifq
;
583 IFCQ_LOCK_ASSERT_HELD(ifq
);
585 qfq_purgeq(qif
, cl
, 0, NULL
, NULL
);
587 if (cl
->cl_inv_w
!= 0) {
588 qif
->qif_wsum
-= (QFQ_ONE_FP
/ cl
->cl_inv_w
);
589 cl
->cl_inv_w
= 0; /* reset weight to avoid run twice */
592 for (i
= 0; i
< qif
->qif_maxclasses
; i
++) {
593 if (qif
->qif_class_tbl
[i
] == cl
) {
594 qif
->qif_class_tbl
[i
] = NULL
;
600 if (cl
->cl_qalg
.ptr
!= NULL
) {
601 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
) {
602 sfb_destroy(cl
->cl_sfb
);
604 cl
->cl_qalg
.ptr
= NULL
;
605 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
606 qstate(&cl
->cl_q
) = QS_RUNNING
;
609 if (qif
->qif_default
== cl
) {
610 qif
->qif_default
= NULL
;
613 if (pktsched_verbose
) {
614 log(LOG_DEBUG
, "%s: %s destroyed qid=%d\n",
615 if_name(QFQIF_IFP(qif
)), qfq_style(qif
), cl
->cl_handle
);
618 zfree(qfq_cl_zone
, cl
);
624 * Calculate a mask to mimic what would be ffs_from()
626 static inline pktsched_bitmap_t
627 mask_from(pktsched_bitmap_t bitmap
, int from
)
629 return bitmap
& ~((1UL << from
) - 1);
633 * The state computation relies on ER=0, IR=1, EB=2, IB=3
634 * First compute eligibility comparing grp->qfg_S, qif->qif_V,
635 * then check if someone is blocking us and possibly add EB
637 static inline u_int32_t
638 qfq_calc_state(struct qfq_if
*qif
, struct qfq_group
*grp
)
640 /* if S > V we are not eligible */
641 u_int32_t state
= qfq_gt(grp
->qfg_S
, qif
->qif_V
);
642 pktsched_bitmap_t mask
= mask_from(qif
->qif_bitmaps
[ER
],
644 struct qfq_group
*next
;
647 next
= qfq_ffs(qif
, mask
);
648 if (qfq_gt(grp
->qfg_F
, next
->qfg_F
)) {
658 * qif->qif_bitmaps[dst] |= qif->qif_bitmaps[src] & mask;
659 * qif->qif_bitmaps[src] &= ~mask;
660 * but we should make sure that src != dst
663 qfq_move_groups(struct qfq_if
*qif
, pktsched_bitmap_t mask
, int src
, int dst
)
665 qif
->qif_bitmaps
[dst
] |= qif
->qif_bitmaps
[src
] & mask
;
666 qif
->qif_bitmaps
[src
] &= ~mask
;
670 qfq_unblock_groups(struct qfq_if
*qif
, int index
, u_int64_t old_finish
)
672 pktsched_bitmap_t mask
= mask_from(qif
->qif_bitmaps
[ER
], index
+ 1);
673 struct qfq_group
*next
;
676 next
= qfq_ffs(qif
, mask
);
677 if (!qfq_gt(next
->qfg_F
, old_finish
)) {
682 mask
= (1UL << index
) - 1;
683 qfq_move_groups(qif
, mask
, EB
, ER
);
684 qfq_move_groups(qif
, mask
, IB
, IR
);
690 * old_V ^= qif->qif_V;
691 * old_V >>= QFQ_MIN_SLOT_SHIFT;
697 qfq_make_eligible(struct qfq_if
*qif
, u_int64_t old_V
)
699 pktsched_bitmap_t mask
, vslot
, old_vslot
;
701 vslot
= qif
->qif_V
>> QFQ_MIN_SLOT_SHIFT
;
702 old_vslot
= old_V
>> QFQ_MIN_SLOT_SHIFT
;
704 if (vslot
!= old_vslot
) {
705 mask
= (2UL << (__fls(vslot
^ old_vslot
))) - 1;
706 qfq_move_groups(qif
, mask
, IR
, ER
);
707 qfq_move_groups(qif
, mask
, IB
, EB
);
712 * XXX we should make sure that slot becomes less than 32.
713 * This is guaranteed by the input values.
714 * roundedS is always cl->qfg_S rounded on grp->qfg_slot_shift bits.
717 qfq_slot_insert(struct qfq_if
*qif
, struct qfq_group
*grp
,
718 struct qfq_class
*cl
, u_int64_t roundedS
)
720 u_int64_t slot
= (roundedS
- grp
->qfg_S
) >> grp
->qfg_slot_shift
;
721 u_int32_t i
= (grp
->qfg_front
+ slot
) % qif
->qif_maxslots
;
723 cl
->cl_next
= grp
->qfg_slots
[i
];
724 grp
->qfg_slots
[i
] = cl
;
725 pktsched_bit_set(slot
, &grp
->qfg_full_slots
);
729 * remove the entry from the slot
732 qfq_front_slot_remove(struct qfq_group
*grp
)
734 struct qfq_class
**h
= &grp
->qfg_slots
[grp
->qfg_front
];
738 pktsched_bit_clr(0, &grp
->qfg_full_slots
);
743 * Returns the first full queue in a group. As a side effect,
744 * adjust the bucket list so the first non-empty bucket is at
745 * position 0 in qfg_full_slots.
747 static inline struct qfq_class
*
748 qfq_slot_scan(struct qfq_if
*qif
, struct qfq_group
*grp
)
752 if (pktsched_verbose
> 2) {
753 log(LOG_DEBUG
, "%s: %s grp=%d full_slots=0x%x\n",
754 if_name(QFQIF_IFP(qif
)), qfq_style(qif
), grp
->qfg_index
,
755 grp
->qfg_full_slots
);
758 if (grp
->qfg_full_slots
== 0) {
762 i
= pktsched_ffs(grp
->qfg_full_slots
) - 1; /* zero-based */
764 grp
->qfg_front
= (grp
->qfg_front
+ i
) % qif
->qif_maxslots
;
765 grp
->qfg_full_slots
>>= i
;
768 return grp
->qfg_slots
[grp
->qfg_front
];
772 * adjust the bucket list. When the start time of a group decreases,
773 * we move the index down (modulo qif->qif_maxslots) so we don't need to
774 * move the objects. The mask of occupied slots must be shifted
775 * because we use ffs() to find the first non-empty slot.
776 * This covers decreases in the group's start time, but what about
777 * increases of the start time ?
778 * Here too we should make sure that i is less than 32
781 qfq_slot_rotate(struct qfq_if
*qif
, struct qfq_group
*grp
, u_int64_t roundedS
)
784 u_int32_t i
= (grp
->qfg_S
- roundedS
) >> grp
->qfg_slot_shift
;
786 grp
->qfg_full_slots
<<= i
;
787 grp
->qfg_front
= (grp
->qfg_front
- i
) % qif
->qif_maxslots
;
791 qfq_update_eligible(struct qfq_if
*qif
, u_int64_t old_V
)
793 pktsched_bitmap_t ineligible
;
795 ineligible
= qif
->qif_bitmaps
[IR
] | qif
->qif_bitmaps
[IB
];
797 if (!qif
->qif_bitmaps
[ER
]) {
798 struct qfq_group
*grp
;
799 grp
= qfq_ffs(qif
, ineligible
);
800 if (qfq_gt(grp
->qfg_S
, qif
->qif_V
)) {
801 qif
->qif_V
= grp
->qfg_S
;
804 qfq_make_eligible(qif
, old_V
);
809 * Updates the class, returns true if also the group needs to be updated.
812 qfq_update_class(struct qfq_if
*qif
, struct qfq_group
*grp
,
813 struct qfq_class
*cl
)
817 if (qempty(&cl
->cl_q
)) {
818 qfq_front_slot_remove(grp
);
823 len
= m_pktlen((struct mbuf
*)qhead(&cl
->cl_q
));
824 cl
->cl_F
= cl
->cl_S
+ (u_int64_t
)len
* cl
->cl_inv_w
;
825 roundedS
= qfq_round_down(cl
->cl_S
, grp
->qfg_slot_shift
);
826 if (roundedS
== grp
->qfg_S
) {
830 qfq_front_slot_remove(grp
);
831 qfq_slot_insert(qif
, grp
, cl
, roundedS
);
837 * note: CLASSQDQ_POLL returns the next packet without removing the packet
838 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
839 * CLASSQDQ_REMOVE must return the same packet if called immediately
840 * after CLASSQDQ_POLL.
843 qfq_dequeue(struct qfq_if
*qif
, pktsched_pkt_t
*pkt
)
845 pktsched_bitmap_t er_bits
= qif
->qif_bitmaps
[ER
];
846 struct ifclassq
*ifq
= qif
->qif_ifq
;
847 struct qfq_group
*grp
;
848 struct qfq_class
*cl
;
852 IFCQ_LOCK_ASSERT_HELD(ifq
);
854 _PKTSCHED_PKT_INIT(pkt
);
859 if (qif
->qif_queued
&& pktsched_verbose
> 1) {
860 qfq_dump_sched(qif
, "start dequeue");
862 #endif /* QFQ_DEBUG */
863 /* no eligible and ready packet */
866 grp
= qfq_ffs(qif
, er_bits
);
867 /* if group is non-empty, use it */
868 if (grp
->qfg_full_slots
!= 0) {
871 pktsched_bit_clr(grp
->qfg_index
, &er_bits
);
874 #endif /* QFQ_DEBUG */
876 VERIFY(!IFCQ_IS_EMPTY(ifq
));
878 cl
= grp
->qfg_slots
[grp
->qfg_front
];
879 VERIFY(cl
!= NULL
&& !qempty(&cl
->cl_q
));
882 /* qalg must be work conserving */
883 VERIFY(pkt
->pktsched_ptype
!= QP_INVALID
);
884 len
= pktsched_get_pkt_len(pkt
);
888 #endif /* QFQ_DEBUG */
891 IFCQ_DEC_BYTES(ifq
, len
);
892 if (qempty(&cl
->cl_q
)) {
895 PKTCNTR_ADD(&cl
->cl_xmitcnt
, 1, len
);
896 IFCQ_XMIT_ADD(ifq
, 1, len
);
899 qif
->qif_V
+= (u_int64_t
)len
* QFQ_IWSUM
;
901 if (pktsched_verbose
> 2) {
902 log(LOG_DEBUG
, "%s: %s qid=%d dequeue pkt=0x%llx F=0x%llx "
903 "V=0x%llx", if_name(QFQIF_IFP(qif
)), qfq_style(qif
),
905 (uint64_t)VM_KERNEL_ADDRPERM(pkt
->pktsched_pkt_mbuf
),
906 cl
->cl_F
, qif
->qif_V
);
909 if (qfq_update_class(qif
, grp
, cl
)) {
910 u_int64_t old_F
= grp
->qfg_F
;
912 cl
= qfq_slot_scan(qif
, grp
);
913 if (!cl
) { /* group gone, remove from ER */
914 pktsched_bit_clr(grp
->qfg_index
, &qif
->qif_bitmaps
[ER
]);
918 qfq_round_down(cl
->cl_S
, grp
->qfg_slot_shift
);
920 if (grp
->qfg_S
== roundedS
) {
924 grp
->qfg_S
= roundedS
;
925 grp
->qfg_F
= roundedS
+ (2ULL << grp
->qfg_slot_shift
);
927 /* remove from ER and put in the new set */
928 pktsched_bit_clr(grp
->qfg_index
, &qif
->qif_bitmaps
[ER
]);
929 s
= qfq_calc_state(qif
, grp
);
930 pktsched_bit_set(grp
->qfg_index
, &qif
->qif_bitmaps
[s
]);
932 /* we need to unblock even if the group has gone away */
933 qfq_unblock_groups(qif
, grp
->qfg_index
, old_F
);
937 qfq_update_eligible(qif
, old_V
);
940 if (!qif
->qif_bitmaps
[ER
] && qif
->qif_queued
&& pktsched_verbose
> 1) {
941 qfq_dump_sched(qif
, "end dequeue");
943 #endif /* QFQ_DEBUG */
947 * Assign a reasonable start time for a new flow k in group i.
948 * Admissible values for hat(F) are multiples of sigma_i
949 * no greater than V+sigma_i . Larger values mean that
950 * we had a wraparound so we consider the timestamp to be stale.
952 * If F is not stale and F >= V then we set S = F.
953 * Otherwise we should assign S = V, but this may violate
954 * the ordering in ER. So, if we have groups in ER, set S to
955 * the F_j of the first group j which would be blocking us.
956 * We are guaranteed not to move S backward because
957 * otherwise our group i would still be blocked.
960 qfq_update_start(struct qfq_if
*qif
, struct qfq_class
*cl
)
962 pktsched_bitmap_t mask
;
963 u_int64_t limit
, roundedF
;
964 int slot_shift
= cl
->cl_grp
->qfg_slot_shift
;
966 roundedF
= qfq_round_down(cl
->cl_F
, slot_shift
);
967 limit
= qfq_round_down(qif
->qif_V
, slot_shift
) + (1UL << slot_shift
);
969 if (!qfq_gt(cl
->cl_F
, qif
->qif_V
) || qfq_gt(roundedF
, limit
)) {
970 /* timestamp was stale */
971 mask
= mask_from(qif
->qif_bitmaps
[ER
], cl
->cl_grp
->qfg_index
);
973 struct qfq_group
*next
= qfq_ffs(qif
, mask
);
974 if (qfq_gt(roundedF
, next
->qfg_F
)) {
975 cl
->cl_S
= next
->qfg_F
;
979 cl
->cl_S
= qif
->qif_V
;
980 } else { /* timestamp is not stale */
986 qfq_enqueue(struct qfq_if
*qif
, struct qfq_class
*cl
, pktsched_pkt_t
*pkt
,
989 struct ifclassq
*ifq
= qif
->qif_ifq
;
990 struct qfq_group
*grp
;
994 IFCQ_LOCK_ASSERT_HELD(ifq
);
995 VERIFY(cl
== NULL
|| cl
->cl_qif
== qif
);
998 cl
= qfq_clh_to_clp(qif
, 0);
1000 cl
= qif
->qif_default
;
1002 IFCQ_CONVERT_LOCK(ifq
);
1003 return CLASSQEQ_DROP
;
1008 VERIFY(pkt
->pktsched_ptype
== qptype(&cl
->cl_q
));
1009 len
= pktsched_get_pkt_len(pkt
);
1011 ret
= qfq_addq(cl
, pkt
, t
);
1012 if ((ret
!= 0) && (ret
!= CLASSQEQ_SUCCESS_FC
)) {
1013 VERIFY(ret
== CLASSQEQ_DROP
||
1014 ret
== CLASSQEQ_DROP_FC
||
1015 ret
== CLASSQEQ_DROP_SP
);
1016 PKTCNTR_ADD(&cl
->cl_dropcnt
, 1, len
);
1017 IFCQ_DROP_ADD(ifq
, 1, len
);
1021 IFCQ_INC_BYTES(ifq
, len
);
1025 #endif /* QFQ_DEBUG */
1027 /* queue was not idle, we're done */
1028 if (qlen(&cl
->cl_q
) > 1) {
1032 /* queue was idle */
1034 qfq_update_start(qif
, cl
); /* adjust start time */
1036 /* compute new finish time and rounded start */
1037 cl
->cl_F
= cl
->cl_S
+ (u_int64_t
)len
* cl
->cl_inv_w
;
1038 roundedS
= qfq_round_down(cl
->cl_S
, grp
->qfg_slot_shift
);
1041 * Insert cl in the correct bucket.
1043 * If cl->cl_S >= grp->qfg_S we don't need to adjust the bucket list
1044 * and simply go to the insertion phase. Otherwise grp->qfg_S is
1045 * decreasing, we must make room in the bucket list, and also
1046 * recompute the group state. Finally, if there were no flows
1047 * in this group and nobody was in ER make sure to adjust V.
1049 if (grp
->qfg_full_slots
!= 0) {
1050 if (!qfq_gt(grp
->qfg_S
, cl
->cl_S
)) {
1054 /* create a slot for this cl->cl_S */
1055 qfq_slot_rotate(qif
, grp
, roundedS
);
1057 /* group was surely ineligible, remove */
1058 pktsched_bit_clr(grp
->qfg_index
, &qif
->qif_bitmaps
[IR
]);
1059 pktsched_bit_clr(grp
->qfg_index
, &qif
->qif_bitmaps
[IB
]);
1060 } else if (!qif
->qif_bitmaps
[ER
] && qfq_gt(roundedS
, qif
->qif_V
)) {
1061 qif
->qif_V
= roundedS
;
1064 grp
->qfg_S
= roundedS
;
1066 roundedS
+ (2ULL << grp
->qfg_slot_shift
); /* i.e. 2 sigma_i */
1067 s
= qfq_calc_state(qif
, grp
);
1068 pktsched_bit_set(grp
->qfg_index
, &qif
->qif_bitmaps
[s
]);
1070 if (pktsched_verbose
> 2) {
1071 log(LOG_DEBUG
, "%s: %s qid=%d enqueue m=0x%llx state=%s 0x%x "
1072 "S=0x%llx F=0x%llx V=0x%llx\n", if_name(QFQIF_IFP(qif
)),
1073 qfq_style(qif
), cl
->cl_handle
,
1074 (uint64_t)VM_KERNEL_ADDRPERM(pkt
->pktsched_pkt_mbuf
),
1076 qif
->qif_bitmaps
[s
], cl
->cl_S
, cl
->cl_F
, qif
->qif_V
);
1080 qfq_slot_insert(qif
, grp
, cl
, roundedS
);
1083 /* successfully queued. */
1088 qfq_slot_remove(struct qfq_if
*qif
, struct qfq_group
*grp
,
1089 struct qfq_class
*cl
)
1092 struct qfq_class
**pprev
;
1093 u_int32_t i
, offset
;
1096 roundedS
= qfq_round_down(cl
->cl_S
, grp
->qfg_slot_shift
);
1097 offset
= (roundedS
- grp
->qfg_S
) >> grp
->qfg_slot_shift
;
1098 i
= (grp
->qfg_front
+ offset
) % qif
->qif_maxslots
;
1100 pprev
= &grp
->qfg_slots
[i
];
1101 while (*pprev
&& *pprev
!= cl
) {
1102 pprev
= &(*pprev
)->cl_next
;
1105 *pprev
= cl
->cl_next
;
1106 if (!grp
->qfg_slots
[i
]) {
1107 pktsched_bit_clr(offset
, &grp
->qfg_full_slots
);
1112 * Called to forcibly destroy a queue.
1113 * If the queue is not in the front bucket, or if it has
1114 * other queues in the front bucket, we can simply remove
1115 * the queue with no other side effects.
1116 * Otherwise we must propagate the event up.
1117 * XXX description to be completed.
1120 qfq_deactivate_class(struct qfq_if
*qif
, struct qfq_class
*cl
)
1122 struct qfq_group
*grp
= cl
->cl_grp
;
1123 pktsched_bitmap_t mask
;
1127 if (pktsched_verbose
) {
1128 log(LOG_DEBUG
, "%s: %s deactivate qid=%d grp=%d "
1129 "full_slots=0x%x front=%d bitmaps={ER=0x%x,EB=0x%x,"
1130 "IR=0x%x,IB=0x%x}\n",
1131 if_name(QFQIF_IFP(cl
->cl_qif
)), qfq_style(cl
->cl_qif
),
1132 cl
->cl_handle
, grp
->qfg_index
, grp
->qfg_full_slots
,
1133 grp
->qfg_front
, qif
->qif_bitmaps
[ER
], qif
->qif_bitmaps
[EB
],
1134 qif
->qif_bitmaps
[IR
], qif
->qif_bitmaps
[IB
]);
1136 if (pktsched_verbose
> 1) {
1137 qfq_dump_sched(qif
, "start deactivate");
1139 #endif /* QFQ_DEBUG */
1142 cl
->cl_F
= cl
->cl_S
; /* not needed if the class goes away */
1143 qfq_slot_remove(qif
, grp
, cl
);
1145 if (grp
->qfg_full_slots
== 0) {
1147 * Nothing left in the group, remove from all sets.
1148 * Do ER last because if we were blocking other groups
1149 * we must unblock them.
1151 pktsched_bit_clr(grp
->qfg_index
, &qif
->qif_bitmaps
[IR
]);
1152 pktsched_bit_clr(grp
->qfg_index
, &qif
->qif_bitmaps
[EB
]);
1153 pktsched_bit_clr(grp
->qfg_index
, &qif
->qif_bitmaps
[IB
]);
1155 if (pktsched_bit_tst(grp
->qfg_index
, &qif
->qif_bitmaps
[ER
]) &&
1156 !(qif
->qif_bitmaps
[ER
] & ~((1UL << grp
->qfg_index
) - 1))) {
1157 mask
= qif
->qif_bitmaps
[ER
] &
1158 ((1UL << grp
->qfg_index
) - 1);
1160 mask
= ~((1UL << __fls(mask
)) - 1);
1162 mask
= (pktsched_bitmap_t
)~0UL;
1164 qfq_move_groups(qif
, mask
, EB
, ER
);
1165 qfq_move_groups(qif
, mask
, IB
, IR
);
1167 pktsched_bit_clr(grp
->qfg_index
, &qif
->qif_bitmaps
[ER
]);
1168 } else if (!grp
->qfg_slots
[grp
->qfg_front
]) {
1169 cl
= qfq_slot_scan(qif
, grp
);
1170 roundedS
= qfq_round_down(cl
->cl_S
, grp
->qfg_slot_shift
);
1171 if (grp
->qfg_S
!= roundedS
) {
1172 pktsched_bit_clr(grp
->qfg_index
, &qif
->qif_bitmaps
[ER
]);
1173 pktsched_bit_clr(grp
->qfg_index
, &qif
->qif_bitmaps
[IR
]);
1174 pktsched_bit_clr(grp
->qfg_index
, &qif
->qif_bitmaps
[EB
]);
1175 pktsched_bit_clr(grp
->qfg_index
, &qif
->qif_bitmaps
[IB
]);
1176 grp
->qfg_S
= roundedS
;
1177 grp
->qfg_F
= roundedS
+ (2ULL << grp
->qfg_slot_shift
);
1178 s
= qfq_calc_state(qif
, grp
);
1179 pktsched_bit_set(grp
->qfg_index
, &qif
->qif_bitmaps
[s
]);
1182 qfq_update_eligible(qif
, qif
->qif_V
);
1185 if (pktsched_verbose
> 1) {
1186 qfq_dump_sched(qif
, "end deactivate");
1188 #endif /* QFQ_DEBUG */
1192 qfq_state2str(int s
)
1217 qfq_addq(struct qfq_class
*cl
, pktsched_pkt_t
*pkt
, struct pf_mtag
*t
)
1219 struct qfq_if
*qif
= cl
->cl_qif
;
1220 struct ifclassq
*ifq
= qif
->qif_ifq
;
1222 IFCQ_LOCK_ASSERT_HELD(ifq
);
1224 if (q_is_sfb(&cl
->cl_q
)) {
1225 if (cl
->cl_sfb
== NULL
) {
1226 struct ifnet
*ifp
= QFQIF_IFP(qif
);
1228 VERIFY(cl
->cl_flags
& QFCF_LAZY
);
1229 cl
->cl_flags
&= ~QFCF_LAZY
;
1231 IFCQ_CONVERT_LOCK(ifq
);
1232 cl
->cl_sfb
= sfb_alloc(ifp
, cl
->cl_handle
,
1233 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
1234 if (cl
->cl_sfb
== NULL
) {
1235 /* fall back to droptail */
1236 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
1237 cl
->cl_flags
&= ~QFCF_SFB
;
1238 cl
->cl_qflags
&= ~(SFBF_ECN
| SFBF_FLOWCTL
);
1240 log(LOG_ERR
, "%s: %s SFB lazy allocation "
1241 "failed for qid=%d grp=%d, falling back "
1242 "to DROPTAIL\n", if_name(ifp
),
1243 qfq_style(qif
), cl
->cl_handle
,
1244 cl
->cl_grp
->qfg_index
);
1245 } else if (qif
->qif_throttle
!= IFNET_THROTTLE_OFF
) {
1246 /* if there's pending throttling, set it */
1247 cqrq_throttle_t tr
= { 1, qif
->qif_throttle
};
1248 int err
= qfq_throttle(qif
, &tr
);
1250 if (err
== EALREADY
) {
1254 tr
.level
= IFNET_THROTTLE_OFF
;
1255 (void) qfq_throttle(qif
, &tr
);
1259 if (cl
->cl_sfb
!= NULL
) {
1260 return sfb_addq(cl
->cl_sfb
, &cl
->cl_q
, pkt
, t
);
1262 } else if (qlen(&cl
->cl_q
) >= qlimit(&cl
->cl_q
)) {
1263 IFCQ_CONVERT_LOCK(ifq
);
1264 return CLASSQEQ_DROP
;
1268 if (cl
->cl_flags
& QFCF_CLEARDSCP
) {
1269 /* not supported for non-mbuf type packets */
1270 VERIFY(pkt
->pktsched_ptype
== QP_MBUF
);
1271 write_dsfield(m
, t
, 0);
1275 VERIFY(pkt
->pktsched_ptype
== qptype(&cl
->cl_q
));
1276 _addq(&cl
->cl_q
, &pkt
->pktsched_pkt
);
1281 qfq_getq(struct qfq_class
*cl
, pktsched_pkt_t
*pkt
)
1283 classq_pkt_t p
= CLASSQ_PKT_INITIALIZER(p
);
1285 IFCQ_LOCK_ASSERT_HELD(cl
->cl_qif
->qif_ifq
);
1287 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
) {
1288 return sfb_getq(cl
->cl_sfb
, &cl
->cl_q
, pkt
);
1291 _getq(&cl
->cl_q
, &p
);
1292 return pktsched_pkt_encap(pkt
, &p
);
1296 qfq_purgeq(struct qfq_if
*qif
, struct qfq_class
*cl
, u_int32_t flow
,
1297 u_int32_t
*packets
, u_int32_t
*bytes
)
1299 struct ifclassq
*ifq
= qif
->qif_ifq
;
1300 u_int32_t cnt
= 0, len
= 0, qlen
;
1302 IFCQ_LOCK_ASSERT_HELD(ifq
);
1304 if ((qlen
= qlen(&cl
->cl_q
)) == 0) {
1308 IFCQ_CONVERT_LOCK(ifq
);
1309 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
) {
1310 sfb_purgeq(cl
->cl_sfb
, &cl
->cl_q
, flow
, &cnt
, &len
);
1312 _flushq_flow(&cl
->cl_q
, flow
, &cnt
, &len
);
1316 VERIFY(qlen(&cl
->cl_q
) == (qlen
- cnt
));
1318 VERIFY(qif
->qif_queued
>= cnt
);
1319 qif
->qif_queued
-= cnt
;
1320 #endif /* QFQ_DEBUG */
1322 PKTCNTR_ADD(&cl
->cl_dropcnt
, cnt
, len
);
1323 IFCQ_DROP_ADD(ifq
, cnt
, len
);
1325 VERIFY(((signed)IFCQ_LEN(ifq
) - cnt
) >= 0);
1326 IFCQ_LEN(ifq
) -= cnt
;
1328 if (qempty(&cl
->cl_q
)) {
1329 qfq_deactivate_class(qif
, cl
);
1332 if (pktsched_verbose
) {
1333 log(LOG_DEBUG
, "%s: %s purge qid=%d weight=%d "
1334 "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
1335 if_name(QFQIF_IFP(qif
)),
1336 qfq_style(qif
), cl
->cl_handle
,
1337 (u_int32_t
)(QFQ_ONE_FP
/ cl
->cl_inv_w
), qlen
,
1338 qlen(&cl
->cl_q
), cnt
, len
, flow
);
1342 if (packets
!= NULL
) {
1345 if (bytes
!= NULL
) {
1351 qfq_updateq(struct qfq_if
*qif
, struct qfq_class
*cl
, cqev_t ev
)
1353 IFCQ_LOCK_ASSERT_HELD(qif
->qif_ifq
);
1355 if (pktsched_verbose
) {
1356 log(LOG_DEBUG
, "%s: %s update qid=%d weight=%d event=%s\n",
1357 if_name(QFQIF_IFP(qif
)), qfq_style(qif
),
1358 cl
->cl_handle
, (u_int32_t
)(QFQ_ONE_FP
/ cl
->cl_inv_w
),
1359 ifclassq_ev2str(ev
));
1362 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
) {
1363 return sfb_updateq(cl
->cl_sfb
, ev
);
1368 qfq_get_class_stats(struct qfq_if
*qif
, u_int32_t qid
,
1369 struct qfq_classstats
*sp
)
1371 struct qfq_class
*cl
;
1373 IFCQ_LOCK_ASSERT_HELD(qif
->qif_ifq
);
1375 if ((cl
= qfq_clh_to_clp(qif
, qid
)) == NULL
) {
1379 sp
->class_handle
= cl
->cl_handle
;
1380 sp
->index
= cl
->cl_grp
->qfg_index
;
1381 sp
->weight
= (QFQ_ONE_FP
/ cl
->cl_inv_w
);
1382 sp
->lmax
= cl
->cl_lmax
;
1383 sp
->qlength
= qlen(&cl
->cl_q
);
1384 sp
->qlimit
= qlimit(&cl
->cl_q
);
1385 sp
->period
= cl
->cl_period
;
1386 sp
->xmitcnt
= cl
->cl_xmitcnt
;
1387 sp
->dropcnt
= cl
->cl_dropcnt
;
1389 sp
->qtype
= qtype(&cl
->cl_q
);
1390 sp
->qstate
= qstate(&cl
->cl_q
);
1392 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
) {
1393 sfb_getstats(cl
->cl_sfb
, &sp
->sfb
);
1400 qfq_stat_sc(struct qfq_if
*qif
, cqrq_stat_sc_t
*sr
)
1402 struct ifclassq
*ifq
= qif
->qif_ifq
;
1403 struct qfq_class
*cl
;
1406 IFCQ_LOCK_ASSERT_HELD(ifq
);
1408 VERIFY(sr
->sc
== MBUF_SC_UNSPEC
|| MBUF_VALID_SC(sr
->sc
));
1410 i
= MBUF_SCIDX(sr
->sc
);
1411 VERIFY(i
< IFCQ_SC_MAX
);
1413 cl
= ifq
->ifcq_disc_slots
[i
].cl
;
1414 sr
->packets
= qlen(&cl
->cl_q
);
1415 sr
->bytes
= qsize(&cl
->cl_q
);
1420 /* convert a class handle to the corresponding class pointer */
1421 static inline struct qfq_class
*
1422 qfq_clh_to_clp(struct qfq_if
*qif
, u_int32_t chandle
)
1424 struct qfq_class
*cl
;
1427 IFCQ_LOCK_ASSERT_HELD(qif
->qif_ifq
);
1430 * First, try optimistically the slot matching the lower bits of
1431 * the handle. If it fails, do the linear table search.
1433 i
= chandle
% qif
->qif_maxclasses
;
1434 if ((cl
= qif
->qif_class_tbl
[i
]) != NULL
&& cl
->cl_handle
== chandle
) {
1437 for (i
= 0; i
< qif
->qif_maxclasses
; i
++) {
1438 if ((cl
= qif
->qif_class_tbl
[i
]) != NULL
&&
1439 cl
->cl_handle
== chandle
) {
1448 qfq_style(struct qfq_if
*qif
)
1455 * Generic comparison function, handling wraparound
1458 qfq_gt(u_int64_t a
, u_int64_t b
)
1460 return (int64_t)(a
- b
) > 0;
1464 * Round a precise timestamp to its slotted value
1466 static inline u_int64_t
1467 qfq_round_down(u_int64_t ts
, u_int32_t shift
)
1469 return ts
& ~((1ULL << shift
) - 1);
1473 * Return the pointer to the group with lowest index in the bitmap
1475 static inline struct qfq_group
*
1476 qfq_ffs(struct qfq_if
*qif
, pktsched_bitmap_t bitmap
)
1478 int index
= pktsched_ffs(bitmap
) - 1; /* zero-based */
1479 VERIFY(index
>= 0 && index
<= QFQ_MAX_INDEX
&&
1480 qif
->qif_groups
[index
] != NULL
);
1481 return qif
->qif_groups
[index
];
1485 * Calculate a flow index, given its weight and maximum packet length.
1486 * index = log_2(maxlen/weight) but we need to apply the scaling.
1487 * This is used only once at flow creation.
1490 qfq_calc_index(struct qfq_class
*cl
, u_int32_t inv_w
, u_int32_t maxlen
)
1492 u_int64_t slot_size
= (u_int64_t
)maxlen
* inv_w
;
1493 pktsched_bitmap_t size_map
;
1496 size_map
= (pktsched_bitmap_t
)(slot_size
>> QFQ_MIN_SLOT_SHIFT
);
1501 index
= __fls(size_map
) + 1; /* basically a log_2() */
1502 index
-= !(slot_size
- (1ULL << (index
+ QFQ_MIN_SLOT_SHIFT
- 1)));
1508 if (pktsched_verbose
) {
1509 log(LOG_DEBUG
, "%s: %s qid=%d grp=%d W=%u, L=%u, I=%d\n",
1510 if_name(QFQIF_IFP(cl
->cl_qif
)), qfq_style(cl
->cl_qif
),
1511 cl
->cl_handle
, index
, (u_int32_t
)(QFQ_ONE_FP
/ inv_w
),
1519 qfq_dump_groups(struct qfq_if
*qif
, u_int32_t mask
)
1523 for (i
= 0; i
< QFQ_MAX_INDEX
+ 1; i
++) {
1524 struct qfq_group
*g
= qif
->qif_groups
[i
];
1526 if (0 == (mask
& (1 << i
))) {
1533 log(LOG_DEBUG
, "%s: %s [%2d] full_slots 0x%x\n",
1534 if_name(QFQIF_IFP(qif
)), qfq_style(qif
), i
,
1536 log(LOG_DEBUG
, "%s: %s S 0x%20llx F 0x%llx %c\n",
1537 if_name(QFQIF_IFP(qif
)), qfq_style(qif
),
1538 g
->qfg_S
, g
->qfg_F
, mask
& (1 << i
) ? '1' : '0');
1540 for (j
= 0; j
< qif
->qif_maxslots
; j
++) {
1541 if (g
->qfg_slots
[j
]) {
1542 log(LOG_DEBUG
, "%s: %s bucket %d 0x%llx "
1543 "qid %d\n", if_name(QFQIF_IFP(qif
)),
1545 (uint64_t)VM_KERNEL_ADDRPERM(
1547 g
->qfg_slots
[j
]->cl_handle
);
1554 qfq_dump_sched(struct qfq_if
*qif
, const char *msg
)
1556 log(LOG_DEBUG
, "%s: %s --- in %s: ---\n",
1557 if_name(QFQIF_IFP(qif
)), qfq_style(qif
), msg
);
1558 log(LOG_DEBUG
, "%s: %s emptygrp %d queued %d V 0x%llx\n",
1559 if_name(QFQIF_IFP(qif
)), qfq_style(qif
), qif
->qif_emptygrp
,
1560 qif
->qif_queued
, qif
->qif_V
);
1561 log(LOG_DEBUG
, "%s: %s ER 0x%08x\n",
1562 if_name(QFQIF_IFP(qif
)), qfq_style(qif
), qif
->qif_bitmaps
[ER
]);
1563 log(LOG_DEBUG
, "%s: %s EB 0x%08x\n",
1564 if_name(QFQIF_IFP(qif
)), qfq_style(qif
), qif
->qif_bitmaps
[EB
]);
1565 log(LOG_DEBUG
, "%s: %s IR 0x%08x\n",
1566 if_name(QFQIF_IFP(qif
)), qfq_style(qif
), qif
->qif_bitmaps
[IR
]);
1567 log(LOG_DEBUG
, "%s: %s IB 0x%08x\n",
1568 if_name(QFQIF_IFP(qif
)), qfq_style(qif
), qif
->qif_bitmaps
[IB
]);
1569 qfq_dump_groups(qif
, 0xffffffff);
1571 #endif /* QFQ_DEBUG */
1574 * qfq_enqueue_ifclassq is an enqueue function to be registered to
1575 * (*ifcq_enqueue) in struct ifclassq.
1578 qfq_enqueue_ifclassq(struct ifclassq
*ifq
, classq_pkt_t
*p
, boolean_t
*pdrop
)
1583 struct pf_mtag
*t
= NULL
;
1585 IFCQ_LOCK_ASSERT_HELD(ifq
);
1587 switch (p
->cp_ptype
) {
1589 struct mbuf
*m
= p
->cp_mbuf
;
1590 if (!(m
->m_flags
& M_PKTHDR
)) {
1591 /* should not happen */
1592 log(LOG_ERR
, "%s: packet does not have pkthdr\n",
1593 if_name(ifq
->ifcq_ifp
));
1594 IFCQ_CONVERT_LOCK(ifq
);
1596 *p
= CLASSQ_PKT_INITIALIZER(*p
);
1600 i
= MBUF_SCIDX(mbuf_get_service_class(m
));
1608 __builtin_unreachable();
1612 VERIFY((u_int32_t
)i
< IFCQ_SC_MAX
);
1614 pktsched_pkt_encap(&pkt
, p
);
1616 ret
= qfq_enqueue(ifq
->ifcq_disc
,
1617 ifq
->ifcq_disc_slots
[i
].cl
, &pkt
, t
);
1619 if ((ret
!= 0) && (ret
!= CLASSQEQ_SUCCESS_FC
)) {
1620 pktsched_free_pkt(&pkt
);
1630 case CLASSQEQ_DROP_FC
:
1633 case CLASSQEQ_DROP_SP
:
1636 case CLASSQEQ_SUCCESS_FC
:
1639 case CLASSQEQ_SUCCESS
:
1649 * qfq_dequeue_ifclassq is a dequeue function to be registered to
1650 * (*ifcq_dequeue) in struct ifclass.
1652 * note: CLASSQDQ_POLL returns the next packet without removing the packet
1653 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
1654 * CLASSQDQ_REMOVE must return the same packet if called immediately
1655 * after CLASSQDQ_POLL.
1658 qfq_dequeue_ifclassq(struct ifclassq
*ifq
, classq_pkt_t
*cpkt
)
1661 _PKTSCHED_PKT_INIT(&pkt
);
1662 qfq_dequeue(ifq
->ifcq_disc
, &pkt
);
1663 *cpkt
= pkt
.pktsched_pkt
;
1667 qfq_request_ifclassq(struct ifclassq
*ifq
, cqrq_t req
, void *arg
)
1669 struct qfq_if
*qif
= (struct qfq_if
*)ifq
->ifcq_disc
;
1672 IFCQ_LOCK_ASSERT_HELD(ifq
);
1675 case CLASSQRQ_PURGE
:
1679 case CLASSQRQ_PURGE_SC
:
1680 qfq_purge_sc(qif
, (cqrq_purge_sc_t
*)arg
);
1683 case CLASSQRQ_EVENT
:
1684 qfq_event(qif
, (cqev_t
)arg
);
1687 case CLASSQRQ_THROTTLE
:
1688 err
= qfq_throttle(qif
, (cqrq_throttle_t
*)arg
);
1690 case CLASSQRQ_STAT_SC
:
1691 err
= qfq_stat_sc(qif
, (cqrq_stat_sc_t
*)arg
);
1698 qfq_setup_ifclassq(struct ifclassq
*ifq
, u_int32_t flags
,
1699 classq_pkt_type_t ptype
)
1701 struct ifnet
*ifp
= ifq
->ifcq_ifp
;
1702 struct qfq_class
*cl0
, *cl1
, *cl2
, *cl3
, *cl4
;
1703 struct qfq_class
*cl5
, *cl6
, *cl7
, *cl8
, *cl9
;
1705 u_int32_t maxlen
= 0, qflags
= 0;
1708 IFCQ_LOCK_ASSERT_HELD(ifq
);
1709 VERIFY(ifq
->ifcq_disc
== NULL
);
1710 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_NONE
);
1712 if (flags
& PKTSCHEDF_QALG_SFB
) {
1715 if (flags
& PKTSCHEDF_QALG_ECN
) {
1718 if (flags
& PKTSCHEDF_QALG_FLOWCTL
) {
1719 qflags
|= QFCF_FLOWCTL
;
1721 if (flags
& PKTSCHEDF_QALG_DELAYBASED
) {
1722 qflags
|= QFCF_DELAYBASED
;
1725 qif
= qfq_alloc(ifp
, M_WAITOK
);
1730 if ((maxlen
= IFCQ_MAXLEN(ifq
)) == 0) {
1731 maxlen
= if_sndq_maxlen
;
1734 if ((err
= qfq_add_queue(qif
, maxlen
, 300, 1200,
1735 qflags
| QFCF_LAZY
, SCIDX_BK_SYS
, &cl0
, ptype
)) != 0) {
1739 if ((err
= qfq_add_queue(qif
, maxlen
, 600, 1400,
1740 qflags
| QFCF_LAZY
, SCIDX_BK
, &cl1
, ptype
)) != 0) {
1744 if ((err
= qfq_add_queue(qif
, maxlen
, 2400, 600,
1745 qflags
| QFCF_DEFAULTCLASS
, SCIDX_BE
, &cl2
, ptype
)) != 0) {
1749 if ((err
= qfq_add_queue(qif
, maxlen
, 2700, 600,
1750 qflags
| QFCF_LAZY
, SCIDX_RD
, &cl3
, ptype
)) != 0) {
1754 if ((err
= qfq_add_queue(qif
, maxlen
, 3000, 400,
1755 qflags
| QFCF_LAZY
, SCIDX_OAM
, &cl4
, ptype
)) != 0) {
1759 if ((err
= qfq_add_queue(qif
, maxlen
, 8000, 1000,
1760 qflags
| QFCF_LAZY
, SCIDX_AV
, &cl5
, ptype
)) != 0) {
1764 if ((err
= qfq_add_queue(qif
, maxlen
, 15000, 1200,
1765 qflags
| QFCF_LAZY
, SCIDX_RV
, &cl6
, ptype
)) != 0) {
1769 if ((err
= qfq_add_queue(qif
, maxlen
, 20000, 1400,
1770 qflags
| QFCF_LAZY
, SCIDX_VI
, &cl7
, ptype
)) != 0) {
1774 if ((err
= qfq_add_queue(qif
, maxlen
, 23000, 200,
1775 qflags
| QFCF_LAZY
, SCIDX_VO
, &cl8
, ptype
)) != 0) {
1779 if ((err
= qfq_add_queue(qif
, maxlen
, 25000, 200,
1780 qflags
, SCIDX_CTL
, &cl9
, ptype
)) != 0) {
1784 err
= ifclassq_attach(ifq
, PKTSCHEDT_QFQ
, qif
,
1785 qfq_enqueue_ifclassq
, qfq_dequeue_ifclassq
, NULL
,
1786 NULL
, NULL
, qfq_request_ifclassq
);
1788 /* cache these for faster lookup */
1790 ifq
->ifcq_disc_slots
[SCIDX_BK_SYS
].qid
= SCIDX_BK_SYS
;
1791 ifq
->ifcq_disc_slots
[SCIDX_BK_SYS
].cl
= cl0
;
1793 ifq
->ifcq_disc_slots
[SCIDX_BK
].qid
= SCIDX_BK
;
1794 ifq
->ifcq_disc_slots
[SCIDX_BK
].cl
= cl1
;
1796 ifq
->ifcq_disc_slots
[SCIDX_BE
].qid
= SCIDX_BE
;
1797 ifq
->ifcq_disc_slots
[SCIDX_BE
].cl
= cl2
;
1799 ifq
->ifcq_disc_slots
[SCIDX_RD
].qid
= SCIDX_RD
;
1800 ifq
->ifcq_disc_slots
[SCIDX_RD
].cl
= cl3
;
1802 ifq
->ifcq_disc_slots
[SCIDX_OAM
].qid
= SCIDX_OAM
;
1803 ifq
->ifcq_disc_slots
[SCIDX_OAM
].cl
= cl4
;
1805 ifq
->ifcq_disc_slots
[SCIDX_AV
].qid
= SCIDX_AV
;
1806 ifq
->ifcq_disc_slots
[SCIDX_AV
].cl
= cl5
;
1808 ifq
->ifcq_disc_slots
[SCIDX_RV
].qid
= SCIDX_RV
;
1809 ifq
->ifcq_disc_slots
[SCIDX_RV
].cl
= cl6
;
1811 ifq
->ifcq_disc_slots
[SCIDX_VI
].qid
= SCIDX_VI
;
1812 ifq
->ifcq_disc_slots
[SCIDX_VI
].cl
= cl7
;
1814 ifq
->ifcq_disc_slots
[SCIDX_VO
].qid
= SCIDX_VO
;
1815 ifq
->ifcq_disc_slots
[SCIDX_VO
].cl
= cl8
;
1817 ifq
->ifcq_disc_slots
[SCIDX_CTL
].qid
= SCIDX_CTL
;
1818 ifq
->ifcq_disc_slots
[SCIDX_CTL
].cl
= cl9
;
1823 (void) qfq_destroy_locked(qif
);
1830 qfq_teardown_ifclassq(struct ifclassq
*ifq
)
1832 struct qfq_if
*qif
= ifq
->ifcq_disc
;
1835 IFCQ_LOCK_ASSERT_HELD(ifq
);
1836 VERIFY(qif
!= NULL
&& ifq
->ifcq_type
== PKTSCHEDT_QFQ
);
1838 (void) qfq_destroy_locked(qif
);
1840 ifq
->ifcq_disc
= NULL
;
1841 for (i
= 0; i
< IFCQ_SC_MAX
; i
++) {
1842 ifq
->ifcq_disc_slots
[i
].qid
= 0;
1843 ifq
->ifcq_disc_slots
[i
].cl
= NULL
;
1846 return ifclassq_detach(ifq
);
1850 qfq_getqstats_ifclassq(struct ifclassq
*ifq
, u_int32_t slot
,
1851 struct if_ifclassq_stats
*ifqs
)
1853 struct qfq_if
*qif
= ifq
->ifcq_disc
;
1855 IFCQ_LOCK_ASSERT_HELD(ifq
);
1856 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_QFQ
);
1858 if (slot
>= IFCQ_SC_MAX
) {
1862 return qfq_get_class_stats(qif
, ifq
->ifcq_disc_slots
[slot
].qid
,
1863 &ifqs
->ifqs_qfq_stats
);
1867 qfq_throttle(struct qfq_if
*qif
, cqrq_throttle_t
*tr
)
1869 struct ifclassq
*ifq
= qif
->qif_ifq
;
1870 struct qfq_class
*cl
;
1873 IFCQ_LOCK_ASSERT_HELD(ifq
);
1876 tr
->level
= qif
->qif_throttle
;
1880 if (tr
->level
== qif
->qif_throttle
) {
1884 /* Current throttling levels only involve BK_SYS class */
1885 cl
= ifq
->ifcq_disc_slots
[SCIDX_BK_SYS
].cl
;
1887 switch (tr
->level
) {
1888 case IFNET_THROTTLE_OFF
:
1889 err
= qfq_resumeq(qif
, cl
);
1892 case IFNET_THROTTLE_OPPORTUNISTIC
:
1893 err
= qfq_suspendq(qif
, cl
);
1901 if (err
== 0 || err
== ENXIO
) {
1902 if (pktsched_verbose
) {
1903 log(LOG_DEBUG
, "%s: %s throttling level %sset %d->%d\n",
1904 if_name(QFQIF_IFP(qif
)), qfq_style(qif
),
1905 (err
== 0) ? "" : "lazy ", qif
->qif_throttle
,
1908 qif
->qif_throttle
= tr
->level
;
1912 qfq_purgeq(qif
, cl
, 0, NULL
, NULL
);
1915 log(LOG_ERR
, "%s: %s unable to set throttling level "
1916 "%d->%d [error=%d]\n", if_name(QFQIF_IFP(qif
)),
1917 qfq_style(qif
), qif
->qif_throttle
, tr
->level
, err
);
1924 qfq_resumeq(struct qfq_if
*qif
, struct qfq_class
*cl
)
1926 struct ifclassq
*ifq
= qif
->qif_ifq
;
1931 IFCQ_LOCK_ASSERT_HELD(ifq
);
1933 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
) {
1934 err
= sfb_suspendq(cl
->cl_sfb
, &cl
->cl_q
, FALSE
);
1938 qstate(&cl
->cl_q
) = QS_RUNNING
;
1945 qfq_suspendq(struct qfq_if
*qif
, struct qfq_class
*cl
)
1947 struct ifclassq
*ifq
= qif
->qif_ifq
;
1952 IFCQ_LOCK_ASSERT_HELD(ifq
);
1954 if (q_is_sfb(&cl
->cl_q
)) {
1955 if (cl
->cl_sfb
!= NULL
) {
1956 err
= sfb_suspendq(cl
->cl_sfb
, &cl
->cl_q
, TRUE
);
1958 VERIFY(cl
->cl_flags
& QFCF_LAZY
);
1959 err
= ENXIO
; /* delayed throttling */
1963 if (err
== 0 || err
== ENXIO
) {
1964 qstate(&cl
->cl_q
) = QS_SUSPENDED
;