]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/pktsched/pktsched_qfq.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / bsd / net / pktsched / pktsched_qfq.c
CommitLineData
316670eb 1/*
5ba3f43e 2 * Copyright (c) 2011-2017 Apple Inc. All rights reserved.
316670eb
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
31 * All rights reserved
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 */
54
55/*
56 * Quick Fair Queueing is described in
57 * "QFQ: Efficient Packet Scheduling with Tight Bandwidth Distribution
58 * Guarantees" by Fabio Checconi, Paolo Valente, and Luigi Rizzo.
59 *
60 * This code is ported from the dummynet(4) QFQ implementation.
61 * See also http://info.iet.unipi.it/~luigi/qfq/
62 */
63
64#include <sys/cdefs.h>
65#include <sys/param.h>
66#include <sys/malloc.h>
67#include <sys/mbuf.h>
68#include <sys/systm.h>
69#include <sys/errno.h>
70#include <sys/kernel.h>
71#include <sys/syslog.h>
72
73#include <kern/zalloc.h>
74
75#include <net/if.h>
76#include <net/net_osdep.h>
77
78#include <net/pktsched/pktsched_qfq.h>
79#include <netinet/in.h>
80
5ba3f43e 81
316670eb
A
82/*
83 * function prototypes
84 */
5ba3f43e
A
85static int qfq_enqueue_ifclassq(struct ifclassq *, void *, classq_pkt_type_t,
86 boolean_t *);
87static void *qfq_dequeue_ifclassq(struct ifclassq *, classq_pkt_type_t *);
316670eb
A
88static int qfq_request_ifclassq(struct ifclassq *, cqrq_t, void *);
89static int qfq_clear_interface(struct qfq_if *);
90static struct qfq_class *qfq_class_create(struct qfq_if *, u_int32_t,
5ba3f43e 91 u_int32_t, u_int32_t, u_int32_t, u_int32_t, classq_pkt_type_t);
316670eb
A
92static int qfq_class_destroy(struct qfq_if *, struct qfq_class *);
93static int qfq_destroy_locked(struct qfq_if *);
5ba3f43e
A
94static inline int qfq_addq(struct qfq_class *, pktsched_pkt_t *,
95 struct pf_mtag *);
96static inline void qfq_getq(struct qfq_class *, pktsched_pkt_t *);
316670eb
A
97static void qfq_purgeq(struct qfq_if *, struct qfq_class *, u_int32_t,
98 u_int32_t *, u_int32_t *);
99static void qfq_purge_sc(struct qfq_if *, cqrq_purge_sc_t *);
100static void qfq_updateq(struct qfq_if *, struct qfq_class *, cqev_t);
101static int qfq_throttle(struct qfq_if *, cqrq_throttle_t *);
102static int qfq_resumeq(struct qfq_if *, struct qfq_class *);
103static int qfq_suspendq(struct qfq_if *, struct qfq_class *);
39236c6e 104static int qfq_stat_sc(struct qfq_if *, cqrq_stat_sc_t *);
316670eb
A
105static inline struct qfq_class *qfq_clh_to_clp(struct qfq_if *, u_int32_t);
106static const char *qfq_style(struct qfq_if *);
107
108static inline int qfq_gt(u_int64_t, u_int64_t);
109static inline u_int64_t qfq_round_down(u_int64_t, u_int32_t);
110static inline struct qfq_group *qfq_ffs(struct qfq_if *, pktsched_bitmap_t);
111static int qfq_calc_index(struct qfq_class *, u_int32_t, u_int32_t);
112static inline pktsched_bitmap_t mask_from(pktsched_bitmap_t, int);
113static inline u_int32_t qfq_calc_state(struct qfq_if *, struct qfq_group *);
114static inline void qfq_move_groups(struct qfq_if *, pktsched_bitmap_t,
115 int, int);
116static inline void qfq_unblock_groups(struct qfq_if *, int, u_int64_t);
117static inline void qfq_make_eligible(struct qfq_if *, u_int64_t);
118static inline void qfq_slot_insert(struct qfq_if *, struct qfq_group *,
119 struct qfq_class *, u_int64_t);
120static inline void qfq_front_slot_remove(struct qfq_group *);
121static inline struct qfq_class *qfq_slot_scan(struct qfq_if *,
122 struct qfq_group *);
123static inline void qfq_slot_rotate(struct qfq_if *, struct qfq_group *,
124 u_int64_t);
125static inline void qfq_update_eligible(struct qfq_if *, u_int64_t);
126static inline int qfq_update_class(struct qfq_if *, struct qfq_group *,
127 struct qfq_class *);
128static inline void qfq_update_start(struct qfq_if *, struct qfq_class *);
129static inline void qfq_slot_remove(struct qfq_if *, struct qfq_group *,
130 struct qfq_class *);
131static void qfq_deactivate_class(struct qfq_if *, struct qfq_class *);
132static const char *qfq_state2str(int);
133#if QFQ_DEBUG
134static void qfq_dump_groups(struct qfq_if *, u_int32_t);
135static void qfq_dump_sched(struct qfq_if *, const char *);
136#endif /* QFQ_DEBUG */
137
0a7de745
A
138#define QFQ_ZONE_MAX 32 /* maximum elements in zone */
139#define QFQ_ZONE_NAME "pktsched_qfq" /* zone name */
316670eb 140
0a7de745
A
141static unsigned int qfq_size; /* size of zone element */
142static struct zone *qfq_zone; /* zone for qfq */
316670eb 143
0a7de745
A
144#define QFQ_CL_ZONE_MAX 32 /* maximum elements in zone */
145#define QFQ_CL_ZONE_NAME "pktsched_qfq_cl" /* zone name */
316670eb 146
0a7de745
A
147static unsigned int qfq_cl_size; /* size of zone element */
148static struct zone *qfq_cl_zone; /* zone for qfq_class */
316670eb
A
149
150/*
151 * Maximum number of consecutive slots occupied by backlogged classes
152 * inside a group. This is approx lmax/lmin + 5. Used when ALTQ is
153 * available.
154 *
155 * XXX check because it poses constraints on MAX_INDEX
156 */
0a7de745 157#define QFQ_MAX_SLOTS 32 /* default when ALTQ is available */
316670eb
A
158
159void
160qfq_init(void)
161{
0a7de745 162 qfq_size = sizeof(struct qfq_if);
316670eb
A
163 qfq_zone = zinit(qfq_size, QFQ_ZONE_MAX * qfq_size,
164 0, QFQ_ZONE_NAME);
165 if (qfq_zone == NULL) {
166 panic("%s: failed allocating %s", __func__, QFQ_ZONE_NAME);
167 /* NOTREACHED */
168 }
169 zone_change(qfq_zone, Z_EXPAND, TRUE);
170 zone_change(qfq_zone, Z_CALLERACCT, TRUE);
171
0a7de745 172 qfq_cl_size = sizeof(struct qfq_class);
316670eb
A
173 qfq_cl_zone = zinit(qfq_cl_size, QFQ_CL_ZONE_MAX * qfq_cl_size,
174 0, QFQ_CL_ZONE_NAME);
175 if (qfq_cl_zone == NULL) {
176 panic("%s: failed allocating %s", __func__, QFQ_CL_ZONE_NAME);
177 /* NOTREACHED */
178 }
179 zone_change(qfq_cl_zone, Z_EXPAND, TRUE);
180 zone_change(qfq_cl_zone, Z_CALLERACCT, TRUE);
181}
182
183struct qfq_if *
5ba3f43e 184qfq_alloc(struct ifnet *ifp, int how)
316670eb 185{
0a7de745 186 struct qfq_if *qif;
316670eb
A
187
188 qif = (how == M_WAITOK) ? zalloc(qfq_zone) : zalloc_noblock(qfq_zone);
0a7de745
A
189 if (qif == NULL) {
190 return NULL;
191 }
316670eb
A
192
193 bzero(qif, qfq_size);
194 qif->qif_ifq = &ifp->if_snd;
5ba3f43e
A
195
196 qif->qif_maxclasses = IFCQ_SC_MAX;
197 /*
198 * TODO: adi@apple.com
199 *
200 * Ideally I would like to have the following
201 * but QFQ needs further modifications.
202 *
203 * qif->qif_maxslots = IFCQ_SC_MAX;
204 */
205 qif->qif_maxslots = QFQ_MAX_SLOTS;
316670eb 206
0a7de745
A
207 if ((qif->qif_class_tbl = _MALLOC(sizeof(struct qfq_class *) *
208 qif->qif_maxclasses, M_DEVBUF, M_WAITOK | M_ZERO)) == NULL) {
316670eb
A
209 log(LOG_ERR, "%s: %s unable to allocate class table array\n",
210 if_name(ifp), qfq_style(qif));
211 goto error;
212 }
213
0a7de745
A
214 if ((qif->qif_groups = _MALLOC(sizeof(struct qfq_group *) *
215 (QFQ_MAX_INDEX + 1), M_DEVBUF, M_WAITOK | M_ZERO)) == NULL) {
316670eb 216 log(LOG_ERR, "%s: %s unable to allocate group array\n",
0a7de745 217 if_name(ifp), qfq_style(qif));
316670eb
A
218 goto error;
219 }
220
221 if (pktsched_verbose) {
222 log(LOG_DEBUG, "%s: %s scheduler allocated\n",
223 if_name(ifp), qfq_style(qif));
224 }
225
0a7de745 226 return qif;
316670eb
A
227
228error:
229 if (qif->qif_class_tbl != NULL) {
230 _FREE(qif->qif_class_tbl, M_DEVBUF);
231 qif->qif_class_tbl = NULL;
232 }
233 if (qif->qif_groups != NULL) {
234 _FREE(qif->qif_groups, M_DEVBUF);
235 qif->qif_groups = NULL;
236 }
237 zfree(qfq_zone, qif);
238
0a7de745 239 return NULL;
316670eb
A
240}
241
242int
243qfq_destroy(struct qfq_if *qif)
244{
245 struct ifclassq *ifq = qif->qif_ifq;
246 int err;
247
248 IFCQ_LOCK(ifq);
249 err = qfq_destroy_locked(qif);
250 IFCQ_UNLOCK(ifq);
251
0a7de745 252 return err;
316670eb
A
253}
254
255static int
256qfq_destroy_locked(struct qfq_if *qif)
257{
258 int i;
259
260 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
261
262 (void) qfq_clear_interface(qif);
263
264 VERIFY(qif->qif_class_tbl != NULL);
265 _FREE(qif->qif_class_tbl, M_DEVBUF);
266 qif->qif_class_tbl = NULL;
267
268 VERIFY(qif->qif_groups != NULL);
269 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
270 struct qfq_group *grp = qif->qif_groups[i];
271
272 if (grp != NULL) {
273 VERIFY(grp->qfg_slots != NULL);
274 _FREE(grp->qfg_slots, M_DEVBUF);
275 grp->qfg_slots = NULL;
276 _FREE(grp, M_DEVBUF);
277 qif->qif_groups[i] = NULL;
278 }
279 }
280 _FREE(qif->qif_groups, M_DEVBUF);
281 qif->qif_groups = NULL;
282
283 if (pktsched_verbose) {
284 log(LOG_DEBUG, "%s: %s scheduler destroyed\n",
285 if_name(QFQIF_IFP(qif)), qfq_style(qif));
286 }
287
288 zfree(qfq_zone, qif);
289
0a7de745 290 return 0;
316670eb
A
291}
292
293/*
294 * bring the interface back to the initial state by discarding
295 * all the filters and classes.
296 */
297static int
298qfq_clear_interface(struct qfq_if *qif)
299{
300 struct qfq_class *cl;
301 int i;
302
303 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
304
305 /* clear out the classes */
0a7de745
A
306 for (i = 0; i < qif->qif_maxclasses; i++) {
307 if ((cl = qif->qif_class_tbl[i]) != NULL) {
316670eb 308 qfq_class_destroy(qif, cl);
0a7de745
A
309 }
310 }
316670eb 311
0a7de745 312 return 0;
316670eb
A
313}
314
315/* discard all the queued packets on the interface */
316void
317qfq_purge(struct qfq_if *qif)
318{
319 struct qfq_class *cl;
320 int i;
321
322 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
323
324 for (i = 0; i < qif->qif_maxclasses; i++) {
0a7de745 325 if ((cl = qif->qif_class_tbl[i]) != NULL) {
316670eb 326 qfq_purgeq(qif, cl, 0, NULL, NULL);
0a7de745 327 }
316670eb 328 }
316670eb 329 VERIFY(IFCQ_LEN(qif->qif_ifq) == 0);
316670eb
A
330}
331
332static void
333qfq_purge_sc(struct qfq_if *qif, cqrq_purge_sc_t *pr)
334{
335 struct ifclassq *ifq = qif->qif_ifq;
336 u_int32_t i;
337
338 IFCQ_LOCK_ASSERT_HELD(ifq);
339
340 VERIFY(pr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(pr->sc));
341 VERIFY(pr->flow != 0);
342
343 if (pr->sc != MBUF_SC_UNSPEC) {
344 i = MBUF_SCIDX(pr->sc);
345 VERIFY(i < IFCQ_SC_MAX);
346
347 qfq_purgeq(qif, ifq->ifcq_disc_slots[i].cl,
348 pr->flow, &pr->packets, &pr->bytes);
349 } else {
350 u_int32_t cnt, len;
351
352 pr->packets = 0;
353 pr->bytes = 0;
354
355 for (i = 0; i < IFCQ_SC_MAX; i++) {
356 qfq_purgeq(qif, ifq->ifcq_disc_slots[i].cl,
357 pr->flow, &cnt, &len);
358 pr->packets += cnt;
359 pr->bytes += len;
360 }
361 }
362}
363
364void
365qfq_event(struct qfq_if *qif, cqev_t ev)
366{
367 struct qfq_class *cl;
368 int i;
369
370 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
371
0a7de745
A
372 for (i = 0; i < qif->qif_maxclasses; i++) {
373 if ((cl = qif->qif_class_tbl[i]) != NULL) {
316670eb 374 qfq_updateq(qif, cl, ev);
0a7de745
A
375 }
376 }
316670eb
A
377}
378
379int
380qfq_add_queue(struct qfq_if *qif, u_int32_t qlimit, u_int32_t weight,
5ba3f43e
A
381 u_int32_t maxsz, u_int32_t flags, u_int32_t qid, struct qfq_class **clp,
382 classq_pkt_type_t ptype)
316670eb
A
383{
384 struct qfq_class *cl;
385 u_int32_t w;
386
387 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
388
0a7de745
A
389 if (qfq_clh_to_clp(qif, qid) != NULL) {
390 return EBUSY;
391 }
316670eb
A
392
393 /* check parameters */
0a7de745
A
394 if (weight == 0 || weight > QFQ_MAX_WEIGHT) {
395 return EINVAL;
396 }
316670eb
A
397
398 w = (QFQ_ONE_FP / (QFQ_ONE_FP / weight));
0a7de745
A
399 if (qif->qif_wsum + w > QFQ_MAX_WSUM) {
400 return EINVAL;
401 }
316670eb 402
0a7de745
A
403 if (maxsz == 0 || maxsz > (1 << QFQ_MTU_SHIFT)) {
404 return EINVAL;
405 }
316670eb 406
5ba3f43e 407 cl = qfq_class_create(qif, weight, qlimit, flags, maxsz, qid, ptype);
0a7de745
A
408 if (cl == NULL) {
409 return ENOMEM;
410 }
316670eb 411
0a7de745 412 if (clp != NULL) {
316670eb 413 *clp = cl;
0a7de745 414 }
316670eb 415
0a7de745 416 return 0;
316670eb
A
417}
418
419static struct qfq_class *
420qfq_class_create(struct qfq_if *qif, u_int32_t weight, u_int32_t qlimit,
5ba3f43e 421 u_int32_t flags, u_int32_t maxsz, u_int32_t qid, classq_pkt_type_t ptype)
316670eb
A
422{
423 struct ifnet *ifp;
424 struct ifclassq *ifq;
425 struct qfq_group *grp;
426 struct qfq_class *cl;
0a7de745 427 u_int32_t w; /* approximated weight */
316670eb
A
428 int i;
429
430 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
431
316670eb
A
432 if (qif->qif_classes >= qif->qif_maxclasses) {
433 log(LOG_ERR, "%s: %s out of classes! (max %d)\n",
434 if_name(QFQIF_IFP(qif)), qfq_style(qif),
435 qif->qif_maxclasses);
0a7de745 436 return NULL;
316670eb
A
437 }
438
316670eb
A
439 ifq = qif->qif_ifq;
440 ifp = QFQIF_IFP(qif);
441
442 cl = zalloc(qfq_cl_zone);
0a7de745
A
443 if (cl == NULL) {
444 return NULL;
445 }
316670eb
A
446
447 bzero(cl, qfq_cl_size);
448
449 if (qlimit == 0 || qlimit > IFCQ_MAXLEN(ifq)) {
450 qlimit = IFCQ_MAXLEN(ifq);
0a7de745 451 if (qlimit == 0) {
316670eb 452 qlimit = DEFAULT_QLIMIT; /* use default */
0a7de745 453 }
316670eb 454 }
5ba3f43e 455 _qinit(&cl->cl_q, Q_DROPTAIL, qlimit, ptype);
316670eb
A
456 cl->cl_qif = qif;
457 cl->cl_flags = flags;
458 cl->cl_handle = qid;
459
460 /*
461 * Find a free slot in the class table. If the slot matching
462 * the lower bits of qid is free, use this slot. Otherwise,
463 * use the first free slot.
464 */
465 i = qid % qif->qif_maxclasses;
466 if (qif->qif_class_tbl[i] == NULL) {
467 qif->qif_class_tbl[i] = cl;
468 } else {
469 for (i = 0; i < qif->qif_maxclasses; i++) {
470 if (qif->qif_class_tbl[i] == NULL) {
471 qif->qif_class_tbl[i] = cl;
472 break;
473 }
474 }
475 if (i == qif->qif_maxclasses) {
476 zfree(qfq_cl_zone, cl);
0a7de745 477 return NULL;
316670eb
A
478 }
479 }
480
481 w = weight;
482 VERIFY(w > 0 && w <= QFQ_MAX_WEIGHT);
483 cl->cl_lmax = maxsz;
484 cl->cl_inv_w = (QFQ_ONE_FP / w);
485 w = (QFQ_ONE_FP / cl->cl_inv_w);
486 VERIFY(qif->qif_wsum + w <= QFQ_MAX_WSUM);
487
488 i = qfq_calc_index(cl, cl->cl_inv_w, cl->cl_lmax);
489 VERIFY(i <= QFQ_MAX_INDEX);
490 grp = qif->qif_groups[i];
491 if (grp == NULL) {
0a7de745 492 grp = _MALLOC(sizeof(*grp), M_DEVBUF, M_WAITOK | M_ZERO);
316670eb
A
493 if (grp != NULL) {
494 grp->qfg_index = i;
495 grp->qfg_slot_shift =
496 QFQ_MTU_SHIFT + QFQ_FRAC_BITS - (QFQ_MAX_INDEX - i);
0a7de745
A
497 grp->qfg_slots = _MALLOC(sizeof(struct qfq_class *) *
498 qif->qif_maxslots, M_DEVBUF, M_WAITOK | M_ZERO);
316670eb
A
499 if (grp->qfg_slots == NULL) {
500 log(LOG_ERR, "%s: %s unable to allocate group "
501 "slots for index %d\n", if_name(ifp),
502 qfq_style(qif), i);
503 }
504 } else {
505 log(LOG_ERR, "%s: %s unable to allocate group for "
506 "qid=%d\n", if_name(ifp), qfq_style(qif),
507 cl->cl_handle);
508 }
509 if (grp == NULL || grp->qfg_slots == NULL) {
510 qif->qif_class_tbl[qid % qif->qif_maxclasses] = NULL;
0a7de745 511 if (grp != NULL) {
316670eb 512 _FREE(grp, M_DEVBUF);
0a7de745 513 }
316670eb 514 zfree(qfq_cl_zone, cl);
0a7de745 515 return NULL;
316670eb
A
516 } else {
517 qif->qif_groups[i] = grp;
518 }
519 }
520 cl->cl_grp = grp;
521 qif->qif_wsum += w;
522 /* XXX cl->cl_S = qif->qif_V; ? */
523 /* XXX compute qif->qif_i_wsum */
524
525 qif->qif_classes++;
526
0a7de745 527 if (flags & QFCF_DEFAULTCLASS) {
316670eb 528 qif->qif_default = cl;
0a7de745 529 }
316670eb 530
5ba3f43e 531 if (flags & QFCF_SFB) {
316670eb
A
532 cl->cl_qflags = 0;
533 if (flags & QFCF_ECN) {
5ba3f43e 534 cl->cl_qflags |= SFBF_ECN;
316670eb
A
535 }
536 if (flags & QFCF_FLOWCTL) {
5ba3f43e 537 cl->cl_qflags |= SFBF_FLOWCTL;
316670eb 538 }
fe8ab488 539 if (flags & QFCF_DELAYBASED) {
5ba3f43e 540 cl->cl_qflags |= SFBF_DELAYBASED;
316670eb 541 }
0a7de745 542 if (!(cl->cl_flags & QFCF_LAZY)) {
5ba3f43e
A
543 cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
544 qlimit(&cl->cl_q), cl->cl_qflags);
0a7de745
A
545 }
546 if (cl->cl_sfb != NULL || (cl->cl_flags & QFCF_LAZY)) {
5ba3f43e 547 qtype(&cl->cl_q) = Q_SFB;
0a7de745 548 }
316670eb
A
549 }
550
551 if (pktsched_verbose) {
552 log(LOG_DEBUG, "%s: %s created qid=%d grp=%d weight=%d "
553 "qlimit=%d flags=%b\n", if_name(ifp), qfq_style(qif),
554 cl->cl_handle, cl->cl_grp->qfg_index, weight, qlimit,
555 flags, QFCF_BITS);
556 }
557
0a7de745 558 return cl;
316670eb
A
559}
560
561int
562qfq_remove_queue(struct qfq_if *qif, u_int32_t qid)
563{
564 struct qfq_class *cl;
565
566 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
567
0a7de745
A
568 if ((cl = qfq_clh_to_clp(qif, qid)) == NULL) {
569 return EINVAL;
570 }
316670eb 571
0a7de745 572 return qfq_class_destroy(qif, cl);
316670eb
A
573}
574
575static int
576qfq_class_destroy(struct qfq_if *qif, struct qfq_class *cl)
577{
578 struct ifclassq *ifq = qif->qif_ifq;
579 int i;
5ba3f43e
A
580#if !MACH_ASSERT
581#pragma unused(ifq)
582#endif
316670eb
A
583
584 IFCQ_LOCK_ASSERT_HELD(ifq);
585
586 qfq_purgeq(qif, cl, 0, NULL, NULL);
587
588 if (cl->cl_inv_w != 0) {
589 qif->qif_wsum -= (QFQ_ONE_FP / cl->cl_inv_w);
0a7de745 590 cl->cl_inv_w = 0; /* reset weight to avoid run twice */
316670eb
A
591 }
592
593 for (i = 0; i < qif->qif_maxclasses; i++) {
594 if (qif->qif_class_tbl[i] == cl) {
595 qif->qif_class_tbl[i] = NULL;
596 break;
597 }
598 }
599 qif->qif_classes--;
600
601 if (cl->cl_qalg.ptr != NULL) {
0a7de745 602 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) {
316670eb 603 sfb_destroy(cl->cl_sfb);
0a7de745 604 }
316670eb
A
605 cl->cl_qalg.ptr = NULL;
606 qtype(&cl->cl_q) = Q_DROPTAIL;
607 qstate(&cl->cl_q) = QS_RUNNING;
608 }
609
0a7de745 610 if (qif->qif_default == cl) {
316670eb 611 qif->qif_default = NULL;
0a7de745 612 }
316670eb
A
613
614 if (pktsched_verbose) {
615 log(LOG_DEBUG, "%s: %s destroyed qid=%d\n",
616 if_name(QFQIF_IFP(qif)), qfq_style(qif), cl->cl_handle);
617 }
618
619 zfree(qfq_cl_zone, cl);
620
0a7de745 621 return 0;
316670eb
A
622}
623
624/*
625 * Calculate a mask to mimic what would be ffs_from()
626 */
627static inline pktsched_bitmap_t
628mask_from(pktsched_bitmap_t bitmap, int from)
629{
0a7de745 630 return bitmap & ~((1UL << from) - 1);
316670eb
A
631}
632
633/*
634 * The state computation relies on ER=0, IR=1, EB=2, IB=3
635 * First compute eligibility comparing grp->qfg_S, qif->qif_V,
636 * then check if someone is blocking us and possibly add EB
637 */
638static inline u_int32_t
639qfq_calc_state(struct qfq_if *qif, struct qfq_group *grp)
640{
641 /* if S > V we are not eligible */
642 u_int32_t state = qfq_gt(grp->qfg_S, qif->qif_V);
643 pktsched_bitmap_t mask = mask_from(qif->qif_bitmaps[ER],
644 grp->qfg_index);
645 struct qfq_group *next;
646
647 if (mask) {
648 next = qfq_ffs(qif, mask);
0a7de745 649 if (qfq_gt(grp->qfg_F, next->qfg_F)) {
316670eb 650 state |= EB;
0a7de745 651 }
316670eb
A
652 }
653
0a7de745 654 return state;
316670eb
A
655}
656
657/*
658 * In principle
659 * qif->qif_bitmaps[dst] |= qif->qif_bitmaps[src] & mask;
660 * qif->qif_bitmaps[src] &= ~mask;
661 * but we should make sure that src != dst
662 */
663static inline void
664qfq_move_groups(struct qfq_if *qif, pktsched_bitmap_t mask, int src, int dst)
665{
666 qif->qif_bitmaps[dst] |= qif->qif_bitmaps[src] & mask;
667 qif->qif_bitmaps[src] &= ~mask;
668}
669
670static inline void
671qfq_unblock_groups(struct qfq_if *qif, int index, u_int64_t old_finish)
672{
673 pktsched_bitmap_t mask = mask_from(qif->qif_bitmaps[ER], index + 1);
674 struct qfq_group *next;
675
676 if (mask) {
677 next = qfq_ffs(qif, mask);
0a7de745 678 if (!qfq_gt(next->qfg_F, old_finish)) {
316670eb 679 return;
0a7de745 680 }
316670eb
A
681 }
682
683 mask = (1UL << index) - 1;
684 qfq_move_groups(qif, mask, EB, ER);
685 qfq_move_groups(qif, mask, IB, IR);
686}
687
688/*
689 * perhaps
690 *
691 * old_V ^= qif->qif_V;
692 * old_V >>= QFQ_MIN_SLOT_SHIFT;
693 * if (old_V) {
694 * ...
695 * }
696 */
697static inline void
698qfq_make_eligible(struct qfq_if *qif, u_int64_t old_V)
699{
700 pktsched_bitmap_t mask, vslot, old_vslot;
701
702 vslot = qif->qif_V >> QFQ_MIN_SLOT_SHIFT;
703 old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
704
705 if (vslot != old_vslot) {
706 mask = (2UL << (__fls(vslot ^ old_vslot))) - 1;
707 qfq_move_groups(qif, mask, IR, ER);
708 qfq_move_groups(qif, mask, IB, EB);
709 }
710}
711
712/*
713 * XXX we should make sure that slot becomes less than 32.
714 * This is guaranteed by the input values.
715 * roundedS is always cl->qfg_S rounded on grp->qfg_slot_shift bits.
716 */
717static inline void
718qfq_slot_insert(struct qfq_if *qif, struct qfq_group *grp,
719 struct qfq_class *cl, u_int64_t roundedS)
720{
721 u_int64_t slot = (roundedS - grp->qfg_S) >> grp->qfg_slot_shift;
722 u_int32_t i = (grp->qfg_front + slot) % qif->qif_maxslots;
723
724 cl->cl_next = grp->qfg_slots[i];
725 grp->qfg_slots[i] = cl;
726 pktsched_bit_set(slot, &grp->qfg_full_slots);
727}
728
729/*
730 * remove the entry from the slot
731 */
732static inline void
733qfq_front_slot_remove(struct qfq_group *grp)
734{
735 struct qfq_class **h = &grp->qfg_slots[grp->qfg_front];
736
737 *h = (*h)->cl_next;
0a7de745 738 if (!*h) {
316670eb 739 pktsched_bit_clr(0, &grp->qfg_full_slots);
0a7de745 740 }
316670eb
A
741}
742
743/*
744 * Returns the first full queue in a group. As a side effect,
745 * adjust the bucket list so the first non-empty bucket is at
746 * position 0 in qfg_full_slots.
747 */
748static inline struct qfq_class *
749qfq_slot_scan(struct qfq_if *qif, struct qfq_group *grp)
750{
751 int i;
752
753 if (pktsched_verbose > 2) {
754 log(LOG_DEBUG, "%s: %s grp=%d full_slots=0x%x\n",
755 if_name(QFQIF_IFP(qif)), qfq_style(qif), grp->qfg_index,
756 grp->qfg_full_slots);
757 }
758
0a7de745
A
759 if (grp->qfg_full_slots == 0) {
760 return NULL;
761 }
316670eb
A
762
763 i = pktsched_ffs(grp->qfg_full_slots) - 1; /* zero-based */
764 if (i > 0) {
765 grp->qfg_front = (grp->qfg_front + i) % qif->qif_maxslots;
766 grp->qfg_full_slots >>= i;
767 }
768
0a7de745 769 return grp->qfg_slots[grp->qfg_front];
316670eb
A
770}
771
772/*
773 * adjust the bucket list. When the start time of a group decreases,
774 * we move the index down (modulo qif->qif_maxslots) so we don't need to
775 * move the objects. The mask of occupied slots must be shifted
776 * because we use ffs() to find the first non-empty slot.
777 * This covers decreases in the group's start time, but what about
778 * increases of the start time ?
779 * Here too we should make sure that i is less than 32
780 */
781static inline void
782qfq_slot_rotate(struct qfq_if *qif, struct qfq_group *grp, u_int64_t roundedS)
783{
784#pragma unused(qif)
785 u_int32_t i = (grp->qfg_S - roundedS) >> grp->qfg_slot_shift;
786
787 grp->qfg_full_slots <<= i;
788 grp->qfg_front = (grp->qfg_front - i) % qif->qif_maxslots;
789}
790
791static inline void
792qfq_update_eligible(struct qfq_if *qif, u_int64_t old_V)
793{
794 pktsched_bitmap_t ineligible;
795
796 ineligible = qif->qif_bitmaps[IR] | qif->qif_bitmaps[IB];
797 if (ineligible) {
798 if (!qif->qif_bitmaps[ER]) {
799 struct qfq_group *grp;
800 grp = qfq_ffs(qif, ineligible);
0a7de745 801 if (qfq_gt(grp->qfg_S, qif->qif_V)) {
316670eb 802 qif->qif_V = grp->qfg_S;
0a7de745 803 }
316670eb
A
804 }
805 qfq_make_eligible(qif, old_V);
806 }
807}
808
809/*
810 * Updates the class, returns true if also the group needs to be updated.
811 */
812static inline int
813qfq_update_class(struct qfq_if *qif, struct qfq_group *grp,
814 struct qfq_class *cl)
815{
816#pragma unused(qif)
817 cl->cl_S = cl->cl_F;
0a7de745 818 if (qempty(&cl->cl_q)) {
316670eb
A
819 qfq_front_slot_remove(grp);
820 } else {
821 u_int32_t len;
822 u_int64_t roundedS;
823
5ba3f43e 824 len = m_pktlen((struct mbuf *)qhead(&cl->cl_q));
316670eb
A
825 cl->cl_F = cl->cl_S + (u_int64_t)len * cl->cl_inv_w;
826 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
0a7de745
A
827 if (roundedS == grp->qfg_S) {
828 return 0;
829 }
316670eb
A
830
831 qfq_front_slot_remove(grp);
832 qfq_slot_insert(qif, grp, cl, roundedS);
833 }
0a7de745 834 return 1;
316670eb
A
835}
836
837/*
838 * note: CLASSQDQ_POLL returns the next packet without removing the packet
839 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
840 * CLASSQDQ_REMOVE must return the same packet if called immediately
841 * after CLASSQDQ_POLL.
842 */
5ba3f43e
A
843void
844qfq_dequeue(struct qfq_if *qif, pktsched_pkt_t *pkt)
316670eb
A
845{
846 pktsched_bitmap_t er_bits = qif->qif_bitmaps[ER];
847 struct ifclassq *ifq = qif->qif_ifq;
848 struct qfq_group *grp;
849 struct qfq_class *cl;
316670eb
A
850 u_int64_t old_V;
851 u_int32_t len;
852
853 IFCQ_LOCK_ASSERT_HELD(ifq);
854
5ba3f43e
A
855 pkt->pktsched_pkt = NULL;
856
316670eb
A
857 for (;;) {
858 if (er_bits == 0) {
859#if QFQ_DEBUG
0a7de745 860 if (qif->qif_queued && pktsched_verbose > 1) {
316670eb 861 qfq_dump_sched(qif, "start dequeue");
0a7de745 862 }
316670eb
A
863#endif /* QFQ_DEBUG */
864 /* no eligible and ready packet */
5ba3f43e 865 return;
316670eb
A
866 }
867 grp = qfq_ffs(qif, er_bits);
868 /* if group is non-empty, use it */
0a7de745 869 if (grp->qfg_full_slots != 0) {
316670eb 870 break;
0a7de745 871 }
316670eb
A
872 pktsched_bit_clr(grp->qfg_index, &er_bits);
873#if QFQ_DEBUG
874 qif->qif_emptygrp++;
875#endif /* QFQ_DEBUG */
876 }
877 VERIFY(!IFCQ_IS_EMPTY(ifq));
878
879 cl = grp->qfg_slots[grp->qfg_front];
880 VERIFY(cl != NULL && !qempty(&cl->cl_q));
881
5ba3f43e
A
882 qfq_getq(cl, pkt);
883 VERIFY(pkt->pktsched_pkt != NULL); /* qalg must be work conserving */
884 len = pktsched_get_pkt_len(pkt);
316670eb
A
885
886#if QFQ_DEBUG
887 qif->qif_queued--;
888#endif /* QFQ_DEBUG */
889
890 IFCQ_DEC_LEN(ifq);
3e170ce0 891 IFCQ_DEC_BYTES(ifq, len);
0a7de745 892 if (qempty(&cl->cl_q)) {
316670eb 893 cl->cl_period++;
0a7de745 894 }
316670eb
A
895 PKTCNTR_ADD(&cl->cl_xmitcnt, 1, len);
896 IFCQ_XMIT_ADD(ifq, 1, len);
897
898 old_V = qif->qif_V;
899 qif->qif_V += (u_int64_t)len * QFQ_IWSUM;
900
901 if (pktsched_verbose > 2) {
5ba3f43e 902 log(LOG_DEBUG, "%s: %s qid=%d dequeue pkt=0x%llx F=0x%llx "
39236c6e 903 "V=0x%llx", if_name(QFQIF_IFP(qif)), qfq_style(qif),
5ba3f43e
A
904 cl->cl_handle,
905 (uint64_t)VM_KERNEL_ADDRPERM(pkt->pktsched_pkt), cl->cl_F,
39236c6e 906 qif->qif_V);
316670eb
A
907 }
908
909 if (qfq_update_class(qif, grp, cl)) {
910 u_int64_t old_F = grp->qfg_F;
911
912 cl = qfq_slot_scan(qif, grp);
913 if (!cl) { /* group gone, remove from ER */
914 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
915 } else {
916 u_int32_t s;
917 u_int64_t roundedS =
918 qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
919
0a7de745 920 if (grp->qfg_S == roundedS) {
316670eb 921 goto skip_unblock;
0a7de745 922 }
316670eb
A
923
924 grp->qfg_S = roundedS;
925 grp->qfg_F = roundedS + (2ULL << grp->qfg_slot_shift);
926
927 /* remove from ER and put in the new set */
928 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
929 s = qfq_calc_state(qif, grp);
930 pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
931 }
932 /* we need to unblock even if the group has gone away */
933 qfq_unblock_groups(qif, grp->qfg_index, old_F);
934 }
935
936skip_unblock:
937 qfq_update_eligible(qif, old_V);
938
939#if QFQ_DEBUG
0a7de745 940 if (!qif->qif_bitmaps[ER] && qif->qif_queued && pktsched_verbose > 1) {
316670eb 941 qfq_dump_sched(qif, "end dequeue");
0a7de745 942 }
316670eb 943#endif /* QFQ_DEBUG */
316670eb
A
944}
945
946/*
947 * Assign a reasonable start time for a new flow k in group i.
948 * Admissible values for hat(F) are multiples of sigma_i
949 * no greater than V+sigma_i . Larger values mean that
950 * we had a wraparound so we consider the timestamp to be stale.
951 *
952 * If F is not stale and F >= V then we set S = F.
953 * Otherwise we should assign S = V, but this may violate
954 * the ordering in ER. So, if we have groups in ER, set S to
955 * the F_j of the first group j which would be blocking us.
956 * We are guaranteed not to move S backward because
957 * otherwise our group i would still be blocked.
958 */
959static inline void
960qfq_update_start(struct qfq_if *qif, struct qfq_class *cl)
961{
962 pktsched_bitmap_t mask;
963 u_int64_t limit, roundedF;
964 int slot_shift = cl->cl_grp->qfg_slot_shift;
965
966 roundedF = qfq_round_down(cl->cl_F, slot_shift);
967 limit = qfq_round_down(qif->qif_V, slot_shift) + (1UL << slot_shift);
968
969 if (!qfq_gt(cl->cl_F, qif->qif_V) || qfq_gt(roundedF, limit)) {
970 /* timestamp was stale */
971 mask = mask_from(qif->qif_bitmaps[ER], cl->cl_grp->qfg_index);
972 if (mask) {
973 struct qfq_group *next = qfq_ffs(qif, mask);
974 if (qfq_gt(roundedF, next->qfg_F)) {
975 cl->cl_S = next->qfg_F;
976 return;
977 }
978 }
979 cl->cl_S = qif->qif_V;
980 } else { /* timestamp is not stale */
981 cl->cl_S = cl->cl_F;
982 }
983}
984
985int
5ba3f43e 986qfq_enqueue(struct qfq_if *qif, struct qfq_class *cl, pktsched_pkt_t *pkt,
316670eb
A
987 struct pf_mtag *t)
988{
989 struct ifclassq *ifq = qif->qif_ifq;
990 struct qfq_group *grp;
991 u_int64_t roundedS;
992 int len, ret, s;
993
994 IFCQ_LOCK_ASSERT_HELD(ifq);
995 VERIFY(cl == NULL || cl->cl_qif == qif);
996
997 if (cl == NULL) {
39236c6e 998 cl = qfq_clh_to_clp(qif, 0);
316670eb
A
999 if (cl == NULL) {
1000 cl = qif->qif_default;
1001 if (cl == NULL) {
1002 IFCQ_CONVERT_LOCK(ifq);
0a7de745 1003 return CLASSQEQ_DROP;
316670eb
A
1004 }
1005 }
1006 }
1007
5ba3f43e
A
1008 VERIFY(pkt->pktsched_ptype == qptype(&cl->cl_q));
1009 len = pktsched_get_pkt_len(pkt);
1010
1011 ret = qfq_addq(cl, pkt, t);
1012 if ((ret != 0) && (ret != CLASSQEQ_SUCCESS_FC)) {
1013 VERIFY(ret == CLASSQEQ_DROP ||
1014 ret == CLASSQEQ_DROP_FC ||
1015 ret == CLASSQEQ_DROP_SP);
1016 PKTCNTR_ADD(&cl->cl_dropcnt, 1, len);
1017 IFCQ_DROP_ADD(ifq, 1, len);
0a7de745 1018 return ret;
316670eb
A
1019 }
1020 IFCQ_INC_LEN(ifq);
3e170ce0 1021 IFCQ_INC_BYTES(ifq, len);
316670eb
A
1022
1023#if QFQ_DEBUG
1024 qif->qif_queued++;
1025#endif /* QFQ_DEBUG */
1026
1027 /* queue was not idle, we're done */
0a7de745 1028 if (qlen(&cl->cl_q) > 1) {
316670eb 1029 goto done;
0a7de745 1030 }
316670eb
A
1031
1032 /* queue was idle */
1033 grp = cl->cl_grp;
0a7de745 1034 qfq_update_start(qif, cl); /* adjust start time */
316670eb
A
1035
1036 /* compute new finish time and rounded start */
1037 cl->cl_F = cl->cl_S + (u_int64_t)len * cl->cl_inv_w;
1038 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1039
1040 /*
1041 * Insert cl in the correct bucket.
1042 *
1043 * If cl->cl_S >= grp->qfg_S we don't need to adjust the bucket list
1044 * and simply go to the insertion phase. Otherwise grp->qfg_S is
1045 * decreasing, we must make room in the bucket list, and also
1046 * recompute the group state. Finally, if there were no flows
1047 * in this group and nobody was in ER make sure to adjust V.
1048 */
1049 if (grp->qfg_full_slots != 0) {
0a7de745 1050 if (!qfq_gt(grp->qfg_S, cl->cl_S)) {
316670eb 1051 goto skip_update;
0a7de745 1052 }
316670eb
A
1053
1054 /* create a slot for this cl->cl_S */
1055 qfq_slot_rotate(qif, grp, roundedS);
1056
1057 /* group was surely ineligible, remove */
1058 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
1059 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
1060 } else if (!qif->qif_bitmaps[ER] && qfq_gt(roundedS, qif->qif_V)) {
1061 qif->qif_V = roundedS;
1062 }
1063
1064 grp->qfg_S = roundedS;
1065 grp->qfg_F =
1066 roundedS + (2ULL << grp->qfg_slot_shift); /* i.e. 2 sigma_i */
1067 s = qfq_calc_state(qif, grp);
1068 pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
1069
1070 if (pktsched_verbose > 2) {
39236c6e 1071 log(LOG_DEBUG, "%s: %s qid=%d enqueue m=0x%llx state=%s 0x%x "
316670eb 1072 "S=0x%llx F=0x%llx V=0x%llx\n", if_name(QFQIF_IFP(qif)),
39236c6e 1073 qfq_style(qif), cl->cl_handle,
5ba3f43e
A
1074 (uint64_t)VM_KERNEL_ADDRPERM(pkt->pktsched_pkt),
1075 qfq_state2str(s),
316670eb
A
1076 qif->qif_bitmaps[s], cl->cl_S, cl->cl_F, qif->qif_V);
1077 }
1078
1079skip_update:
1080 qfq_slot_insert(qif, grp, cl, roundedS);
1081
1082done:
1083 /* successfully queued. */
0a7de745 1084 return ret;
316670eb
A
1085}
1086
1087static inline void
1088qfq_slot_remove(struct qfq_if *qif, struct qfq_group *grp,
1089 struct qfq_class *cl)
1090{
1091#pragma unused(qif)
1092 struct qfq_class **pprev;
1093 u_int32_t i, offset;
1094 u_int64_t roundedS;
1095
1096 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1097 offset = (roundedS - grp->qfg_S) >> grp->qfg_slot_shift;
1098 i = (grp->qfg_front + offset) % qif->qif_maxslots;
1099
1100 pprev = &grp->qfg_slots[i];
0a7de745 1101 while (*pprev && *pprev != cl) {
316670eb 1102 pprev = &(*pprev)->cl_next;
0a7de745 1103 }
316670eb
A
1104
1105 *pprev = cl->cl_next;
0a7de745 1106 if (!grp->qfg_slots[i]) {
316670eb 1107 pktsched_bit_clr(offset, &grp->qfg_full_slots);
0a7de745 1108 }
316670eb
A
1109}
1110
1111/*
1112 * Called to forcibly destroy a queue.
1113 * If the queue is not in the front bucket, or if it has
1114 * other queues in the front bucket, we can simply remove
1115 * the queue with no other side effects.
1116 * Otherwise we must propagate the event up.
1117 * XXX description to be completed.
1118 */
1119static void
1120qfq_deactivate_class(struct qfq_if *qif, struct qfq_class *cl)
1121{
1122 struct qfq_group *grp = cl->cl_grp;
1123 pktsched_bitmap_t mask;
1124 u_int64_t roundedS;
1125 int s;
1126
1127 if (pktsched_verbose) {
1128 log(LOG_DEBUG, "%s: %s deactivate qid=%d grp=%d "
1129 "full_slots=0x%x front=%d bitmaps={ER=0x%x,EB=0x%x,"
1130 "IR=0x%x,IB=0x%x}\n",
1131 if_name(QFQIF_IFP(cl->cl_qif)), qfq_style(cl->cl_qif),
1132 cl->cl_handle, grp->qfg_index, grp->qfg_full_slots,
1133 grp->qfg_front, qif->qif_bitmaps[ER], qif->qif_bitmaps[EB],
1134 qif->qif_bitmaps[IR], qif->qif_bitmaps[IB]);
1135#if QFQ_DEBUG
0a7de745 1136 if (pktsched_verbose > 1) {
316670eb 1137 qfq_dump_sched(qif, "start deactivate");
0a7de745 1138 }
316670eb
A
1139#endif /* QFQ_DEBUG */
1140 }
1141
0a7de745 1142 cl->cl_F = cl->cl_S; /* not needed if the class goes away */
316670eb
A
1143 qfq_slot_remove(qif, grp, cl);
1144
1145 if (grp->qfg_full_slots == 0) {
1146 /*
1147 * Nothing left in the group, remove from all sets.
1148 * Do ER last because if we were blocking other groups
1149 * we must unblock them.
1150 */
1151 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
1152 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[EB]);
1153 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
1154
1155 if (pktsched_bit_tst(grp->qfg_index, &qif->qif_bitmaps[ER]) &&
1156 !(qif->qif_bitmaps[ER] & ~((1UL << grp->qfg_index) - 1))) {
1157 mask = qif->qif_bitmaps[ER] &
1158 ((1UL << grp->qfg_index) - 1);
0a7de745 1159 if (mask) {
316670eb 1160 mask = ~((1UL << __fls(mask)) - 1);
0a7de745 1161 } else {
316670eb 1162 mask = (pktsched_bitmap_t)~0UL;
0a7de745 1163 }
316670eb
A
1164 qfq_move_groups(qif, mask, EB, ER);
1165 qfq_move_groups(qif, mask, IB, IR);
1166 }
1167 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
1168 } else if (!grp->qfg_slots[grp->qfg_front]) {
1169 cl = qfq_slot_scan(qif, grp);
1170 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1171 if (grp->qfg_S != roundedS) {
1172 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
1173 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
1174 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[EB]);
1175 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
1176 grp->qfg_S = roundedS;
1177 grp->qfg_F = roundedS + (2ULL << grp->qfg_slot_shift);
1178 s = qfq_calc_state(qif, grp);
1179 pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
1180 }
1181 }
1182 qfq_update_eligible(qif, qif->qif_V);
1183
1184#if QFQ_DEBUG
0a7de745 1185 if (pktsched_verbose > 1) {
316670eb 1186 qfq_dump_sched(qif, "end deactivate");
0a7de745 1187 }
316670eb
A
1188#endif /* QFQ_DEBUG */
1189}
1190
1191static const char *
1192qfq_state2str(int s)
1193{
1194 const char *c;
1195
1196 switch (s) {
1197 case ER:
1198 c = "ER";
1199 break;
1200 case IR:
1201 c = "IR";
1202 break;
1203 case EB:
1204 c = "EB";
1205 break;
1206 case IB:
1207 c = "IB";
1208 break;
1209 default:
1210 c = "?";
1211 break;
1212 }
0a7de745 1213 return c;
316670eb
A
1214}
1215
1216static inline int
5ba3f43e 1217qfq_addq(struct qfq_class *cl, pktsched_pkt_t *pkt, struct pf_mtag *t)
316670eb 1218{
0a7de745 1219 struct qfq_if *qif = cl->cl_qif;
316670eb
A
1220 struct ifclassq *ifq = qif->qif_ifq;
1221
1222 IFCQ_LOCK_ASSERT_HELD(ifq);
1223
316670eb
A
1224 if (q_is_sfb(&cl->cl_q)) {
1225 if (cl->cl_sfb == NULL) {
1226 struct ifnet *ifp = QFQIF_IFP(qif);
1227
1228 VERIFY(cl->cl_flags & QFCF_LAZY);
1229 cl->cl_flags &= ~QFCF_LAZY;
316670eb 1230
5ba3f43e 1231 IFCQ_CONVERT_LOCK(ifq);
316670eb
A
1232 cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
1233 qlimit(&cl->cl_q), cl->cl_qflags);
1234 if (cl->cl_sfb == NULL) {
1235 /* fall back to droptail */
1236 qtype(&cl->cl_q) = Q_DROPTAIL;
1237 cl->cl_flags &= ~QFCF_SFB;
1238 cl->cl_qflags &= ~(SFBF_ECN | SFBF_FLOWCTL);
1239
1240 log(LOG_ERR, "%s: %s SFB lazy allocation "
1241 "failed for qid=%d grp=%d, falling back "
1242 "to DROPTAIL\n", if_name(ifp),
1243 qfq_style(qif), cl->cl_handle,
1244 cl->cl_grp->qfg_index);
1245 } else if (qif->qif_throttle != IFNET_THROTTLE_OFF) {
1246 /* if there's pending throttling, set it */
1247 cqrq_throttle_t tr = { 1, qif->qif_throttle };
1248 int err = qfq_throttle(qif, &tr);
1249
0a7de745 1250 if (err == EALREADY) {
316670eb 1251 err = 0;
0a7de745 1252 }
316670eb
A
1253 if (err != 0) {
1254 tr.level = IFNET_THROTTLE_OFF;
1255 (void) qfq_throttle(qif, &tr);
1256 }
1257 }
1258 }
0a7de745
A
1259 if (cl->cl_sfb != NULL) {
1260 return sfb_addq(cl->cl_sfb, &cl->cl_q, pkt, t);
1261 }
316670eb
A
1262 } else if (qlen(&cl->cl_q) >= qlimit(&cl->cl_q)) {
1263 IFCQ_CONVERT_LOCK(ifq);
0a7de745 1264 return CLASSQEQ_DROP;
316670eb
A
1265 }
1266
39236c6e 1267#if PF_ECN
5ba3f43e
A
1268 if (cl->cl_flags & QFCF_CLEARDSCP) {
1269 /* not supported for non-mbuf type packets */
1270 VERIFY(pkt->pktsched_ptype == QP_MBUF);
316670eb 1271 write_dsfield(m, t, 0);
5ba3f43e 1272 }
39236c6e 1273#endif /* PF_ECN */
316670eb 1274
5ba3f43e
A
1275 VERIFY(pkt->pktsched_ptype == qptype(&cl->cl_q));
1276 _addq(&cl->cl_q, pkt->pktsched_pkt);
0a7de745 1277 return 0;
316670eb
A
1278}
1279
5ba3f43e
A
1280static inline void
1281qfq_getq(struct qfq_class *cl, pktsched_pkt_t *pkt)
316670eb
A
1282{
1283 IFCQ_LOCK_ASSERT_HELD(cl->cl_qif->qif_ifq);
1284
0a7de745
A
1285 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) {
1286 return sfb_getq(cl->cl_sfb, &cl->cl_q, pkt);
1287 }
316670eb 1288
0a7de745 1289 return pktsched_pkt_encap(pkt, qptype(&cl->cl_q), _getq(&cl->cl_q));
316670eb
A
1290}
1291
1292static void
1293qfq_purgeq(struct qfq_if *qif, struct qfq_class *cl, u_int32_t flow,
1294 u_int32_t *packets, u_int32_t *bytes)
1295{
1296 struct ifclassq *ifq = qif->qif_ifq;
1297 u_int32_t cnt = 0, len = 0, qlen;
1298
1299 IFCQ_LOCK_ASSERT_HELD(ifq);
1300
0a7de745 1301 if ((qlen = qlen(&cl->cl_q)) == 0) {
316670eb 1302 goto done;
0a7de745 1303 }
316670eb 1304
316670eb 1305 IFCQ_CONVERT_LOCK(ifq);
0a7de745 1306 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) {
316670eb 1307 sfb_purgeq(cl->cl_sfb, &cl->cl_q, flow, &cnt, &len);
0a7de745 1308 } else {
316670eb 1309 _flushq_flow(&cl->cl_q, flow, &cnt, &len);
0a7de745 1310 }
316670eb
A
1311
1312 if (cnt > 0) {
1313 VERIFY(qlen(&cl->cl_q) == (qlen - cnt));
1314#if QFQ_DEBUG
1315 VERIFY(qif->qif_queued >= cnt);
1316 qif->qif_queued -= cnt;
1317#endif /* QFQ_DEBUG */
1318
1319 PKTCNTR_ADD(&cl->cl_dropcnt, cnt, len);
1320 IFCQ_DROP_ADD(ifq, cnt, len);
1321
1322 VERIFY(((signed)IFCQ_LEN(ifq) - cnt) >= 0);
1323 IFCQ_LEN(ifq) -= cnt;
1324
0a7de745 1325 if (qempty(&cl->cl_q)) {
316670eb 1326 qfq_deactivate_class(qif, cl);
0a7de745 1327 }
316670eb
A
1328
1329 if (pktsched_verbose) {
1330 log(LOG_DEBUG, "%s: %s purge qid=%d weight=%d "
1331 "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
1332 if_name(QFQIF_IFP(qif)),
1333 qfq_style(qif), cl->cl_handle,
1334 (u_int32_t)(QFQ_ONE_FP / cl->cl_inv_w), qlen,
1335 qlen(&cl->cl_q), cnt, len, flow);
1336 }
1337 }
1338done:
0a7de745 1339 if (packets != NULL) {
316670eb 1340 *packets = cnt;
0a7de745
A
1341 }
1342 if (bytes != NULL) {
316670eb 1343 *bytes = len;
0a7de745 1344 }
316670eb
A
1345}
1346
1347static void
1348qfq_updateq(struct qfq_if *qif, struct qfq_class *cl, cqev_t ev)
1349{
1350 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
1351
1352 if (pktsched_verbose) {
1353 log(LOG_DEBUG, "%s: %s update qid=%d weight=%d event=%s\n",
1354 if_name(QFQIF_IFP(qif)), qfq_style(qif),
1355 cl->cl_handle, (u_int32_t)(QFQ_ONE_FP / cl->cl_inv_w),
1356 ifclassq_ev2str(ev));
1357 }
1358
0a7de745
A
1359 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) {
1360 return sfb_updateq(cl->cl_sfb, ev);
1361 }
316670eb
A
1362}
1363
1364int
1365qfq_get_class_stats(struct qfq_if *qif, u_int32_t qid,
1366 struct qfq_classstats *sp)
1367{
1368 struct qfq_class *cl;
1369
1370 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
1371
0a7de745
A
1372 if ((cl = qfq_clh_to_clp(qif, qid)) == NULL) {
1373 return EINVAL;
1374 }
316670eb
A
1375
1376 sp->class_handle = cl->cl_handle;
1377 sp->index = cl->cl_grp->qfg_index;
1378 sp->weight = (QFQ_ONE_FP / cl->cl_inv_w);
1379 sp->lmax = cl->cl_lmax;
1380 sp->qlength = qlen(&cl->cl_q);
1381 sp->qlimit = qlimit(&cl->cl_q);
1382 sp->period = cl->cl_period;
1383 sp->xmitcnt = cl->cl_xmitcnt;
1384 sp->dropcnt = cl->cl_dropcnt;
1385
1386 sp->qtype = qtype(&cl->cl_q);
1387 sp->qstate = qstate(&cl->cl_q);
5ba3f43e 1388
0a7de745 1389 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) {
316670eb 1390 sfb_getstats(cl->cl_sfb, &sp->sfb);
0a7de745 1391 }
316670eb 1392
0a7de745 1393 return 0;
316670eb
A
1394}
1395
39236c6e
A
1396static int
1397qfq_stat_sc(struct qfq_if *qif, cqrq_stat_sc_t *sr)
1398{
1399 struct ifclassq *ifq = qif->qif_ifq;
1400 struct qfq_class *cl;
1401 u_int32_t i;
1402
1403 IFCQ_LOCK_ASSERT_HELD(ifq);
1404
1405 VERIFY(sr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(sr->sc));
1406
1407 i = MBUF_SCIDX(sr->sc);
1408 VERIFY(i < IFCQ_SC_MAX);
1409
1410 cl = ifq->ifcq_disc_slots[i].cl;
1411 sr->packets = qlen(&cl->cl_q);
1412 sr->bytes = qsize(&cl->cl_q);
1413
0a7de745 1414 return 0;
39236c6e
A
1415}
1416
316670eb
A
1417/* convert a class handle to the corresponding class pointer */
1418static inline struct qfq_class *
1419qfq_clh_to_clp(struct qfq_if *qif, u_int32_t chandle)
1420{
1421 struct qfq_class *cl;
1422 int i;
1423
1424 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
1425
1426 /*
1427 * First, try optimistically the slot matching the lower bits of
1428 * the handle. If it fails, do the linear table search.
1429 */
1430 i = chandle % qif->qif_maxclasses;
0a7de745
A
1431 if ((cl = qif->qif_class_tbl[i]) != NULL && cl->cl_handle == chandle) {
1432 return cl;
1433 }
1434 for (i = 0; i < qif->qif_maxclasses; i++) {
316670eb 1435 if ((cl = qif->qif_class_tbl[i]) != NULL &&
0a7de745
A
1436 cl->cl_handle == chandle) {
1437 return cl;
1438 }
1439 }
316670eb 1440
0a7de745 1441 return NULL;
316670eb
A
1442}
1443
1444static const char *
1445qfq_style(struct qfq_if *qif)
1446{
5ba3f43e 1447#pragma unused(qif)
0a7de745 1448 return "QFQ";
316670eb
A
1449}
1450
1451/*
1452 * Generic comparison function, handling wraparound
1453 */
1454static inline int
1455qfq_gt(u_int64_t a, u_int64_t b)
1456{
0a7de745 1457 return (int64_t)(a - b) > 0;
316670eb
A
1458}
1459
1460/*
1461 * Round a precise timestamp to its slotted value
1462 */
1463static inline u_int64_t
1464qfq_round_down(u_int64_t ts, u_int32_t shift)
1465{
0a7de745 1466 return ts & ~((1ULL << shift) - 1);
316670eb
A
1467}
1468
1469/*
1470 * Return the pointer to the group with lowest index in the bitmap
1471 */
1472static inline struct qfq_group *
1473qfq_ffs(struct qfq_if *qif, pktsched_bitmap_t bitmap)
1474{
0a7de745 1475 int index = pktsched_ffs(bitmap) - 1; /* zero-based */
316670eb
A
1476 VERIFY(index >= 0 && index <= QFQ_MAX_INDEX &&
1477 qif->qif_groups[index] != NULL);
0a7de745 1478 return qif->qif_groups[index];
316670eb
A
1479}
1480
1481/*
1482 * Calculate a flow index, given its weight and maximum packet length.
1483 * index = log_2(maxlen/weight) but we need to apply the scaling.
1484 * This is used only once at flow creation.
1485 */
1486static int
1487qfq_calc_index(struct qfq_class *cl, u_int32_t inv_w, u_int32_t maxlen)
1488{
0a7de745 1489 u_int64_t slot_size = (u_int64_t)maxlen * inv_w;
316670eb
A
1490 pktsched_bitmap_t size_map;
1491 int index = 0;
1492
1493 size_map = (pktsched_bitmap_t)(slot_size >> QFQ_MIN_SLOT_SHIFT);
0a7de745 1494 if (!size_map) {
316670eb 1495 goto out;
0a7de745 1496 }
316670eb 1497
0a7de745 1498 index = __fls(size_map) + 1; /* basically a log_2() */
316670eb
A
1499 index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
1500
0a7de745 1501 if (index < 0) {
316670eb 1502 index = 0;
0a7de745 1503 }
316670eb
A
1504out:
1505 if (pktsched_verbose) {
1506 log(LOG_DEBUG, "%s: %s qid=%d grp=%d W=%u, L=%u, I=%d\n",
1507 if_name(QFQIF_IFP(cl->cl_qif)), qfq_style(cl->cl_qif),
0a7de745 1508 cl->cl_handle, index, (u_int32_t)(QFQ_ONE_FP / inv_w),
316670eb
A
1509 maxlen, index);
1510 }
0a7de745 1511 return index;
316670eb
A
1512}
1513
1514#if QFQ_DEBUG
1515static void
1516qfq_dump_groups(struct qfq_if *qif, u_int32_t mask)
1517{
1518 int i, j;
1519
1520 for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
1521 struct qfq_group *g = qif->qif_groups[i];
1522
0a7de745 1523 if (0 == (mask & (1 << i))) {
316670eb 1524 continue;
0a7de745
A
1525 }
1526 if (g == NULL) {
316670eb 1527 continue;
0a7de745 1528 }
316670eb
A
1529
1530 log(LOG_DEBUG, "%s: %s [%2d] full_slots 0x%x\n",
1531 if_name(QFQIF_IFP(qif)), qfq_style(qif), i,
1532 g->qfg_full_slots);
1533 log(LOG_DEBUG, "%s: %s S 0x%20llx F 0x%llx %c\n",
1534 if_name(QFQIF_IFP(qif)), qfq_style(qif),
1535 g->qfg_S, g->qfg_F, mask & (1 << i) ? '1' : '0');
1536
1537 for (j = 0; j < qif->qif_maxslots; j++) {
1538 if (g->qfg_slots[j]) {
39236c6e 1539 log(LOG_DEBUG, "%s: %s bucket %d 0x%llx "
316670eb 1540 "qid %d\n", if_name(QFQIF_IFP(qif)),
39236c6e
A
1541 qfq_style(qif), j,
1542 (uint64_t)VM_KERNEL_ADDRPERM(
0a7de745 1543 g->qfg_slots[j]),
316670eb
A
1544 g->qfg_slots[j]->cl_handle);
1545 }
1546 }
1547 }
1548}
1549
1550static void
1551qfq_dump_sched(struct qfq_if *qif, const char *msg)
1552{
1553 log(LOG_DEBUG, "%s: %s --- in %s: ---\n",
1554 if_name(QFQIF_IFP(qif)), qfq_style(qif), msg);
1555 log(LOG_DEBUG, "%s: %s emptygrp %d queued %d V 0x%llx\n",
1556 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_emptygrp,
1557 qif->qif_queued, qif->qif_V);
1558 log(LOG_DEBUG, "%s: %s ER 0x%08x\n",
1559 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[ER]);
1560 log(LOG_DEBUG, "%s: %s EB 0x%08x\n",
1561 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[EB]);
1562 log(LOG_DEBUG, "%s: %s IR 0x%08x\n",
1563 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[IR]);
1564 log(LOG_DEBUG, "%s: %s IB 0x%08x\n",
1565 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[IB]);
1566 qfq_dump_groups(qif, 0xffffffff);
1567};
1568#endif /* QFQ_DEBUG */
1569
1570/*
1571 * qfq_enqueue_ifclassq is an enqueue function to be registered to
1572 * (*ifcq_enqueue) in struct ifclassq.
1573 */
1574static int
5ba3f43e
A
1575qfq_enqueue_ifclassq(struct ifclassq *ifq, void *p, classq_pkt_type_t ptype,
1576 boolean_t *pdrop)
316670eb 1577{
5ba3f43e
A
1578 u_int32_t i = 0;
1579 int ret;
1580 pktsched_pkt_t pkt;
1581 struct pf_mtag *t = NULL;
316670eb
A
1582
1583 IFCQ_LOCK_ASSERT_HELD(ifq);
1584
5ba3f43e
A
1585 switch (ptype) {
1586 case QP_MBUF: {
1587 struct mbuf *m = p;
1588 if (!(m->m_flags & M_PKTHDR)) {
1589 /* should not happen */
1590 log(LOG_ERR, "%s: packet does not have pkthdr\n",
1591 if_name(ifq->ifcq_ifp));
1592 IFCQ_CONVERT_LOCK(ifq);
1593 m_freem(m);
1594 *pdrop = TRUE;
0a7de745 1595 return ENOBUFS;
5ba3f43e
A
1596 }
1597 i = MBUF_SCIDX(mbuf_get_service_class(m));
1598 t = m_pftag(m);
1599 break;
1600 }
1601
1602
1603 default:
1604 VERIFY(0);
1605 /* NOTREACHED */
316670eb
A
1606 }
1607
316670eb
A
1608 VERIFY((u_int32_t)i < IFCQ_SC_MAX);
1609
5ba3f43e
A
1610 pktsched_pkt_encap(&pkt, ptype, p);
1611
1612 ret = qfq_enqueue(ifq->ifcq_disc,
1613 ifq->ifcq_disc_slots[i].cl, &pkt, t);
1614
1615 if ((ret != 0) && (ret != CLASSQEQ_SUCCESS_FC)) {
1616 pktsched_free_pkt(&pkt);
1617 *pdrop = TRUE;
1618 } else {
1619 *pdrop = FALSE;
1620 }
1621
1622 switch (ret) {
1623 case CLASSQEQ_DROP:
1624 ret = ENOBUFS;
1625 break;
1626 case CLASSQEQ_DROP_FC:
1627 ret = EQFULL;
1628 break;
1629 case CLASSQEQ_DROP_SP:
1630 ret = EQSUSPENDED;
1631 break;
1632 case CLASSQEQ_SUCCESS_FC:
1633 ret = EQFULL;
1634 break;
1635 case CLASSQEQ_SUCCESS:
1636 ret = 0;
1637 break;
1638 default:
1639 VERIFY(0);
1640 }
0a7de745 1641 return ret;
316670eb
A
1642}
1643
1644/*
1645 * qfq_dequeue_ifclassq is a dequeue function to be registered to
1646 * (*ifcq_dequeue) in struct ifclass.
1647 *
1648 * note: CLASSQDQ_POLL returns the next packet without removing the packet
1649 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
1650 * CLASSQDQ_REMOVE must return the same packet if called immediately
1651 * after CLASSQDQ_POLL.
1652 */
5ba3f43e
A
1653static void *
1654qfq_dequeue_ifclassq(struct ifclassq *ifq, classq_pkt_type_t *ptype)
316670eb 1655{
5ba3f43e 1656 pktsched_pkt_t pkt;
0a7de745 1657 bzero(&pkt, sizeof(pkt));
5ba3f43e
A
1658 qfq_dequeue(ifq->ifcq_disc, &pkt);
1659 *ptype = pkt.pktsched_ptype;
0a7de745 1660 return pkt.pktsched_pkt;
316670eb
A
1661}
1662
1663static int
1664qfq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg)
1665{
1666 struct qfq_if *qif = (struct qfq_if *)ifq->ifcq_disc;
1667 int err = 0;
1668
1669 IFCQ_LOCK_ASSERT_HELD(ifq);
1670
1671 switch (req) {
1672 case CLASSQRQ_PURGE:
1673 qfq_purge(qif);
1674 break;
1675
1676 case CLASSQRQ_PURGE_SC:
1677 qfq_purge_sc(qif, (cqrq_purge_sc_t *)arg);
1678 break;
1679
1680 case CLASSQRQ_EVENT:
1681 qfq_event(qif, (cqev_t)arg);
1682 break;
1683
1684 case CLASSQRQ_THROTTLE:
1685 err = qfq_throttle(qif, (cqrq_throttle_t *)arg);
1686 break;
39236c6e
A
1687 case CLASSQRQ_STAT_SC:
1688 err = qfq_stat_sc(qif, (cqrq_stat_sc_t *)arg);
1689 break;
316670eb 1690 }
0a7de745 1691 return err;
316670eb
A
1692}
1693
1694int
5ba3f43e
A
1695qfq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
1696 classq_pkt_type_t ptype)
316670eb
A
1697{
1698 struct ifnet *ifp = ifq->ifcq_ifp;
1699 struct qfq_class *cl0, *cl1, *cl2, *cl3, *cl4;
1700 struct qfq_class *cl5, *cl6, *cl7, *cl8, *cl9;
1701 struct qfq_if *qif;
1702 u_int32_t maxlen = 0, qflags = 0;
1703 int err = 0;
1704
1705 IFCQ_LOCK_ASSERT_HELD(ifq);
1706 VERIFY(ifq->ifcq_disc == NULL);
1707 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
1708
0a7de745 1709 if (flags & PKTSCHEDF_QALG_SFB) {
316670eb 1710 qflags |= QFCF_SFB;
0a7de745
A
1711 }
1712 if (flags & PKTSCHEDF_QALG_ECN) {
316670eb 1713 qflags |= QFCF_ECN;
0a7de745
A
1714 }
1715 if (flags & PKTSCHEDF_QALG_FLOWCTL) {
316670eb 1716 qflags |= QFCF_FLOWCTL;
0a7de745
A
1717 }
1718 if (flags & PKTSCHEDF_QALG_DELAYBASED) {
fe8ab488 1719 qflags |= QFCF_DELAYBASED;
0a7de745 1720 }
316670eb 1721
5ba3f43e 1722 qif = qfq_alloc(ifp, M_WAITOK);
0a7de745
A
1723 if (qif == NULL) {
1724 return ENOMEM;
1725 }
316670eb 1726
0a7de745 1727 if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) {
316670eb 1728 maxlen = if_sndq_maxlen;
0a7de745 1729 }
316670eb
A
1730
1731 if ((err = qfq_add_queue(qif, maxlen, 300, 1200,
0a7de745 1732 qflags | QFCF_LAZY, SCIDX_BK_SYS, &cl0, ptype)) != 0) {
316670eb 1733 goto cleanup;
0a7de745 1734 }
316670eb
A
1735
1736 if ((err = qfq_add_queue(qif, maxlen, 600, 1400,
0a7de745 1737 qflags | QFCF_LAZY, SCIDX_BK, &cl1, ptype)) != 0) {
316670eb 1738 goto cleanup;
0a7de745 1739 }
316670eb
A
1740
1741 if ((err = qfq_add_queue(qif, maxlen, 2400, 600,
0a7de745 1742 qflags | QFCF_DEFAULTCLASS, SCIDX_BE, &cl2, ptype)) != 0) {
316670eb 1743 goto cleanup;
0a7de745 1744 }
316670eb
A
1745
1746 if ((err = qfq_add_queue(qif, maxlen, 2700, 600,
0a7de745 1747 qflags | QFCF_LAZY, SCIDX_RD, &cl3, ptype)) != 0) {
316670eb 1748 goto cleanup;
0a7de745 1749 }
316670eb
A
1750
1751 if ((err = qfq_add_queue(qif, maxlen, 3000, 400,
0a7de745 1752 qflags | QFCF_LAZY, SCIDX_OAM, &cl4, ptype)) != 0) {
316670eb 1753 goto cleanup;
0a7de745 1754 }
316670eb
A
1755
1756 if ((err = qfq_add_queue(qif, maxlen, 8000, 1000,
0a7de745 1757 qflags | QFCF_LAZY, SCIDX_AV, &cl5, ptype)) != 0) {
316670eb 1758 goto cleanup;
0a7de745 1759 }
316670eb
A
1760
1761 if ((err = qfq_add_queue(qif, maxlen, 15000, 1200,
0a7de745 1762 qflags | QFCF_LAZY, SCIDX_RV, &cl6, ptype)) != 0) {
316670eb 1763 goto cleanup;
0a7de745 1764 }
316670eb
A
1765
1766 if ((err = qfq_add_queue(qif, maxlen, 20000, 1400,
0a7de745 1767 qflags | QFCF_LAZY, SCIDX_VI, &cl7, ptype)) != 0) {
316670eb 1768 goto cleanup;
0a7de745 1769 }
316670eb
A
1770
1771 if ((err = qfq_add_queue(qif, maxlen, 23000, 200,
0a7de745 1772 qflags | QFCF_LAZY, SCIDX_VO, &cl8, ptype)) != 0) {
316670eb 1773 goto cleanup;
0a7de745 1774 }
316670eb
A
1775
1776 if ((err = qfq_add_queue(qif, maxlen, 25000, 200,
0a7de745 1777 qflags, SCIDX_CTL, &cl9, ptype)) != 0) {
316670eb 1778 goto cleanup;
0a7de745 1779 }
316670eb
A
1780
1781 err = ifclassq_attach(ifq, PKTSCHEDT_QFQ, qif,
1782 qfq_enqueue_ifclassq, qfq_dequeue_ifclassq, NULL,
5ba3f43e 1783 NULL, NULL, qfq_request_ifclassq);
316670eb
A
1784
1785 /* cache these for faster lookup */
1786 if (err == 0) {
1787 ifq->ifcq_disc_slots[SCIDX_BK_SYS].qid = SCIDX_BK_SYS;
1788 ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl = cl0;
1789
1790 ifq->ifcq_disc_slots[SCIDX_BK].qid = SCIDX_BK;
1791 ifq->ifcq_disc_slots[SCIDX_BK].cl = cl1;
1792
1793 ifq->ifcq_disc_slots[SCIDX_BE].qid = SCIDX_BE;
1794 ifq->ifcq_disc_slots[SCIDX_BE].cl = cl2;
1795
1796 ifq->ifcq_disc_slots[SCIDX_RD].qid = SCIDX_RD;
1797 ifq->ifcq_disc_slots[SCIDX_RD].cl = cl3;
1798
1799 ifq->ifcq_disc_slots[SCIDX_OAM].qid = SCIDX_OAM;
1800 ifq->ifcq_disc_slots[SCIDX_OAM].cl = cl4;
1801
1802 ifq->ifcq_disc_slots[SCIDX_AV].qid = SCIDX_AV;
1803 ifq->ifcq_disc_slots[SCIDX_AV].cl = cl5;
1804
1805 ifq->ifcq_disc_slots[SCIDX_RV].qid = SCIDX_RV;
1806 ifq->ifcq_disc_slots[SCIDX_RV].cl = cl6;
1807
1808 ifq->ifcq_disc_slots[SCIDX_VI].qid = SCIDX_VI;
1809 ifq->ifcq_disc_slots[SCIDX_VI].cl = cl7;
1810
1811 ifq->ifcq_disc_slots[SCIDX_VO].qid = SCIDX_VO;
1812 ifq->ifcq_disc_slots[SCIDX_VO].cl = cl8;
1813
1814 ifq->ifcq_disc_slots[SCIDX_CTL].qid = SCIDX_CTL;
1815 ifq->ifcq_disc_slots[SCIDX_CTL].cl = cl9;
1816 }
1817
1818cleanup:
0a7de745 1819 if (err != 0) {
316670eb 1820 (void) qfq_destroy_locked(qif);
0a7de745 1821 }
316670eb 1822
0a7de745 1823 return err;
316670eb
A
1824}
1825
1826int
1827qfq_teardown_ifclassq(struct ifclassq *ifq)
1828{
1829 struct qfq_if *qif = ifq->ifcq_disc;
1830 int i;
1831
1832 IFCQ_LOCK_ASSERT_HELD(ifq);
1833 VERIFY(qif != NULL && ifq->ifcq_type == PKTSCHEDT_QFQ);
1834
1835 (void) qfq_destroy_locked(qif);
1836
1837 ifq->ifcq_disc = NULL;
1838 for (i = 0; i < IFCQ_SC_MAX; i++) {
1839 ifq->ifcq_disc_slots[i].qid = 0;
1840 ifq->ifcq_disc_slots[i].cl = NULL;
1841 }
1842
0a7de745 1843 return ifclassq_detach(ifq);
316670eb
A
1844}
1845
1846int
1847qfq_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot,
1848 struct if_ifclassq_stats *ifqs)
1849{
1850 struct qfq_if *qif = ifq->ifcq_disc;
1851
1852 IFCQ_LOCK_ASSERT_HELD(ifq);
1853 VERIFY(ifq->ifcq_type == PKTSCHEDT_QFQ);
1854
0a7de745
A
1855 if (slot >= IFCQ_SC_MAX) {
1856 return EINVAL;
1857 }
316670eb 1858
0a7de745
A
1859 return qfq_get_class_stats(qif, ifq->ifcq_disc_slots[slot].qid,
1860 &ifqs->ifqs_qfq_stats);
316670eb
A
1861}
1862
1863static int
1864qfq_throttle(struct qfq_if *qif, cqrq_throttle_t *tr)
1865{
1866 struct ifclassq *ifq = qif->qif_ifq;
1867 struct qfq_class *cl;
39236c6e 1868 int err = 0;
316670eb
A
1869
1870 IFCQ_LOCK_ASSERT_HELD(ifq);
316670eb
A
1871
1872 if (!tr->set) {
1873 tr->level = qif->qif_throttle;
0a7de745 1874 return 0;
316670eb
A
1875 }
1876
0a7de745
A
1877 if (tr->level == qif->qif_throttle) {
1878 return EALREADY;
1879 }
316670eb
A
1880
1881 /* Current throttling levels only involve BK_SYS class */
1882 cl = ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl;
1883
1884 switch (tr->level) {
1885 case IFNET_THROTTLE_OFF:
1886 err = qfq_resumeq(qif, cl);
1887 break;
1888
1889 case IFNET_THROTTLE_OPPORTUNISTIC:
1890 err = qfq_suspendq(qif, cl);
1891 break;
1892
1893 default:
1894 VERIFY(0);
1895 /* NOTREACHED */
1896 }
1897
1898 if (err == 0 || err == ENXIO) {
1899 if (pktsched_verbose) {
1900 log(LOG_DEBUG, "%s: %s throttling level %sset %d->%d\n",
1901 if_name(QFQIF_IFP(qif)), qfq_style(qif),
1902 (err == 0) ? "" : "lazy ", qif->qif_throttle,
1903 tr->level);
1904 }
1905 qif->qif_throttle = tr->level;
0a7de745 1906 if (err != 0) {
316670eb 1907 err = 0;
0a7de745 1908 } else {
316670eb 1909 qfq_purgeq(qif, cl, 0, NULL, NULL);
0a7de745 1910 }
316670eb
A
1911 } else {
1912 log(LOG_ERR, "%s: %s unable to set throttling level "
1913 "%d->%d [error=%d]\n", if_name(QFQIF_IFP(qif)),
1914 qfq_style(qif), qif->qif_throttle, tr->level, err);
1915 }
1916
0a7de745 1917 return err;
316670eb
A
1918}
1919
1920static int
1921qfq_resumeq(struct qfq_if *qif, struct qfq_class *cl)
1922{
1923 struct ifclassq *ifq = qif->qif_ifq;
1924 int err = 0;
5ba3f43e
A
1925#if !MACH_ASSERT
1926#pragma unused(ifq)
1927#endif
316670eb
A
1928 IFCQ_LOCK_ASSERT_HELD(ifq);
1929
0a7de745 1930 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) {
316670eb 1931 err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, FALSE);
0a7de745 1932 }
316670eb 1933
0a7de745 1934 if (err == 0) {
316670eb 1935 qstate(&cl->cl_q) = QS_RUNNING;
0a7de745 1936 }
316670eb 1937
0a7de745 1938 return err;
316670eb
A
1939}
1940
1941static int
1942qfq_suspendq(struct qfq_if *qif, struct qfq_class *cl)
1943{
1944 struct ifclassq *ifq = qif->qif_ifq;
1945 int err = 0;
5ba3f43e
A
1946#if !MACH_ASSERT
1947#pragma unused(ifq)
1948#endif
316670eb
A
1949 IFCQ_LOCK_ASSERT_HELD(ifq);
1950
316670eb
A
1951 if (q_is_sfb(&cl->cl_q)) {
1952 if (cl->cl_sfb != NULL) {
1953 err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, TRUE);
1954 } else {
1955 VERIFY(cl->cl_flags & QFCF_LAZY);
0a7de745 1956 err = ENXIO; /* delayed throttling */
316670eb
A
1957 }
1958 }
1959
0a7de745 1960 if (err == 0 || err == ENXIO) {
316670eb 1961 qstate(&cl->cl_q) = QS_SUSPENDED;
0a7de745 1962 }
316670eb 1963
0a7de745 1964 return err;
316670eb 1965}