]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/pktsched/pktsched_qfq.c
xnu-2050.22.13.tar.gz
[apple/xnu.git] / bsd / net / pktsched / pktsched_qfq.c
1 /*
2 * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
31 * All rights reserved
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 */
54
55 /*
56 * Quick Fair Queueing is described in
57 * "QFQ: Efficient Packet Scheduling with Tight Bandwidth Distribution
58 * Guarantees" by Fabio Checconi, Paolo Valente, and Luigi Rizzo.
59 *
60 * This code is ported from the dummynet(4) QFQ implementation.
61 * See also http://info.iet.unipi.it/~luigi/qfq/
62 */
63
64 #include <sys/cdefs.h>
65 #include <sys/param.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/systm.h>
69 #include <sys/errno.h>
70 #include <sys/kernel.h>
71 #include <sys/syslog.h>
72
73 #include <kern/zalloc.h>
74
75 #include <net/if.h>
76 #include <net/net_osdep.h>
77
78 #include <net/pktsched/pktsched_qfq.h>
79 #include <netinet/in.h>
80
81 /*
82 * function prototypes
83 */
84 static int qfq_enqueue_ifclassq(struct ifclassq *, struct mbuf *);
85 static struct mbuf *qfq_dequeue_ifclassq(struct ifclassq *, cqdq_op_t);
86 static int qfq_request_ifclassq(struct ifclassq *, cqrq_t, void *);
87 static int qfq_clear_interface(struct qfq_if *);
88 static struct qfq_class *qfq_class_create(struct qfq_if *, u_int32_t,
89 u_int32_t, u_int32_t, u_int32_t, u_int32_t);
90 static int qfq_class_destroy(struct qfq_if *, struct qfq_class *);
91 static int qfq_destroy_locked(struct qfq_if *);
92 static inline int qfq_addq(struct qfq_class *, struct mbuf *, struct pf_mtag *);
93 static inline struct mbuf *qfq_getq(struct qfq_class *);
94 static inline struct mbuf *qfq_pollq(struct qfq_class *);
95 static void qfq_purgeq(struct qfq_if *, struct qfq_class *, u_int32_t,
96 u_int32_t *, u_int32_t *);
97 static void qfq_purge_sc(struct qfq_if *, cqrq_purge_sc_t *);
98 static void qfq_updateq(struct qfq_if *, struct qfq_class *, cqev_t);
99 static int qfq_throttle(struct qfq_if *, cqrq_throttle_t *);
100 static int qfq_resumeq(struct qfq_if *, struct qfq_class *);
101 static int qfq_suspendq(struct qfq_if *, struct qfq_class *);
102 static inline struct qfq_class *qfq_clh_to_clp(struct qfq_if *, u_int32_t);
103 static const char *qfq_style(struct qfq_if *);
104
105 static inline int qfq_gt(u_int64_t, u_int64_t);
106 static inline u_int64_t qfq_round_down(u_int64_t, u_int32_t);
107 static inline struct qfq_group *qfq_ffs(struct qfq_if *, pktsched_bitmap_t);
108 static int qfq_calc_index(struct qfq_class *, u_int32_t, u_int32_t);
109 static inline pktsched_bitmap_t mask_from(pktsched_bitmap_t, int);
110 static inline u_int32_t qfq_calc_state(struct qfq_if *, struct qfq_group *);
111 static inline void qfq_move_groups(struct qfq_if *, pktsched_bitmap_t,
112 int, int);
113 static inline void qfq_unblock_groups(struct qfq_if *, int, u_int64_t);
114 static inline void qfq_make_eligible(struct qfq_if *, u_int64_t);
115 static inline void qfq_slot_insert(struct qfq_if *, struct qfq_group *,
116 struct qfq_class *, u_int64_t);
117 static inline void qfq_front_slot_remove(struct qfq_group *);
118 static inline struct qfq_class *qfq_slot_scan(struct qfq_if *,
119 struct qfq_group *);
120 static inline void qfq_slot_rotate(struct qfq_if *, struct qfq_group *,
121 u_int64_t);
122 static inline void qfq_update_eligible(struct qfq_if *, u_int64_t);
123 static inline int qfq_update_class(struct qfq_if *, struct qfq_group *,
124 struct qfq_class *);
125 static inline void qfq_update_start(struct qfq_if *, struct qfq_class *);
126 static inline void qfq_slot_remove(struct qfq_if *, struct qfq_group *,
127 struct qfq_class *);
128 static void qfq_deactivate_class(struct qfq_if *, struct qfq_class *);
129 static const char *qfq_state2str(int);
130 #if QFQ_DEBUG
131 static void qfq_dump_groups(struct qfq_if *, u_int32_t);
132 static void qfq_dump_sched(struct qfq_if *, const char *);
133 #endif /* QFQ_DEBUG */
134
135 #define QFQ_ZONE_MAX 32 /* maximum elements in zone */
136 #define QFQ_ZONE_NAME "pktsched_qfq" /* zone name */
137
138 static unsigned int qfq_size; /* size of zone element */
139 static struct zone *qfq_zone; /* zone for qfq */
140
141 #define QFQ_CL_ZONE_MAX 32 /* maximum elements in zone */
142 #define QFQ_CL_ZONE_NAME "pktsched_qfq_cl" /* zone name */
143
144 static unsigned int qfq_cl_size; /* size of zone element */
145 static struct zone *qfq_cl_zone; /* zone for qfq_class */
146
147 /*
148 * Maximum number of consecutive slots occupied by backlogged classes
149 * inside a group. This is approx lmax/lmin + 5. Used when ALTQ is
150 * available.
151 *
152 * XXX check because it poses constraints on MAX_INDEX
153 */
154 #define QFQ_MAX_SLOTS 32 /* default when ALTQ is available */
155
156 void
157 qfq_init(void)
158 {
159 qfq_size = sizeof (struct qfq_if);
160 qfq_zone = zinit(qfq_size, QFQ_ZONE_MAX * qfq_size,
161 0, QFQ_ZONE_NAME);
162 if (qfq_zone == NULL) {
163 panic("%s: failed allocating %s", __func__, QFQ_ZONE_NAME);
164 /* NOTREACHED */
165 }
166 zone_change(qfq_zone, Z_EXPAND, TRUE);
167 zone_change(qfq_zone, Z_CALLERACCT, TRUE);
168
169 qfq_cl_size = sizeof (struct qfq_class);
170 qfq_cl_zone = zinit(qfq_cl_size, QFQ_CL_ZONE_MAX * qfq_cl_size,
171 0, QFQ_CL_ZONE_NAME);
172 if (qfq_cl_zone == NULL) {
173 panic("%s: failed allocating %s", __func__, QFQ_CL_ZONE_NAME);
174 /* NOTREACHED */
175 }
176 zone_change(qfq_cl_zone, Z_EXPAND, TRUE);
177 zone_change(qfq_cl_zone, Z_CALLERACCT, TRUE);
178 }
179
180 struct qfq_if *
181 qfq_alloc(struct ifnet *ifp, int how, boolean_t altq)
182 {
183 struct qfq_if *qif;
184
185 qif = (how == M_WAITOK) ? zalloc(qfq_zone) : zalloc_noblock(qfq_zone);
186 if (qif == NULL)
187 return (NULL);
188
189 bzero(qif, qfq_size);
190 qif->qif_ifq = &ifp->if_snd;
191 if (altq) {
192 qif->qif_maxclasses = QFQ_MAX_CLASSES;
193 qif->qif_maxslots = QFQ_MAX_SLOTS;
194 qif->qif_flags |= QFQIFF_ALTQ;
195 } else {
196 qif->qif_maxclasses = IFCQ_SC_MAX;
197 /*
198 * TODO: adi@apple.com
199 *
200 * Ideally I would like to have the following
201 * but QFQ needs further modifications.
202 *
203 * qif->qif_maxslots = IFCQ_SC_MAX;
204 */
205 qif->qif_maxslots = QFQ_MAX_SLOTS;
206 }
207
208 if ((qif->qif_class_tbl = _MALLOC(sizeof (struct qfq_class *) *
209 qif->qif_maxclasses, M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
210 log(LOG_ERR, "%s: %s unable to allocate class table array\n",
211 if_name(ifp), qfq_style(qif));
212 goto error;
213 }
214
215 if ((qif->qif_groups = _MALLOC(sizeof (struct qfq_group *) *
216 (QFQ_MAX_INDEX + 1), M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
217 log(LOG_ERR, "%s: %s unable to allocate group array\n",
218 if_name(ifp), qfq_style(qif));
219 goto error;
220 }
221
222 if (pktsched_verbose) {
223 log(LOG_DEBUG, "%s: %s scheduler allocated\n",
224 if_name(ifp), qfq_style(qif));
225 }
226
227 return (qif);
228
229 error:
230 if (qif->qif_class_tbl != NULL) {
231 _FREE(qif->qif_class_tbl, M_DEVBUF);
232 qif->qif_class_tbl = NULL;
233 }
234 if (qif->qif_groups != NULL) {
235 _FREE(qif->qif_groups, M_DEVBUF);
236 qif->qif_groups = NULL;
237 }
238 zfree(qfq_zone, qif);
239
240 return (NULL);
241 }
242
243 int
244 qfq_destroy(struct qfq_if *qif)
245 {
246 struct ifclassq *ifq = qif->qif_ifq;
247 int err;
248
249 IFCQ_LOCK(ifq);
250 err = qfq_destroy_locked(qif);
251 IFCQ_UNLOCK(ifq);
252
253 return (err);
254 }
255
256 static int
257 qfq_destroy_locked(struct qfq_if *qif)
258 {
259 int i;
260
261 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
262
263 (void) qfq_clear_interface(qif);
264
265 VERIFY(qif->qif_class_tbl != NULL);
266 _FREE(qif->qif_class_tbl, M_DEVBUF);
267 qif->qif_class_tbl = NULL;
268
269 VERIFY(qif->qif_groups != NULL);
270 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
271 struct qfq_group *grp = qif->qif_groups[i];
272
273 if (grp != NULL) {
274 VERIFY(grp->qfg_slots != NULL);
275 _FREE(grp->qfg_slots, M_DEVBUF);
276 grp->qfg_slots = NULL;
277 _FREE(grp, M_DEVBUF);
278 qif->qif_groups[i] = NULL;
279 }
280 }
281 _FREE(qif->qif_groups, M_DEVBUF);
282 qif->qif_groups = NULL;
283
284 if (pktsched_verbose) {
285 log(LOG_DEBUG, "%s: %s scheduler destroyed\n",
286 if_name(QFQIF_IFP(qif)), qfq_style(qif));
287 }
288
289 zfree(qfq_zone, qif);
290
291 return (0);
292 }
293
294 /*
295 * bring the interface back to the initial state by discarding
296 * all the filters and classes.
297 */
298 static int
299 qfq_clear_interface(struct qfq_if *qif)
300 {
301 struct qfq_class *cl;
302 int i;
303
304 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
305
306 /* clear out the classes */
307 for (i = 0; i < qif->qif_maxclasses; i++)
308 if ((cl = qif->qif_class_tbl[i]) != NULL)
309 qfq_class_destroy(qif, cl);
310
311 return (0);
312 }
313
314 /* discard all the queued packets on the interface */
315 void
316 qfq_purge(struct qfq_if *qif)
317 {
318 struct qfq_class *cl;
319 int i;
320
321 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
322
323 for (i = 0; i < qif->qif_maxclasses; i++) {
324 if ((cl = qif->qif_class_tbl[i]) != NULL)
325 qfq_purgeq(qif, cl, 0, NULL, NULL);
326 }
327 #if !PF_ALTQ
328 /*
329 * This assertion is safe to be made only when PF_ALTQ is not
330 * configured; otherwise, IFCQ_LEN represents the sum of the
331 * packets managed by ifcq_disc and altq_disc instances, which
332 * is possible when transitioning between the two.
333 */
334 VERIFY(IFCQ_LEN(qif->qif_ifq) == 0);
335 #endif /* !PF_ALTQ */
336 }
337
338 static void
339 qfq_purge_sc(struct qfq_if *qif, cqrq_purge_sc_t *pr)
340 {
341 struct ifclassq *ifq = qif->qif_ifq;
342 u_int32_t i;
343
344 IFCQ_LOCK_ASSERT_HELD(ifq);
345
346 VERIFY(pr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(pr->sc));
347 VERIFY(pr->flow != 0);
348
349 if (pr->sc != MBUF_SC_UNSPEC) {
350 i = MBUF_SCIDX(pr->sc);
351 VERIFY(i < IFCQ_SC_MAX);
352
353 qfq_purgeq(qif, ifq->ifcq_disc_slots[i].cl,
354 pr->flow, &pr->packets, &pr->bytes);
355 } else {
356 u_int32_t cnt, len;
357
358 pr->packets = 0;
359 pr->bytes = 0;
360
361 for (i = 0; i < IFCQ_SC_MAX; i++) {
362 qfq_purgeq(qif, ifq->ifcq_disc_slots[i].cl,
363 pr->flow, &cnt, &len);
364 pr->packets += cnt;
365 pr->bytes += len;
366 }
367 }
368 }
369
370 void
371 qfq_event(struct qfq_if *qif, cqev_t ev)
372 {
373 struct qfq_class *cl;
374 int i;
375
376 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
377
378 for (i = 0; i < qif->qif_maxclasses; i++)
379 if ((cl = qif->qif_class_tbl[i]) != NULL)
380 qfq_updateq(qif, cl, ev);
381 }
382
383 int
384 qfq_add_queue(struct qfq_if *qif, u_int32_t qlimit, u_int32_t weight,
385 u_int32_t maxsz, u_int32_t flags, u_int32_t qid, struct qfq_class **clp)
386 {
387 struct qfq_class *cl;
388 u_int32_t w;
389
390 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
391
392 if (qfq_clh_to_clp(qif, qid) != NULL)
393 return (EBUSY);
394
395 /* check parameters */
396 if (weight == 0 || weight > QFQ_MAX_WEIGHT)
397 return (EINVAL);
398
399 w = (QFQ_ONE_FP / (QFQ_ONE_FP / weight));
400 if (qif->qif_wsum + w > QFQ_MAX_WSUM)
401 return (EINVAL);
402
403 if (maxsz == 0 || maxsz > (1 << QFQ_MTU_SHIFT))
404 return (EINVAL);
405
406 cl = qfq_class_create(qif, weight, qlimit, flags, maxsz, qid);
407 if (cl == NULL)
408 return (ENOMEM);
409
410 if (clp != NULL)
411 *clp = cl;
412
413 return (0);
414 }
415
416 static struct qfq_class *
417 qfq_class_create(struct qfq_if *qif, u_int32_t weight, u_int32_t qlimit,
418 u_int32_t flags, u_int32_t maxsz, u_int32_t qid)
419 {
420 struct ifnet *ifp;
421 struct ifclassq *ifq;
422 struct qfq_group *grp;
423 struct qfq_class *cl;
424 u_int32_t w; /* approximated weight */
425 int i;
426
427 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
428
429 /* Sanitize flags unless internally configured */
430 if (qif->qif_flags & QFQIFF_ALTQ)
431 flags &= QFCF_USERFLAGS;
432
433 if (qif->qif_classes >= qif->qif_maxclasses) {
434 log(LOG_ERR, "%s: %s out of classes! (max %d)\n",
435 if_name(QFQIF_IFP(qif)), qfq_style(qif),
436 qif->qif_maxclasses);
437 return (NULL);
438 }
439
440 #if !CLASSQ_RED
441 if (flags & QFCF_RED) {
442 log(LOG_ERR, "%s: %s RED not available!\n",
443 if_name(QFQIF_IFP(qif)), qfq_style(qif));
444 return (NULL);
445 }
446 #endif /* !CLASSQ_RED */
447
448 #if !CLASSQ_RIO
449 if (flags & QFCF_RIO) {
450 log(LOG_ERR, "%s: %s RIO not available!\n",
451 if_name(QFQIF_IFP(qif)), qfq_style(qif));
452 return (NULL);
453 }
454 #endif /* CLASSQ_RIO */
455
456 #if !CLASSQ_BLUE
457 if (flags & QFCF_BLUE) {
458 log(LOG_ERR, "%s: %s BLUE not available!\n",
459 if_name(QFQIF_IFP(qif)), qfq_style(qif));
460 return (NULL);
461 }
462 #endif /* CLASSQ_BLUE */
463
464 /* These are mutually exclusive */
465 if ((flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) &&
466 (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_RED &&
467 (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_RIO &&
468 (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_BLUE &&
469 (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_SFB) {
470 log(LOG_ERR, "%s: %s more than one RED|RIO|BLUE|SFB\n",
471 if_name(QFQIF_IFP(qif)), qfq_style(qif));
472 return (NULL);
473 }
474
475 ifq = qif->qif_ifq;
476 ifp = QFQIF_IFP(qif);
477
478 cl = zalloc(qfq_cl_zone);
479 if (cl == NULL)
480 return (NULL);
481
482 bzero(cl, qfq_cl_size);
483
484 if (qlimit == 0 || qlimit > IFCQ_MAXLEN(ifq)) {
485 qlimit = IFCQ_MAXLEN(ifq);
486 if (qlimit == 0)
487 qlimit = DEFAULT_QLIMIT; /* use default */
488 }
489 _qinit(&cl->cl_q, Q_DROPTAIL, qlimit);
490 cl->cl_qif = qif;
491 cl->cl_flags = flags;
492 cl->cl_handle = qid;
493
494 /*
495 * Find a free slot in the class table. If the slot matching
496 * the lower bits of qid is free, use this slot. Otherwise,
497 * use the first free slot.
498 */
499 i = qid % qif->qif_maxclasses;
500 if (qif->qif_class_tbl[i] == NULL) {
501 qif->qif_class_tbl[i] = cl;
502 } else {
503 for (i = 0; i < qif->qif_maxclasses; i++) {
504 if (qif->qif_class_tbl[i] == NULL) {
505 qif->qif_class_tbl[i] = cl;
506 break;
507 }
508 }
509 if (i == qif->qif_maxclasses) {
510 zfree(qfq_cl_zone, cl);
511 return (NULL);
512 }
513 }
514
515 w = weight;
516 VERIFY(w > 0 && w <= QFQ_MAX_WEIGHT);
517 cl->cl_lmax = maxsz;
518 cl->cl_inv_w = (QFQ_ONE_FP / w);
519 w = (QFQ_ONE_FP / cl->cl_inv_w);
520 VERIFY(qif->qif_wsum + w <= QFQ_MAX_WSUM);
521
522 i = qfq_calc_index(cl, cl->cl_inv_w, cl->cl_lmax);
523 VERIFY(i <= QFQ_MAX_INDEX);
524 grp = qif->qif_groups[i];
525 if (grp == NULL) {
526 grp = _MALLOC(sizeof (*grp), M_DEVBUF, M_WAITOK|M_ZERO);
527 if (grp != NULL) {
528 grp->qfg_index = i;
529 grp->qfg_slot_shift =
530 QFQ_MTU_SHIFT + QFQ_FRAC_BITS - (QFQ_MAX_INDEX - i);
531 grp->qfg_slots = _MALLOC(sizeof (struct qfq_class *) *
532 qif->qif_maxslots, M_DEVBUF, M_WAITOK|M_ZERO);
533 if (grp->qfg_slots == NULL) {
534 log(LOG_ERR, "%s: %s unable to allocate group "
535 "slots for index %d\n", if_name(ifp),
536 qfq_style(qif), i);
537 }
538 } else {
539 log(LOG_ERR, "%s: %s unable to allocate group for "
540 "qid=%d\n", if_name(ifp), qfq_style(qif),
541 cl->cl_handle);
542 }
543 if (grp == NULL || grp->qfg_slots == NULL) {
544 qif->qif_class_tbl[qid % qif->qif_maxclasses] = NULL;
545 if (grp != NULL)
546 _FREE(grp, M_DEVBUF);
547 zfree(qfq_cl_zone, cl);
548 return (NULL);
549 } else {
550 qif->qif_groups[i] = grp;
551 }
552 }
553 cl->cl_grp = grp;
554 qif->qif_wsum += w;
555 /* XXX cl->cl_S = qif->qif_V; ? */
556 /* XXX compute qif->qif_i_wsum */
557
558 qif->qif_classes++;
559
560 if (flags & QFCF_DEFAULTCLASS)
561 qif->qif_default = cl;
562
563 if (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) {
564 #if CLASSQ_RED || CLASSQ_RIO
565 u_int64_t ifbandwidth = ifnet_output_linkrate(ifp);
566 int pkttime;
567 #endif /* CLASSQ_RED || CLASSQ_RIO */
568
569 cl->cl_qflags = 0;
570 if (flags & QFCF_ECN) {
571 if (flags & QFCF_BLUE)
572 cl->cl_qflags |= BLUEF_ECN;
573 else if (flags & QFCF_SFB)
574 cl->cl_qflags |= SFBF_ECN;
575 else if (flags & QFCF_RED)
576 cl->cl_qflags |= REDF_ECN;
577 else if (flags & QFCF_RIO)
578 cl->cl_qflags |= RIOF_ECN;
579 }
580 if (flags & QFCF_FLOWCTL) {
581 if (flags & QFCF_SFB)
582 cl->cl_qflags |= SFBF_FLOWCTL;
583 }
584 if (flags & QFCF_CLEARDSCP) {
585 if (flags & QFCF_RIO)
586 cl->cl_qflags |= RIOF_CLEARDSCP;
587 }
588 #if CLASSQ_RED || CLASSQ_RIO
589 /*
590 * XXX: RED & RIO should be watching link speed and MTU
591 * events and recompute pkttime accordingly.
592 */
593 if (ifbandwidth < 8)
594 pkttime = 1000 * 1000 * 1000; /* 1 sec */
595 else
596 pkttime = (int64_t)ifp->if_mtu * 1000 * 1000 * 1000 /
597 (ifbandwidth / 8);
598
599 /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
600 #if CLASSQ_RED
601 if (flags & QFCF_RED) {
602 cl->cl_red = red_alloc(ifp, 0, 0,
603 qlimit(&cl->cl_q) * 10/100,
604 qlimit(&cl->cl_q) * 30/100,
605 cl->cl_qflags, pkttime);
606 if (cl->cl_red != NULL)
607 qtype(&cl->cl_q) = Q_RED;
608 }
609 #endif /* CLASSQ_RED */
610 #if CLASSQ_RIO
611 if (flags & QFCF_RIO) {
612 cl->cl_rio =
613 rio_alloc(ifp, 0, NULL, cl->cl_qflags, pkttime);
614 if (cl->cl_rio != NULL)
615 qtype(&cl->cl_q) = Q_RIO;
616 }
617 #endif /* CLASSQ_RIO */
618 #endif /* CLASSQ_RED || CLASSQ_RIO */
619 #if CLASSQ_BLUE
620 if (flags & QFCF_BLUE) {
621 cl->cl_blue = blue_alloc(ifp, 0, 0, cl->cl_qflags);
622 if (cl->cl_blue != NULL)
623 qtype(&cl->cl_q) = Q_BLUE;
624 }
625 #endif /* CLASSQ_BLUE */
626 if (flags & QFCF_SFB) {
627 if (!(cl->cl_flags & QFCF_LAZY))
628 cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
629 qlimit(&cl->cl_q), cl->cl_qflags);
630 if (cl->cl_sfb != NULL || (cl->cl_flags & QFCF_LAZY))
631 qtype(&cl->cl_q) = Q_SFB;
632 }
633 }
634
635 if (pktsched_verbose) {
636 log(LOG_DEBUG, "%s: %s created qid=%d grp=%d weight=%d "
637 "qlimit=%d flags=%b\n", if_name(ifp), qfq_style(qif),
638 cl->cl_handle, cl->cl_grp->qfg_index, weight, qlimit,
639 flags, QFCF_BITS);
640 }
641
642 return (cl);
643 }
644
645 int
646 qfq_remove_queue(struct qfq_if *qif, u_int32_t qid)
647 {
648 struct qfq_class *cl;
649
650 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
651
652 if ((cl = qfq_clh_to_clp(qif, qid)) == NULL)
653 return (EINVAL);
654
655 return (qfq_class_destroy(qif, cl));
656 }
657
658 static int
659 qfq_class_destroy(struct qfq_if *qif, struct qfq_class *cl)
660 {
661 struct ifclassq *ifq = qif->qif_ifq;
662 int i;
663
664 IFCQ_LOCK_ASSERT_HELD(ifq);
665
666 qfq_purgeq(qif, cl, 0, NULL, NULL);
667
668 if (cl->cl_inv_w != 0) {
669 qif->qif_wsum -= (QFQ_ONE_FP / cl->cl_inv_w);
670 cl->cl_inv_w = 0; /* reset weight to avoid run twice */
671 }
672
673 for (i = 0; i < qif->qif_maxclasses; i++) {
674 if (qif->qif_class_tbl[i] == cl) {
675 qif->qif_class_tbl[i] = NULL;
676 break;
677 }
678 }
679 qif->qif_classes--;
680
681 if (cl->cl_qalg.ptr != NULL) {
682 #if CLASSQ_RIO
683 if (q_is_rio(&cl->cl_q))
684 rio_destroy(cl->cl_rio);
685 #endif /* CLASSQ_RIO */
686 #if CLASSQ_RED
687 if (q_is_red(&cl->cl_q))
688 red_destroy(cl->cl_red);
689 #endif /* CLASSQ_RED */
690 #if CLASSQ_BLUE
691 if (q_is_blue(&cl->cl_q))
692 blue_destroy(cl->cl_blue);
693 #endif /* CLASSQ_BLUE */
694 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
695 sfb_destroy(cl->cl_sfb);
696 cl->cl_qalg.ptr = NULL;
697 qtype(&cl->cl_q) = Q_DROPTAIL;
698 qstate(&cl->cl_q) = QS_RUNNING;
699 }
700
701 if (qif->qif_default == cl)
702 qif->qif_default = NULL;
703
704 if (pktsched_verbose) {
705 log(LOG_DEBUG, "%s: %s destroyed qid=%d\n",
706 if_name(QFQIF_IFP(qif)), qfq_style(qif), cl->cl_handle);
707 }
708
709 zfree(qfq_cl_zone, cl);
710
711 return (0);
712 }
713
714 /*
715 * Calculate a mask to mimic what would be ffs_from()
716 */
717 static inline pktsched_bitmap_t
718 mask_from(pktsched_bitmap_t bitmap, int from)
719 {
720 return (bitmap & ~((1UL << from) - 1));
721 }
722
723 /*
724 * The state computation relies on ER=0, IR=1, EB=2, IB=3
725 * First compute eligibility comparing grp->qfg_S, qif->qif_V,
726 * then check if someone is blocking us and possibly add EB
727 */
728 static inline u_int32_t
729 qfq_calc_state(struct qfq_if *qif, struct qfq_group *grp)
730 {
731 /* if S > V we are not eligible */
732 u_int32_t state = qfq_gt(grp->qfg_S, qif->qif_V);
733 pktsched_bitmap_t mask = mask_from(qif->qif_bitmaps[ER],
734 grp->qfg_index);
735 struct qfq_group *next;
736
737 if (mask) {
738 next = qfq_ffs(qif, mask);
739 if (qfq_gt(grp->qfg_F, next->qfg_F))
740 state |= EB;
741 }
742
743 return (state);
744 }
745
746 /*
747 * In principle
748 * qif->qif_bitmaps[dst] |= qif->qif_bitmaps[src] & mask;
749 * qif->qif_bitmaps[src] &= ~mask;
750 * but we should make sure that src != dst
751 */
752 static inline void
753 qfq_move_groups(struct qfq_if *qif, pktsched_bitmap_t mask, int src, int dst)
754 {
755 qif->qif_bitmaps[dst] |= qif->qif_bitmaps[src] & mask;
756 qif->qif_bitmaps[src] &= ~mask;
757 }
758
759 static inline void
760 qfq_unblock_groups(struct qfq_if *qif, int index, u_int64_t old_finish)
761 {
762 pktsched_bitmap_t mask = mask_from(qif->qif_bitmaps[ER], index + 1);
763 struct qfq_group *next;
764
765 if (mask) {
766 next = qfq_ffs(qif, mask);
767 if (!qfq_gt(next->qfg_F, old_finish))
768 return;
769 }
770
771 mask = (1UL << index) - 1;
772 qfq_move_groups(qif, mask, EB, ER);
773 qfq_move_groups(qif, mask, IB, IR);
774 }
775
776 /*
777 * perhaps
778 *
779 * old_V ^= qif->qif_V;
780 * old_V >>= QFQ_MIN_SLOT_SHIFT;
781 * if (old_V) {
782 * ...
783 * }
784 */
785 static inline void
786 qfq_make_eligible(struct qfq_if *qif, u_int64_t old_V)
787 {
788 pktsched_bitmap_t mask, vslot, old_vslot;
789
790 vslot = qif->qif_V >> QFQ_MIN_SLOT_SHIFT;
791 old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
792
793 if (vslot != old_vslot) {
794 mask = (2UL << (__fls(vslot ^ old_vslot))) - 1;
795 qfq_move_groups(qif, mask, IR, ER);
796 qfq_move_groups(qif, mask, IB, EB);
797 }
798 }
799
800 /*
801 * XXX we should make sure that slot becomes less than 32.
802 * This is guaranteed by the input values.
803 * roundedS is always cl->qfg_S rounded on grp->qfg_slot_shift bits.
804 */
805 static inline void
806 qfq_slot_insert(struct qfq_if *qif, struct qfq_group *grp,
807 struct qfq_class *cl, u_int64_t roundedS)
808 {
809 u_int64_t slot = (roundedS - grp->qfg_S) >> grp->qfg_slot_shift;
810 u_int32_t i = (grp->qfg_front + slot) % qif->qif_maxslots;
811
812 cl->cl_next = grp->qfg_slots[i];
813 grp->qfg_slots[i] = cl;
814 pktsched_bit_set(slot, &grp->qfg_full_slots);
815 }
816
817 /*
818 * remove the entry from the slot
819 */
820 static inline void
821 qfq_front_slot_remove(struct qfq_group *grp)
822 {
823 struct qfq_class **h = &grp->qfg_slots[grp->qfg_front];
824
825 *h = (*h)->cl_next;
826 if (!*h)
827 pktsched_bit_clr(0, &grp->qfg_full_slots);
828 }
829
830 /*
831 * Returns the first full queue in a group. As a side effect,
832 * adjust the bucket list so the first non-empty bucket is at
833 * position 0 in qfg_full_slots.
834 */
835 static inline struct qfq_class *
836 qfq_slot_scan(struct qfq_if *qif, struct qfq_group *grp)
837 {
838 int i;
839
840 if (pktsched_verbose > 2) {
841 log(LOG_DEBUG, "%s: %s grp=%d full_slots=0x%x\n",
842 if_name(QFQIF_IFP(qif)), qfq_style(qif), grp->qfg_index,
843 grp->qfg_full_slots);
844 }
845
846 if (grp->qfg_full_slots == 0)
847 return (NULL);
848
849 i = pktsched_ffs(grp->qfg_full_slots) - 1; /* zero-based */
850 if (i > 0) {
851 grp->qfg_front = (grp->qfg_front + i) % qif->qif_maxslots;
852 grp->qfg_full_slots >>= i;
853 }
854
855 return (grp->qfg_slots[grp->qfg_front]);
856 }
857
858 /*
859 * adjust the bucket list. When the start time of a group decreases,
860 * we move the index down (modulo qif->qif_maxslots) so we don't need to
861 * move the objects. The mask of occupied slots must be shifted
862 * because we use ffs() to find the first non-empty slot.
863 * This covers decreases in the group's start time, but what about
864 * increases of the start time ?
865 * Here too we should make sure that i is less than 32
866 */
867 static inline void
868 qfq_slot_rotate(struct qfq_if *qif, struct qfq_group *grp, u_int64_t roundedS)
869 {
870 #pragma unused(qif)
871 u_int32_t i = (grp->qfg_S - roundedS) >> grp->qfg_slot_shift;
872
873 grp->qfg_full_slots <<= i;
874 grp->qfg_front = (grp->qfg_front - i) % qif->qif_maxslots;
875 }
876
877 static inline void
878 qfq_update_eligible(struct qfq_if *qif, u_int64_t old_V)
879 {
880 pktsched_bitmap_t ineligible;
881
882 ineligible = qif->qif_bitmaps[IR] | qif->qif_bitmaps[IB];
883 if (ineligible) {
884 if (!qif->qif_bitmaps[ER]) {
885 struct qfq_group *grp;
886 grp = qfq_ffs(qif, ineligible);
887 if (qfq_gt(grp->qfg_S, qif->qif_V))
888 qif->qif_V = grp->qfg_S;
889 }
890 qfq_make_eligible(qif, old_V);
891 }
892 }
893
894 /*
895 * Updates the class, returns true if also the group needs to be updated.
896 */
897 static inline int
898 qfq_update_class(struct qfq_if *qif, struct qfq_group *grp,
899 struct qfq_class *cl)
900 {
901 #pragma unused(qif)
902 cl->cl_S = cl->cl_F;
903 if (qempty(&cl->cl_q)) {
904 qfq_front_slot_remove(grp);
905 } else {
906 u_int32_t len;
907 u_int64_t roundedS;
908
909 len = m_pktlen(qhead(&cl->cl_q));
910 cl->cl_F = cl->cl_S + (u_int64_t)len * cl->cl_inv_w;
911 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
912 if (roundedS == grp->qfg_S)
913 return (0);
914
915 qfq_front_slot_remove(grp);
916 qfq_slot_insert(qif, grp, cl, roundedS);
917 }
918 return (1);
919 }
920
921 /*
922 * note: CLASSQDQ_POLL returns the next packet without removing the packet
923 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
924 * CLASSQDQ_REMOVE must return the same packet if called immediately
925 * after CLASSQDQ_POLL.
926 */
927 struct mbuf *
928 qfq_dequeue(struct qfq_if *qif, cqdq_op_t op)
929 {
930 pktsched_bitmap_t er_bits = qif->qif_bitmaps[ER];
931 struct ifclassq *ifq = qif->qif_ifq;
932 struct qfq_group *grp;
933 struct qfq_class *cl;
934 struct mbuf *m;
935 u_int64_t old_V;
936 u_int32_t len;
937
938 IFCQ_LOCK_ASSERT_HELD(ifq);
939
940 for (;;) {
941 if (er_bits == 0) {
942 #if QFQ_DEBUG
943 if (qif->qif_queued && pktsched_verbose > 1)
944 qfq_dump_sched(qif, "start dequeue");
945 #endif /* QFQ_DEBUG */
946 /* no eligible and ready packet */
947 return (NULL);
948 }
949 grp = qfq_ffs(qif, er_bits);
950 /* if group is non-empty, use it */
951 if (grp->qfg_full_slots != 0)
952 break;
953 pktsched_bit_clr(grp->qfg_index, &er_bits);
954 #if QFQ_DEBUG
955 qif->qif_emptygrp++;
956 #endif /* QFQ_DEBUG */
957 }
958 VERIFY(!IFCQ_IS_EMPTY(ifq));
959
960 cl = grp->qfg_slots[grp->qfg_front];
961 VERIFY(cl != NULL && !qempty(&cl->cl_q));
962
963 if (op == CLASSQDQ_POLL)
964 return (qfq_pollq(cl));
965
966 m = qfq_getq(cl);
967 VERIFY(m != NULL); /* qalg must be work conserving */
968 len = m_pktlen(m);
969
970 #if QFQ_DEBUG
971 qif->qif_queued--;
972 #endif /* QFQ_DEBUG */
973
974 IFCQ_DEC_LEN(ifq);
975 if (qempty(&cl->cl_q))
976 cl->cl_period++;
977 PKTCNTR_ADD(&cl->cl_xmitcnt, 1, len);
978 IFCQ_XMIT_ADD(ifq, 1, len);
979
980 old_V = qif->qif_V;
981 qif->qif_V += (u_int64_t)len * QFQ_IWSUM;
982
983 if (pktsched_verbose > 2) {
984 log(LOG_DEBUG, "%s: %s qid=%d dequeue m=%p F=0x%llx V=0x%llx",
985 if_name(QFQIF_IFP(qif)), qfq_style(qif), cl->cl_handle,
986 m, cl->cl_F, qif->qif_V);
987 }
988
989 if (qfq_update_class(qif, grp, cl)) {
990 u_int64_t old_F = grp->qfg_F;
991
992 cl = qfq_slot_scan(qif, grp);
993 if (!cl) { /* group gone, remove from ER */
994 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
995 } else {
996 u_int32_t s;
997 u_int64_t roundedS =
998 qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
999
1000 if (grp->qfg_S == roundedS)
1001 goto skip_unblock;
1002
1003 grp->qfg_S = roundedS;
1004 grp->qfg_F = roundedS + (2ULL << grp->qfg_slot_shift);
1005
1006 /* remove from ER and put in the new set */
1007 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
1008 s = qfq_calc_state(qif, grp);
1009 pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
1010 }
1011 /* we need to unblock even if the group has gone away */
1012 qfq_unblock_groups(qif, grp->qfg_index, old_F);
1013 }
1014
1015 skip_unblock:
1016 qfq_update_eligible(qif, old_V);
1017
1018 #if QFQ_DEBUG
1019 if (!qif->qif_bitmaps[ER] && qif->qif_queued && pktsched_verbose > 1)
1020 qfq_dump_sched(qif, "end dequeue");
1021 #endif /* QFQ_DEBUG */
1022
1023 return (m);
1024 }
1025
1026 /*
1027 * Assign a reasonable start time for a new flow k in group i.
1028 * Admissible values for hat(F) are multiples of sigma_i
1029 * no greater than V+sigma_i . Larger values mean that
1030 * we had a wraparound so we consider the timestamp to be stale.
1031 *
1032 * If F is not stale and F >= V then we set S = F.
1033 * Otherwise we should assign S = V, but this may violate
1034 * the ordering in ER. So, if we have groups in ER, set S to
1035 * the F_j of the first group j which would be blocking us.
1036 * We are guaranteed not to move S backward because
1037 * otherwise our group i would still be blocked.
1038 */
1039 static inline void
1040 qfq_update_start(struct qfq_if *qif, struct qfq_class *cl)
1041 {
1042 pktsched_bitmap_t mask;
1043 u_int64_t limit, roundedF;
1044 int slot_shift = cl->cl_grp->qfg_slot_shift;
1045
1046 roundedF = qfq_round_down(cl->cl_F, slot_shift);
1047 limit = qfq_round_down(qif->qif_V, slot_shift) + (1UL << slot_shift);
1048
1049 if (!qfq_gt(cl->cl_F, qif->qif_V) || qfq_gt(roundedF, limit)) {
1050 /* timestamp was stale */
1051 mask = mask_from(qif->qif_bitmaps[ER], cl->cl_grp->qfg_index);
1052 if (mask) {
1053 struct qfq_group *next = qfq_ffs(qif, mask);
1054 if (qfq_gt(roundedF, next->qfg_F)) {
1055 cl->cl_S = next->qfg_F;
1056 return;
1057 }
1058 }
1059 cl->cl_S = qif->qif_V;
1060 } else { /* timestamp is not stale */
1061 cl->cl_S = cl->cl_F;
1062 }
1063 }
1064
1065 int
1066 qfq_enqueue(struct qfq_if *qif, struct qfq_class *cl, struct mbuf *m,
1067 struct pf_mtag *t)
1068 {
1069 struct ifclassq *ifq = qif->qif_ifq;
1070 struct qfq_group *grp;
1071 u_int64_t roundedS;
1072 int len, ret, s;
1073
1074 IFCQ_LOCK_ASSERT_HELD(ifq);
1075 VERIFY(cl == NULL || cl->cl_qif == qif);
1076
1077 if (cl == NULL) {
1078 cl = qfq_clh_to_clp(qif, t->pftag_qid);
1079 if (cl == NULL) {
1080 cl = qif->qif_default;
1081 if (cl == NULL) {
1082 IFCQ_CONVERT_LOCK(ifq);
1083 m_freem(m);
1084 return (ENOBUFS);
1085 }
1086 }
1087 }
1088
1089 len = m_pktlen(m);
1090
1091 ret = qfq_addq(cl, m, t);
1092 if (ret != 0) {
1093 if (ret == CLASSQEQ_SUCCESS_FC) {
1094 /* packet enqueued, return advisory feedback */
1095 ret = EQFULL;
1096 } else {
1097 VERIFY(ret == CLASSQEQ_DROPPED ||
1098 ret == CLASSQEQ_DROPPED_FC ||
1099 ret == CLASSQEQ_DROPPED_SP);
1100 /* packet has been freed in qfq_addq */
1101 PKTCNTR_ADD(&cl->cl_dropcnt, 1, len);
1102 IFCQ_DROP_ADD(ifq, 1, len);
1103 switch (ret) {
1104 case CLASSQEQ_DROPPED:
1105 return (ENOBUFS);
1106 case CLASSQEQ_DROPPED_FC:
1107 return (EQFULL);
1108 case CLASSQEQ_DROPPED_SP:
1109 return (EQSUSPENDED);
1110 }
1111 /* NOT REACHED */
1112 }
1113 }
1114 IFCQ_INC_LEN(ifq);
1115
1116 #if QFQ_DEBUG
1117 qif->qif_queued++;
1118 #endif /* QFQ_DEBUG */
1119
1120 /* queue was not idle, we're done */
1121 if (qlen(&cl->cl_q) > 1)
1122 goto done;
1123
1124 /* queue was idle */
1125 grp = cl->cl_grp;
1126 qfq_update_start(qif, cl); /* adjust start time */
1127
1128 /* compute new finish time and rounded start */
1129 cl->cl_F = cl->cl_S + (u_int64_t)len * cl->cl_inv_w;
1130 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1131
1132 /*
1133 * Insert cl in the correct bucket.
1134 *
1135 * If cl->cl_S >= grp->qfg_S we don't need to adjust the bucket list
1136 * and simply go to the insertion phase. Otherwise grp->qfg_S is
1137 * decreasing, we must make room in the bucket list, and also
1138 * recompute the group state. Finally, if there were no flows
1139 * in this group and nobody was in ER make sure to adjust V.
1140 */
1141 if (grp->qfg_full_slots != 0) {
1142 if (!qfq_gt(grp->qfg_S, cl->cl_S))
1143 goto skip_update;
1144
1145 /* create a slot for this cl->cl_S */
1146 qfq_slot_rotate(qif, grp, roundedS);
1147
1148 /* group was surely ineligible, remove */
1149 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
1150 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
1151 } else if (!qif->qif_bitmaps[ER] && qfq_gt(roundedS, qif->qif_V)) {
1152 qif->qif_V = roundedS;
1153 }
1154
1155 grp->qfg_S = roundedS;
1156 grp->qfg_F =
1157 roundedS + (2ULL << grp->qfg_slot_shift); /* i.e. 2 sigma_i */
1158 s = qfq_calc_state(qif, grp);
1159 pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
1160
1161 if (pktsched_verbose > 2) {
1162 log(LOG_DEBUG, "%s: %s qid=%d enqueue m=%p state=%s 0x%x "
1163 "S=0x%llx F=0x%llx V=0x%llx\n", if_name(QFQIF_IFP(qif)),
1164 qfq_style(qif), cl->cl_handle, m, qfq_state2str(s),
1165 qif->qif_bitmaps[s], cl->cl_S, cl->cl_F, qif->qif_V);
1166 }
1167
1168 skip_update:
1169 qfq_slot_insert(qif, grp, cl, roundedS);
1170
1171 done:
1172 /* successfully queued. */
1173 return (ret);
1174 }
1175
1176 static inline void
1177 qfq_slot_remove(struct qfq_if *qif, struct qfq_group *grp,
1178 struct qfq_class *cl)
1179 {
1180 #pragma unused(qif)
1181 struct qfq_class **pprev;
1182 u_int32_t i, offset;
1183 u_int64_t roundedS;
1184
1185 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1186 offset = (roundedS - grp->qfg_S) >> grp->qfg_slot_shift;
1187 i = (grp->qfg_front + offset) % qif->qif_maxslots;
1188
1189 pprev = &grp->qfg_slots[i];
1190 while (*pprev && *pprev != cl)
1191 pprev = &(*pprev)->cl_next;
1192
1193 *pprev = cl->cl_next;
1194 if (!grp->qfg_slots[i])
1195 pktsched_bit_clr(offset, &grp->qfg_full_slots);
1196 }
1197
1198 /*
1199 * Called to forcibly destroy a queue.
1200 * If the queue is not in the front bucket, or if it has
1201 * other queues in the front bucket, we can simply remove
1202 * the queue with no other side effects.
1203 * Otherwise we must propagate the event up.
1204 * XXX description to be completed.
1205 */
1206 static void
1207 qfq_deactivate_class(struct qfq_if *qif, struct qfq_class *cl)
1208 {
1209 struct qfq_group *grp = cl->cl_grp;
1210 pktsched_bitmap_t mask;
1211 u_int64_t roundedS;
1212 int s;
1213
1214 if (pktsched_verbose) {
1215 log(LOG_DEBUG, "%s: %s deactivate qid=%d grp=%d "
1216 "full_slots=0x%x front=%d bitmaps={ER=0x%x,EB=0x%x,"
1217 "IR=0x%x,IB=0x%x}\n",
1218 if_name(QFQIF_IFP(cl->cl_qif)), qfq_style(cl->cl_qif),
1219 cl->cl_handle, grp->qfg_index, grp->qfg_full_slots,
1220 grp->qfg_front, qif->qif_bitmaps[ER], qif->qif_bitmaps[EB],
1221 qif->qif_bitmaps[IR], qif->qif_bitmaps[IB]);
1222 #if QFQ_DEBUG
1223 if (pktsched_verbose > 1)
1224 qfq_dump_sched(qif, "start deactivate");
1225 #endif /* QFQ_DEBUG */
1226 }
1227
1228 cl->cl_F = cl->cl_S; /* not needed if the class goes away */
1229 qfq_slot_remove(qif, grp, cl);
1230
1231 if (grp->qfg_full_slots == 0) {
1232 /*
1233 * Nothing left in the group, remove from all sets.
1234 * Do ER last because if we were blocking other groups
1235 * we must unblock them.
1236 */
1237 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
1238 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[EB]);
1239 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
1240
1241 if (pktsched_bit_tst(grp->qfg_index, &qif->qif_bitmaps[ER]) &&
1242 !(qif->qif_bitmaps[ER] & ~((1UL << grp->qfg_index) - 1))) {
1243 mask = qif->qif_bitmaps[ER] &
1244 ((1UL << grp->qfg_index) - 1);
1245 if (mask)
1246 mask = ~((1UL << __fls(mask)) - 1);
1247 else
1248 mask = (pktsched_bitmap_t)~0UL;
1249 qfq_move_groups(qif, mask, EB, ER);
1250 qfq_move_groups(qif, mask, IB, IR);
1251 }
1252 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
1253 } else if (!grp->qfg_slots[grp->qfg_front]) {
1254 cl = qfq_slot_scan(qif, grp);
1255 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1256 if (grp->qfg_S != roundedS) {
1257 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
1258 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
1259 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[EB]);
1260 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
1261 grp->qfg_S = roundedS;
1262 grp->qfg_F = roundedS + (2ULL << grp->qfg_slot_shift);
1263 s = qfq_calc_state(qif, grp);
1264 pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
1265 }
1266 }
1267 qfq_update_eligible(qif, qif->qif_V);
1268
1269 #if QFQ_DEBUG
1270 if (pktsched_verbose > 1)
1271 qfq_dump_sched(qif, "end deactivate");
1272 #endif /* QFQ_DEBUG */
1273 }
1274
1275 static const char *
1276 qfq_state2str(int s)
1277 {
1278 const char *c;
1279
1280 switch (s) {
1281 case ER:
1282 c = "ER";
1283 break;
1284 case IR:
1285 c = "IR";
1286 break;
1287 case EB:
1288 c = "EB";
1289 break;
1290 case IB:
1291 c = "IB";
1292 break;
1293 default:
1294 c = "?";
1295 break;
1296 }
1297 return (c);
1298 }
1299
1300 static inline int
1301 qfq_addq(struct qfq_class *cl, struct mbuf *m, struct pf_mtag *t)
1302 {
1303 struct qfq_if *qif = cl->cl_qif;
1304 struct ifclassq *ifq = qif->qif_ifq;
1305
1306 IFCQ_LOCK_ASSERT_HELD(ifq);
1307
1308 #if CLASSQ_RIO
1309 if (q_is_rio(&cl->cl_q))
1310 return (rio_addq(cl->cl_rio, &cl->cl_q, m, t));
1311 else
1312 #endif /* CLASSQ_RIO */
1313 #if CLASSQ_RED
1314 if (q_is_red(&cl->cl_q))
1315 return (red_addq(cl->cl_red, &cl->cl_q, m, t));
1316 else
1317 #endif /* CLASSQ_RED */
1318 #if CLASSQ_BLUE
1319 if (q_is_blue(&cl->cl_q))
1320 return (blue_addq(cl->cl_blue, &cl->cl_q, m, t));
1321 else
1322 #endif /* CLASSQ_BLUE */
1323 if (q_is_sfb(&cl->cl_q)) {
1324 if (cl->cl_sfb == NULL) {
1325 struct ifnet *ifp = QFQIF_IFP(qif);
1326
1327 VERIFY(cl->cl_flags & QFCF_LAZY);
1328 cl->cl_flags &= ~QFCF_LAZY;
1329 IFCQ_CONVERT_LOCK(ifq);
1330
1331 cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
1332 qlimit(&cl->cl_q), cl->cl_qflags);
1333 if (cl->cl_sfb == NULL) {
1334 /* fall back to droptail */
1335 qtype(&cl->cl_q) = Q_DROPTAIL;
1336 cl->cl_flags &= ~QFCF_SFB;
1337 cl->cl_qflags &= ~(SFBF_ECN | SFBF_FLOWCTL);
1338
1339 log(LOG_ERR, "%s: %s SFB lazy allocation "
1340 "failed for qid=%d grp=%d, falling back "
1341 "to DROPTAIL\n", if_name(ifp),
1342 qfq_style(qif), cl->cl_handle,
1343 cl->cl_grp->qfg_index);
1344 } else if (qif->qif_throttle != IFNET_THROTTLE_OFF) {
1345 /* if there's pending throttling, set it */
1346 cqrq_throttle_t tr = { 1, qif->qif_throttle };
1347 int err = qfq_throttle(qif, &tr);
1348
1349 if (err == EALREADY)
1350 err = 0;
1351 if (err != 0) {
1352 tr.level = IFNET_THROTTLE_OFF;
1353 (void) qfq_throttle(qif, &tr);
1354 }
1355 }
1356 }
1357 if (cl->cl_sfb != NULL)
1358 return (sfb_addq(cl->cl_sfb, &cl->cl_q, m, t));
1359 } else if (qlen(&cl->cl_q) >= qlimit(&cl->cl_q)) {
1360 IFCQ_CONVERT_LOCK(ifq);
1361 m_freem(m);
1362 return (CLASSQEQ_DROPPED);
1363 }
1364
1365 if (cl->cl_flags & QFCF_CLEARDSCP)
1366 write_dsfield(m, t, 0);
1367
1368 _addq(&cl->cl_q, m);
1369
1370 return (0);
1371 }
1372
1373 static inline struct mbuf *
1374 qfq_getq(struct qfq_class *cl)
1375 {
1376 IFCQ_LOCK_ASSERT_HELD(cl->cl_qif->qif_ifq);
1377
1378 #if CLASSQ_RIO
1379 if (q_is_rio(&cl->cl_q))
1380 return (rio_getq(cl->cl_rio, &cl->cl_q));
1381 else
1382 #endif /* CLASSQ_RIO */
1383 #if CLASSQ_RED
1384 if (q_is_red(&cl->cl_q))
1385 return (red_getq(cl->cl_red, &cl->cl_q));
1386 else
1387 #endif /* CLASSQ_RED */
1388 #if CLASSQ_BLUE
1389 if (q_is_blue(&cl->cl_q))
1390 return (blue_getq(cl->cl_blue, &cl->cl_q));
1391 else
1392 #endif /* CLASSQ_BLUE */
1393 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1394 return (sfb_getq(cl->cl_sfb, &cl->cl_q));
1395
1396 return (_getq(&cl->cl_q));
1397 }
1398
1399 static inline struct mbuf *
1400 qfq_pollq(struct qfq_class *cl)
1401 {
1402 IFCQ_LOCK_ASSERT_HELD(cl->cl_qif->qif_ifq);
1403
1404 return (qhead(&cl->cl_q));
1405 }
1406
1407 static void
1408 qfq_purgeq(struct qfq_if *qif, struct qfq_class *cl, u_int32_t flow,
1409 u_int32_t *packets, u_int32_t *bytes)
1410 {
1411 struct ifclassq *ifq = qif->qif_ifq;
1412 u_int32_t cnt = 0, len = 0, qlen;
1413
1414 IFCQ_LOCK_ASSERT_HELD(ifq);
1415
1416 if ((qlen = qlen(&cl->cl_q)) == 0)
1417 goto done;
1418
1419 /* become regular mutex before freeing mbufs */
1420 IFCQ_CONVERT_LOCK(ifq);
1421
1422 #if CLASSQ_RIO
1423 if (q_is_rio(&cl->cl_q))
1424 rio_purgeq(cl->cl_rio, &cl->cl_q, flow, &cnt, &len);
1425 else
1426 #endif /* CLASSQ_RIO */
1427 #if CLASSQ_RED
1428 if (q_is_red(&cl->cl_q))
1429 red_purgeq(cl->cl_red, &cl->cl_q, flow, &cnt, &len);
1430 else
1431 #endif /* CLASSQ_RED */
1432 #if CLASSQ_BLUE
1433 if (q_is_blue(&cl->cl_q))
1434 blue_purgeq(cl->cl_blue, &cl->cl_q, flow, &cnt, &len);
1435 else
1436 #endif /* CLASSQ_BLUE */
1437 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1438 sfb_purgeq(cl->cl_sfb, &cl->cl_q, flow, &cnt, &len);
1439 else
1440 _flushq_flow(&cl->cl_q, flow, &cnt, &len);
1441
1442 if (cnt > 0) {
1443 VERIFY(qlen(&cl->cl_q) == (qlen - cnt));
1444 #if QFQ_DEBUG
1445 VERIFY(qif->qif_queued >= cnt);
1446 qif->qif_queued -= cnt;
1447 #endif /* QFQ_DEBUG */
1448
1449 PKTCNTR_ADD(&cl->cl_dropcnt, cnt, len);
1450 IFCQ_DROP_ADD(ifq, cnt, len);
1451
1452 VERIFY(((signed)IFCQ_LEN(ifq) - cnt) >= 0);
1453 IFCQ_LEN(ifq) -= cnt;
1454
1455 if (qempty(&cl->cl_q))
1456 qfq_deactivate_class(qif, cl);
1457
1458 if (pktsched_verbose) {
1459 log(LOG_DEBUG, "%s: %s purge qid=%d weight=%d "
1460 "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
1461 if_name(QFQIF_IFP(qif)),
1462 qfq_style(qif), cl->cl_handle,
1463 (u_int32_t)(QFQ_ONE_FP / cl->cl_inv_w), qlen,
1464 qlen(&cl->cl_q), cnt, len, flow);
1465 }
1466 }
1467 done:
1468 if (packets != NULL)
1469 *packets = cnt;
1470 if (bytes != NULL)
1471 *bytes = len;
1472 }
1473
1474 static void
1475 qfq_updateq(struct qfq_if *qif, struct qfq_class *cl, cqev_t ev)
1476 {
1477 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
1478
1479 if (pktsched_verbose) {
1480 log(LOG_DEBUG, "%s: %s update qid=%d weight=%d event=%s\n",
1481 if_name(QFQIF_IFP(qif)), qfq_style(qif),
1482 cl->cl_handle, (u_int32_t)(QFQ_ONE_FP / cl->cl_inv_w),
1483 ifclassq_ev2str(ev));
1484 }
1485
1486 #if CLASSQ_RIO
1487 if (q_is_rio(&cl->cl_q))
1488 return (rio_updateq(cl->cl_rio, ev));
1489 #endif /* CLASSQ_RIO */
1490 #if CLASSQ_RED
1491 if (q_is_red(&cl->cl_q))
1492 return (red_updateq(cl->cl_red, ev));
1493 #endif /* CLASSQ_RED */
1494 #if CLASSQ_BLUE
1495 if (q_is_blue(&cl->cl_q))
1496 return (blue_updateq(cl->cl_blue, ev));
1497 #endif /* CLASSQ_BLUE */
1498 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1499 return (sfb_updateq(cl->cl_sfb, ev));
1500 }
1501
1502 int
1503 qfq_get_class_stats(struct qfq_if *qif, u_int32_t qid,
1504 struct qfq_classstats *sp)
1505 {
1506 struct qfq_class *cl;
1507
1508 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
1509
1510 if ((cl = qfq_clh_to_clp(qif, qid)) == NULL)
1511 return (EINVAL);
1512
1513 sp->class_handle = cl->cl_handle;
1514 sp->index = cl->cl_grp->qfg_index;
1515 sp->weight = (QFQ_ONE_FP / cl->cl_inv_w);
1516 sp->lmax = cl->cl_lmax;
1517 sp->qlength = qlen(&cl->cl_q);
1518 sp->qlimit = qlimit(&cl->cl_q);
1519 sp->period = cl->cl_period;
1520 sp->xmitcnt = cl->cl_xmitcnt;
1521 sp->dropcnt = cl->cl_dropcnt;
1522
1523 sp->qtype = qtype(&cl->cl_q);
1524 sp->qstate = qstate(&cl->cl_q);
1525 #if CLASSQ_RED
1526 if (q_is_red(&cl->cl_q))
1527 red_getstats(cl->cl_red, &sp->red[0]);
1528 #endif /* CLASSQ_RED */
1529 #if CLASSQ_RIO
1530 if (q_is_rio(&cl->cl_q))
1531 rio_getstats(cl->cl_rio, &sp->red[0]);
1532 #endif /* CLASSQ_RIO */
1533 #if CLASSQ_BLUE
1534 if (q_is_blue(&cl->cl_q))
1535 blue_getstats(cl->cl_blue, &sp->blue);
1536 #endif /* CLASSQ_BLUE */
1537 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1538 sfb_getstats(cl->cl_sfb, &sp->sfb);
1539
1540 return (0);
1541 }
1542
1543 /* convert a class handle to the corresponding class pointer */
1544 static inline struct qfq_class *
1545 qfq_clh_to_clp(struct qfq_if *qif, u_int32_t chandle)
1546 {
1547 struct qfq_class *cl;
1548 int i;
1549
1550 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
1551
1552 /*
1553 * First, try optimistically the slot matching the lower bits of
1554 * the handle. If it fails, do the linear table search.
1555 */
1556 i = chandle % qif->qif_maxclasses;
1557 if ((cl = qif->qif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
1558 return (cl);
1559 for (i = 0; i < qif->qif_maxclasses; i++)
1560 if ((cl = qif->qif_class_tbl[i]) != NULL &&
1561 cl->cl_handle == chandle)
1562 return (cl);
1563
1564 return (NULL);
1565 }
1566
1567 static const char *
1568 qfq_style(struct qfq_if *qif)
1569 {
1570 return ((qif->qif_flags & QFQIFF_ALTQ) ? "ALTQ_QFQ" : "QFQ");
1571 }
1572
1573 /*
1574 * Generic comparison function, handling wraparound
1575 */
1576 static inline int
1577 qfq_gt(u_int64_t a, u_int64_t b)
1578 {
1579 return ((int64_t)(a - b) > 0);
1580 }
1581
1582 /*
1583 * Round a precise timestamp to its slotted value
1584 */
1585 static inline u_int64_t
1586 qfq_round_down(u_int64_t ts, u_int32_t shift)
1587 {
1588 return (ts & ~((1ULL << shift) - 1));
1589 }
1590
1591 /*
1592 * Return the pointer to the group with lowest index in the bitmap
1593 */
1594 static inline struct qfq_group *
1595 qfq_ffs(struct qfq_if *qif, pktsched_bitmap_t bitmap)
1596 {
1597 int index = pktsched_ffs(bitmap) - 1; /* zero-based */
1598 VERIFY(index >= 0 && index <= QFQ_MAX_INDEX &&
1599 qif->qif_groups[index] != NULL);
1600 return (qif->qif_groups[index]);
1601 }
1602
1603 /*
1604 * Calculate a flow index, given its weight and maximum packet length.
1605 * index = log_2(maxlen/weight) but we need to apply the scaling.
1606 * This is used only once at flow creation.
1607 */
1608 static int
1609 qfq_calc_index(struct qfq_class *cl, u_int32_t inv_w, u_int32_t maxlen)
1610 {
1611 u_int64_t slot_size = (u_int64_t)maxlen *inv_w;
1612 pktsched_bitmap_t size_map;
1613 int index = 0;
1614
1615 size_map = (pktsched_bitmap_t)(slot_size >> QFQ_MIN_SLOT_SHIFT);
1616 if (!size_map)
1617 goto out;
1618
1619 index = __fls(size_map) + 1; /* basically a log_2() */
1620 index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
1621
1622 if (index < 0)
1623 index = 0;
1624 out:
1625 if (pktsched_verbose) {
1626 log(LOG_DEBUG, "%s: %s qid=%d grp=%d W=%u, L=%u, I=%d\n",
1627 if_name(QFQIF_IFP(cl->cl_qif)), qfq_style(cl->cl_qif),
1628 cl->cl_handle, index, (u_int32_t)(QFQ_ONE_FP/inv_w),
1629 maxlen, index);
1630 }
1631 return (index);
1632 }
1633
1634 #if QFQ_DEBUG
1635 static void
1636 qfq_dump_groups(struct qfq_if *qif, u_int32_t mask)
1637 {
1638 int i, j;
1639
1640 for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
1641 struct qfq_group *g = qif->qif_groups[i];
1642
1643 if (0 == (mask & (1 << i)))
1644 continue;
1645 if (g == NULL)
1646 continue;
1647
1648 log(LOG_DEBUG, "%s: %s [%2d] full_slots 0x%x\n",
1649 if_name(QFQIF_IFP(qif)), qfq_style(qif), i,
1650 g->qfg_full_slots);
1651 log(LOG_DEBUG, "%s: %s S 0x%20llx F 0x%llx %c\n",
1652 if_name(QFQIF_IFP(qif)), qfq_style(qif),
1653 g->qfg_S, g->qfg_F, mask & (1 << i) ? '1' : '0');
1654
1655 for (j = 0; j < qif->qif_maxslots; j++) {
1656 if (g->qfg_slots[j]) {
1657 log(LOG_DEBUG, "%s: %s bucket %d %p "
1658 "qid %d\n", if_name(QFQIF_IFP(qif)),
1659 qfq_style(qif), j, g->qfg_slots[j],
1660 g->qfg_slots[j]->cl_handle);
1661 }
1662 }
1663 }
1664 }
1665
1666 static void
1667 qfq_dump_sched(struct qfq_if *qif, const char *msg)
1668 {
1669 log(LOG_DEBUG, "%s: %s --- in %s: ---\n",
1670 if_name(QFQIF_IFP(qif)), qfq_style(qif), msg);
1671 log(LOG_DEBUG, "%s: %s emptygrp %d queued %d V 0x%llx\n",
1672 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_emptygrp,
1673 qif->qif_queued, qif->qif_V);
1674 log(LOG_DEBUG, "%s: %s ER 0x%08x\n",
1675 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[ER]);
1676 log(LOG_DEBUG, "%s: %s EB 0x%08x\n",
1677 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[EB]);
1678 log(LOG_DEBUG, "%s: %s IR 0x%08x\n",
1679 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[IR]);
1680 log(LOG_DEBUG, "%s: %s IB 0x%08x\n",
1681 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[IB]);
1682 qfq_dump_groups(qif, 0xffffffff);
1683 };
1684 #endif /* QFQ_DEBUG */
1685
1686 /*
1687 * qfq_enqueue_ifclassq is an enqueue function to be registered to
1688 * (*ifcq_enqueue) in struct ifclassq.
1689 */
1690 static int
1691 qfq_enqueue_ifclassq(struct ifclassq *ifq, struct mbuf *m)
1692 {
1693 u_int32_t i;
1694
1695 IFCQ_LOCK_ASSERT_HELD(ifq);
1696
1697 if (!(m->m_flags & M_PKTHDR)) {
1698 /* should not happen */
1699 log(LOG_ERR, "%s: packet does not have pkthdr\n",
1700 if_name(ifq->ifcq_ifp));
1701 IFCQ_CONVERT_LOCK(ifq);
1702 m_freem(m);
1703 return (ENOBUFS);
1704 }
1705
1706 i = MBUF_SCIDX(mbuf_get_service_class(m));
1707 VERIFY((u_int32_t)i < IFCQ_SC_MAX);
1708
1709 return (qfq_enqueue(ifq->ifcq_disc,
1710 ifq->ifcq_disc_slots[i].cl, m, m_pftag(m)));
1711 }
1712
1713 /*
1714 * qfq_dequeue_ifclassq is a dequeue function to be registered to
1715 * (*ifcq_dequeue) in struct ifclass.
1716 *
1717 * note: CLASSQDQ_POLL returns the next packet without removing the packet
1718 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
1719 * CLASSQDQ_REMOVE must return the same packet if called immediately
1720 * after CLASSQDQ_POLL.
1721 */
1722 static struct mbuf *
1723 qfq_dequeue_ifclassq(struct ifclassq *ifq, cqdq_op_t op)
1724 {
1725 return (qfq_dequeue(ifq->ifcq_disc, op));
1726 }
1727
1728 static int
1729 qfq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg)
1730 {
1731 struct qfq_if *qif = (struct qfq_if *)ifq->ifcq_disc;
1732 int err = 0;
1733
1734 IFCQ_LOCK_ASSERT_HELD(ifq);
1735
1736 switch (req) {
1737 case CLASSQRQ_PURGE:
1738 qfq_purge(qif);
1739 break;
1740
1741 case CLASSQRQ_PURGE_SC:
1742 qfq_purge_sc(qif, (cqrq_purge_sc_t *)arg);
1743 break;
1744
1745 case CLASSQRQ_EVENT:
1746 qfq_event(qif, (cqev_t)arg);
1747 break;
1748
1749 case CLASSQRQ_THROTTLE:
1750 err = qfq_throttle(qif, (cqrq_throttle_t *)arg);
1751 break;
1752 }
1753 return (err);
1754 }
1755
1756 int
1757 qfq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags)
1758 {
1759 struct ifnet *ifp = ifq->ifcq_ifp;
1760 struct qfq_class *cl0, *cl1, *cl2, *cl3, *cl4;
1761 struct qfq_class *cl5, *cl6, *cl7, *cl8, *cl9;
1762 struct qfq_if *qif;
1763 u_int32_t maxlen = 0, qflags = 0;
1764 int err = 0;
1765
1766 IFCQ_LOCK_ASSERT_HELD(ifq);
1767 VERIFY(ifq->ifcq_disc == NULL);
1768 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
1769
1770 if (flags & PKTSCHEDF_QALG_RED)
1771 qflags |= QFCF_RED;
1772 if (flags & PKTSCHEDF_QALG_RIO)
1773 qflags |= QFCF_RIO;
1774 if (flags & PKTSCHEDF_QALG_BLUE)
1775 qflags |= QFCF_BLUE;
1776 if (flags & PKTSCHEDF_QALG_SFB)
1777 qflags |= QFCF_SFB;
1778 if (flags & PKTSCHEDF_QALG_ECN)
1779 qflags |= QFCF_ECN;
1780 if (flags & PKTSCHEDF_QALG_FLOWCTL)
1781 qflags |= QFCF_FLOWCTL;
1782
1783 qif = qfq_alloc(ifp, M_WAITOK, FALSE);
1784 if (qif == NULL)
1785 return (ENOMEM);
1786
1787 if ((maxlen = IFCQ_MAXLEN(ifq)) == 0)
1788 maxlen = if_sndq_maxlen;
1789
1790 if ((err = qfq_add_queue(qif, maxlen, 300, 1200,
1791 qflags | QFCF_LAZY, SCIDX_BK_SYS, &cl0)) != 0)
1792 goto cleanup;
1793
1794 if ((err = qfq_add_queue(qif, maxlen, 600, 1400,
1795 qflags | QFCF_LAZY, SCIDX_BK, &cl1)) != 0)
1796 goto cleanup;
1797
1798 if ((err = qfq_add_queue(qif, maxlen, 2400, 600,
1799 qflags | QFCF_DEFAULTCLASS, SCIDX_BE, &cl2)) != 0)
1800 goto cleanup;
1801
1802 if ((err = qfq_add_queue(qif, maxlen, 2700, 600,
1803 qflags | QFCF_LAZY, SCIDX_RD, &cl3)) != 0)
1804 goto cleanup;
1805
1806 if ((err = qfq_add_queue(qif, maxlen, 3000, 400,
1807 qflags | QFCF_LAZY, SCIDX_OAM, &cl4)) != 0)
1808 goto cleanup;
1809
1810 if ((err = qfq_add_queue(qif, maxlen, 8000, 1000,
1811 qflags | QFCF_LAZY, SCIDX_AV, &cl5)) != 0)
1812 goto cleanup;
1813
1814 if ((err = qfq_add_queue(qif, maxlen, 15000, 1200,
1815 qflags | QFCF_LAZY, SCIDX_RV, &cl6)) != 0)
1816 goto cleanup;
1817
1818 if ((err = qfq_add_queue(qif, maxlen, 20000, 1400,
1819 qflags | QFCF_LAZY, SCIDX_VI, &cl7)) != 0)
1820 goto cleanup;
1821
1822 if ((err = qfq_add_queue(qif, maxlen, 23000, 200,
1823 qflags | QFCF_LAZY, SCIDX_VO, &cl8)) != 0)
1824 goto cleanup;
1825
1826 if ((err = qfq_add_queue(qif, maxlen, 25000, 200,
1827 qflags, SCIDX_CTL, &cl9)) != 0)
1828 goto cleanup;
1829
1830 err = ifclassq_attach(ifq, PKTSCHEDT_QFQ, qif,
1831 qfq_enqueue_ifclassq, qfq_dequeue_ifclassq, NULL,
1832 qfq_request_ifclassq);
1833
1834 /* cache these for faster lookup */
1835 if (err == 0) {
1836 ifq->ifcq_disc_slots[SCIDX_BK_SYS].qid = SCIDX_BK_SYS;
1837 ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl = cl0;
1838
1839 ifq->ifcq_disc_slots[SCIDX_BK].qid = SCIDX_BK;
1840 ifq->ifcq_disc_slots[SCIDX_BK].cl = cl1;
1841
1842 ifq->ifcq_disc_slots[SCIDX_BE].qid = SCIDX_BE;
1843 ifq->ifcq_disc_slots[SCIDX_BE].cl = cl2;
1844
1845 ifq->ifcq_disc_slots[SCIDX_RD].qid = SCIDX_RD;
1846 ifq->ifcq_disc_slots[SCIDX_RD].cl = cl3;
1847
1848 ifq->ifcq_disc_slots[SCIDX_OAM].qid = SCIDX_OAM;
1849 ifq->ifcq_disc_slots[SCIDX_OAM].cl = cl4;
1850
1851 ifq->ifcq_disc_slots[SCIDX_AV].qid = SCIDX_AV;
1852 ifq->ifcq_disc_slots[SCIDX_AV].cl = cl5;
1853
1854 ifq->ifcq_disc_slots[SCIDX_RV].qid = SCIDX_RV;
1855 ifq->ifcq_disc_slots[SCIDX_RV].cl = cl6;
1856
1857 ifq->ifcq_disc_slots[SCIDX_VI].qid = SCIDX_VI;
1858 ifq->ifcq_disc_slots[SCIDX_VI].cl = cl7;
1859
1860 ifq->ifcq_disc_slots[SCIDX_VO].qid = SCIDX_VO;
1861 ifq->ifcq_disc_slots[SCIDX_VO].cl = cl8;
1862
1863 ifq->ifcq_disc_slots[SCIDX_CTL].qid = SCIDX_CTL;
1864 ifq->ifcq_disc_slots[SCIDX_CTL].cl = cl9;
1865 }
1866
1867 cleanup:
1868 if (err != 0)
1869 (void) qfq_destroy_locked(qif);
1870
1871 return (err);
1872 }
1873
1874 int
1875 qfq_teardown_ifclassq(struct ifclassq *ifq)
1876 {
1877 struct qfq_if *qif = ifq->ifcq_disc;
1878 int i;
1879
1880 IFCQ_LOCK_ASSERT_HELD(ifq);
1881 VERIFY(qif != NULL && ifq->ifcq_type == PKTSCHEDT_QFQ);
1882
1883 (void) qfq_destroy_locked(qif);
1884
1885 ifq->ifcq_disc = NULL;
1886 for (i = 0; i < IFCQ_SC_MAX; i++) {
1887 ifq->ifcq_disc_slots[i].qid = 0;
1888 ifq->ifcq_disc_slots[i].cl = NULL;
1889 }
1890
1891 return (ifclassq_detach(ifq));
1892 }
1893
1894 int
1895 qfq_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot,
1896 struct if_ifclassq_stats *ifqs)
1897 {
1898 struct qfq_if *qif = ifq->ifcq_disc;
1899
1900 IFCQ_LOCK_ASSERT_HELD(ifq);
1901 VERIFY(ifq->ifcq_type == PKTSCHEDT_QFQ);
1902
1903 if (slot >= IFCQ_SC_MAX)
1904 return (EINVAL);
1905
1906 return (qfq_get_class_stats(qif, ifq->ifcq_disc_slots[slot].qid,
1907 &ifqs->ifqs_qfq_stats));
1908 }
1909
1910 static int
1911 qfq_throttle(struct qfq_if *qif, cqrq_throttle_t *tr)
1912 {
1913 struct ifclassq *ifq = qif->qif_ifq;
1914 struct qfq_class *cl;
1915 int err;
1916
1917 IFCQ_LOCK_ASSERT_HELD(ifq);
1918 VERIFY(!(qif->qif_flags & QFQIFF_ALTQ));
1919
1920 if (!tr->set) {
1921 tr->level = qif->qif_throttle;
1922 return (0);
1923 }
1924
1925 if (tr->level == qif->qif_throttle)
1926 return (EALREADY);
1927
1928 /* Current throttling levels only involve BK_SYS class */
1929 cl = ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl;
1930
1931 switch (tr->level) {
1932 case IFNET_THROTTLE_OFF:
1933 err = qfq_resumeq(qif, cl);
1934 break;
1935
1936 case IFNET_THROTTLE_OPPORTUNISTIC:
1937 err = qfq_suspendq(qif, cl);
1938 break;
1939
1940 default:
1941 VERIFY(0);
1942 /* NOTREACHED */
1943 }
1944
1945 if (err == 0 || err == ENXIO) {
1946 if (pktsched_verbose) {
1947 log(LOG_DEBUG, "%s: %s throttling level %sset %d->%d\n",
1948 if_name(QFQIF_IFP(qif)), qfq_style(qif),
1949 (err == 0) ? "" : "lazy ", qif->qif_throttle,
1950 tr->level);
1951 }
1952 qif->qif_throttle = tr->level;
1953 if (err != 0)
1954 err = 0;
1955 else
1956 qfq_purgeq(qif, cl, 0, NULL, NULL);
1957 } else {
1958 log(LOG_ERR, "%s: %s unable to set throttling level "
1959 "%d->%d [error=%d]\n", if_name(QFQIF_IFP(qif)),
1960 qfq_style(qif), qif->qif_throttle, tr->level, err);
1961 }
1962
1963 return (err);
1964 }
1965
1966 static int
1967 qfq_resumeq(struct qfq_if *qif, struct qfq_class *cl)
1968 {
1969 struct ifclassq *ifq = qif->qif_ifq;
1970 int err = 0;
1971
1972 IFCQ_LOCK_ASSERT_HELD(ifq);
1973
1974 #if CLASSQ_RIO
1975 if (q_is_rio(&cl->cl_q))
1976 err = rio_suspendq(cl->cl_rio, &cl->cl_q, FALSE);
1977 else
1978 #endif /* CLASSQ_RIO */
1979 #if CLASSQ_RED
1980 if (q_is_red(&cl->cl_q))
1981 err = red_suspendq(cl->cl_red, &cl->cl_q, FALSE);
1982 else
1983 #endif /* CLASSQ_RED */
1984 #if CLASSQ_BLUE
1985 if (q_is_blue(&cl->cl_q))
1986 err = blue_suspendq(cl->cl_blue, &cl->cl_q, FALSE);
1987 else
1988 #endif /* CLASSQ_BLUE */
1989 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1990 err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, FALSE);
1991
1992 if (err == 0)
1993 qstate(&cl->cl_q) = QS_RUNNING;
1994
1995 return (err);
1996 }
1997
1998 static int
1999 qfq_suspendq(struct qfq_if *qif, struct qfq_class *cl)
2000 {
2001 struct ifclassq *ifq = qif->qif_ifq;
2002 int err = 0;
2003
2004 IFCQ_LOCK_ASSERT_HELD(ifq);
2005
2006 #if CLASSQ_RIO
2007 if (q_is_rio(&cl->cl_q))
2008 err = rio_suspendq(cl->cl_rio, &cl->cl_q, TRUE);
2009 else
2010 #endif /* CLASSQ_RIO */
2011 #if CLASSQ_RED
2012 if (q_is_red(&cl->cl_q))
2013 err = red_suspendq(cl->cl_red, &cl->cl_q, TRUE);
2014 else
2015 #endif /* CLASSQ_RED */
2016 #if CLASSQ_BLUE
2017 if (q_is_blue(&cl->cl_q))
2018 err = blue_suspendq(cl->cl_blue, &cl->cl_q, TRUE);
2019 else
2020 #endif /* CLASSQ_BLUE */
2021 if (q_is_sfb(&cl->cl_q)) {
2022 if (cl->cl_sfb != NULL) {
2023 err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, TRUE);
2024 } else {
2025 VERIFY(cl->cl_flags & QFCF_LAZY);
2026 err = ENXIO; /* delayed throttling */
2027 }
2028 }
2029
2030 if (err == 0 || err == ENXIO)
2031 qstate(&cl->cl_q) = QS_SUSPENDED;
2032
2033 return (err);
2034 }