]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/pktsched/pktsched_qfq.c
bc7cc221524a004b24af1fff42887119c7ed202e
[apple/xnu.git] / bsd / net / pktsched / pktsched_qfq.c
1 /*
2 * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
31 * All rights reserved
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 */
54
55 /*
56 * Quick Fair Queueing is described in
57 * "QFQ: Efficient Packet Scheduling with Tight Bandwidth Distribution
58 * Guarantees" by Fabio Checconi, Paolo Valente, and Luigi Rizzo.
59 *
60 * This code is ported from the dummynet(4) QFQ implementation.
61 * See also http://info.iet.unipi.it/~luigi/qfq/
62 */
63
64 #include <sys/cdefs.h>
65 #include <sys/param.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/systm.h>
69 #include <sys/errno.h>
70 #include <sys/kernel.h>
71 #include <sys/syslog.h>
72
73 #include <kern/zalloc.h>
74
75 #include <net/if.h>
76 #include <net/net_osdep.h>
77
78 #include <net/pktsched/pktsched_qfq.h>
79 #include <netinet/in.h>
80
81 /*
82 * function prototypes
83 */
84 static int qfq_enqueue_ifclassq(struct ifclassq *, struct mbuf *);
85 static struct mbuf *qfq_dequeue_ifclassq(struct ifclassq *, cqdq_op_t);
86 static int qfq_request_ifclassq(struct ifclassq *, cqrq_t, void *);
87 static int qfq_clear_interface(struct qfq_if *);
88 static struct qfq_class *qfq_class_create(struct qfq_if *, u_int32_t,
89 u_int32_t, u_int32_t, u_int32_t, u_int32_t);
90 static int qfq_class_destroy(struct qfq_if *, struct qfq_class *);
91 static int qfq_destroy_locked(struct qfq_if *);
92 static inline int qfq_addq(struct qfq_class *, struct mbuf *, struct pf_mtag *);
93 static inline struct mbuf *qfq_getq(struct qfq_class *);
94 static inline struct mbuf *qfq_pollq(struct qfq_class *);
95 static void qfq_purgeq(struct qfq_if *, struct qfq_class *, u_int32_t,
96 u_int32_t *, u_int32_t *);
97 static void qfq_purge_sc(struct qfq_if *, cqrq_purge_sc_t *);
98 static void qfq_updateq(struct qfq_if *, struct qfq_class *, cqev_t);
99 static int qfq_throttle(struct qfq_if *, cqrq_throttle_t *);
100 static int qfq_resumeq(struct qfq_if *, struct qfq_class *);
101 static int qfq_suspendq(struct qfq_if *, struct qfq_class *);
102 static int qfq_stat_sc(struct qfq_if *, cqrq_stat_sc_t *);
103 static inline struct qfq_class *qfq_clh_to_clp(struct qfq_if *, u_int32_t);
104 static const char *qfq_style(struct qfq_if *);
105
106 static inline int qfq_gt(u_int64_t, u_int64_t);
107 static inline u_int64_t qfq_round_down(u_int64_t, u_int32_t);
108 static inline struct qfq_group *qfq_ffs(struct qfq_if *, pktsched_bitmap_t);
109 static int qfq_calc_index(struct qfq_class *, u_int32_t, u_int32_t);
110 static inline pktsched_bitmap_t mask_from(pktsched_bitmap_t, int);
111 static inline u_int32_t qfq_calc_state(struct qfq_if *, struct qfq_group *);
112 static inline void qfq_move_groups(struct qfq_if *, pktsched_bitmap_t,
113 int, int);
114 static inline void qfq_unblock_groups(struct qfq_if *, int, u_int64_t);
115 static inline void qfq_make_eligible(struct qfq_if *, u_int64_t);
116 static inline void qfq_slot_insert(struct qfq_if *, struct qfq_group *,
117 struct qfq_class *, u_int64_t);
118 static inline void qfq_front_slot_remove(struct qfq_group *);
119 static inline struct qfq_class *qfq_slot_scan(struct qfq_if *,
120 struct qfq_group *);
121 static inline void qfq_slot_rotate(struct qfq_if *, struct qfq_group *,
122 u_int64_t);
123 static inline void qfq_update_eligible(struct qfq_if *, u_int64_t);
124 static inline int qfq_update_class(struct qfq_if *, struct qfq_group *,
125 struct qfq_class *);
126 static inline void qfq_update_start(struct qfq_if *, struct qfq_class *);
127 static inline void qfq_slot_remove(struct qfq_if *, struct qfq_group *,
128 struct qfq_class *);
129 static void qfq_deactivate_class(struct qfq_if *, struct qfq_class *);
130 static const char *qfq_state2str(int);
131 #if QFQ_DEBUG
132 static void qfq_dump_groups(struct qfq_if *, u_int32_t);
133 static void qfq_dump_sched(struct qfq_if *, const char *);
134 #endif /* QFQ_DEBUG */
135
136 #define QFQ_ZONE_MAX 32 /* maximum elements in zone */
137 #define QFQ_ZONE_NAME "pktsched_qfq" /* zone name */
138
139 static unsigned int qfq_size; /* size of zone element */
140 static struct zone *qfq_zone; /* zone for qfq */
141
142 #define QFQ_CL_ZONE_MAX 32 /* maximum elements in zone */
143 #define QFQ_CL_ZONE_NAME "pktsched_qfq_cl" /* zone name */
144
145 static unsigned int qfq_cl_size; /* size of zone element */
146 static struct zone *qfq_cl_zone; /* zone for qfq_class */
147
148 /*
149 * Maximum number of consecutive slots occupied by backlogged classes
150 * inside a group. This is approx lmax/lmin + 5. Used when ALTQ is
151 * available.
152 *
153 * XXX check because it poses constraints on MAX_INDEX
154 */
155 #define QFQ_MAX_SLOTS 32 /* default when ALTQ is available */
156
157 void
158 qfq_init(void)
159 {
160 qfq_size = sizeof (struct qfq_if);
161 qfq_zone = zinit(qfq_size, QFQ_ZONE_MAX * qfq_size,
162 0, QFQ_ZONE_NAME);
163 if (qfq_zone == NULL) {
164 panic("%s: failed allocating %s", __func__, QFQ_ZONE_NAME);
165 /* NOTREACHED */
166 }
167 zone_change(qfq_zone, Z_EXPAND, TRUE);
168 zone_change(qfq_zone, Z_CALLERACCT, TRUE);
169
170 qfq_cl_size = sizeof (struct qfq_class);
171 qfq_cl_zone = zinit(qfq_cl_size, QFQ_CL_ZONE_MAX * qfq_cl_size,
172 0, QFQ_CL_ZONE_NAME);
173 if (qfq_cl_zone == NULL) {
174 panic("%s: failed allocating %s", __func__, QFQ_CL_ZONE_NAME);
175 /* NOTREACHED */
176 }
177 zone_change(qfq_cl_zone, Z_EXPAND, TRUE);
178 zone_change(qfq_cl_zone, Z_CALLERACCT, TRUE);
179 }
180
181 struct qfq_if *
182 qfq_alloc(struct ifnet *ifp, int how, boolean_t altq)
183 {
184 struct qfq_if *qif;
185
186 qif = (how == M_WAITOK) ? zalloc(qfq_zone) : zalloc_noblock(qfq_zone);
187 if (qif == NULL)
188 return (NULL);
189
190 bzero(qif, qfq_size);
191 qif->qif_ifq = &ifp->if_snd;
192 if (altq) {
193 qif->qif_maxclasses = QFQ_MAX_CLASSES;
194 qif->qif_maxslots = QFQ_MAX_SLOTS;
195 qif->qif_flags |= QFQIFF_ALTQ;
196 } else {
197 qif->qif_maxclasses = IFCQ_SC_MAX;
198 /*
199 * TODO: adi@apple.com
200 *
201 * Ideally I would like to have the following
202 * but QFQ needs further modifications.
203 *
204 * qif->qif_maxslots = IFCQ_SC_MAX;
205 */
206 qif->qif_maxslots = QFQ_MAX_SLOTS;
207 }
208
209 if ((qif->qif_class_tbl = _MALLOC(sizeof (struct qfq_class *) *
210 qif->qif_maxclasses, M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
211 log(LOG_ERR, "%s: %s unable to allocate class table array\n",
212 if_name(ifp), qfq_style(qif));
213 goto error;
214 }
215
216 if ((qif->qif_groups = _MALLOC(sizeof (struct qfq_group *) *
217 (QFQ_MAX_INDEX + 1), M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
218 log(LOG_ERR, "%s: %s unable to allocate group array\n",
219 if_name(ifp), qfq_style(qif));
220 goto error;
221 }
222
223 if (pktsched_verbose) {
224 log(LOG_DEBUG, "%s: %s scheduler allocated\n",
225 if_name(ifp), qfq_style(qif));
226 }
227
228 return (qif);
229
230 error:
231 if (qif->qif_class_tbl != NULL) {
232 _FREE(qif->qif_class_tbl, M_DEVBUF);
233 qif->qif_class_tbl = NULL;
234 }
235 if (qif->qif_groups != NULL) {
236 _FREE(qif->qif_groups, M_DEVBUF);
237 qif->qif_groups = NULL;
238 }
239 zfree(qfq_zone, qif);
240
241 return (NULL);
242 }
243
244 int
245 qfq_destroy(struct qfq_if *qif)
246 {
247 struct ifclassq *ifq = qif->qif_ifq;
248 int err;
249
250 IFCQ_LOCK(ifq);
251 err = qfq_destroy_locked(qif);
252 IFCQ_UNLOCK(ifq);
253
254 return (err);
255 }
256
257 static int
258 qfq_destroy_locked(struct qfq_if *qif)
259 {
260 int i;
261
262 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
263
264 (void) qfq_clear_interface(qif);
265
266 VERIFY(qif->qif_class_tbl != NULL);
267 _FREE(qif->qif_class_tbl, M_DEVBUF);
268 qif->qif_class_tbl = NULL;
269
270 VERIFY(qif->qif_groups != NULL);
271 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
272 struct qfq_group *grp = qif->qif_groups[i];
273
274 if (grp != NULL) {
275 VERIFY(grp->qfg_slots != NULL);
276 _FREE(grp->qfg_slots, M_DEVBUF);
277 grp->qfg_slots = NULL;
278 _FREE(grp, M_DEVBUF);
279 qif->qif_groups[i] = NULL;
280 }
281 }
282 _FREE(qif->qif_groups, M_DEVBUF);
283 qif->qif_groups = NULL;
284
285 if (pktsched_verbose) {
286 log(LOG_DEBUG, "%s: %s scheduler destroyed\n",
287 if_name(QFQIF_IFP(qif)), qfq_style(qif));
288 }
289
290 zfree(qfq_zone, qif);
291
292 return (0);
293 }
294
295 /*
296 * bring the interface back to the initial state by discarding
297 * all the filters and classes.
298 */
299 static int
300 qfq_clear_interface(struct qfq_if *qif)
301 {
302 struct qfq_class *cl;
303 int i;
304
305 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
306
307 /* clear out the classes */
308 for (i = 0; i < qif->qif_maxclasses; i++)
309 if ((cl = qif->qif_class_tbl[i]) != NULL)
310 qfq_class_destroy(qif, cl);
311
312 return (0);
313 }
314
315 /* discard all the queued packets on the interface */
316 void
317 qfq_purge(struct qfq_if *qif)
318 {
319 struct qfq_class *cl;
320 int i;
321
322 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
323
324 for (i = 0; i < qif->qif_maxclasses; i++) {
325 if ((cl = qif->qif_class_tbl[i]) != NULL)
326 qfq_purgeq(qif, cl, 0, NULL, NULL);
327 }
328 #if !PF_ALTQ
329 /*
330 * This assertion is safe to be made only when PF_ALTQ is not
331 * configured; otherwise, IFCQ_LEN represents the sum of the
332 * packets managed by ifcq_disc and altq_disc instances, which
333 * is possible when transitioning between the two.
334 */
335 VERIFY(IFCQ_LEN(qif->qif_ifq) == 0);
336 #endif /* !PF_ALTQ */
337 }
338
339 static void
340 qfq_purge_sc(struct qfq_if *qif, cqrq_purge_sc_t *pr)
341 {
342 struct ifclassq *ifq = qif->qif_ifq;
343 u_int32_t i;
344
345 IFCQ_LOCK_ASSERT_HELD(ifq);
346
347 VERIFY(pr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(pr->sc));
348 VERIFY(pr->flow != 0);
349
350 if (pr->sc != MBUF_SC_UNSPEC) {
351 i = MBUF_SCIDX(pr->sc);
352 VERIFY(i < IFCQ_SC_MAX);
353
354 qfq_purgeq(qif, ifq->ifcq_disc_slots[i].cl,
355 pr->flow, &pr->packets, &pr->bytes);
356 } else {
357 u_int32_t cnt, len;
358
359 pr->packets = 0;
360 pr->bytes = 0;
361
362 for (i = 0; i < IFCQ_SC_MAX; i++) {
363 qfq_purgeq(qif, ifq->ifcq_disc_slots[i].cl,
364 pr->flow, &cnt, &len);
365 pr->packets += cnt;
366 pr->bytes += len;
367 }
368 }
369 }
370
371 void
372 qfq_event(struct qfq_if *qif, cqev_t ev)
373 {
374 struct qfq_class *cl;
375 int i;
376
377 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
378
379 for (i = 0; i < qif->qif_maxclasses; i++)
380 if ((cl = qif->qif_class_tbl[i]) != NULL)
381 qfq_updateq(qif, cl, ev);
382 }
383
384 int
385 qfq_add_queue(struct qfq_if *qif, u_int32_t qlimit, u_int32_t weight,
386 u_int32_t maxsz, u_int32_t flags, u_int32_t qid, struct qfq_class **clp)
387 {
388 struct qfq_class *cl;
389 u_int32_t w;
390
391 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
392
393 if (qfq_clh_to_clp(qif, qid) != NULL)
394 return (EBUSY);
395
396 /* check parameters */
397 if (weight == 0 || weight > QFQ_MAX_WEIGHT)
398 return (EINVAL);
399
400 w = (QFQ_ONE_FP / (QFQ_ONE_FP / weight));
401 if (qif->qif_wsum + w > QFQ_MAX_WSUM)
402 return (EINVAL);
403
404 if (maxsz == 0 || maxsz > (1 << QFQ_MTU_SHIFT))
405 return (EINVAL);
406
407 cl = qfq_class_create(qif, weight, qlimit, flags, maxsz, qid);
408 if (cl == NULL)
409 return (ENOMEM);
410
411 if (clp != NULL)
412 *clp = cl;
413
414 return (0);
415 }
416
417 static struct qfq_class *
418 qfq_class_create(struct qfq_if *qif, u_int32_t weight, u_int32_t qlimit,
419 u_int32_t flags, u_int32_t maxsz, u_int32_t qid)
420 {
421 struct ifnet *ifp;
422 struct ifclassq *ifq;
423 struct qfq_group *grp;
424 struct qfq_class *cl;
425 u_int32_t w; /* approximated weight */
426 int i;
427
428 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
429
430 /* Sanitize flags unless internally configured */
431 if (qif->qif_flags & QFQIFF_ALTQ)
432 flags &= QFCF_USERFLAGS;
433
434 if (qif->qif_classes >= qif->qif_maxclasses) {
435 log(LOG_ERR, "%s: %s out of classes! (max %d)\n",
436 if_name(QFQIF_IFP(qif)), qfq_style(qif),
437 qif->qif_maxclasses);
438 return (NULL);
439 }
440
441 #if !CLASSQ_RED
442 if (flags & QFCF_RED) {
443 log(LOG_ERR, "%s: %s RED not available!\n",
444 if_name(QFQIF_IFP(qif)), qfq_style(qif));
445 return (NULL);
446 }
447 #endif /* !CLASSQ_RED */
448
449 #if !CLASSQ_RIO
450 if (flags & QFCF_RIO) {
451 log(LOG_ERR, "%s: %s RIO not available!\n",
452 if_name(QFQIF_IFP(qif)), qfq_style(qif));
453 return (NULL);
454 }
455 #endif /* CLASSQ_RIO */
456
457 #if !CLASSQ_BLUE
458 if (flags & QFCF_BLUE) {
459 log(LOG_ERR, "%s: %s BLUE not available!\n",
460 if_name(QFQIF_IFP(qif)), qfq_style(qif));
461 return (NULL);
462 }
463 #endif /* CLASSQ_BLUE */
464
465 /* These are mutually exclusive */
466 if ((flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) &&
467 (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_RED &&
468 (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_RIO &&
469 (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_BLUE &&
470 (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_SFB) {
471 log(LOG_ERR, "%s: %s more than one RED|RIO|BLUE|SFB\n",
472 if_name(QFQIF_IFP(qif)), qfq_style(qif));
473 return (NULL);
474 }
475
476 ifq = qif->qif_ifq;
477 ifp = QFQIF_IFP(qif);
478
479 cl = zalloc(qfq_cl_zone);
480 if (cl == NULL)
481 return (NULL);
482
483 bzero(cl, qfq_cl_size);
484
485 if (qlimit == 0 || qlimit > IFCQ_MAXLEN(ifq)) {
486 qlimit = IFCQ_MAXLEN(ifq);
487 if (qlimit == 0)
488 qlimit = DEFAULT_QLIMIT; /* use default */
489 }
490 _qinit(&cl->cl_q, Q_DROPTAIL, qlimit);
491 cl->cl_qif = qif;
492 cl->cl_flags = flags;
493 cl->cl_handle = qid;
494
495 /*
496 * Find a free slot in the class table. If the slot matching
497 * the lower bits of qid is free, use this slot. Otherwise,
498 * use the first free slot.
499 */
500 i = qid % qif->qif_maxclasses;
501 if (qif->qif_class_tbl[i] == NULL) {
502 qif->qif_class_tbl[i] = cl;
503 } else {
504 for (i = 0; i < qif->qif_maxclasses; i++) {
505 if (qif->qif_class_tbl[i] == NULL) {
506 qif->qif_class_tbl[i] = cl;
507 break;
508 }
509 }
510 if (i == qif->qif_maxclasses) {
511 zfree(qfq_cl_zone, cl);
512 return (NULL);
513 }
514 }
515
516 w = weight;
517 VERIFY(w > 0 && w <= QFQ_MAX_WEIGHT);
518 cl->cl_lmax = maxsz;
519 cl->cl_inv_w = (QFQ_ONE_FP / w);
520 w = (QFQ_ONE_FP / cl->cl_inv_w);
521 VERIFY(qif->qif_wsum + w <= QFQ_MAX_WSUM);
522
523 i = qfq_calc_index(cl, cl->cl_inv_w, cl->cl_lmax);
524 VERIFY(i <= QFQ_MAX_INDEX);
525 grp = qif->qif_groups[i];
526 if (grp == NULL) {
527 grp = _MALLOC(sizeof (*grp), M_DEVBUF, M_WAITOK|M_ZERO);
528 if (grp != NULL) {
529 grp->qfg_index = i;
530 grp->qfg_slot_shift =
531 QFQ_MTU_SHIFT + QFQ_FRAC_BITS - (QFQ_MAX_INDEX - i);
532 grp->qfg_slots = _MALLOC(sizeof (struct qfq_class *) *
533 qif->qif_maxslots, M_DEVBUF, M_WAITOK|M_ZERO);
534 if (grp->qfg_slots == NULL) {
535 log(LOG_ERR, "%s: %s unable to allocate group "
536 "slots for index %d\n", if_name(ifp),
537 qfq_style(qif), i);
538 }
539 } else {
540 log(LOG_ERR, "%s: %s unable to allocate group for "
541 "qid=%d\n", if_name(ifp), qfq_style(qif),
542 cl->cl_handle);
543 }
544 if (grp == NULL || grp->qfg_slots == NULL) {
545 qif->qif_class_tbl[qid % qif->qif_maxclasses] = NULL;
546 if (grp != NULL)
547 _FREE(grp, M_DEVBUF);
548 zfree(qfq_cl_zone, cl);
549 return (NULL);
550 } else {
551 qif->qif_groups[i] = grp;
552 }
553 }
554 cl->cl_grp = grp;
555 qif->qif_wsum += w;
556 /* XXX cl->cl_S = qif->qif_V; ? */
557 /* XXX compute qif->qif_i_wsum */
558
559 qif->qif_classes++;
560
561 if (flags & QFCF_DEFAULTCLASS)
562 qif->qif_default = cl;
563
564 if (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) {
565 #if CLASSQ_RED || CLASSQ_RIO
566 u_int64_t ifbandwidth = ifnet_output_linkrate(ifp);
567 int pkttime;
568 #endif /* CLASSQ_RED || CLASSQ_RIO */
569
570 cl->cl_qflags = 0;
571 if (flags & QFCF_ECN) {
572 if (flags & QFCF_BLUE)
573 cl->cl_qflags |= BLUEF_ECN;
574 else if (flags & QFCF_SFB)
575 cl->cl_qflags |= SFBF_ECN;
576 else if (flags & QFCF_RED)
577 cl->cl_qflags |= REDF_ECN;
578 else if (flags & QFCF_RIO)
579 cl->cl_qflags |= RIOF_ECN;
580 }
581 if (flags & QFCF_FLOWCTL) {
582 if (flags & QFCF_SFB)
583 cl->cl_qflags |= SFBF_FLOWCTL;
584 }
585 if (flags & QFCF_DELAYBASED) {
586 if (flags & QFCF_SFB)
587 cl->cl_qflags |= SFBF_DELAYBASED;
588 }
589 if (flags & QFCF_CLEARDSCP) {
590 if (flags & QFCF_RIO)
591 cl->cl_qflags |= RIOF_CLEARDSCP;
592 }
593 #if CLASSQ_RED || CLASSQ_RIO
594 /*
595 * XXX: RED & RIO should be watching link speed and MTU
596 * events and recompute pkttime accordingly.
597 */
598 if (ifbandwidth < 8)
599 pkttime = 1000 * 1000 * 1000; /* 1 sec */
600 else
601 pkttime = (int64_t)ifp->if_mtu * 1000 * 1000 * 1000 /
602 (ifbandwidth / 8);
603
604 /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
605 #if CLASSQ_RED
606 if (flags & QFCF_RED) {
607 cl->cl_red = red_alloc(ifp, 0, 0,
608 qlimit(&cl->cl_q) * 10/100,
609 qlimit(&cl->cl_q) * 30/100,
610 cl->cl_qflags, pkttime);
611 if (cl->cl_red != NULL)
612 qtype(&cl->cl_q) = Q_RED;
613 }
614 #endif /* CLASSQ_RED */
615 #if CLASSQ_RIO
616 if (flags & QFCF_RIO) {
617 cl->cl_rio =
618 rio_alloc(ifp, 0, NULL, cl->cl_qflags, pkttime);
619 if (cl->cl_rio != NULL)
620 qtype(&cl->cl_q) = Q_RIO;
621 }
622 #endif /* CLASSQ_RIO */
623 #endif /* CLASSQ_RED || CLASSQ_RIO */
624 #if CLASSQ_BLUE
625 if (flags & QFCF_BLUE) {
626 cl->cl_blue = blue_alloc(ifp, 0, 0, cl->cl_qflags);
627 if (cl->cl_blue != NULL)
628 qtype(&cl->cl_q) = Q_BLUE;
629 }
630 #endif /* CLASSQ_BLUE */
631 if (flags & QFCF_SFB) {
632 if (!(cl->cl_flags & QFCF_LAZY))
633 cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
634 qlimit(&cl->cl_q), cl->cl_qflags);
635 if (cl->cl_sfb != NULL || (cl->cl_flags & QFCF_LAZY))
636 qtype(&cl->cl_q) = Q_SFB;
637 }
638 }
639
640 if (pktsched_verbose) {
641 log(LOG_DEBUG, "%s: %s created qid=%d grp=%d weight=%d "
642 "qlimit=%d flags=%b\n", if_name(ifp), qfq_style(qif),
643 cl->cl_handle, cl->cl_grp->qfg_index, weight, qlimit,
644 flags, QFCF_BITS);
645 }
646
647 return (cl);
648 }
649
650 int
651 qfq_remove_queue(struct qfq_if *qif, u_int32_t qid)
652 {
653 struct qfq_class *cl;
654
655 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
656
657 if ((cl = qfq_clh_to_clp(qif, qid)) == NULL)
658 return (EINVAL);
659
660 return (qfq_class_destroy(qif, cl));
661 }
662
663 static int
664 qfq_class_destroy(struct qfq_if *qif, struct qfq_class *cl)
665 {
666 struct ifclassq *ifq = qif->qif_ifq;
667 int i;
668
669 IFCQ_LOCK_ASSERT_HELD(ifq);
670
671 qfq_purgeq(qif, cl, 0, NULL, NULL);
672
673 if (cl->cl_inv_w != 0) {
674 qif->qif_wsum -= (QFQ_ONE_FP / cl->cl_inv_w);
675 cl->cl_inv_w = 0; /* reset weight to avoid run twice */
676 }
677
678 for (i = 0; i < qif->qif_maxclasses; i++) {
679 if (qif->qif_class_tbl[i] == cl) {
680 qif->qif_class_tbl[i] = NULL;
681 break;
682 }
683 }
684 qif->qif_classes--;
685
686 if (cl->cl_qalg.ptr != NULL) {
687 #if CLASSQ_RIO
688 if (q_is_rio(&cl->cl_q))
689 rio_destroy(cl->cl_rio);
690 #endif /* CLASSQ_RIO */
691 #if CLASSQ_RED
692 if (q_is_red(&cl->cl_q))
693 red_destroy(cl->cl_red);
694 #endif /* CLASSQ_RED */
695 #if CLASSQ_BLUE
696 if (q_is_blue(&cl->cl_q))
697 blue_destroy(cl->cl_blue);
698 #endif /* CLASSQ_BLUE */
699 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
700 sfb_destroy(cl->cl_sfb);
701 cl->cl_qalg.ptr = NULL;
702 qtype(&cl->cl_q) = Q_DROPTAIL;
703 qstate(&cl->cl_q) = QS_RUNNING;
704 }
705
706 if (qif->qif_default == cl)
707 qif->qif_default = NULL;
708
709 if (pktsched_verbose) {
710 log(LOG_DEBUG, "%s: %s destroyed qid=%d\n",
711 if_name(QFQIF_IFP(qif)), qfq_style(qif), cl->cl_handle);
712 }
713
714 zfree(qfq_cl_zone, cl);
715
716 return (0);
717 }
718
719 /*
720 * Calculate a mask to mimic what would be ffs_from()
721 */
722 static inline pktsched_bitmap_t
723 mask_from(pktsched_bitmap_t bitmap, int from)
724 {
725 return (bitmap & ~((1UL << from) - 1));
726 }
727
728 /*
729 * The state computation relies on ER=0, IR=1, EB=2, IB=3
730 * First compute eligibility comparing grp->qfg_S, qif->qif_V,
731 * then check if someone is blocking us and possibly add EB
732 */
733 static inline u_int32_t
734 qfq_calc_state(struct qfq_if *qif, struct qfq_group *grp)
735 {
736 /* if S > V we are not eligible */
737 u_int32_t state = qfq_gt(grp->qfg_S, qif->qif_V);
738 pktsched_bitmap_t mask = mask_from(qif->qif_bitmaps[ER],
739 grp->qfg_index);
740 struct qfq_group *next;
741
742 if (mask) {
743 next = qfq_ffs(qif, mask);
744 if (qfq_gt(grp->qfg_F, next->qfg_F))
745 state |= EB;
746 }
747
748 return (state);
749 }
750
751 /*
752 * In principle
753 * qif->qif_bitmaps[dst] |= qif->qif_bitmaps[src] & mask;
754 * qif->qif_bitmaps[src] &= ~mask;
755 * but we should make sure that src != dst
756 */
757 static inline void
758 qfq_move_groups(struct qfq_if *qif, pktsched_bitmap_t mask, int src, int dst)
759 {
760 qif->qif_bitmaps[dst] |= qif->qif_bitmaps[src] & mask;
761 qif->qif_bitmaps[src] &= ~mask;
762 }
763
764 static inline void
765 qfq_unblock_groups(struct qfq_if *qif, int index, u_int64_t old_finish)
766 {
767 pktsched_bitmap_t mask = mask_from(qif->qif_bitmaps[ER], index + 1);
768 struct qfq_group *next;
769
770 if (mask) {
771 next = qfq_ffs(qif, mask);
772 if (!qfq_gt(next->qfg_F, old_finish))
773 return;
774 }
775
776 mask = (1UL << index) - 1;
777 qfq_move_groups(qif, mask, EB, ER);
778 qfq_move_groups(qif, mask, IB, IR);
779 }
780
781 /*
782 * perhaps
783 *
784 * old_V ^= qif->qif_V;
785 * old_V >>= QFQ_MIN_SLOT_SHIFT;
786 * if (old_V) {
787 * ...
788 * }
789 */
790 static inline void
791 qfq_make_eligible(struct qfq_if *qif, u_int64_t old_V)
792 {
793 pktsched_bitmap_t mask, vslot, old_vslot;
794
795 vslot = qif->qif_V >> QFQ_MIN_SLOT_SHIFT;
796 old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
797
798 if (vslot != old_vslot) {
799 mask = (2UL << (__fls(vslot ^ old_vslot))) - 1;
800 qfq_move_groups(qif, mask, IR, ER);
801 qfq_move_groups(qif, mask, IB, EB);
802 }
803 }
804
805 /*
806 * XXX we should make sure that slot becomes less than 32.
807 * This is guaranteed by the input values.
808 * roundedS is always cl->qfg_S rounded on grp->qfg_slot_shift bits.
809 */
810 static inline void
811 qfq_slot_insert(struct qfq_if *qif, struct qfq_group *grp,
812 struct qfq_class *cl, u_int64_t roundedS)
813 {
814 u_int64_t slot = (roundedS - grp->qfg_S) >> grp->qfg_slot_shift;
815 u_int32_t i = (grp->qfg_front + slot) % qif->qif_maxslots;
816
817 cl->cl_next = grp->qfg_slots[i];
818 grp->qfg_slots[i] = cl;
819 pktsched_bit_set(slot, &grp->qfg_full_slots);
820 }
821
822 /*
823 * remove the entry from the slot
824 */
825 static inline void
826 qfq_front_slot_remove(struct qfq_group *grp)
827 {
828 struct qfq_class **h = &grp->qfg_slots[grp->qfg_front];
829
830 *h = (*h)->cl_next;
831 if (!*h)
832 pktsched_bit_clr(0, &grp->qfg_full_slots);
833 }
834
835 /*
836 * Returns the first full queue in a group. As a side effect,
837 * adjust the bucket list so the first non-empty bucket is at
838 * position 0 in qfg_full_slots.
839 */
840 static inline struct qfq_class *
841 qfq_slot_scan(struct qfq_if *qif, struct qfq_group *grp)
842 {
843 int i;
844
845 if (pktsched_verbose > 2) {
846 log(LOG_DEBUG, "%s: %s grp=%d full_slots=0x%x\n",
847 if_name(QFQIF_IFP(qif)), qfq_style(qif), grp->qfg_index,
848 grp->qfg_full_slots);
849 }
850
851 if (grp->qfg_full_slots == 0)
852 return (NULL);
853
854 i = pktsched_ffs(grp->qfg_full_slots) - 1; /* zero-based */
855 if (i > 0) {
856 grp->qfg_front = (grp->qfg_front + i) % qif->qif_maxslots;
857 grp->qfg_full_slots >>= i;
858 }
859
860 return (grp->qfg_slots[grp->qfg_front]);
861 }
862
863 /*
864 * adjust the bucket list. When the start time of a group decreases,
865 * we move the index down (modulo qif->qif_maxslots) so we don't need to
866 * move the objects. The mask of occupied slots must be shifted
867 * because we use ffs() to find the first non-empty slot.
868 * This covers decreases in the group's start time, but what about
869 * increases of the start time ?
870 * Here too we should make sure that i is less than 32
871 */
872 static inline void
873 qfq_slot_rotate(struct qfq_if *qif, struct qfq_group *grp, u_int64_t roundedS)
874 {
875 #pragma unused(qif)
876 u_int32_t i = (grp->qfg_S - roundedS) >> grp->qfg_slot_shift;
877
878 grp->qfg_full_slots <<= i;
879 grp->qfg_front = (grp->qfg_front - i) % qif->qif_maxslots;
880 }
881
882 static inline void
883 qfq_update_eligible(struct qfq_if *qif, u_int64_t old_V)
884 {
885 pktsched_bitmap_t ineligible;
886
887 ineligible = qif->qif_bitmaps[IR] | qif->qif_bitmaps[IB];
888 if (ineligible) {
889 if (!qif->qif_bitmaps[ER]) {
890 struct qfq_group *grp;
891 grp = qfq_ffs(qif, ineligible);
892 if (qfq_gt(grp->qfg_S, qif->qif_V))
893 qif->qif_V = grp->qfg_S;
894 }
895 qfq_make_eligible(qif, old_V);
896 }
897 }
898
899 /*
900 * Updates the class, returns true if also the group needs to be updated.
901 */
902 static inline int
903 qfq_update_class(struct qfq_if *qif, struct qfq_group *grp,
904 struct qfq_class *cl)
905 {
906 #pragma unused(qif)
907 cl->cl_S = cl->cl_F;
908 if (qempty(&cl->cl_q)) {
909 qfq_front_slot_remove(grp);
910 } else {
911 u_int32_t len;
912 u_int64_t roundedS;
913
914 len = m_pktlen(qhead(&cl->cl_q));
915 cl->cl_F = cl->cl_S + (u_int64_t)len * cl->cl_inv_w;
916 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
917 if (roundedS == grp->qfg_S)
918 return (0);
919
920 qfq_front_slot_remove(grp);
921 qfq_slot_insert(qif, grp, cl, roundedS);
922 }
923 return (1);
924 }
925
926 /*
927 * note: CLASSQDQ_POLL returns the next packet without removing the packet
928 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
929 * CLASSQDQ_REMOVE must return the same packet if called immediately
930 * after CLASSQDQ_POLL.
931 */
932 struct mbuf *
933 qfq_dequeue(struct qfq_if *qif, cqdq_op_t op)
934 {
935 pktsched_bitmap_t er_bits = qif->qif_bitmaps[ER];
936 struct ifclassq *ifq = qif->qif_ifq;
937 struct qfq_group *grp;
938 struct qfq_class *cl;
939 struct mbuf *m;
940 u_int64_t old_V;
941 u_int32_t len;
942
943 IFCQ_LOCK_ASSERT_HELD(ifq);
944
945 for (;;) {
946 if (er_bits == 0) {
947 #if QFQ_DEBUG
948 if (qif->qif_queued && pktsched_verbose > 1)
949 qfq_dump_sched(qif, "start dequeue");
950 #endif /* QFQ_DEBUG */
951 /* no eligible and ready packet */
952 return (NULL);
953 }
954 grp = qfq_ffs(qif, er_bits);
955 /* if group is non-empty, use it */
956 if (grp->qfg_full_slots != 0)
957 break;
958 pktsched_bit_clr(grp->qfg_index, &er_bits);
959 #if QFQ_DEBUG
960 qif->qif_emptygrp++;
961 #endif /* QFQ_DEBUG */
962 }
963 VERIFY(!IFCQ_IS_EMPTY(ifq));
964
965 cl = grp->qfg_slots[grp->qfg_front];
966 VERIFY(cl != NULL && !qempty(&cl->cl_q));
967
968 if (op == CLASSQDQ_POLL)
969 return (qfq_pollq(cl));
970
971 m = qfq_getq(cl);
972 VERIFY(m != NULL); /* qalg must be work conserving */
973 len = m_pktlen(m);
974
975 #if QFQ_DEBUG
976 qif->qif_queued--;
977 #endif /* QFQ_DEBUG */
978
979 IFCQ_DEC_LEN(ifq);
980 IFCQ_DEC_BYTES(ifq, len);
981 if (qempty(&cl->cl_q))
982 cl->cl_period++;
983 PKTCNTR_ADD(&cl->cl_xmitcnt, 1, len);
984 IFCQ_XMIT_ADD(ifq, 1, len);
985
986 old_V = qif->qif_V;
987 qif->qif_V += (u_int64_t)len * QFQ_IWSUM;
988
989 if (pktsched_verbose > 2) {
990 log(LOG_DEBUG, "%s: %s qid=%d dequeue m=0x%llx F=0x%llx "
991 "V=0x%llx", if_name(QFQIF_IFP(qif)), qfq_style(qif),
992 cl->cl_handle, (uint64_t)VM_KERNEL_ADDRPERM(m), cl->cl_F,
993 qif->qif_V);
994 }
995
996 if (qfq_update_class(qif, grp, cl)) {
997 u_int64_t old_F = grp->qfg_F;
998
999 cl = qfq_slot_scan(qif, grp);
1000 if (!cl) { /* group gone, remove from ER */
1001 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
1002 } else {
1003 u_int32_t s;
1004 u_int64_t roundedS =
1005 qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1006
1007 if (grp->qfg_S == roundedS)
1008 goto skip_unblock;
1009
1010 grp->qfg_S = roundedS;
1011 grp->qfg_F = roundedS + (2ULL << grp->qfg_slot_shift);
1012
1013 /* remove from ER and put in the new set */
1014 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
1015 s = qfq_calc_state(qif, grp);
1016 pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
1017 }
1018 /* we need to unblock even if the group has gone away */
1019 qfq_unblock_groups(qif, grp->qfg_index, old_F);
1020 }
1021
1022 skip_unblock:
1023 qfq_update_eligible(qif, old_V);
1024
1025 #if QFQ_DEBUG
1026 if (!qif->qif_bitmaps[ER] && qif->qif_queued && pktsched_verbose > 1)
1027 qfq_dump_sched(qif, "end dequeue");
1028 #endif /* QFQ_DEBUG */
1029
1030 return (m);
1031 }
1032
1033 /*
1034 * Assign a reasonable start time for a new flow k in group i.
1035 * Admissible values for hat(F) are multiples of sigma_i
1036 * no greater than V+sigma_i . Larger values mean that
1037 * we had a wraparound so we consider the timestamp to be stale.
1038 *
1039 * If F is not stale and F >= V then we set S = F.
1040 * Otherwise we should assign S = V, but this may violate
1041 * the ordering in ER. So, if we have groups in ER, set S to
1042 * the F_j of the first group j which would be blocking us.
1043 * We are guaranteed not to move S backward because
1044 * otherwise our group i would still be blocked.
1045 */
1046 static inline void
1047 qfq_update_start(struct qfq_if *qif, struct qfq_class *cl)
1048 {
1049 pktsched_bitmap_t mask;
1050 u_int64_t limit, roundedF;
1051 int slot_shift = cl->cl_grp->qfg_slot_shift;
1052
1053 roundedF = qfq_round_down(cl->cl_F, slot_shift);
1054 limit = qfq_round_down(qif->qif_V, slot_shift) + (1UL << slot_shift);
1055
1056 if (!qfq_gt(cl->cl_F, qif->qif_V) || qfq_gt(roundedF, limit)) {
1057 /* timestamp was stale */
1058 mask = mask_from(qif->qif_bitmaps[ER], cl->cl_grp->qfg_index);
1059 if (mask) {
1060 struct qfq_group *next = qfq_ffs(qif, mask);
1061 if (qfq_gt(roundedF, next->qfg_F)) {
1062 cl->cl_S = next->qfg_F;
1063 return;
1064 }
1065 }
1066 cl->cl_S = qif->qif_V;
1067 } else { /* timestamp is not stale */
1068 cl->cl_S = cl->cl_F;
1069 }
1070 }
1071
1072 int
1073 qfq_enqueue(struct qfq_if *qif, struct qfq_class *cl, struct mbuf *m,
1074 struct pf_mtag *t)
1075 {
1076 struct ifclassq *ifq = qif->qif_ifq;
1077 struct qfq_group *grp;
1078 u_int64_t roundedS;
1079 int len, ret, s;
1080
1081 IFCQ_LOCK_ASSERT_HELD(ifq);
1082 VERIFY(cl == NULL || cl->cl_qif == qif);
1083
1084 if (cl == NULL) {
1085 #if PF_ALTQ
1086 cl = qfq_clh_to_clp(qif, t->pftag_qid);
1087 #else /* !PF_ALTQ */
1088 cl = qfq_clh_to_clp(qif, 0);
1089 #endif /* !PF_ALTQ */
1090 if (cl == NULL) {
1091 cl = qif->qif_default;
1092 if (cl == NULL) {
1093 IFCQ_CONVERT_LOCK(ifq);
1094 m_freem(m);
1095 return (ENOBUFS);
1096 }
1097 }
1098 }
1099
1100 len = m_pktlen(m);
1101
1102 ret = qfq_addq(cl, m, t);
1103 if (ret != 0) {
1104 if (ret == CLASSQEQ_SUCCESS_FC) {
1105 /* packet enqueued, return advisory feedback */
1106 ret = EQFULL;
1107 } else {
1108 VERIFY(ret == CLASSQEQ_DROPPED ||
1109 ret == CLASSQEQ_DROPPED_FC ||
1110 ret == CLASSQEQ_DROPPED_SP);
1111 /* packet has been freed in qfq_addq */
1112 PKTCNTR_ADD(&cl->cl_dropcnt, 1, len);
1113 IFCQ_DROP_ADD(ifq, 1, len);
1114 switch (ret) {
1115 case CLASSQEQ_DROPPED:
1116 return (ENOBUFS);
1117 case CLASSQEQ_DROPPED_FC:
1118 return (EQFULL);
1119 case CLASSQEQ_DROPPED_SP:
1120 return (EQSUSPENDED);
1121 }
1122 /* NOT REACHED */
1123 }
1124 }
1125 IFCQ_INC_LEN(ifq);
1126 IFCQ_INC_BYTES(ifq, len);
1127
1128 #if QFQ_DEBUG
1129 qif->qif_queued++;
1130 #endif /* QFQ_DEBUG */
1131
1132 /* queue was not idle, we're done */
1133 if (qlen(&cl->cl_q) > 1)
1134 goto done;
1135
1136 /* queue was idle */
1137 grp = cl->cl_grp;
1138 qfq_update_start(qif, cl); /* adjust start time */
1139
1140 /* compute new finish time and rounded start */
1141 cl->cl_F = cl->cl_S + (u_int64_t)len * cl->cl_inv_w;
1142 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1143
1144 /*
1145 * Insert cl in the correct bucket.
1146 *
1147 * If cl->cl_S >= grp->qfg_S we don't need to adjust the bucket list
1148 * and simply go to the insertion phase. Otherwise grp->qfg_S is
1149 * decreasing, we must make room in the bucket list, and also
1150 * recompute the group state. Finally, if there were no flows
1151 * in this group and nobody was in ER make sure to adjust V.
1152 */
1153 if (grp->qfg_full_slots != 0) {
1154 if (!qfq_gt(grp->qfg_S, cl->cl_S))
1155 goto skip_update;
1156
1157 /* create a slot for this cl->cl_S */
1158 qfq_slot_rotate(qif, grp, roundedS);
1159
1160 /* group was surely ineligible, remove */
1161 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
1162 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
1163 } else if (!qif->qif_bitmaps[ER] && qfq_gt(roundedS, qif->qif_V)) {
1164 qif->qif_V = roundedS;
1165 }
1166
1167 grp->qfg_S = roundedS;
1168 grp->qfg_F =
1169 roundedS + (2ULL << grp->qfg_slot_shift); /* i.e. 2 sigma_i */
1170 s = qfq_calc_state(qif, grp);
1171 pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
1172
1173 if (pktsched_verbose > 2) {
1174 log(LOG_DEBUG, "%s: %s qid=%d enqueue m=0x%llx state=%s 0x%x "
1175 "S=0x%llx F=0x%llx V=0x%llx\n", if_name(QFQIF_IFP(qif)),
1176 qfq_style(qif), cl->cl_handle,
1177 (uint64_t)VM_KERNEL_ADDRPERM(m), qfq_state2str(s),
1178 qif->qif_bitmaps[s], cl->cl_S, cl->cl_F, qif->qif_V);
1179 }
1180
1181 skip_update:
1182 qfq_slot_insert(qif, grp, cl, roundedS);
1183
1184 done:
1185 /* successfully queued. */
1186 return (ret);
1187 }
1188
1189 static inline void
1190 qfq_slot_remove(struct qfq_if *qif, struct qfq_group *grp,
1191 struct qfq_class *cl)
1192 {
1193 #pragma unused(qif)
1194 struct qfq_class **pprev;
1195 u_int32_t i, offset;
1196 u_int64_t roundedS;
1197
1198 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1199 offset = (roundedS - grp->qfg_S) >> grp->qfg_slot_shift;
1200 i = (grp->qfg_front + offset) % qif->qif_maxslots;
1201
1202 pprev = &grp->qfg_slots[i];
1203 while (*pprev && *pprev != cl)
1204 pprev = &(*pprev)->cl_next;
1205
1206 *pprev = cl->cl_next;
1207 if (!grp->qfg_slots[i])
1208 pktsched_bit_clr(offset, &grp->qfg_full_slots);
1209 }
1210
1211 /*
1212 * Called to forcibly destroy a queue.
1213 * If the queue is not in the front bucket, or if it has
1214 * other queues in the front bucket, we can simply remove
1215 * the queue with no other side effects.
1216 * Otherwise we must propagate the event up.
1217 * XXX description to be completed.
1218 */
1219 static void
1220 qfq_deactivate_class(struct qfq_if *qif, struct qfq_class *cl)
1221 {
1222 struct qfq_group *grp = cl->cl_grp;
1223 pktsched_bitmap_t mask;
1224 u_int64_t roundedS;
1225 int s;
1226
1227 if (pktsched_verbose) {
1228 log(LOG_DEBUG, "%s: %s deactivate qid=%d grp=%d "
1229 "full_slots=0x%x front=%d bitmaps={ER=0x%x,EB=0x%x,"
1230 "IR=0x%x,IB=0x%x}\n",
1231 if_name(QFQIF_IFP(cl->cl_qif)), qfq_style(cl->cl_qif),
1232 cl->cl_handle, grp->qfg_index, grp->qfg_full_slots,
1233 grp->qfg_front, qif->qif_bitmaps[ER], qif->qif_bitmaps[EB],
1234 qif->qif_bitmaps[IR], qif->qif_bitmaps[IB]);
1235 #if QFQ_DEBUG
1236 if (pktsched_verbose > 1)
1237 qfq_dump_sched(qif, "start deactivate");
1238 #endif /* QFQ_DEBUG */
1239 }
1240
1241 cl->cl_F = cl->cl_S; /* not needed if the class goes away */
1242 qfq_slot_remove(qif, grp, cl);
1243
1244 if (grp->qfg_full_slots == 0) {
1245 /*
1246 * Nothing left in the group, remove from all sets.
1247 * Do ER last because if we were blocking other groups
1248 * we must unblock them.
1249 */
1250 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
1251 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[EB]);
1252 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
1253
1254 if (pktsched_bit_tst(grp->qfg_index, &qif->qif_bitmaps[ER]) &&
1255 !(qif->qif_bitmaps[ER] & ~((1UL << grp->qfg_index) - 1))) {
1256 mask = qif->qif_bitmaps[ER] &
1257 ((1UL << grp->qfg_index) - 1);
1258 if (mask)
1259 mask = ~((1UL << __fls(mask)) - 1);
1260 else
1261 mask = (pktsched_bitmap_t)~0UL;
1262 qfq_move_groups(qif, mask, EB, ER);
1263 qfq_move_groups(qif, mask, IB, IR);
1264 }
1265 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
1266 } else if (!grp->qfg_slots[grp->qfg_front]) {
1267 cl = qfq_slot_scan(qif, grp);
1268 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1269 if (grp->qfg_S != roundedS) {
1270 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
1271 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
1272 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[EB]);
1273 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
1274 grp->qfg_S = roundedS;
1275 grp->qfg_F = roundedS + (2ULL << grp->qfg_slot_shift);
1276 s = qfq_calc_state(qif, grp);
1277 pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
1278 }
1279 }
1280 qfq_update_eligible(qif, qif->qif_V);
1281
1282 #if QFQ_DEBUG
1283 if (pktsched_verbose > 1)
1284 qfq_dump_sched(qif, "end deactivate");
1285 #endif /* QFQ_DEBUG */
1286 }
1287
1288 static const char *
1289 qfq_state2str(int s)
1290 {
1291 const char *c;
1292
1293 switch (s) {
1294 case ER:
1295 c = "ER";
1296 break;
1297 case IR:
1298 c = "IR";
1299 break;
1300 case EB:
1301 c = "EB";
1302 break;
1303 case IB:
1304 c = "IB";
1305 break;
1306 default:
1307 c = "?";
1308 break;
1309 }
1310 return (c);
1311 }
1312
1313 static inline int
1314 qfq_addq(struct qfq_class *cl, struct mbuf *m, struct pf_mtag *t)
1315 {
1316 struct qfq_if *qif = cl->cl_qif;
1317 struct ifclassq *ifq = qif->qif_ifq;
1318
1319 IFCQ_LOCK_ASSERT_HELD(ifq);
1320
1321 #if CLASSQ_RIO
1322 if (q_is_rio(&cl->cl_q))
1323 return (rio_addq(cl->cl_rio, &cl->cl_q, m, t));
1324 else
1325 #endif /* CLASSQ_RIO */
1326 #if CLASSQ_RED
1327 if (q_is_red(&cl->cl_q))
1328 return (red_addq(cl->cl_red, &cl->cl_q, m, t));
1329 else
1330 #endif /* CLASSQ_RED */
1331 #if CLASSQ_BLUE
1332 if (q_is_blue(&cl->cl_q))
1333 return (blue_addq(cl->cl_blue, &cl->cl_q, m, t));
1334 else
1335 #endif /* CLASSQ_BLUE */
1336 if (q_is_sfb(&cl->cl_q)) {
1337 if (cl->cl_sfb == NULL) {
1338 struct ifnet *ifp = QFQIF_IFP(qif);
1339
1340 VERIFY(cl->cl_flags & QFCF_LAZY);
1341 cl->cl_flags &= ~QFCF_LAZY;
1342 IFCQ_CONVERT_LOCK(ifq);
1343
1344 cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
1345 qlimit(&cl->cl_q), cl->cl_qflags);
1346 if (cl->cl_sfb == NULL) {
1347 /* fall back to droptail */
1348 qtype(&cl->cl_q) = Q_DROPTAIL;
1349 cl->cl_flags &= ~QFCF_SFB;
1350 cl->cl_qflags &= ~(SFBF_ECN | SFBF_FLOWCTL);
1351
1352 log(LOG_ERR, "%s: %s SFB lazy allocation "
1353 "failed for qid=%d grp=%d, falling back "
1354 "to DROPTAIL\n", if_name(ifp),
1355 qfq_style(qif), cl->cl_handle,
1356 cl->cl_grp->qfg_index);
1357 } else if (qif->qif_throttle != IFNET_THROTTLE_OFF) {
1358 /* if there's pending throttling, set it */
1359 cqrq_throttle_t tr = { 1, qif->qif_throttle };
1360 int err = qfq_throttle(qif, &tr);
1361
1362 if (err == EALREADY)
1363 err = 0;
1364 if (err != 0) {
1365 tr.level = IFNET_THROTTLE_OFF;
1366 (void) qfq_throttle(qif, &tr);
1367 }
1368 }
1369 }
1370 if (cl->cl_sfb != NULL)
1371 return (sfb_addq(cl->cl_sfb, &cl->cl_q, m, t));
1372 } else if (qlen(&cl->cl_q) >= qlimit(&cl->cl_q)) {
1373 IFCQ_CONVERT_LOCK(ifq);
1374 m_freem(m);
1375 return (CLASSQEQ_DROPPED);
1376 }
1377
1378 #if PF_ECN
1379 if (cl->cl_flags & QFCF_CLEARDSCP)
1380 write_dsfield(m, t, 0);
1381 #endif /* PF_ECN */
1382
1383 _addq(&cl->cl_q, m);
1384
1385 return (0);
1386 }
1387
1388 static inline struct mbuf *
1389 qfq_getq(struct qfq_class *cl)
1390 {
1391 IFCQ_LOCK_ASSERT_HELD(cl->cl_qif->qif_ifq);
1392
1393 #if CLASSQ_RIO
1394 if (q_is_rio(&cl->cl_q))
1395 return (rio_getq(cl->cl_rio, &cl->cl_q));
1396 else
1397 #endif /* CLASSQ_RIO */
1398 #if CLASSQ_RED
1399 if (q_is_red(&cl->cl_q))
1400 return (red_getq(cl->cl_red, &cl->cl_q));
1401 else
1402 #endif /* CLASSQ_RED */
1403 #if CLASSQ_BLUE
1404 if (q_is_blue(&cl->cl_q))
1405 return (blue_getq(cl->cl_blue, &cl->cl_q));
1406 else
1407 #endif /* CLASSQ_BLUE */
1408 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1409 return (sfb_getq(cl->cl_sfb, &cl->cl_q));
1410
1411 return (_getq(&cl->cl_q));
1412 }
1413
1414 static inline struct mbuf *
1415 qfq_pollq(struct qfq_class *cl)
1416 {
1417 IFCQ_LOCK_ASSERT_HELD(cl->cl_qif->qif_ifq);
1418
1419 return (qhead(&cl->cl_q));
1420 }
1421
1422 static void
1423 qfq_purgeq(struct qfq_if *qif, struct qfq_class *cl, u_int32_t flow,
1424 u_int32_t *packets, u_int32_t *bytes)
1425 {
1426 struct ifclassq *ifq = qif->qif_ifq;
1427 u_int32_t cnt = 0, len = 0, qlen;
1428
1429 IFCQ_LOCK_ASSERT_HELD(ifq);
1430
1431 if ((qlen = qlen(&cl->cl_q)) == 0)
1432 goto done;
1433
1434 /* become regular mutex before freeing mbufs */
1435 IFCQ_CONVERT_LOCK(ifq);
1436
1437 #if CLASSQ_RIO
1438 if (q_is_rio(&cl->cl_q))
1439 rio_purgeq(cl->cl_rio, &cl->cl_q, flow, &cnt, &len);
1440 else
1441 #endif /* CLASSQ_RIO */
1442 #if CLASSQ_RED
1443 if (q_is_red(&cl->cl_q))
1444 red_purgeq(cl->cl_red, &cl->cl_q, flow, &cnt, &len);
1445 else
1446 #endif /* CLASSQ_RED */
1447 #if CLASSQ_BLUE
1448 if (q_is_blue(&cl->cl_q))
1449 blue_purgeq(cl->cl_blue, &cl->cl_q, flow, &cnt, &len);
1450 else
1451 #endif /* CLASSQ_BLUE */
1452 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1453 sfb_purgeq(cl->cl_sfb, &cl->cl_q, flow, &cnt, &len);
1454 else
1455 _flushq_flow(&cl->cl_q, flow, &cnt, &len);
1456
1457 if (cnt > 0) {
1458 VERIFY(qlen(&cl->cl_q) == (qlen - cnt));
1459 #if QFQ_DEBUG
1460 VERIFY(qif->qif_queued >= cnt);
1461 qif->qif_queued -= cnt;
1462 #endif /* QFQ_DEBUG */
1463
1464 PKTCNTR_ADD(&cl->cl_dropcnt, cnt, len);
1465 IFCQ_DROP_ADD(ifq, cnt, len);
1466
1467 VERIFY(((signed)IFCQ_LEN(ifq) - cnt) >= 0);
1468 IFCQ_LEN(ifq) -= cnt;
1469
1470 if (qempty(&cl->cl_q))
1471 qfq_deactivate_class(qif, cl);
1472
1473 if (pktsched_verbose) {
1474 log(LOG_DEBUG, "%s: %s purge qid=%d weight=%d "
1475 "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
1476 if_name(QFQIF_IFP(qif)),
1477 qfq_style(qif), cl->cl_handle,
1478 (u_int32_t)(QFQ_ONE_FP / cl->cl_inv_w), qlen,
1479 qlen(&cl->cl_q), cnt, len, flow);
1480 }
1481 }
1482 done:
1483 if (packets != NULL)
1484 *packets = cnt;
1485 if (bytes != NULL)
1486 *bytes = len;
1487 }
1488
1489 static void
1490 qfq_updateq(struct qfq_if *qif, struct qfq_class *cl, cqev_t ev)
1491 {
1492 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
1493
1494 if (pktsched_verbose) {
1495 log(LOG_DEBUG, "%s: %s update qid=%d weight=%d event=%s\n",
1496 if_name(QFQIF_IFP(qif)), qfq_style(qif),
1497 cl->cl_handle, (u_int32_t)(QFQ_ONE_FP / cl->cl_inv_w),
1498 ifclassq_ev2str(ev));
1499 }
1500
1501 #if CLASSQ_RIO
1502 if (q_is_rio(&cl->cl_q))
1503 return (rio_updateq(cl->cl_rio, ev));
1504 #endif /* CLASSQ_RIO */
1505 #if CLASSQ_RED
1506 if (q_is_red(&cl->cl_q))
1507 return (red_updateq(cl->cl_red, ev));
1508 #endif /* CLASSQ_RED */
1509 #if CLASSQ_BLUE
1510 if (q_is_blue(&cl->cl_q))
1511 return (blue_updateq(cl->cl_blue, ev));
1512 #endif /* CLASSQ_BLUE */
1513 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1514 return (sfb_updateq(cl->cl_sfb, ev));
1515 }
1516
1517 int
1518 qfq_get_class_stats(struct qfq_if *qif, u_int32_t qid,
1519 struct qfq_classstats *sp)
1520 {
1521 struct qfq_class *cl;
1522
1523 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
1524
1525 if ((cl = qfq_clh_to_clp(qif, qid)) == NULL)
1526 return (EINVAL);
1527
1528 sp->class_handle = cl->cl_handle;
1529 sp->index = cl->cl_grp->qfg_index;
1530 sp->weight = (QFQ_ONE_FP / cl->cl_inv_w);
1531 sp->lmax = cl->cl_lmax;
1532 sp->qlength = qlen(&cl->cl_q);
1533 sp->qlimit = qlimit(&cl->cl_q);
1534 sp->period = cl->cl_period;
1535 sp->xmitcnt = cl->cl_xmitcnt;
1536 sp->dropcnt = cl->cl_dropcnt;
1537
1538 sp->qtype = qtype(&cl->cl_q);
1539 sp->qstate = qstate(&cl->cl_q);
1540 #if CLASSQ_RED
1541 if (q_is_red(&cl->cl_q))
1542 red_getstats(cl->cl_red, &sp->red[0]);
1543 #endif /* CLASSQ_RED */
1544 #if CLASSQ_RIO
1545 if (q_is_rio(&cl->cl_q))
1546 rio_getstats(cl->cl_rio, &sp->red[0]);
1547 #endif /* CLASSQ_RIO */
1548 #if CLASSQ_BLUE
1549 if (q_is_blue(&cl->cl_q))
1550 blue_getstats(cl->cl_blue, &sp->blue);
1551 #endif /* CLASSQ_BLUE */
1552 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1553 sfb_getstats(cl->cl_sfb, &sp->sfb);
1554
1555 return (0);
1556 }
1557
1558 static int
1559 qfq_stat_sc(struct qfq_if *qif, cqrq_stat_sc_t *sr)
1560 {
1561 struct ifclassq *ifq = qif->qif_ifq;
1562 struct qfq_class *cl;
1563 u_int32_t i;
1564
1565 IFCQ_LOCK_ASSERT_HELD(ifq);
1566
1567 VERIFY(sr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(sr->sc));
1568
1569 i = MBUF_SCIDX(sr->sc);
1570 VERIFY(i < IFCQ_SC_MAX);
1571
1572 cl = ifq->ifcq_disc_slots[i].cl;
1573 sr->packets = qlen(&cl->cl_q);
1574 sr->bytes = qsize(&cl->cl_q);
1575
1576 return (0);
1577 }
1578
1579 /* convert a class handle to the corresponding class pointer */
1580 static inline struct qfq_class *
1581 qfq_clh_to_clp(struct qfq_if *qif, u_int32_t chandle)
1582 {
1583 struct qfq_class *cl;
1584 int i;
1585
1586 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
1587
1588 /*
1589 * First, try optimistically the slot matching the lower bits of
1590 * the handle. If it fails, do the linear table search.
1591 */
1592 i = chandle % qif->qif_maxclasses;
1593 if ((cl = qif->qif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
1594 return (cl);
1595 for (i = 0; i < qif->qif_maxclasses; i++)
1596 if ((cl = qif->qif_class_tbl[i]) != NULL &&
1597 cl->cl_handle == chandle)
1598 return (cl);
1599
1600 return (NULL);
1601 }
1602
1603 static const char *
1604 qfq_style(struct qfq_if *qif)
1605 {
1606 return ((qif->qif_flags & QFQIFF_ALTQ) ? "ALTQ_QFQ" : "QFQ");
1607 }
1608
1609 /*
1610 * Generic comparison function, handling wraparound
1611 */
1612 static inline int
1613 qfq_gt(u_int64_t a, u_int64_t b)
1614 {
1615 return ((int64_t)(a - b) > 0);
1616 }
1617
1618 /*
1619 * Round a precise timestamp to its slotted value
1620 */
1621 static inline u_int64_t
1622 qfq_round_down(u_int64_t ts, u_int32_t shift)
1623 {
1624 return (ts & ~((1ULL << shift) - 1));
1625 }
1626
1627 /*
1628 * Return the pointer to the group with lowest index in the bitmap
1629 */
1630 static inline struct qfq_group *
1631 qfq_ffs(struct qfq_if *qif, pktsched_bitmap_t bitmap)
1632 {
1633 int index = pktsched_ffs(bitmap) - 1; /* zero-based */
1634 VERIFY(index >= 0 && index <= QFQ_MAX_INDEX &&
1635 qif->qif_groups[index] != NULL);
1636 return (qif->qif_groups[index]);
1637 }
1638
1639 /*
1640 * Calculate a flow index, given its weight and maximum packet length.
1641 * index = log_2(maxlen/weight) but we need to apply the scaling.
1642 * This is used only once at flow creation.
1643 */
1644 static int
1645 qfq_calc_index(struct qfq_class *cl, u_int32_t inv_w, u_int32_t maxlen)
1646 {
1647 u_int64_t slot_size = (u_int64_t)maxlen *inv_w;
1648 pktsched_bitmap_t size_map;
1649 int index = 0;
1650
1651 size_map = (pktsched_bitmap_t)(slot_size >> QFQ_MIN_SLOT_SHIFT);
1652 if (!size_map)
1653 goto out;
1654
1655 index = __fls(size_map) + 1; /* basically a log_2() */
1656 index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
1657
1658 if (index < 0)
1659 index = 0;
1660 out:
1661 if (pktsched_verbose) {
1662 log(LOG_DEBUG, "%s: %s qid=%d grp=%d W=%u, L=%u, I=%d\n",
1663 if_name(QFQIF_IFP(cl->cl_qif)), qfq_style(cl->cl_qif),
1664 cl->cl_handle, index, (u_int32_t)(QFQ_ONE_FP/inv_w),
1665 maxlen, index);
1666 }
1667 return (index);
1668 }
1669
1670 #if QFQ_DEBUG
1671 static void
1672 qfq_dump_groups(struct qfq_if *qif, u_int32_t mask)
1673 {
1674 int i, j;
1675
1676 for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
1677 struct qfq_group *g = qif->qif_groups[i];
1678
1679 if (0 == (mask & (1 << i)))
1680 continue;
1681 if (g == NULL)
1682 continue;
1683
1684 log(LOG_DEBUG, "%s: %s [%2d] full_slots 0x%x\n",
1685 if_name(QFQIF_IFP(qif)), qfq_style(qif), i,
1686 g->qfg_full_slots);
1687 log(LOG_DEBUG, "%s: %s S 0x%20llx F 0x%llx %c\n",
1688 if_name(QFQIF_IFP(qif)), qfq_style(qif),
1689 g->qfg_S, g->qfg_F, mask & (1 << i) ? '1' : '0');
1690
1691 for (j = 0; j < qif->qif_maxslots; j++) {
1692 if (g->qfg_slots[j]) {
1693 log(LOG_DEBUG, "%s: %s bucket %d 0x%llx "
1694 "qid %d\n", if_name(QFQIF_IFP(qif)),
1695 qfq_style(qif), j,
1696 (uint64_t)VM_KERNEL_ADDRPERM(
1697 g->qfg_slots[j]),
1698 g->qfg_slots[j]->cl_handle);
1699 }
1700 }
1701 }
1702 }
1703
1704 static void
1705 qfq_dump_sched(struct qfq_if *qif, const char *msg)
1706 {
1707 log(LOG_DEBUG, "%s: %s --- in %s: ---\n",
1708 if_name(QFQIF_IFP(qif)), qfq_style(qif), msg);
1709 log(LOG_DEBUG, "%s: %s emptygrp %d queued %d V 0x%llx\n",
1710 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_emptygrp,
1711 qif->qif_queued, qif->qif_V);
1712 log(LOG_DEBUG, "%s: %s ER 0x%08x\n",
1713 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[ER]);
1714 log(LOG_DEBUG, "%s: %s EB 0x%08x\n",
1715 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[EB]);
1716 log(LOG_DEBUG, "%s: %s IR 0x%08x\n",
1717 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[IR]);
1718 log(LOG_DEBUG, "%s: %s IB 0x%08x\n",
1719 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[IB]);
1720 qfq_dump_groups(qif, 0xffffffff);
1721 };
1722 #endif /* QFQ_DEBUG */
1723
1724 /*
1725 * qfq_enqueue_ifclassq is an enqueue function to be registered to
1726 * (*ifcq_enqueue) in struct ifclassq.
1727 */
1728 static int
1729 qfq_enqueue_ifclassq(struct ifclassq *ifq, struct mbuf *m)
1730 {
1731 u_int32_t i;
1732
1733 IFCQ_LOCK_ASSERT_HELD(ifq);
1734
1735 if (!(m->m_flags & M_PKTHDR)) {
1736 /* should not happen */
1737 log(LOG_ERR, "%s: packet does not have pkthdr\n",
1738 if_name(ifq->ifcq_ifp));
1739 IFCQ_CONVERT_LOCK(ifq);
1740 m_freem(m);
1741 return (ENOBUFS);
1742 }
1743
1744 i = MBUF_SCIDX(mbuf_get_service_class(m));
1745 VERIFY((u_int32_t)i < IFCQ_SC_MAX);
1746
1747 return (qfq_enqueue(ifq->ifcq_disc,
1748 ifq->ifcq_disc_slots[i].cl, m, m_pftag(m)));
1749 }
1750
1751 /*
1752 * qfq_dequeue_ifclassq is a dequeue function to be registered to
1753 * (*ifcq_dequeue) in struct ifclass.
1754 *
1755 * note: CLASSQDQ_POLL returns the next packet without removing the packet
1756 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
1757 * CLASSQDQ_REMOVE must return the same packet if called immediately
1758 * after CLASSQDQ_POLL.
1759 */
1760 static struct mbuf *
1761 qfq_dequeue_ifclassq(struct ifclassq *ifq, cqdq_op_t op)
1762 {
1763 return (qfq_dequeue(ifq->ifcq_disc, op));
1764 }
1765
1766 static int
1767 qfq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg)
1768 {
1769 struct qfq_if *qif = (struct qfq_if *)ifq->ifcq_disc;
1770 int err = 0;
1771
1772 IFCQ_LOCK_ASSERT_HELD(ifq);
1773
1774 switch (req) {
1775 case CLASSQRQ_PURGE:
1776 qfq_purge(qif);
1777 break;
1778
1779 case CLASSQRQ_PURGE_SC:
1780 qfq_purge_sc(qif, (cqrq_purge_sc_t *)arg);
1781 break;
1782
1783 case CLASSQRQ_EVENT:
1784 qfq_event(qif, (cqev_t)arg);
1785 break;
1786
1787 case CLASSQRQ_THROTTLE:
1788 err = qfq_throttle(qif, (cqrq_throttle_t *)arg);
1789 break;
1790 case CLASSQRQ_STAT_SC:
1791 err = qfq_stat_sc(qif, (cqrq_stat_sc_t *)arg);
1792 break;
1793 }
1794 return (err);
1795 }
1796
1797 int
1798 qfq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags)
1799 {
1800 struct ifnet *ifp = ifq->ifcq_ifp;
1801 struct qfq_class *cl0, *cl1, *cl2, *cl3, *cl4;
1802 struct qfq_class *cl5, *cl6, *cl7, *cl8, *cl9;
1803 struct qfq_if *qif;
1804 u_int32_t maxlen = 0, qflags = 0;
1805 int err = 0;
1806
1807 IFCQ_LOCK_ASSERT_HELD(ifq);
1808 VERIFY(ifq->ifcq_disc == NULL);
1809 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
1810
1811 if (flags & PKTSCHEDF_QALG_RED)
1812 qflags |= QFCF_RED;
1813 if (flags & PKTSCHEDF_QALG_RIO)
1814 qflags |= QFCF_RIO;
1815 if (flags & PKTSCHEDF_QALG_BLUE)
1816 qflags |= QFCF_BLUE;
1817 if (flags & PKTSCHEDF_QALG_SFB)
1818 qflags |= QFCF_SFB;
1819 if (flags & PKTSCHEDF_QALG_ECN)
1820 qflags |= QFCF_ECN;
1821 if (flags & PKTSCHEDF_QALG_FLOWCTL)
1822 qflags |= QFCF_FLOWCTL;
1823 if (flags & PKTSCHEDF_QALG_DELAYBASED)
1824 qflags |= QFCF_DELAYBASED;
1825
1826 qif = qfq_alloc(ifp, M_WAITOK, FALSE);
1827 if (qif == NULL)
1828 return (ENOMEM);
1829
1830 if ((maxlen = IFCQ_MAXLEN(ifq)) == 0)
1831 maxlen = if_sndq_maxlen;
1832
1833 if ((err = qfq_add_queue(qif, maxlen, 300, 1200,
1834 qflags | QFCF_LAZY, SCIDX_BK_SYS, &cl0)) != 0)
1835 goto cleanup;
1836
1837 if ((err = qfq_add_queue(qif, maxlen, 600, 1400,
1838 qflags | QFCF_LAZY, SCIDX_BK, &cl1)) != 0)
1839 goto cleanup;
1840
1841 if ((err = qfq_add_queue(qif, maxlen, 2400, 600,
1842 qflags | QFCF_DEFAULTCLASS, SCIDX_BE, &cl2)) != 0)
1843 goto cleanup;
1844
1845 if ((err = qfq_add_queue(qif, maxlen, 2700, 600,
1846 qflags | QFCF_LAZY, SCIDX_RD, &cl3)) != 0)
1847 goto cleanup;
1848
1849 if ((err = qfq_add_queue(qif, maxlen, 3000, 400,
1850 qflags | QFCF_LAZY, SCIDX_OAM, &cl4)) != 0)
1851 goto cleanup;
1852
1853 if ((err = qfq_add_queue(qif, maxlen, 8000, 1000,
1854 qflags | QFCF_LAZY, SCIDX_AV, &cl5)) != 0)
1855 goto cleanup;
1856
1857 if ((err = qfq_add_queue(qif, maxlen, 15000, 1200,
1858 qflags | QFCF_LAZY, SCIDX_RV, &cl6)) != 0)
1859 goto cleanup;
1860
1861 if ((err = qfq_add_queue(qif, maxlen, 20000, 1400,
1862 qflags | QFCF_LAZY, SCIDX_VI, &cl7)) != 0)
1863 goto cleanup;
1864
1865 if ((err = qfq_add_queue(qif, maxlen, 23000, 200,
1866 qflags | QFCF_LAZY, SCIDX_VO, &cl8)) != 0)
1867 goto cleanup;
1868
1869 if ((err = qfq_add_queue(qif, maxlen, 25000, 200,
1870 qflags, SCIDX_CTL, &cl9)) != 0)
1871 goto cleanup;
1872
1873 err = ifclassq_attach(ifq, PKTSCHEDT_QFQ, qif,
1874 qfq_enqueue_ifclassq, qfq_dequeue_ifclassq, NULL,
1875 qfq_request_ifclassq);
1876
1877 /* cache these for faster lookup */
1878 if (err == 0) {
1879 ifq->ifcq_disc_slots[SCIDX_BK_SYS].qid = SCIDX_BK_SYS;
1880 ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl = cl0;
1881
1882 ifq->ifcq_disc_slots[SCIDX_BK].qid = SCIDX_BK;
1883 ifq->ifcq_disc_slots[SCIDX_BK].cl = cl1;
1884
1885 ifq->ifcq_disc_slots[SCIDX_BE].qid = SCIDX_BE;
1886 ifq->ifcq_disc_slots[SCIDX_BE].cl = cl2;
1887
1888 ifq->ifcq_disc_slots[SCIDX_RD].qid = SCIDX_RD;
1889 ifq->ifcq_disc_slots[SCIDX_RD].cl = cl3;
1890
1891 ifq->ifcq_disc_slots[SCIDX_OAM].qid = SCIDX_OAM;
1892 ifq->ifcq_disc_slots[SCIDX_OAM].cl = cl4;
1893
1894 ifq->ifcq_disc_slots[SCIDX_AV].qid = SCIDX_AV;
1895 ifq->ifcq_disc_slots[SCIDX_AV].cl = cl5;
1896
1897 ifq->ifcq_disc_slots[SCIDX_RV].qid = SCIDX_RV;
1898 ifq->ifcq_disc_slots[SCIDX_RV].cl = cl6;
1899
1900 ifq->ifcq_disc_slots[SCIDX_VI].qid = SCIDX_VI;
1901 ifq->ifcq_disc_slots[SCIDX_VI].cl = cl7;
1902
1903 ifq->ifcq_disc_slots[SCIDX_VO].qid = SCIDX_VO;
1904 ifq->ifcq_disc_slots[SCIDX_VO].cl = cl8;
1905
1906 ifq->ifcq_disc_slots[SCIDX_CTL].qid = SCIDX_CTL;
1907 ifq->ifcq_disc_slots[SCIDX_CTL].cl = cl9;
1908 }
1909
1910 cleanup:
1911 if (err != 0)
1912 (void) qfq_destroy_locked(qif);
1913
1914 return (err);
1915 }
1916
1917 int
1918 qfq_teardown_ifclassq(struct ifclassq *ifq)
1919 {
1920 struct qfq_if *qif = ifq->ifcq_disc;
1921 int i;
1922
1923 IFCQ_LOCK_ASSERT_HELD(ifq);
1924 VERIFY(qif != NULL && ifq->ifcq_type == PKTSCHEDT_QFQ);
1925
1926 (void) qfq_destroy_locked(qif);
1927
1928 ifq->ifcq_disc = NULL;
1929 for (i = 0; i < IFCQ_SC_MAX; i++) {
1930 ifq->ifcq_disc_slots[i].qid = 0;
1931 ifq->ifcq_disc_slots[i].cl = NULL;
1932 }
1933
1934 return (ifclassq_detach(ifq));
1935 }
1936
1937 int
1938 qfq_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot,
1939 struct if_ifclassq_stats *ifqs)
1940 {
1941 struct qfq_if *qif = ifq->ifcq_disc;
1942
1943 IFCQ_LOCK_ASSERT_HELD(ifq);
1944 VERIFY(ifq->ifcq_type == PKTSCHEDT_QFQ);
1945
1946 if (slot >= IFCQ_SC_MAX)
1947 return (EINVAL);
1948
1949 return (qfq_get_class_stats(qif, ifq->ifcq_disc_slots[slot].qid,
1950 &ifqs->ifqs_qfq_stats));
1951 }
1952
1953 static int
1954 qfq_throttle(struct qfq_if *qif, cqrq_throttle_t *tr)
1955 {
1956 struct ifclassq *ifq = qif->qif_ifq;
1957 struct qfq_class *cl;
1958 int err = 0;
1959
1960 IFCQ_LOCK_ASSERT_HELD(ifq);
1961 VERIFY(!(qif->qif_flags & QFQIFF_ALTQ));
1962
1963 if (!tr->set) {
1964 tr->level = qif->qif_throttle;
1965 return (0);
1966 }
1967
1968 if (tr->level == qif->qif_throttle)
1969 return (EALREADY);
1970
1971 /* Current throttling levels only involve BK_SYS class */
1972 cl = ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl;
1973
1974 switch (tr->level) {
1975 case IFNET_THROTTLE_OFF:
1976 err = qfq_resumeq(qif, cl);
1977 break;
1978
1979 case IFNET_THROTTLE_OPPORTUNISTIC:
1980 err = qfq_suspendq(qif, cl);
1981 break;
1982
1983 default:
1984 VERIFY(0);
1985 /* NOTREACHED */
1986 }
1987
1988 if (err == 0 || err == ENXIO) {
1989 if (pktsched_verbose) {
1990 log(LOG_DEBUG, "%s: %s throttling level %sset %d->%d\n",
1991 if_name(QFQIF_IFP(qif)), qfq_style(qif),
1992 (err == 0) ? "" : "lazy ", qif->qif_throttle,
1993 tr->level);
1994 }
1995 qif->qif_throttle = tr->level;
1996 if (err != 0)
1997 err = 0;
1998 else
1999 qfq_purgeq(qif, cl, 0, NULL, NULL);
2000 } else {
2001 log(LOG_ERR, "%s: %s unable to set throttling level "
2002 "%d->%d [error=%d]\n", if_name(QFQIF_IFP(qif)),
2003 qfq_style(qif), qif->qif_throttle, tr->level, err);
2004 }
2005
2006 return (err);
2007 }
2008
2009 static int
2010 qfq_resumeq(struct qfq_if *qif, struct qfq_class *cl)
2011 {
2012 struct ifclassq *ifq = qif->qif_ifq;
2013 int err = 0;
2014
2015 IFCQ_LOCK_ASSERT_HELD(ifq);
2016
2017 #if CLASSQ_RIO
2018 if (q_is_rio(&cl->cl_q))
2019 err = rio_suspendq(cl->cl_rio, &cl->cl_q, FALSE);
2020 else
2021 #endif /* CLASSQ_RIO */
2022 #if CLASSQ_RED
2023 if (q_is_red(&cl->cl_q))
2024 err = red_suspendq(cl->cl_red, &cl->cl_q, FALSE);
2025 else
2026 #endif /* CLASSQ_RED */
2027 #if CLASSQ_BLUE
2028 if (q_is_blue(&cl->cl_q))
2029 err = blue_suspendq(cl->cl_blue, &cl->cl_q, FALSE);
2030 else
2031 #endif /* CLASSQ_BLUE */
2032 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
2033 err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, FALSE);
2034
2035 if (err == 0)
2036 qstate(&cl->cl_q) = QS_RUNNING;
2037
2038 return (err);
2039 }
2040
2041 static int
2042 qfq_suspendq(struct qfq_if *qif, struct qfq_class *cl)
2043 {
2044 struct ifclassq *ifq = qif->qif_ifq;
2045 int err = 0;
2046
2047 IFCQ_LOCK_ASSERT_HELD(ifq);
2048
2049 #if CLASSQ_RIO
2050 if (q_is_rio(&cl->cl_q))
2051 err = rio_suspendq(cl->cl_rio, &cl->cl_q, TRUE);
2052 else
2053 #endif /* CLASSQ_RIO */
2054 #if CLASSQ_RED
2055 if (q_is_red(&cl->cl_q))
2056 err = red_suspendq(cl->cl_red, &cl->cl_q, TRUE);
2057 else
2058 #endif /* CLASSQ_RED */
2059 #if CLASSQ_BLUE
2060 if (q_is_blue(&cl->cl_q))
2061 err = blue_suspendq(cl->cl_blue, &cl->cl_q, TRUE);
2062 else
2063 #endif /* CLASSQ_BLUE */
2064 if (q_is_sfb(&cl->cl_q)) {
2065 if (cl->cl_sfb != NULL) {
2066 err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, TRUE);
2067 } else {
2068 VERIFY(cl->cl_flags & QFCF_LAZY);
2069 err = ENXIO; /* delayed throttling */
2070 }
2071 }
2072
2073 if (err == 0 || err == ENXIO)
2074 qstate(&cl->cl_q) = QS_SUSPENDED;
2075
2076 return (err);
2077 }