2 * Copyright (c) 2007-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* $OpenBSD: altq_priq.c,v 1.21 2007/09/13 20:40:02 chl Exp $ */
30 /* $KAME: altq_priq.c,v 1.1 2000/10/18 09:15:23 kjc Exp $ */
33 * Copyright (C) 2000-2003
34 * Sony Computer Science Laboratories Inc. All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
45 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 #include <sys/cdefs.h>
65 #include <sys/param.h>
66 #include <sys/malloc.h>
68 #include <sys/systm.h>
69 #include <sys/errno.h>
70 #include <sys/kernel.h>
71 #include <sys/syslog.h>
73 #include <kern/zalloc.h>
76 #include <net/net_osdep.h>
78 #include <net/pktsched/pktsched_priq.h>
79 #include <netinet/in.h>
84 static int priq_enqueue_ifclassq(struct ifclassq
*, struct mbuf
*);
85 static struct mbuf
*priq_dequeue_ifclassq(struct ifclassq
*, cqdq_op_t
);
86 static int priq_request_ifclassq(struct ifclassq
*, cqrq_t
, void *);
87 static int priq_clear_interface(struct priq_if
*);
88 static struct priq_class
*priq_class_create(struct priq_if
*, int, u_int32_t
,
90 static int priq_class_destroy(struct priq_if
*, struct priq_class
*);
91 static int priq_destroy_locked(struct priq_if
*);
92 static inline int priq_addq(struct priq_class
*, struct mbuf
*,
94 static inline struct mbuf
*priq_getq(struct priq_class
*);
95 static inline struct mbuf
*priq_pollq(struct priq_class
*);
96 static void priq_purgeq(struct priq_if
*, struct priq_class
*, u_int32_t
,
97 u_int32_t
*, u_int32_t
*);
98 static void priq_purge_sc(struct priq_if
*, cqrq_purge_sc_t
*);
99 static void priq_updateq(struct priq_if
*, struct priq_class
*, cqev_t
);
100 static int priq_throttle(struct priq_if
*, cqrq_throttle_t
*);
101 static int priq_resumeq(struct priq_if
*, struct priq_class
*);
102 static int priq_suspendq(struct priq_if
*, struct priq_class
*);
103 static int priq_stat_sc(struct priq_if
*, cqrq_stat_sc_t
*);
104 static inline struct priq_class
*priq_clh_to_clp(struct priq_if
*, u_int32_t
);
105 static const char *priq_style(struct priq_if
*);
107 #define PRIQ_ZONE_MAX 32 /* maximum elements in zone */
108 #define PRIQ_ZONE_NAME "pktsched_priq" /* zone name */
110 static unsigned int priq_size
; /* size of zone element */
111 static struct zone
*priq_zone
; /* zone for priq */
113 #define PRIQ_CL_ZONE_MAX 32 /* maximum elements in zone */
114 #define PRIQ_CL_ZONE_NAME "pktsched_priq_cl" /* zone name */
116 static unsigned int priq_cl_size
; /* size of zone element */
117 static struct zone
*priq_cl_zone
; /* zone for priq_class */
122 priq_size
= sizeof (struct priq_if
);
123 priq_zone
= zinit(priq_size
, PRIQ_ZONE_MAX
* priq_size
,
125 if (priq_zone
== NULL
) {
126 panic("%s: failed allocating %s", __func__
, PRIQ_ZONE_NAME
);
129 zone_change(priq_zone
, Z_EXPAND
, TRUE
);
130 zone_change(priq_zone
, Z_CALLERACCT
, TRUE
);
132 priq_cl_size
= sizeof (struct priq_class
);
133 priq_cl_zone
= zinit(priq_cl_size
, PRIQ_CL_ZONE_MAX
* priq_cl_size
,
134 0, PRIQ_CL_ZONE_NAME
);
135 if (priq_cl_zone
== NULL
) {
136 panic("%s: failed allocating %s", __func__
, PRIQ_CL_ZONE_NAME
);
139 zone_change(priq_cl_zone
, Z_EXPAND
, TRUE
);
140 zone_change(priq_cl_zone
, Z_CALLERACCT
, TRUE
);
144 priq_alloc(struct ifnet
*ifp
, int how
, boolean_t altq
)
148 pif
= (how
== M_WAITOK
) ? zalloc(priq_zone
) : zalloc_noblock(priq_zone
);
152 bzero(pif
, priq_size
);
153 pif
->pif_maxpri
= -1;
154 pif
->pif_ifq
= &ifp
->if_snd
;
156 pif
->pif_flags
|= PRIQIFF_ALTQ
;
158 if (pktsched_verbose
) {
159 log(LOG_DEBUG
, "%s: %s scheduler allocated\n",
160 if_name(ifp
), priq_style(pif
));
167 priq_destroy(struct priq_if
*pif
)
169 struct ifclassq
*ifq
= pif
->pif_ifq
;
173 err
= priq_destroy_locked(pif
);
180 priq_destroy_locked(struct priq_if
*pif
)
182 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
184 (void) priq_clear_interface(pif
);
186 if (pktsched_verbose
) {
187 log(LOG_DEBUG
, "%s: %s scheduler destroyed\n",
188 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
191 zfree(priq_zone
, pif
);
197 * bring the interface back to the initial state by discarding
198 * all the filters and classes.
201 priq_clear_interface(struct priq_if
*pif
)
203 struct priq_class
*cl
;
206 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
208 /* clear out the classes */
209 for (pri
= 0; pri
<= pif
->pif_maxpri
; pri
++)
210 if ((cl
= pif
->pif_classes
[pri
]) != NULL
)
211 priq_class_destroy(pif
, cl
);
216 /* discard all the queued packets on the interface */
218 priq_purge(struct priq_if
*pif
)
220 struct priq_class
*cl
;
223 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
225 for (pri
= 0; pri
<= pif
->pif_maxpri
; pri
++) {
226 if ((cl
= pif
->pif_classes
[pri
]) != NULL
&& !qempty(&cl
->cl_q
))
227 priq_purgeq(pif
, cl
, 0, NULL
, NULL
);
231 * This assertion is safe to be made only when PF_ALTQ is not
232 * configured; otherwise, IFCQ_LEN represents the sum of the
233 * packets managed by ifcq_disc and altq_disc instances, which
234 * is possible when transitioning between the two.
236 VERIFY(IFCQ_LEN(pif
->pif_ifq
) == 0);
237 #endif /* !PF_ALTQ */
241 priq_purge_sc(struct priq_if
*pif
, cqrq_purge_sc_t
*pr
)
243 struct ifclassq
*ifq
= pif
->pif_ifq
;
246 IFCQ_LOCK_ASSERT_HELD(ifq
);
248 VERIFY(pr
->sc
== MBUF_SC_UNSPEC
|| MBUF_VALID_SC(pr
->sc
));
249 VERIFY(pr
->flow
!= 0);
251 if (pr
->sc
!= MBUF_SC_UNSPEC
) {
252 i
= MBUF_SCIDX(pr
->sc
);
253 VERIFY(i
< IFCQ_SC_MAX
);
255 priq_purgeq(pif
, ifq
->ifcq_disc_slots
[i
].cl
,
256 pr
->flow
, &pr
->packets
, &pr
->bytes
);
263 for (i
= 0; i
< IFCQ_SC_MAX
; i
++) {
264 priq_purgeq(pif
, ifq
->ifcq_disc_slots
[i
].cl
,
265 pr
->flow
, &cnt
, &len
);
273 priq_event(struct priq_if
*pif
, cqev_t ev
)
275 struct priq_class
*cl
;
278 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
280 for (pri
= 0; pri
<= pif
->pif_maxpri
; pri
++)
281 if ((cl
= pif
->pif_classes
[pri
]) != NULL
)
282 priq_updateq(pif
, cl
, ev
);
286 priq_add_queue(struct priq_if
*pif
, int priority
, u_int32_t qlimit
,
287 int flags
, u_int32_t qid
, struct priq_class
**clp
)
289 struct priq_class
*cl
;
291 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
293 /* check parameters */
294 if (priority
>= PRIQ_MAXPRI
)
296 if (pif
->pif_classes
[priority
] != NULL
)
298 if (priq_clh_to_clp(pif
, qid
) != NULL
)
301 cl
= priq_class_create(pif
, priority
, qlimit
, flags
, qid
);
311 static struct priq_class
*
312 priq_class_create(struct priq_if
*pif
, int pri
, u_int32_t qlimit
,
313 int flags
, u_int32_t qid
)
316 struct ifclassq
*ifq
;
317 struct priq_class
*cl
;
319 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
321 /* Sanitize flags unless internally configured */
322 if (pif
->pif_flags
& PRIQIFF_ALTQ
)
323 flags
&= PRCF_USERFLAGS
;
326 if (flags
& PRCF_RED
) {
327 log(LOG_ERR
, "%s: %s RED not available!\n",
328 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
331 #endif /* !CLASSQ_RED */
334 if (flags
& PRCF_RIO
) {
335 log(LOG_ERR
, "%s: %s RIO not available!\n",
336 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
339 #endif /* CLASSQ_RIO */
342 if (flags
& PRCF_BLUE
) {
343 log(LOG_ERR
, "%s: %s BLUE not available!\n",
344 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
347 #endif /* CLASSQ_BLUE */
349 /* These are mutually exclusive */
350 if ((flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) &&
351 (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) != PRCF_RED
&&
352 (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) != PRCF_RIO
&&
353 (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) != PRCF_BLUE
&&
354 (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) != PRCF_SFB
) {
355 log(LOG_ERR
, "%s: %s more than one RED|RIO|BLUE|SFB\n",
356 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
361 ifp
= PRIQIF_IFP(pif
);
363 if ((cl
= pif
->pif_classes
[pri
]) != NULL
) {
364 /* modify the class instead of creating a new one */
365 if (!qempty(&cl
->cl_q
))
366 priq_purgeq(pif
, cl
, 0, NULL
, NULL
);
368 if (q_is_rio(&cl
->cl_q
))
369 rio_destroy(cl
->cl_rio
);
370 #endif /* CLASSQ_RIO */
372 if (q_is_red(&cl
->cl_q
))
373 red_destroy(cl
->cl_red
);
374 #endif /* CLASSQ_RED */
376 if (q_is_blue(&cl
->cl_q
))
377 blue_destroy(cl
->cl_blue
);
378 #endif /* CLASSQ_BLUE */
379 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
380 sfb_destroy(cl
->cl_sfb
);
381 cl
->cl_qalg
.ptr
= NULL
;
382 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
383 qstate(&cl
->cl_q
) = QS_RUNNING
;
385 cl
= zalloc(priq_cl_zone
);
389 bzero(cl
, priq_cl_size
);
392 pif
->pif_classes
[pri
] = cl
;
393 if (flags
& PRCF_DEFAULTCLASS
)
394 pif
->pif_default
= cl
;
395 if (qlimit
== 0 || qlimit
> IFCQ_MAXLEN(ifq
)) {
396 qlimit
= IFCQ_MAXLEN(ifq
);
398 qlimit
= DEFAULT_QLIMIT
; /* use default */
400 _qinit(&cl
->cl_q
, Q_DROPTAIL
, qlimit
);
401 cl
->cl_flags
= flags
;
403 if (pri
> pif
->pif_maxpri
)
404 pif
->pif_maxpri
= pri
;
408 if (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) {
409 #if CLASSQ_RED || CLASSQ_RIO
410 u_int64_t ifbandwidth
= ifnet_output_linkrate(ifp
);
412 #endif /* CLASSQ_RED || CLASSQ_RIO */
415 if (flags
& PRCF_ECN
) {
416 if (flags
& PRCF_BLUE
)
417 cl
->cl_qflags
|= BLUEF_ECN
;
418 else if (flags
& PRCF_SFB
)
419 cl
->cl_qflags
|= SFBF_ECN
;
420 else if (flags
& PRCF_RED
)
421 cl
->cl_qflags
|= REDF_ECN
;
422 else if (flags
& PRCF_RIO
)
423 cl
->cl_qflags
|= RIOF_ECN
;
425 if (flags
& PRCF_FLOWCTL
) {
426 if (flags
& PRCF_SFB
)
427 cl
->cl_qflags
|= SFBF_FLOWCTL
;
429 if (flags
& PRCF_CLEARDSCP
) {
430 if (flags
& PRCF_RIO
)
431 cl
->cl_qflags
|= RIOF_CLEARDSCP
;
433 #if CLASSQ_RED || CLASSQ_RIO
435 * XXX: RED & RIO should be watching link speed and MTU
436 * events and recompute pkttime accordingly.
439 pkttime
= 1000 * 1000 * 1000; /* 1 sec */
441 pkttime
= (int64_t)ifp
->if_mtu
* 1000 * 1000 * 1000 /
444 /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
446 if (flags
& PRCF_RED
) {
447 cl
->cl_red
= red_alloc(ifp
, 0, 0,
448 qlimit(&cl
->cl_q
) * 10/100,
449 qlimit(&cl
->cl_q
) * 30/100,
450 cl
->cl_qflags
, pkttime
);
451 if (cl
->cl_red
!= NULL
)
452 qtype(&cl
->cl_q
) = Q_RED
;
454 #endif /* CLASSQ_RED */
456 if (flags
& PRCF_RIO
) {
458 rio_alloc(ifp
, 0, NULL
, cl
->cl_qflags
, pkttime
);
459 if (cl
->cl_rio
!= NULL
)
460 qtype(&cl
->cl_q
) = Q_RIO
;
462 #endif /* CLASSQ_RIO */
463 #endif /* CLASSQ_RED || CLASSQ_RIO */
465 if (flags
& PRCF_BLUE
) {
466 cl
->cl_blue
= blue_alloc(ifp
, 0, 0, cl
->cl_qflags
);
467 if (cl
->cl_blue
!= NULL
)
468 qtype(&cl
->cl_q
) = Q_BLUE
;
470 #endif /* CLASSQ_BLUE */
471 if (flags
& PRCF_SFB
) {
472 if (!(cl
->cl_flags
& PRCF_LAZY
))
473 cl
->cl_sfb
= sfb_alloc(ifp
, cl
->cl_handle
,
474 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
475 if (cl
->cl_sfb
!= NULL
|| (cl
->cl_flags
& PRCF_LAZY
))
476 qtype(&cl
->cl_q
) = Q_SFB
;
480 if (pktsched_verbose
) {
481 log(LOG_DEBUG
, "%s: %s created qid=%d pri=%d qlimit=%d "
482 "flags=%b\n", if_name(ifp
), priq_style(pif
),
483 cl
->cl_handle
, cl
->cl_pri
, qlimit
, flags
, PRCF_BITS
);
490 priq_remove_queue(struct priq_if
*pif
, u_int32_t qid
)
492 struct priq_class
*cl
;
494 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
496 if ((cl
= priq_clh_to_clp(pif
, qid
)) == NULL
)
499 return (priq_class_destroy(pif
, cl
));
503 priq_class_destroy(struct priq_if
*pif
, struct priq_class
*cl
)
505 struct ifclassq
*ifq
= pif
->pif_ifq
;
508 IFCQ_LOCK_ASSERT_HELD(ifq
);
510 if (!qempty(&cl
->cl_q
))
511 priq_purgeq(pif
, cl
, 0, NULL
, NULL
);
513 VERIFY(cl
->cl_pri
< PRIQ_MAXPRI
);
514 VERIFY(!pktsched_bit_tst(cl
->cl_pri
, &pif
->pif_bitmap
));
516 pif
->pif_classes
[cl
->cl_pri
] = NULL
;
517 if (pif
->pif_maxpri
== cl
->cl_pri
) {
518 for (pri
= cl
->cl_pri
; pri
>= 0; pri
--)
519 if (pif
->pif_classes
[pri
] != NULL
) {
520 pif
->pif_maxpri
= pri
;
524 pif
->pif_maxpri
= -1;
527 if (pif
->pif_default
== cl
)
528 pif
->pif_default
= NULL
;
530 if (cl
->cl_qalg
.ptr
!= NULL
) {
532 if (q_is_rio(&cl
->cl_q
))
533 rio_destroy(cl
->cl_rio
);
534 #endif /* CLASSQ_RIO */
536 if (q_is_red(&cl
->cl_q
))
537 red_destroy(cl
->cl_red
);
538 #endif /* CLASSQ_RED */
540 if (q_is_blue(&cl
->cl_q
))
541 blue_destroy(cl
->cl_blue
);
542 #endif /* CLASSQ_BLUE */
543 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
544 sfb_destroy(cl
->cl_sfb
);
545 cl
->cl_qalg
.ptr
= NULL
;
546 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
547 qstate(&cl
->cl_q
) = QS_RUNNING
;
550 if (pktsched_verbose
) {
551 log(LOG_DEBUG
, "%s: %s destroyed qid=%d pri=%d\n",
552 if_name(PRIQIF_IFP(pif
)), priq_style(pif
),
553 cl
->cl_handle
, cl
->cl_pri
);
556 zfree(priq_cl_zone
, cl
);
562 priq_enqueue(struct priq_if
*pif
, struct priq_class
*cl
, struct mbuf
*m
,
565 struct ifclassq
*ifq
= pif
->pif_ifq
;
569 IFCQ_LOCK_ASSERT_HELD(ifq
);
570 VERIFY(cl
== NULL
|| cl
->cl_pif
== pif
);
574 cl
= priq_clh_to_clp(pif
, t
->pftag_qid
);
576 cl
= priq_clh_to_clp(pif
, 0);
577 #endif /* !PF_ALTQ */
579 cl
= pif
->pif_default
;
581 IFCQ_CONVERT_LOCK(ifq
);
588 VERIFY(pri
< PRIQ_MAXPRI
);
592 ret
= priq_addq(cl
, m
, t
);
594 if (ret
== CLASSQEQ_SUCCESS_FC
) {
595 /* packet enqueued, return advisory feedback */
598 VERIFY(ret
== CLASSQEQ_DROPPED
||
599 ret
== CLASSQEQ_DROPPED_FC
||
600 ret
== CLASSQEQ_DROPPED_SP
);
601 /* packet has been freed in priq_addq */
602 PKTCNTR_ADD(&cl
->cl_dropcnt
, 1, len
);
603 IFCQ_DROP_ADD(ifq
, 1, len
);
605 case CLASSQEQ_DROPPED
:
607 case CLASSQEQ_DROPPED_FC
:
609 case CLASSQEQ_DROPPED_SP
:
610 return (EQSUSPENDED
);
616 IFCQ_INC_BYTES(ifq
, len
);
618 /* class is now active; indicate it as such */
619 if (!pktsched_bit_tst(pri
, &pif
->pif_bitmap
))
620 pktsched_bit_set(pri
, &pif
->pif_bitmap
);
622 /* successfully queued. */
627 * note: CLASSQDQ_POLL returns the next packet without removing the packet
628 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
629 * CLASSQDQ_REMOVE must return the same packet if called immediately
630 * after CLASSQDQ_POLL.
633 priq_dequeue(struct priq_if
*pif
, cqdq_op_t op
)
635 struct ifclassq
*ifq
= pif
->pif_ifq
;
636 struct priq_class
*cl
;
640 IFCQ_LOCK_ASSERT_HELD(ifq
);
642 if (pif
->pif_bitmap
== 0) {
643 /* no active class; nothing to dequeue */
646 VERIFY(!IFCQ_IS_EMPTY(ifq
));
648 pri
= pktsched_fls(pif
->pif_bitmap
) - 1; /* zero based */
649 VERIFY(pri
< PRIQ_MAXPRI
);
650 cl
= pif
->pif_classes
[pri
];
651 VERIFY(cl
!= NULL
&& !qempty(&cl
->cl_q
));
653 if (op
== CLASSQDQ_POLL
)
654 return (priq_pollq(cl
));
657 VERIFY(m
!= NULL
); /* qalg must be work conserving */
661 IFCQ_DEC_BYTES(ifq
, len
);
662 if (qempty(&cl
->cl_q
)) {
664 /* class is now inactive; indicate it as such */
665 pktsched_bit_clr(pri
, &pif
->pif_bitmap
);
667 PKTCNTR_ADD(&cl
->cl_xmitcnt
, 1, len
);
668 IFCQ_XMIT_ADD(ifq
, 1, len
);
674 priq_addq(struct priq_class
*cl
, struct mbuf
*m
, struct pf_mtag
*t
)
676 struct priq_if
*pif
= cl
->cl_pif
;
677 struct ifclassq
*ifq
= pif
->pif_ifq
;
679 IFCQ_LOCK_ASSERT_HELD(ifq
);
682 if (q_is_rio(&cl
->cl_q
))
683 return (rio_addq(cl
->cl_rio
, &cl
->cl_q
, m
, t
));
685 #endif /* CLASSQ_RIO */
687 if (q_is_red(&cl
->cl_q
))
688 return (red_addq(cl
->cl_red
, &cl
->cl_q
, m
, t
));
690 #endif /* CLASSQ_RED */
692 if (q_is_blue(&cl
->cl_q
))
693 return (blue_addq(cl
->cl_blue
, &cl
->cl_q
, m
, t
));
695 #endif /* CLASSQ_BLUE */
696 if (q_is_sfb(&cl
->cl_q
)) {
697 if (cl
->cl_sfb
== NULL
) {
698 struct ifnet
*ifp
= PRIQIF_IFP(pif
);
700 VERIFY(cl
->cl_flags
& PRCF_LAZY
);
701 cl
->cl_flags
&= ~PRCF_LAZY
;
702 IFCQ_CONVERT_LOCK(ifq
);
704 cl
->cl_sfb
= sfb_alloc(ifp
, cl
->cl_handle
,
705 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
706 if (cl
->cl_sfb
== NULL
) {
707 /* fall back to droptail */
708 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
709 cl
->cl_flags
&= ~PRCF_SFB
;
710 cl
->cl_qflags
&= ~(SFBF_ECN
| SFBF_FLOWCTL
);
712 log(LOG_ERR
, "%s: %s SFB lazy allocation "
713 "failed for qid=%d pri=%d, falling back "
714 "to DROPTAIL\n", if_name(ifp
),
715 priq_style(pif
), cl
->cl_handle
,
717 } else if (pif
->pif_throttle
!= IFNET_THROTTLE_OFF
) {
718 /* if there's pending throttling, set it */
719 cqrq_throttle_t tr
= { 1, pif
->pif_throttle
};
720 int err
= priq_throttle(pif
, &tr
);
725 tr
.level
= IFNET_THROTTLE_OFF
;
726 (void) priq_throttle(pif
, &tr
);
730 if (cl
->cl_sfb
!= NULL
)
731 return (sfb_addq(cl
->cl_sfb
, &cl
->cl_q
, m
, t
));
732 } else if (qlen(&cl
->cl_q
) >= qlimit(&cl
->cl_q
)) {
733 IFCQ_CONVERT_LOCK(ifq
);
735 return (CLASSQEQ_DROPPED
);
739 if (cl
->cl_flags
& PRCF_CLEARDSCP
)
740 write_dsfield(m
, t
, 0);
748 static inline struct mbuf
*
749 priq_getq(struct priq_class
*cl
)
751 IFCQ_LOCK_ASSERT_HELD(cl
->cl_pif
->pif_ifq
);
754 if (q_is_rio(&cl
->cl_q
))
755 return (rio_getq(cl
->cl_rio
, &cl
->cl_q
));
757 #endif /* CLASSQ_RIO */
759 if (q_is_red(&cl
->cl_q
))
760 return (red_getq(cl
->cl_red
, &cl
->cl_q
));
762 #endif /* CLASSQ_RED */
764 if (q_is_blue(&cl
->cl_q
))
765 return (blue_getq(cl
->cl_blue
, &cl
->cl_q
));
767 #endif /* CLASSQ_BLUE */
768 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
769 return (sfb_getq(cl
->cl_sfb
, &cl
->cl_q
));
771 return (_getq(&cl
->cl_q
));
774 static inline struct mbuf
*
775 priq_pollq(struct priq_class
*cl
)
777 IFCQ_LOCK_ASSERT_HELD(cl
->cl_pif
->pif_ifq
);
779 return (qhead(&cl
->cl_q
));
783 priq_purgeq(struct priq_if
*pif
, struct priq_class
*cl
, u_int32_t flow
,
784 u_int32_t
*packets
, u_int32_t
*bytes
)
786 struct ifclassq
*ifq
= pif
->pif_ifq
;
787 u_int32_t cnt
= 0, len
= 0, qlen
;
789 IFCQ_LOCK_ASSERT_HELD(ifq
);
791 if ((qlen
= qlen(&cl
->cl_q
)) == 0) {
792 VERIFY(!pktsched_bit_tst(cl
->cl_pri
, &pif
->pif_bitmap
));
796 /* become regular mutex before freeing mbufs */
797 IFCQ_CONVERT_LOCK(ifq
);
800 if (q_is_rio(&cl
->cl_q
))
801 rio_purgeq(cl
->cl_rio
, &cl
->cl_q
, flow
, &cnt
, &len
);
803 #endif /* CLASSQ_RIO */
805 if (q_is_red(&cl
->cl_q
))
806 red_purgeq(cl
->cl_red
, &cl
->cl_q
, flow
, &cnt
, &len
);
808 #endif /* CLASSQ_RED */
810 if (q_is_blue(&cl
->cl_q
))
811 blue_purgeq(cl
->cl_blue
, &cl
->cl_q
, flow
, &cnt
, &len
);
813 #endif /* CLASSQ_BLUE */
814 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
815 sfb_purgeq(cl
->cl_sfb
, &cl
->cl_q
, flow
, &cnt
, &len
);
817 _flushq_flow(&cl
->cl_q
, flow
, &cnt
, &len
);
820 VERIFY(qlen(&cl
->cl_q
) == (qlen
- cnt
));
822 PKTCNTR_ADD(&cl
->cl_dropcnt
, cnt
, len
);
823 IFCQ_DROP_ADD(ifq
, cnt
, len
);
825 VERIFY(((signed)IFCQ_LEN(ifq
) - cnt
) >= 0);
826 IFCQ_LEN(ifq
) -= cnt
;
828 if (qempty(&cl
->cl_q
))
829 pktsched_bit_clr(cl
->cl_pri
, &pif
->pif_bitmap
);
831 if (pktsched_verbose
) {
832 log(LOG_DEBUG
, "%s: %s purge qid=%d pri=%d "
833 "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
834 if_name(PRIQIF_IFP(pif
)), priq_style(pif
),
835 cl
->cl_handle
, cl
->cl_pri
, qlen
, qlen(&cl
->cl_q
),
847 priq_updateq(struct priq_if
*pif
, struct priq_class
*cl
, cqev_t ev
)
849 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
851 if (pktsched_verbose
) {
852 log(LOG_DEBUG
, "%s: %s update qid=%d pri=%d event=%s\n",
853 if_name(PRIQIF_IFP(pif
)), priq_style(pif
),
854 cl
->cl_handle
, cl
->cl_pri
, ifclassq_ev2str(ev
));
858 if (q_is_rio(&cl
->cl_q
))
859 return (rio_updateq(cl
->cl_rio
, ev
));
860 #endif /* CLASSQ_RIO */
862 if (q_is_red(&cl
->cl_q
))
863 return (red_updateq(cl
->cl_red
, ev
));
864 #endif /* CLASSQ_RED */
866 if (q_is_blue(&cl
->cl_q
))
867 return (blue_updateq(cl
->cl_blue
, ev
));
868 #endif /* CLASSQ_BLUE */
869 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
870 return (sfb_updateq(cl
->cl_sfb
, ev
));
874 priq_get_class_stats(struct priq_if
*pif
, u_int32_t qid
,
875 struct priq_classstats
*sp
)
877 struct priq_class
*cl
;
879 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
881 if ((cl
= priq_clh_to_clp(pif
, qid
)) == NULL
)
884 sp
->class_handle
= cl
->cl_handle
;
885 sp
->priority
= cl
->cl_pri
;
886 sp
->qlength
= qlen(&cl
->cl_q
);
887 sp
->qlimit
= qlimit(&cl
->cl_q
);
888 sp
->period
= cl
->cl_period
;
889 sp
->xmitcnt
= cl
->cl_xmitcnt
;
890 sp
->dropcnt
= cl
->cl_dropcnt
;
892 sp
->qtype
= qtype(&cl
->cl_q
);
893 sp
->qstate
= qstate(&cl
->cl_q
);
895 if (q_is_red(&cl
->cl_q
))
896 red_getstats(cl
->cl_red
, &sp
->red
[0]);
897 #endif /* CLASSQ_RED */
899 if (q_is_rio(&cl
->cl_q
))
900 rio_getstats(cl
->cl_rio
, &sp
->red
[0]);
901 #endif /* CLASSQ_RIO */
903 if (q_is_blue(&cl
->cl_q
))
904 blue_getstats(cl
->cl_blue
, &sp
->blue
);
905 #endif /* CLASSQ_BLUE */
906 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
907 sfb_getstats(cl
->cl_sfb
, &sp
->sfb
);
913 priq_stat_sc(struct priq_if
*pif
, cqrq_stat_sc_t
*sr
)
915 struct ifclassq
*ifq
= pif
->pif_ifq
;
916 struct priq_class
*cl
;
919 IFCQ_LOCK_ASSERT_HELD(ifq
);
921 VERIFY(sr
->sc
== MBUF_SC_UNSPEC
|| MBUF_VALID_SC(sr
->sc
));
923 i
= MBUF_SCIDX(sr
->sc
);
924 VERIFY(i
< IFCQ_SC_MAX
);
926 cl
= ifq
->ifcq_disc_slots
[i
].cl
;
927 sr
->packets
= qlen(&cl
->cl_q
);
928 sr
->bytes
= qsize(&cl
->cl_q
);
933 /* convert a class handle to the corresponding class pointer */
934 static inline struct priq_class
*
935 priq_clh_to_clp(struct priq_if
*pif
, u_int32_t chandle
)
937 struct priq_class
*cl
;
940 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
942 for (idx
= pif
->pif_maxpri
; idx
>= 0; idx
--)
943 if ((cl
= pif
->pif_classes
[idx
]) != NULL
&&
944 cl
->cl_handle
== chandle
)
951 priq_style(struct priq_if
*pif
)
953 return ((pif
->pif_flags
& PRIQIFF_ALTQ
) ? "ALTQ_PRIQ" : "PRIQ");
957 * priq_enqueue_ifclassq is an enqueue function to be registered to
958 * (*ifcq_enqueue) in struct ifclassq.
961 priq_enqueue_ifclassq(struct ifclassq
*ifq
, struct mbuf
*m
)
965 IFCQ_LOCK_ASSERT_HELD(ifq
);
967 if (!(m
->m_flags
& M_PKTHDR
)) {
968 /* should not happen */
969 log(LOG_ERR
, "%s: packet does not have pkthdr\n",
970 if_name(ifq
->ifcq_ifp
));
971 IFCQ_CONVERT_LOCK(ifq
);
976 i
= MBUF_SCIDX(mbuf_get_service_class(m
));
977 VERIFY((u_int32_t
)i
< IFCQ_SC_MAX
);
979 return (priq_enqueue(ifq
->ifcq_disc
,
980 ifq
->ifcq_disc_slots
[i
].cl
, m
, m_pftag(m
)));
984 * priq_dequeue_ifclassq is a dequeue function to be registered to
985 * (*ifcq_dequeue) in struct ifclass.
987 * note: CLASSQDQ_POLL returns the next packet without removing the packet
988 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
989 * CLASSQDQ_REMOVE must return the same packet if called immediately
990 * after CLASSQDQ_POLL.
993 priq_dequeue_ifclassq(struct ifclassq
*ifq
, cqdq_op_t op
)
995 return (priq_dequeue(ifq
->ifcq_disc
, op
));
999 priq_request_ifclassq(struct ifclassq
*ifq
, cqrq_t req
, void *arg
)
1001 struct priq_if
*pif
= (struct priq_if
*)ifq
->ifcq_disc
;
1004 IFCQ_LOCK_ASSERT_HELD(ifq
);
1007 case CLASSQRQ_PURGE
:
1011 case CLASSQRQ_PURGE_SC
:
1012 priq_purge_sc(pif
, (cqrq_purge_sc_t
*)arg
);
1015 case CLASSQRQ_EVENT
:
1016 priq_event(pif
, (cqev_t
)arg
);
1019 case CLASSQRQ_THROTTLE
:
1020 err
= priq_throttle(pif
, (cqrq_throttle_t
*)arg
);
1023 case CLASSQRQ_STAT_SC
:
1024 err
= priq_stat_sc(pif
, (cqrq_stat_sc_t
*)arg
);
1031 priq_setup_ifclassq(struct ifclassq
*ifq
, u_int32_t flags
)
1033 struct ifnet
*ifp
= ifq
->ifcq_ifp
;
1034 struct priq_class
*cl0
, *cl1
, *cl2
, *cl3
, *cl4
;
1035 struct priq_class
*cl5
, *cl6
, *cl7
, *cl8
, *cl9
;
1036 struct priq_if
*pif
;
1037 u_int32_t maxlen
= 0, qflags
= 0;
1040 IFCQ_LOCK_ASSERT_HELD(ifq
);
1041 VERIFY(ifq
->ifcq_disc
== NULL
);
1042 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_NONE
);
1044 if (flags
& PKTSCHEDF_QALG_RED
)
1046 if (flags
& PKTSCHEDF_QALG_RIO
)
1048 if (flags
& PKTSCHEDF_QALG_BLUE
)
1049 qflags
|= PRCF_BLUE
;
1050 if (flags
& PKTSCHEDF_QALG_SFB
)
1052 if (flags
& PKTSCHEDF_QALG_ECN
)
1054 if (flags
& PKTSCHEDF_QALG_FLOWCTL
)
1055 qflags
|= PRCF_FLOWCTL
;
1057 pif
= priq_alloc(ifp
, M_WAITOK
, FALSE
);
1061 if ((maxlen
= IFCQ_MAXLEN(ifq
)) == 0)
1062 maxlen
= if_sndq_maxlen
;
1064 if ((err
= priq_add_queue(pif
, 0, maxlen
,
1065 qflags
| PRCF_LAZY
, SCIDX_BK_SYS
, &cl0
)) != 0)
1068 if ((err
= priq_add_queue(pif
, 1, maxlen
,
1069 qflags
| PRCF_LAZY
, SCIDX_BK
, &cl1
)) != 0)
1072 if ((err
= priq_add_queue(pif
, 2, maxlen
,
1073 qflags
| PRCF_DEFAULTCLASS
, SCIDX_BE
, &cl2
)) != 0)
1076 if ((err
= priq_add_queue(pif
, 3, maxlen
,
1077 qflags
| PRCF_LAZY
, SCIDX_RD
, &cl3
)) != 0)
1080 if ((err
= priq_add_queue(pif
, 4, maxlen
,
1081 qflags
| PRCF_LAZY
, SCIDX_OAM
, &cl4
)) != 0)
1084 if ((err
= priq_add_queue(pif
, 5, maxlen
,
1085 qflags
| PRCF_LAZY
, SCIDX_AV
, &cl5
)) != 0)
1088 if ((err
= priq_add_queue(pif
, 6, maxlen
,
1089 qflags
| PRCF_LAZY
, SCIDX_RV
, &cl6
)) != 0)
1092 if ((err
= priq_add_queue(pif
, 7, maxlen
,
1093 qflags
| PRCF_LAZY
, SCIDX_VI
, &cl7
)) != 0)
1096 if ((err
= priq_add_queue(pif
, 8, maxlen
,
1097 qflags
| PRCF_LAZY
, SCIDX_VO
, &cl8
)) != 0)
1100 if ((err
= priq_add_queue(pif
, 9, maxlen
,
1101 qflags
, SCIDX_CTL
, &cl9
)) != 0)
1104 err
= ifclassq_attach(ifq
, PKTSCHEDT_PRIQ
, pif
,
1105 priq_enqueue_ifclassq
, priq_dequeue_ifclassq
, NULL
,
1106 NULL
, priq_request_ifclassq
);
1108 /* cache these for faster lookup */
1110 ifq
->ifcq_disc_slots
[SCIDX_BK_SYS
].qid
= SCIDX_BK_SYS
;
1111 ifq
->ifcq_disc_slots
[SCIDX_BK_SYS
].cl
= cl0
;
1113 ifq
->ifcq_disc_slots
[SCIDX_BK
].qid
= SCIDX_BK
;
1114 ifq
->ifcq_disc_slots
[SCIDX_BK
].cl
= cl1
;
1116 ifq
->ifcq_disc_slots
[SCIDX_BE
].qid
= SCIDX_BE
;
1117 ifq
->ifcq_disc_slots
[SCIDX_BE
].cl
= cl2
;
1119 ifq
->ifcq_disc_slots
[SCIDX_RD
].qid
= SCIDX_RD
;
1120 ifq
->ifcq_disc_slots
[SCIDX_RD
].cl
= cl3
;
1122 ifq
->ifcq_disc_slots
[SCIDX_OAM
].qid
= SCIDX_OAM
;
1123 ifq
->ifcq_disc_slots
[SCIDX_OAM
].cl
= cl4
;
1125 ifq
->ifcq_disc_slots
[SCIDX_AV
].qid
= SCIDX_AV
;
1126 ifq
->ifcq_disc_slots
[SCIDX_AV
].cl
= cl5
;
1128 ifq
->ifcq_disc_slots
[SCIDX_RV
].qid
= SCIDX_RV
;
1129 ifq
->ifcq_disc_slots
[SCIDX_RV
].cl
= cl6
;
1131 ifq
->ifcq_disc_slots
[SCIDX_VI
].qid
= SCIDX_VI
;
1132 ifq
->ifcq_disc_slots
[SCIDX_VI
].cl
= cl7
;
1134 ifq
->ifcq_disc_slots
[SCIDX_VO
].qid
= SCIDX_VO
;
1135 ifq
->ifcq_disc_slots
[SCIDX_VO
].cl
= cl8
;
1137 ifq
->ifcq_disc_slots
[SCIDX_CTL
].qid
= SCIDX_CTL
;
1138 ifq
->ifcq_disc_slots
[SCIDX_CTL
].cl
= cl9
;
1143 (void) priq_destroy_locked(pif
);
1149 priq_teardown_ifclassq(struct ifclassq
*ifq
)
1151 struct priq_if
*pif
= ifq
->ifcq_disc
;
1154 IFCQ_LOCK_ASSERT_HELD(ifq
);
1155 VERIFY(pif
!= NULL
&& ifq
->ifcq_type
== PKTSCHEDT_PRIQ
);
1157 (void) priq_destroy_locked(pif
);
1159 ifq
->ifcq_disc
= NULL
;
1160 for (i
= 0; i
< IFCQ_SC_MAX
; i
++) {
1161 ifq
->ifcq_disc_slots
[i
].qid
= 0;
1162 ifq
->ifcq_disc_slots
[i
].cl
= NULL
;
1165 return (ifclassq_detach(ifq
));
1169 priq_getqstats_ifclassq(struct ifclassq
*ifq
, u_int32_t slot
,
1170 struct if_ifclassq_stats
*ifqs
)
1172 struct priq_if
*pif
= ifq
->ifcq_disc
;
1174 IFCQ_LOCK_ASSERT_HELD(ifq
);
1175 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_PRIQ
);
1177 if (slot
>= IFCQ_SC_MAX
)
1180 return (priq_get_class_stats(pif
, ifq
->ifcq_disc_slots
[slot
].qid
,
1181 &ifqs
->ifqs_priq_stats
));
1185 priq_throttle(struct priq_if
*pif
, cqrq_throttle_t
*tr
)
1187 struct ifclassq
*ifq
= pif
->pif_ifq
;
1188 struct priq_class
*cl
;
1191 IFCQ_LOCK_ASSERT_HELD(ifq
);
1192 VERIFY(!(pif
->pif_flags
& PRIQIFF_ALTQ
));
1195 tr
->level
= pif
->pif_throttle
;
1199 if (tr
->level
== pif
->pif_throttle
)
1202 /* Current throttling levels only involve BK_SYS class */
1203 cl
= ifq
->ifcq_disc_slots
[SCIDX_BK_SYS
].cl
;
1205 switch (tr
->level
) {
1206 case IFNET_THROTTLE_OFF
:
1207 err
= priq_resumeq(pif
, cl
);
1210 case IFNET_THROTTLE_OPPORTUNISTIC
:
1211 err
= priq_suspendq(pif
, cl
);
1219 if (err
== 0 || err
== ENXIO
) {
1220 if (pktsched_verbose
) {
1221 log(LOG_DEBUG
, "%s: %s throttling level %sset %d->%d\n",
1222 if_name(PRIQIF_IFP(pif
)), priq_style(pif
),
1223 (err
== 0) ? "" : "lazy ", pif
->pif_throttle
,
1226 pif
->pif_throttle
= tr
->level
;
1230 priq_purgeq(pif
, cl
, 0, NULL
, NULL
);
1232 log(LOG_ERR
, "%s: %s unable to set throttling level "
1233 "%d->%d [error=%d]\n", if_name(PRIQIF_IFP(pif
)),
1234 priq_style(pif
), pif
->pif_throttle
, tr
->level
, err
);
1241 priq_resumeq(struct priq_if
*pif
, struct priq_class
*cl
)
1243 struct ifclassq
*ifq
= pif
->pif_ifq
;
1246 IFCQ_LOCK_ASSERT_HELD(ifq
);
1249 if (q_is_rio(&cl
->cl_q
))
1250 err
= rio_suspendq(cl
->cl_rio
, &cl
->cl_q
, FALSE
);
1252 #endif /* CLASSQ_RIO */
1254 if (q_is_red(&cl
->cl_q
))
1255 err
= red_suspendq(cl
->cl_red
, &cl
->cl_q
, FALSE
);
1257 #endif /* CLASSQ_RED */
1259 if (q_is_blue(&cl
->cl_q
))
1260 err
= blue_suspendq(cl
->cl_blue
, &cl
->cl_q
, FALSE
);
1262 #endif /* CLASSQ_BLUE */
1263 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1264 err
= sfb_suspendq(cl
->cl_sfb
, &cl
->cl_q
, FALSE
);
1267 qstate(&cl
->cl_q
) = QS_RUNNING
;
1273 priq_suspendq(struct priq_if
*pif
, struct priq_class
*cl
)
1275 struct ifclassq
*ifq
= pif
->pif_ifq
;
1278 IFCQ_LOCK_ASSERT_HELD(ifq
);
1281 if (q_is_rio(&cl
->cl_q
))
1282 err
= rio_suspendq(cl
->cl_rio
, &cl
->cl_q
, TRUE
);
1284 #endif /* CLASSQ_RIO */
1286 if (q_is_red(&cl
->cl_q
))
1287 err
= red_suspendq(cl
->cl_red
, &cl
->cl_q
, TRUE
);
1289 #endif /* CLASSQ_RED */
1291 if (q_is_blue(&cl
->cl_q
))
1292 err
= blue_suspendq(cl
->cl_blue
, &cl
->cl_q
, TRUE
);
1294 #endif /* CLASSQ_BLUE */
1295 if (q_is_sfb(&cl
->cl_q
)) {
1296 if (cl
->cl_sfb
!= NULL
) {
1297 err
= sfb_suspendq(cl
->cl_sfb
, &cl
->cl_q
, TRUE
);
1299 VERIFY(cl
->cl_flags
& PRCF_LAZY
);
1300 err
= ENXIO
; /* delayed throttling */
1304 if (err
== 0 || err
== ENXIO
)
1305 qstate(&cl
->cl_q
) = QS_SUSPENDED
;
1309 #endif /* PKTSCHED_PRIQ */