2 * Copyright (c) 2007-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* $OpenBSD: altq_priq.c,v 1.21 2007/09/13 20:40:02 chl Exp $ */
30 /* $KAME: altq_priq.c,v 1.1 2000/10/18 09:15:23 kjc Exp $ */
33 * Copyright (C) 2000-2003
34 * Sony Computer Science Laboratories Inc. All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
45 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 #include <sys/cdefs.h>
65 #include <sys/param.h>
66 #include <sys/malloc.h>
68 #include <sys/systm.h>
69 #include <sys/errno.h>
70 #include <sys/kernel.h>
71 #include <sys/syslog.h>
73 #include <kern/zalloc.h>
76 #include <net/net_osdep.h>
78 #include <net/pktsched/pktsched_priq.h>
79 #include <netinet/in.h>
84 static int priq_enqueue_ifclassq(struct ifclassq
*, struct mbuf
*);
85 static struct mbuf
*priq_dequeue_ifclassq(struct ifclassq
*, cqdq_op_t
);
86 static int priq_request_ifclassq(struct ifclassq
*, cqrq_t
, void *);
87 static int priq_clear_interface(struct priq_if
*);
88 static struct priq_class
*priq_class_create(struct priq_if
*, int, u_int32_t
,
90 static int priq_class_destroy(struct priq_if
*, struct priq_class
*);
91 static int priq_destroy_locked(struct priq_if
*);
92 static inline int priq_addq(struct priq_class
*, struct mbuf
*,
94 static inline struct mbuf
*priq_getq(struct priq_class
*);
95 static inline struct mbuf
*priq_pollq(struct priq_class
*);
96 static void priq_purgeq(struct priq_if
*, struct priq_class
*, u_int32_t
,
97 u_int32_t
*, u_int32_t
*);
98 static void priq_purge_sc(struct priq_if
*, cqrq_purge_sc_t
*);
99 static void priq_updateq(struct priq_if
*, struct priq_class
*, cqev_t
);
100 static int priq_throttle(struct priq_if
*, cqrq_throttle_t
*);
101 static int priq_resumeq(struct priq_if
*, struct priq_class
*);
102 static int priq_suspendq(struct priq_if
*, struct priq_class
*);
103 static int priq_stat_sc(struct priq_if
*, cqrq_stat_sc_t
*);
104 static inline struct priq_class
*priq_clh_to_clp(struct priq_if
*, u_int32_t
);
105 static const char *priq_style(struct priq_if
*);
107 #define PRIQ_ZONE_MAX 32 /* maximum elements in zone */
108 #define PRIQ_ZONE_NAME "pktsched_priq" /* zone name */
110 static unsigned int priq_size
; /* size of zone element */
111 static struct zone
*priq_zone
; /* zone for priq */
113 #define PRIQ_CL_ZONE_MAX 32 /* maximum elements in zone */
114 #define PRIQ_CL_ZONE_NAME "pktsched_priq_cl" /* zone name */
116 static unsigned int priq_cl_size
; /* size of zone element */
117 static struct zone
*priq_cl_zone
; /* zone for priq_class */
122 priq_size
= sizeof (struct priq_if
);
123 priq_zone
= zinit(priq_size
, PRIQ_ZONE_MAX
* priq_size
,
125 if (priq_zone
== NULL
) {
126 panic("%s: failed allocating %s", __func__
, PRIQ_ZONE_NAME
);
129 zone_change(priq_zone
, Z_EXPAND
, TRUE
);
130 zone_change(priq_zone
, Z_CALLERACCT
, TRUE
);
132 priq_cl_size
= sizeof (struct priq_class
);
133 priq_cl_zone
= zinit(priq_cl_size
, PRIQ_CL_ZONE_MAX
* priq_cl_size
,
134 0, PRIQ_CL_ZONE_NAME
);
135 if (priq_cl_zone
== NULL
) {
136 panic("%s: failed allocating %s", __func__
, PRIQ_CL_ZONE_NAME
);
139 zone_change(priq_cl_zone
, Z_EXPAND
, TRUE
);
140 zone_change(priq_cl_zone
, Z_CALLERACCT
, TRUE
);
144 priq_alloc(struct ifnet
*ifp
, int how
, boolean_t altq
)
148 pif
= (how
== M_WAITOK
) ? zalloc(priq_zone
) : zalloc_noblock(priq_zone
);
152 bzero(pif
, priq_size
);
153 pif
->pif_maxpri
= -1;
154 pif
->pif_ifq
= &ifp
->if_snd
;
156 pif
->pif_flags
|= PRIQIFF_ALTQ
;
158 if (pktsched_verbose
) {
159 log(LOG_DEBUG
, "%s: %s scheduler allocated\n",
160 if_name(ifp
), priq_style(pif
));
167 priq_destroy(struct priq_if
*pif
)
169 struct ifclassq
*ifq
= pif
->pif_ifq
;
173 err
= priq_destroy_locked(pif
);
180 priq_destroy_locked(struct priq_if
*pif
)
182 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
184 (void) priq_clear_interface(pif
);
186 if (pktsched_verbose
) {
187 log(LOG_DEBUG
, "%s: %s scheduler destroyed\n",
188 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
191 zfree(priq_zone
, pif
);
197 * bring the interface back to the initial state by discarding
198 * all the filters and classes.
201 priq_clear_interface(struct priq_if
*pif
)
203 struct priq_class
*cl
;
206 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
208 /* clear out the classes */
209 for (pri
= 0; pri
<= pif
->pif_maxpri
; pri
++)
210 if ((cl
= pif
->pif_classes
[pri
]) != NULL
)
211 priq_class_destroy(pif
, cl
);
216 /* discard all the queued packets on the interface */
218 priq_purge(struct priq_if
*pif
)
220 struct priq_class
*cl
;
223 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
225 for (pri
= 0; pri
<= pif
->pif_maxpri
; pri
++) {
226 if ((cl
= pif
->pif_classes
[pri
]) != NULL
&& !qempty(&cl
->cl_q
))
227 priq_purgeq(pif
, cl
, 0, NULL
, NULL
);
231 * This assertion is safe to be made only when PF_ALTQ is not
232 * configured; otherwise, IFCQ_LEN represents the sum of the
233 * packets managed by ifcq_disc and altq_disc instances, which
234 * is possible when transitioning between the two.
236 VERIFY(IFCQ_LEN(pif
->pif_ifq
) == 0);
237 #endif /* !PF_ALTQ */
241 priq_purge_sc(struct priq_if
*pif
, cqrq_purge_sc_t
*pr
)
243 struct ifclassq
*ifq
= pif
->pif_ifq
;
246 IFCQ_LOCK_ASSERT_HELD(ifq
);
248 VERIFY(pr
->sc
== MBUF_SC_UNSPEC
|| MBUF_VALID_SC(pr
->sc
));
249 VERIFY(pr
->flow
!= 0);
251 if (pr
->sc
!= MBUF_SC_UNSPEC
) {
252 i
= MBUF_SCIDX(pr
->sc
);
253 VERIFY(i
< IFCQ_SC_MAX
);
255 priq_purgeq(pif
, ifq
->ifcq_disc_slots
[i
].cl
,
256 pr
->flow
, &pr
->packets
, &pr
->bytes
);
263 for (i
= 0; i
< IFCQ_SC_MAX
; i
++) {
264 priq_purgeq(pif
, ifq
->ifcq_disc_slots
[i
].cl
,
265 pr
->flow
, &cnt
, &len
);
273 priq_event(struct priq_if
*pif
, cqev_t ev
)
275 struct priq_class
*cl
;
278 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
280 for (pri
= 0; pri
<= pif
->pif_maxpri
; pri
++)
281 if ((cl
= pif
->pif_classes
[pri
]) != NULL
)
282 priq_updateq(pif
, cl
, ev
);
286 priq_add_queue(struct priq_if
*pif
, int priority
, u_int32_t qlimit
,
287 int flags
, u_int32_t qid
, struct priq_class
**clp
)
289 struct priq_class
*cl
;
291 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
293 /* check parameters */
294 if (priority
>= PRIQ_MAXPRI
)
296 if (pif
->pif_classes
[priority
] != NULL
)
298 if (priq_clh_to_clp(pif
, qid
) != NULL
)
301 cl
= priq_class_create(pif
, priority
, qlimit
, flags
, qid
);
311 static struct priq_class
*
312 priq_class_create(struct priq_if
*pif
, int pri
, u_int32_t qlimit
,
313 int flags
, u_int32_t qid
)
316 struct ifclassq
*ifq
;
317 struct priq_class
*cl
;
319 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
321 /* Sanitize flags unless internally configured */
322 if (pif
->pif_flags
& PRIQIFF_ALTQ
)
323 flags
&= PRCF_USERFLAGS
;
326 if (flags
& PRCF_RED
) {
327 log(LOG_ERR
, "%s: %s RED not available!\n",
328 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
331 #endif /* !CLASSQ_RED */
334 if (flags
& PRCF_RIO
) {
335 log(LOG_ERR
, "%s: %s RIO not available!\n",
336 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
339 #endif /* CLASSQ_RIO */
342 if (flags
& PRCF_BLUE
) {
343 log(LOG_ERR
, "%s: %s BLUE not available!\n",
344 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
347 #endif /* CLASSQ_BLUE */
349 /* These are mutually exclusive */
350 if ((flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) &&
351 (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) != PRCF_RED
&&
352 (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) != PRCF_RIO
&&
353 (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) != PRCF_BLUE
&&
354 (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) != PRCF_SFB
) {
355 log(LOG_ERR
, "%s: %s more than one RED|RIO|BLUE|SFB\n",
356 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
361 ifp
= PRIQIF_IFP(pif
);
363 if ((cl
= pif
->pif_classes
[pri
]) != NULL
) {
364 /* modify the class instead of creating a new one */
365 if (!qempty(&cl
->cl_q
))
366 priq_purgeq(pif
, cl
, 0, NULL
, NULL
);
368 if (q_is_rio(&cl
->cl_q
))
369 rio_destroy(cl
->cl_rio
);
370 #endif /* CLASSQ_RIO */
372 if (q_is_red(&cl
->cl_q
))
373 red_destroy(cl
->cl_red
);
374 #endif /* CLASSQ_RED */
376 if (q_is_blue(&cl
->cl_q
))
377 blue_destroy(cl
->cl_blue
);
378 #endif /* CLASSQ_BLUE */
379 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
380 sfb_destroy(cl
->cl_sfb
);
381 cl
->cl_qalg
.ptr
= NULL
;
382 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
383 qstate(&cl
->cl_q
) = QS_RUNNING
;
385 cl
= zalloc(priq_cl_zone
);
389 bzero(cl
, priq_cl_size
);
392 pif
->pif_classes
[pri
] = cl
;
393 if (flags
& PRCF_DEFAULTCLASS
)
394 pif
->pif_default
= cl
;
395 if (qlimit
== 0 || qlimit
> IFCQ_MAXLEN(ifq
)) {
396 qlimit
= IFCQ_MAXLEN(ifq
);
398 qlimit
= DEFAULT_QLIMIT
; /* use default */
400 _qinit(&cl
->cl_q
, Q_DROPTAIL
, qlimit
);
401 cl
->cl_flags
= flags
;
403 if (pri
> pif
->pif_maxpri
)
404 pif
->pif_maxpri
= pri
;
408 if (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) {
409 #if CLASSQ_RED || CLASSQ_RIO
410 u_int64_t ifbandwidth
= ifnet_output_linkrate(ifp
);
412 #endif /* CLASSQ_RED || CLASSQ_RIO */
415 if (flags
& PRCF_ECN
) {
416 if (flags
& PRCF_BLUE
)
417 cl
->cl_qflags
|= BLUEF_ECN
;
418 else if (flags
& PRCF_SFB
)
419 cl
->cl_qflags
|= SFBF_ECN
;
420 else if (flags
& PRCF_RED
)
421 cl
->cl_qflags
|= REDF_ECN
;
422 else if (flags
& PRCF_RIO
)
423 cl
->cl_qflags
|= RIOF_ECN
;
425 if (flags
& PRCF_FLOWCTL
) {
426 if (flags
& PRCF_SFB
)
427 cl
->cl_qflags
|= SFBF_FLOWCTL
;
429 if (flags
& PRCF_CLEARDSCP
) {
430 if (flags
& PRCF_RIO
)
431 cl
->cl_qflags
|= RIOF_CLEARDSCP
;
433 #if CLASSQ_RED || CLASSQ_RIO
435 * XXX: RED & RIO should be watching link speed and MTU
436 * events and recompute pkttime accordingly.
439 pkttime
= 1000 * 1000 * 1000; /* 1 sec */
441 pkttime
= (int64_t)ifp
->if_mtu
* 1000 * 1000 * 1000 /
444 /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
446 if (flags
& PRCF_RED
) {
447 cl
->cl_red
= red_alloc(ifp
, 0, 0,
448 qlimit(&cl
->cl_q
) * 10/100,
449 qlimit(&cl
->cl_q
) * 30/100,
450 cl
->cl_qflags
, pkttime
);
451 if (cl
->cl_red
!= NULL
)
452 qtype(&cl
->cl_q
) = Q_RED
;
454 #endif /* CLASSQ_RED */
456 if (flags
& PRCF_RIO
) {
458 rio_alloc(ifp
, 0, NULL
, cl
->cl_qflags
, pkttime
);
459 if (cl
->cl_rio
!= NULL
)
460 qtype(&cl
->cl_q
) = Q_RIO
;
462 #endif /* CLASSQ_RIO */
463 #endif /* CLASSQ_RED || CLASSQ_RIO */
465 if (flags
& PRCF_BLUE
) {
466 cl
->cl_blue
= blue_alloc(ifp
, 0, 0, cl
->cl_qflags
);
467 if (cl
->cl_blue
!= NULL
)
468 qtype(&cl
->cl_q
) = Q_BLUE
;
470 #endif /* CLASSQ_BLUE */
471 if (flags
& PRCF_SFB
) {
472 if (!(cl
->cl_flags
& PRCF_LAZY
))
473 cl
->cl_sfb
= sfb_alloc(ifp
, cl
->cl_handle
,
474 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
475 if (cl
->cl_sfb
!= NULL
|| (cl
->cl_flags
& PRCF_LAZY
))
476 qtype(&cl
->cl_q
) = Q_SFB
;
480 if (pktsched_verbose
) {
481 log(LOG_DEBUG
, "%s: %s created qid=%d pri=%d qlimit=%d "
482 "flags=%b\n", if_name(ifp
), priq_style(pif
),
483 cl
->cl_handle
, cl
->cl_pri
, qlimit
, flags
, PRCF_BITS
);
490 priq_remove_queue(struct priq_if
*pif
, u_int32_t qid
)
492 struct priq_class
*cl
;
494 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
496 if ((cl
= priq_clh_to_clp(pif
, qid
)) == NULL
)
499 return (priq_class_destroy(pif
, cl
));
503 priq_class_destroy(struct priq_if
*pif
, struct priq_class
*cl
)
505 struct ifclassq
*ifq
= pif
->pif_ifq
;
508 IFCQ_LOCK_ASSERT_HELD(ifq
);
510 if (!qempty(&cl
->cl_q
))
511 priq_purgeq(pif
, cl
, 0, NULL
, NULL
);
513 VERIFY(cl
->cl_pri
< PRIQ_MAXPRI
);
514 VERIFY(!pktsched_bit_tst(cl
->cl_pri
, &pif
->pif_bitmap
));
516 pif
->pif_classes
[cl
->cl_pri
] = NULL
;
517 if (pif
->pif_maxpri
== cl
->cl_pri
) {
518 for (pri
= cl
->cl_pri
; pri
>= 0; pri
--)
519 if (pif
->pif_classes
[pri
] != NULL
) {
520 pif
->pif_maxpri
= pri
;
524 pif
->pif_maxpri
= -1;
527 if (pif
->pif_default
== cl
)
528 pif
->pif_default
= NULL
;
530 if (cl
->cl_qalg
.ptr
!= NULL
) {
532 if (q_is_rio(&cl
->cl_q
))
533 rio_destroy(cl
->cl_rio
);
534 #endif /* CLASSQ_RIO */
536 if (q_is_red(&cl
->cl_q
))
537 red_destroy(cl
->cl_red
);
538 #endif /* CLASSQ_RED */
540 if (q_is_blue(&cl
->cl_q
))
541 blue_destroy(cl
->cl_blue
);
542 #endif /* CLASSQ_BLUE */
543 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
544 sfb_destroy(cl
->cl_sfb
);
545 cl
->cl_qalg
.ptr
= NULL
;
546 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
547 qstate(&cl
->cl_q
) = QS_RUNNING
;
550 if (pktsched_verbose
) {
551 log(LOG_DEBUG
, "%s: %s destroyed qid=%d pri=%d\n",
552 if_name(PRIQIF_IFP(pif
)), priq_style(pif
),
553 cl
->cl_handle
, cl
->cl_pri
);
556 zfree(priq_cl_zone
, cl
);
562 priq_enqueue(struct priq_if
*pif
, struct priq_class
*cl
, struct mbuf
*m
,
565 struct ifclassq
*ifq
= pif
->pif_ifq
;
569 IFCQ_LOCK_ASSERT_HELD(ifq
);
570 VERIFY(cl
== NULL
|| cl
->cl_pif
== pif
);
574 cl
= priq_clh_to_clp(pif
, t
->pftag_qid
);
576 cl
= priq_clh_to_clp(pif
, 0);
577 #endif /* !PF_ALTQ */
579 cl
= pif
->pif_default
;
581 IFCQ_CONVERT_LOCK(ifq
);
588 VERIFY(pri
< PRIQ_MAXPRI
);
592 ret
= priq_addq(cl
, m
, t
);
594 if (ret
== CLASSQEQ_SUCCESS_FC
) {
595 /* packet enqueued, return advisory feedback */
598 VERIFY(ret
== CLASSQEQ_DROPPED
||
599 ret
== CLASSQEQ_DROPPED_FC
||
600 ret
== CLASSQEQ_DROPPED_SP
);
601 /* packet has been freed in priq_addq */
602 PKTCNTR_ADD(&cl
->cl_dropcnt
, 1, len
);
603 IFCQ_DROP_ADD(ifq
, 1, len
);
605 case CLASSQEQ_DROPPED
:
607 case CLASSQEQ_DROPPED_FC
:
609 case CLASSQEQ_DROPPED_SP
:
610 return (EQSUSPENDED
);
617 /* class is now active; indicate it as such */
618 if (!pktsched_bit_tst(pri
, &pif
->pif_bitmap
))
619 pktsched_bit_set(pri
, &pif
->pif_bitmap
);
621 /* successfully queued. */
626 * note: CLASSQDQ_POLL returns the next packet without removing the packet
627 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
628 * CLASSQDQ_REMOVE must return the same packet if called immediately
629 * after CLASSQDQ_POLL.
632 priq_dequeue(struct priq_if
*pif
, cqdq_op_t op
)
634 struct ifclassq
*ifq
= pif
->pif_ifq
;
635 struct priq_class
*cl
;
639 IFCQ_LOCK_ASSERT_HELD(ifq
);
641 if (pif
->pif_bitmap
== 0) {
642 /* no active class; nothing to dequeue */
645 VERIFY(!IFCQ_IS_EMPTY(ifq
));
647 pri
= pktsched_fls(pif
->pif_bitmap
) - 1; /* zero based */
648 VERIFY(pri
< PRIQ_MAXPRI
);
649 cl
= pif
->pif_classes
[pri
];
650 VERIFY(cl
!= NULL
&& !qempty(&cl
->cl_q
));
652 if (op
== CLASSQDQ_POLL
)
653 return (priq_pollq(cl
));
656 VERIFY(m
!= NULL
); /* qalg must be work conserving */
660 if (qempty(&cl
->cl_q
)) {
662 /* class is now inactive; indicate it as such */
663 pktsched_bit_clr(pri
, &pif
->pif_bitmap
);
665 PKTCNTR_ADD(&cl
->cl_xmitcnt
, 1, len
);
666 IFCQ_XMIT_ADD(ifq
, 1, len
);
672 priq_addq(struct priq_class
*cl
, struct mbuf
*m
, struct pf_mtag
*t
)
674 struct priq_if
*pif
= cl
->cl_pif
;
675 struct ifclassq
*ifq
= pif
->pif_ifq
;
677 IFCQ_LOCK_ASSERT_HELD(ifq
);
680 if (q_is_rio(&cl
->cl_q
))
681 return (rio_addq(cl
->cl_rio
, &cl
->cl_q
, m
, t
));
683 #endif /* CLASSQ_RIO */
685 if (q_is_red(&cl
->cl_q
))
686 return (red_addq(cl
->cl_red
, &cl
->cl_q
, m
, t
));
688 #endif /* CLASSQ_RED */
690 if (q_is_blue(&cl
->cl_q
))
691 return (blue_addq(cl
->cl_blue
, &cl
->cl_q
, m
, t
));
693 #endif /* CLASSQ_BLUE */
694 if (q_is_sfb(&cl
->cl_q
)) {
695 if (cl
->cl_sfb
== NULL
) {
696 struct ifnet
*ifp
= PRIQIF_IFP(pif
);
698 VERIFY(cl
->cl_flags
& PRCF_LAZY
);
699 cl
->cl_flags
&= ~PRCF_LAZY
;
700 IFCQ_CONVERT_LOCK(ifq
);
702 cl
->cl_sfb
= sfb_alloc(ifp
, cl
->cl_handle
,
703 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
704 if (cl
->cl_sfb
== NULL
) {
705 /* fall back to droptail */
706 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
707 cl
->cl_flags
&= ~PRCF_SFB
;
708 cl
->cl_qflags
&= ~(SFBF_ECN
| SFBF_FLOWCTL
);
710 log(LOG_ERR
, "%s: %s SFB lazy allocation "
711 "failed for qid=%d pri=%d, falling back "
712 "to DROPTAIL\n", if_name(ifp
),
713 priq_style(pif
), cl
->cl_handle
,
715 } else if (pif
->pif_throttle
!= IFNET_THROTTLE_OFF
) {
716 /* if there's pending throttling, set it */
717 cqrq_throttle_t tr
= { 1, pif
->pif_throttle
};
718 int err
= priq_throttle(pif
, &tr
);
723 tr
.level
= IFNET_THROTTLE_OFF
;
724 (void) priq_throttle(pif
, &tr
);
728 if (cl
->cl_sfb
!= NULL
)
729 return (sfb_addq(cl
->cl_sfb
, &cl
->cl_q
, m
, t
));
730 } else if (qlen(&cl
->cl_q
) >= qlimit(&cl
->cl_q
)) {
731 IFCQ_CONVERT_LOCK(ifq
);
733 return (CLASSQEQ_DROPPED
);
737 if (cl
->cl_flags
& PRCF_CLEARDSCP
)
738 write_dsfield(m
, t
, 0);
746 static inline struct mbuf
*
747 priq_getq(struct priq_class
*cl
)
749 IFCQ_LOCK_ASSERT_HELD(cl
->cl_pif
->pif_ifq
);
752 if (q_is_rio(&cl
->cl_q
))
753 return (rio_getq(cl
->cl_rio
, &cl
->cl_q
));
755 #endif /* CLASSQ_RIO */
757 if (q_is_red(&cl
->cl_q
))
758 return (red_getq(cl
->cl_red
, &cl
->cl_q
));
760 #endif /* CLASSQ_RED */
762 if (q_is_blue(&cl
->cl_q
))
763 return (blue_getq(cl
->cl_blue
, &cl
->cl_q
));
765 #endif /* CLASSQ_BLUE */
766 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
767 return (sfb_getq(cl
->cl_sfb
, &cl
->cl_q
));
769 return (_getq(&cl
->cl_q
));
772 static inline struct mbuf
*
773 priq_pollq(struct priq_class
*cl
)
775 IFCQ_LOCK_ASSERT_HELD(cl
->cl_pif
->pif_ifq
);
777 return (qhead(&cl
->cl_q
));
781 priq_purgeq(struct priq_if
*pif
, struct priq_class
*cl
, u_int32_t flow
,
782 u_int32_t
*packets
, u_int32_t
*bytes
)
784 struct ifclassq
*ifq
= pif
->pif_ifq
;
785 u_int32_t cnt
= 0, len
= 0, qlen
;
787 IFCQ_LOCK_ASSERT_HELD(ifq
);
789 if ((qlen
= qlen(&cl
->cl_q
)) == 0) {
790 VERIFY(!pktsched_bit_tst(cl
->cl_pri
, &pif
->pif_bitmap
));
794 /* become regular mutex before freeing mbufs */
795 IFCQ_CONVERT_LOCK(ifq
);
798 if (q_is_rio(&cl
->cl_q
))
799 rio_purgeq(cl
->cl_rio
, &cl
->cl_q
, flow
, &cnt
, &len
);
801 #endif /* CLASSQ_RIO */
803 if (q_is_red(&cl
->cl_q
))
804 red_purgeq(cl
->cl_red
, &cl
->cl_q
, flow
, &cnt
, &len
);
806 #endif /* CLASSQ_RED */
808 if (q_is_blue(&cl
->cl_q
))
809 blue_purgeq(cl
->cl_blue
, &cl
->cl_q
, flow
, &cnt
, &len
);
811 #endif /* CLASSQ_BLUE */
812 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
813 sfb_purgeq(cl
->cl_sfb
, &cl
->cl_q
, flow
, &cnt
, &len
);
815 _flushq_flow(&cl
->cl_q
, flow
, &cnt
, &len
);
818 VERIFY(qlen(&cl
->cl_q
) == (qlen
- cnt
));
820 PKTCNTR_ADD(&cl
->cl_dropcnt
, cnt
, len
);
821 IFCQ_DROP_ADD(ifq
, cnt
, len
);
823 VERIFY(((signed)IFCQ_LEN(ifq
) - cnt
) >= 0);
824 IFCQ_LEN(ifq
) -= cnt
;
826 if (qempty(&cl
->cl_q
))
827 pktsched_bit_clr(cl
->cl_pri
, &pif
->pif_bitmap
);
829 if (pktsched_verbose
) {
830 log(LOG_DEBUG
, "%s: %s purge qid=%d pri=%d "
831 "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
832 if_name(PRIQIF_IFP(pif
)), priq_style(pif
),
833 cl
->cl_handle
, cl
->cl_pri
, qlen
, qlen(&cl
->cl_q
),
845 priq_updateq(struct priq_if
*pif
, struct priq_class
*cl
, cqev_t ev
)
847 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
849 if (pktsched_verbose
) {
850 log(LOG_DEBUG
, "%s: %s update qid=%d pri=%d event=%s\n",
851 if_name(PRIQIF_IFP(pif
)), priq_style(pif
),
852 cl
->cl_handle
, cl
->cl_pri
, ifclassq_ev2str(ev
));
856 if (q_is_rio(&cl
->cl_q
))
857 return (rio_updateq(cl
->cl_rio
, ev
));
858 #endif /* CLASSQ_RIO */
860 if (q_is_red(&cl
->cl_q
))
861 return (red_updateq(cl
->cl_red
, ev
));
862 #endif /* CLASSQ_RED */
864 if (q_is_blue(&cl
->cl_q
))
865 return (blue_updateq(cl
->cl_blue
, ev
));
866 #endif /* CLASSQ_BLUE */
867 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
868 return (sfb_updateq(cl
->cl_sfb
, ev
));
872 priq_get_class_stats(struct priq_if
*pif
, u_int32_t qid
,
873 struct priq_classstats
*sp
)
875 struct priq_class
*cl
;
877 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
879 if ((cl
= priq_clh_to_clp(pif
, qid
)) == NULL
)
882 sp
->class_handle
= cl
->cl_handle
;
883 sp
->priority
= cl
->cl_pri
;
884 sp
->qlength
= qlen(&cl
->cl_q
);
885 sp
->qlimit
= qlimit(&cl
->cl_q
);
886 sp
->period
= cl
->cl_period
;
887 sp
->xmitcnt
= cl
->cl_xmitcnt
;
888 sp
->dropcnt
= cl
->cl_dropcnt
;
890 sp
->qtype
= qtype(&cl
->cl_q
);
891 sp
->qstate
= qstate(&cl
->cl_q
);
893 if (q_is_red(&cl
->cl_q
))
894 red_getstats(cl
->cl_red
, &sp
->red
[0]);
895 #endif /* CLASSQ_RED */
897 if (q_is_rio(&cl
->cl_q
))
898 rio_getstats(cl
->cl_rio
, &sp
->red
[0]);
899 #endif /* CLASSQ_RIO */
901 if (q_is_blue(&cl
->cl_q
))
902 blue_getstats(cl
->cl_blue
, &sp
->blue
);
903 #endif /* CLASSQ_BLUE */
904 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
905 sfb_getstats(cl
->cl_sfb
, &sp
->sfb
);
911 priq_stat_sc(struct priq_if
*pif
, cqrq_stat_sc_t
*sr
)
913 struct ifclassq
*ifq
= pif
->pif_ifq
;
914 struct priq_class
*cl
;
917 IFCQ_LOCK_ASSERT_HELD(ifq
);
919 VERIFY(sr
->sc
== MBUF_SC_UNSPEC
|| MBUF_VALID_SC(sr
->sc
));
921 i
= MBUF_SCIDX(sr
->sc
);
922 VERIFY(i
< IFCQ_SC_MAX
);
924 cl
= ifq
->ifcq_disc_slots
[i
].cl
;
925 sr
->packets
= qlen(&cl
->cl_q
);
926 sr
->bytes
= qsize(&cl
->cl_q
);
931 /* convert a class handle to the corresponding class pointer */
932 static inline struct priq_class
*
933 priq_clh_to_clp(struct priq_if
*pif
, u_int32_t chandle
)
935 struct priq_class
*cl
;
938 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
940 for (idx
= pif
->pif_maxpri
; idx
>= 0; idx
--)
941 if ((cl
= pif
->pif_classes
[idx
]) != NULL
&&
942 cl
->cl_handle
== chandle
)
949 priq_style(struct priq_if
*pif
)
951 return ((pif
->pif_flags
& PRIQIFF_ALTQ
) ? "ALTQ_PRIQ" : "PRIQ");
955 * priq_enqueue_ifclassq is an enqueue function to be registered to
956 * (*ifcq_enqueue) in struct ifclassq.
959 priq_enqueue_ifclassq(struct ifclassq
*ifq
, struct mbuf
*m
)
963 IFCQ_LOCK_ASSERT_HELD(ifq
);
965 if (!(m
->m_flags
& M_PKTHDR
)) {
966 /* should not happen */
967 log(LOG_ERR
, "%s: packet does not have pkthdr\n",
968 if_name(ifq
->ifcq_ifp
));
969 IFCQ_CONVERT_LOCK(ifq
);
974 i
= MBUF_SCIDX(mbuf_get_service_class(m
));
975 VERIFY((u_int32_t
)i
< IFCQ_SC_MAX
);
977 return (priq_enqueue(ifq
->ifcq_disc
,
978 ifq
->ifcq_disc_slots
[i
].cl
, m
, m_pftag(m
)));
982 * priq_dequeue_ifclassq is a dequeue function to be registered to
983 * (*ifcq_dequeue) in struct ifclass.
985 * note: CLASSQDQ_POLL returns the next packet without removing the packet
986 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
987 * CLASSQDQ_REMOVE must return the same packet if called immediately
988 * after CLASSQDQ_POLL.
991 priq_dequeue_ifclassq(struct ifclassq
*ifq
, cqdq_op_t op
)
993 return (priq_dequeue(ifq
->ifcq_disc
, op
));
997 priq_request_ifclassq(struct ifclassq
*ifq
, cqrq_t req
, void *arg
)
999 struct priq_if
*pif
= (struct priq_if
*)ifq
->ifcq_disc
;
1002 IFCQ_LOCK_ASSERT_HELD(ifq
);
1005 case CLASSQRQ_PURGE
:
1009 case CLASSQRQ_PURGE_SC
:
1010 priq_purge_sc(pif
, (cqrq_purge_sc_t
*)arg
);
1013 case CLASSQRQ_EVENT
:
1014 priq_event(pif
, (cqev_t
)arg
);
1017 case CLASSQRQ_THROTTLE
:
1018 err
= priq_throttle(pif
, (cqrq_throttle_t
*)arg
);
1021 case CLASSQRQ_STAT_SC
:
1022 err
= priq_stat_sc(pif
, (cqrq_stat_sc_t
*)arg
);
1029 priq_setup_ifclassq(struct ifclassq
*ifq
, u_int32_t flags
)
1031 struct ifnet
*ifp
= ifq
->ifcq_ifp
;
1032 struct priq_class
*cl0
, *cl1
, *cl2
, *cl3
, *cl4
;
1033 struct priq_class
*cl5
, *cl6
, *cl7
, *cl8
, *cl9
;
1034 struct priq_if
*pif
;
1035 u_int32_t maxlen
= 0, qflags
= 0;
1038 IFCQ_LOCK_ASSERT_HELD(ifq
);
1039 VERIFY(ifq
->ifcq_disc
== NULL
);
1040 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_NONE
);
1042 if (flags
& PKTSCHEDF_QALG_RED
)
1044 if (flags
& PKTSCHEDF_QALG_RIO
)
1046 if (flags
& PKTSCHEDF_QALG_BLUE
)
1047 qflags
|= PRCF_BLUE
;
1048 if (flags
& PKTSCHEDF_QALG_SFB
)
1050 if (flags
& PKTSCHEDF_QALG_ECN
)
1052 if (flags
& PKTSCHEDF_QALG_FLOWCTL
)
1053 qflags
|= PRCF_FLOWCTL
;
1055 pif
= priq_alloc(ifp
, M_WAITOK
, FALSE
);
1059 if ((maxlen
= IFCQ_MAXLEN(ifq
)) == 0)
1060 maxlen
= if_sndq_maxlen
;
1062 if ((err
= priq_add_queue(pif
, 0, maxlen
,
1063 qflags
| PRCF_LAZY
, SCIDX_BK_SYS
, &cl0
)) != 0)
1066 if ((err
= priq_add_queue(pif
, 1, maxlen
,
1067 qflags
| PRCF_LAZY
, SCIDX_BK
, &cl1
)) != 0)
1070 if ((err
= priq_add_queue(pif
, 2, maxlen
,
1071 qflags
| PRCF_DEFAULTCLASS
, SCIDX_BE
, &cl2
)) != 0)
1074 if ((err
= priq_add_queue(pif
, 3, maxlen
,
1075 qflags
| PRCF_LAZY
, SCIDX_RD
, &cl3
)) != 0)
1078 if ((err
= priq_add_queue(pif
, 4, maxlen
,
1079 qflags
| PRCF_LAZY
, SCIDX_OAM
, &cl4
)) != 0)
1082 if ((err
= priq_add_queue(pif
, 5, maxlen
,
1083 qflags
| PRCF_LAZY
, SCIDX_AV
, &cl5
)) != 0)
1086 if ((err
= priq_add_queue(pif
, 6, maxlen
,
1087 qflags
| PRCF_LAZY
, SCIDX_RV
, &cl6
)) != 0)
1090 if ((err
= priq_add_queue(pif
, 7, maxlen
,
1091 qflags
| PRCF_LAZY
, SCIDX_VI
, &cl7
)) != 0)
1094 if ((err
= priq_add_queue(pif
, 8, maxlen
,
1095 qflags
| PRCF_LAZY
, SCIDX_VO
, &cl8
)) != 0)
1098 if ((err
= priq_add_queue(pif
, 9, maxlen
,
1099 qflags
, SCIDX_CTL
, &cl9
)) != 0)
1102 err
= ifclassq_attach(ifq
, PKTSCHEDT_PRIQ
, pif
,
1103 priq_enqueue_ifclassq
, priq_dequeue_ifclassq
, NULL
,
1104 priq_request_ifclassq
);
1106 /* cache these for faster lookup */
1108 ifq
->ifcq_disc_slots
[SCIDX_BK_SYS
].qid
= SCIDX_BK_SYS
;
1109 ifq
->ifcq_disc_slots
[SCIDX_BK_SYS
].cl
= cl0
;
1111 ifq
->ifcq_disc_slots
[SCIDX_BK
].qid
= SCIDX_BK
;
1112 ifq
->ifcq_disc_slots
[SCIDX_BK
].cl
= cl1
;
1114 ifq
->ifcq_disc_slots
[SCIDX_BE
].qid
= SCIDX_BE
;
1115 ifq
->ifcq_disc_slots
[SCIDX_BE
].cl
= cl2
;
1117 ifq
->ifcq_disc_slots
[SCIDX_RD
].qid
= SCIDX_RD
;
1118 ifq
->ifcq_disc_slots
[SCIDX_RD
].cl
= cl3
;
1120 ifq
->ifcq_disc_slots
[SCIDX_OAM
].qid
= SCIDX_OAM
;
1121 ifq
->ifcq_disc_slots
[SCIDX_OAM
].cl
= cl4
;
1123 ifq
->ifcq_disc_slots
[SCIDX_AV
].qid
= SCIDX_AV
;
1124 ifq
->ifcq_disc_slots
[SCIDX_AV
].cl
= cl5
;
1126 ifq
->ifcq_disc_slots
[SCIDX_RV
].qid
= SCIDX_RV
;
1127 ifq
->ifcq_disc_slots
[SCIDX_RV
].cl
= cl6
;
1129 ifq
->ifcq_disc_slots
[SCIDX_VI
].qid
= SCIDX_VI
;
1130 ifq
->ifcq_disc_slots
[SCIDX_VI
].cl
= cl7
;
1132 ifq
->ifcq_disc_slots
[SCIDX_VO
].qid
= SCIDX_VO
;
1133 ifq
->ifcq_disc_slots
[SCIDX_VO
].cl
= cl8
;
1135 ifq
->ifcq_disc_slots
[SCIDX_CTL
].qid
= SCIDX_CTL
;
1136 ifq
->ifcq_disc_slots
[SCIDX_CTL
].cl
= cl9
;
1141 (void) priq_destroy_locked(pif
);
1147 priq_teardown_ifclassq(struct ifclassq
*ifq
)
1149 struct priq_if
*pif
= ifq
->ifcq_disc
;
1152 IFCQ_LOCK_ASSERT_HELD(ifq
);
1153 VERIFY(pif
!= NULL
&& ifq
->ifcq_type
== PKTSCHEDT_PRIQ
);
1155 (void) priq_destroy_locked(pif
);
1157 ifq
->ifcq_disc
= NULL
;
1158 for (i
= 0; i
< IFCQ_SC_MAX
; i
++) {
1159 ifq
->ifcq_disc_slots
[i
].qid
= 0;
1160 ifq
->ifcq_disc_slots
[i
].cl
= NULL
;
1163 return (ifclassq_detach(ifq
));
1167 priq_getqstats_ifclassq(struct ifclassq
*ifq
, u_int32_t slot
,
1168 struct if_ifclassq_stats
*ifqs
)
1170 struct priq_if
*pif
= ifq
->ifcq_disc
;
1172 IFCQ_LOCK_ASSERT_HELD(ifq
);
1173 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_PRIQ
);
1175 if (slot
>= IFCQ_SC_MAX
)
1178 return (priq_get_class_stats(pif
, ifq
->ifcq_disc_slots
[slot
].qid
,
1179 &ifqs
->ifqs_priq_stats
));
1183 priq_throttle(struct priq_if
*pif
, cqrq_throttle_t
*tr
)
1185 struct ifclassq
*ifq
= pif
->pif_ifq
;
1186 struct priq_class
*cl
;
1189 IFCQ_LOCK_ASSERT_HELD(ifq
);
1190 VERIFY(!(pif
->pif_flags
& PRIQIFF_ALTQ
));
1193 tr
->level
= pif
->pif_throttle
;
1197 if (tr
->level
== pif
->pif_throttle
)
1200 /* Current throttling levels only involve BK_SYS class */
1201 cl
= ifq
->ifcq_disc_slots
[SCIDX_BK_SYS
].cl
;
1203 switch (tr
->level
) {
1204 case IFNET_THROTTLE_OFF
:
1205 err
= priq_resumeq(pif
, cl
);
1208 case IFNET_THROTTLE_OPPORTUNISTIC
:
1209 err
= priq_suspendq(pif
, cl
);
1217 if (err
== 0 || err
== ENXIO
) {
1218 if (pktsched_verbose
) {
1219 log(LOG_DEBUG
, "%s: %s throttling level %sset %d->%d\n",
1220 if_name(PRIQIF_IFP(pif
)), priq_style(pif
),
1221 (err
== 0) ? "" : "lazy ", pif
->pif_throttle
,
1224 pif
->pif_throttle
= tr
->level
;
1228 priq_purgeq(pif
, cl
, 0, NULL
, NULL
);
1230 log(LOG_ERR
, "%s: %s unable to set throttling level "
1231 "%d->%d [error=%d]\n", if_name(PRIQIF_IFP(pif
)),
1232 priq_style(pif
), pif
->pif_throttle
, tr
->level
, err
);
1239 priq_resumeq(struct priq_if
*pif
, struct priq_class
*cl
)
1241 struct ifclassq
*ifq
= pif
->pif_ifq
;
1244 IFCQ_LOCK_ASSERT_HELD(ifq
);
1247 if (q_is_rio(&cl
->cl_q
))
1248 err
= rio_suspendq(cl
->cl_rio
, &cl
->cl_q
, FALSE
);
1250 #endif /* CLASSQ_RIO */
1252 if (q_is_red(&cl
->cl_q
))
1253 err
= red_suspendq(cl
->cl_red
, &cl
->cl_q
, FALSE
);
1255 #endif /* CLASSQ_RED */
1257 if (q_is_blue(&cl
->cl_q
))
1258 err
= blue_suspendq(cl
->cl_blue
, &cl
->cl_q
, FALSE
);
1260 #endif /* CLASSQ_BLUE */
1261 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1262 err
= sfb_suspendq(cl
->cl_sfb
, &cl
->cl_q
, FALSE
);
1265 qstate(&cl
->cl_q
) = QS_RUNNING
;
1271 priq_suspendq(struct priq_if
*pif
, struct priq_class
*cl
)
1273 struct ifclassq
*ifq
= pif
->pif_ifq
;
1276 IFCQ_LOCK_ASSERT_HELD(ifq
);
1279 if (q_is_rio(&cl
->cl_q
))
1280 err
= rio_suspendq(cl
->cl_rio
, &cl
->cl_q
, TRUE
);
1282 #endif /* CLASSQ_RIO */
1284 if (q_is_red(&cl
->cl_q
))
1285 err
= red_suspendq(cl
->cl_red
, &cl
->cl_q
, TRUE
);
1287 #endif /* CLASSQ_RED */
1289 if (q_is_blue(&cl
->cl_q
))
1290 err
= blue_suspendq(cl
->cl_blue
, &cl
->cl_q
, TRUE
);
1292 #endif /* CLASSQ_BLUE */
1293 if (q_is_sfb(&cl
->cl_q
)) {
1294 if (cl
->cl_sfb
!= NULL
) {
1295 err
= sfb_suspendq(cl
->cl_sfb
, &cl
->cl_q
, TRUE
);
1297 VERIFY(cl
->cl_flags
& PRCF_LAZY
);
1298 err
= ENXIO
; /* delayed throttling */
1302 if (err
== 0 || err
== ENXIO
)
1303 qstate(&cl
->cl_q
) = QS_SUSPENDED
;
1307 #endif /* PKTSCHED_PRIQ */