2 * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* $OpenBSD: altq_priq.c,v 1.21 2007/09/13 20:40:02 chl Exp $ */
30 /* $KAME: altq_priq.c,v 1.1 2000/10/18 09:15:23 kjc Exp $ */
33 * Copyright (C) 2000-2003
34 * Sony Computer Science Laboratories Inc. All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
45 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 #include <sys/cdefs.h>
65 #include <sys/param.h>
66 #include <sys/malloc.h>
68 #include <sys/systm.h>
69 #include <sys/errno.h>
70 #include <sys/kernel.h>
71 #include <sys/syslog.h>
73 #include <kern/zalloc.h>
76 #include <net/net_osdep.h>
78 #include <net/pktsched/pktsched_priq.h>
79 #include <netinet/in.h>
84 static int priq_enqueue_ifclassq(struct ifclassq
*, struct mbuf
*);
85 static struct mbuf
*priq_dequeue_ifclassq(struct ifclassq
*, cqdq_op_t
);
86 static int priq_request_ifclassq(struct ifclassq
*, cqrq_t
, void *);
87 static int priq_clear_interface(struct priq_if
*);
88 static struct priq_class
*priq_class_create(struct priq_if
*, int, u_int32_t
,
90 static int priq_class_destroy(struct priq_if
*, struct priq_class
*);
91 static int priq_destroy_locked(struct priq_if
*);
92 static inline int priq_addq(struct priq_class
*, struct mbuf
*,
94 static inline struct mbuf
*priq_getq(struct priq_class
*);
95 static inline struct mbuf
*priq_pollq(struct priq_class
*);
96 static void priq_purgeq(struct priq_if
*, struct priq_class
*, u_int32_t
,
97 u_int32_t
*, u_int32_t
*);
98 static void priq_purge_sc(struct priq_if
*, cqrq_purge_sc_t
*);
99 static void priq_updateq(struct priq_if
*, struct priq_class
*, cqev_t
);
100 static int priq_throttle(struct priq_if
*, cqrq_throttle_t
*);
101 static int priq_resumeq(struct priq_if
*, struct priq_class
*);
102 static int priq_suspendq(struct priq_if
*, struct priq_class
*);
103 static inline struct priq_class
*priq_clh_to_clp(struct priq_if
*, u_int32_t
);
104 static const char *priq_style(struct priq_if
*);
106 #define PRIQ_ZONE_MAX 32 /* maximum elements in zone */
107 #define PRIQ_ZONE_NAME "pktsched_priq" /* zone name */
109 static unsigned int priq_size
; /* size of zone element */
110 static struct zone
*priq_zone
; /* zone for priq */
112 #define PRIQ_CL_ZONE_MAX 32 /* maximum elements in zone */
113 #define PRIQ_CL_ZONE_NAME "pktsched_priq_cl" /* zone name */
115 static unsigned int priq_cl_size
; /* size of zone element */
116 static struct zone
*priq_cl_zone
; /* zone for priq_class */
121 priq_size
= sizeof (struct priq_if
);
122 priq_zone
= zinit(priq_size
, PRIQ_ZONE_MAX
* priq_size
,
124 if (priq_zone
== NULL
) {
125 panic("%s: failed allocating %s", __func__
, PRIQ_ZONE_NAME
);
128 zone_change(priq_zone
, Z_EXPAND
, TRUE
);
129 zone_change(priq_zone
, Z_CALLERACCT
, TRUE
);
131 priq_cl_size
= sizeof (struct priq_class
);
132 priq_cl_zone
= zinit(priq_cl_size
, PRIQ_CL_ZONE_MAX
* priq_cl_size
,
133 0, PRIQ_CL_ZONE_NAME
);
134 if (priq_cl_zone
== NULL
) {
135 panic("%s: failed allocating %s", __func__
, PRIQ_CL_ZONE_NAME
);
138 zone_change(priq_cl_zone
, Z_EXPAND
, TRUE
);
139 zone_change(priq_cl_zone
, Z_CALLERACCT
, TRUE
);
143 priq_alloc(struct ifnet
*ifp
, int how
, boolean_t altq
)
147 pif
= (how
== M_WAITOK
) ? zalloc(priq_zone
) : zalloc_noblock(priq_zone
);
151 bzero(pif
, priq_size
);
152 pif
->pif_maxpri
= -1;
153 pif
->pif_ifq
= &ifp
->if_snd
;
155 pif
->pif_flags
|= PRIQIFF_ALTQ
;
157 if (pktsched_verbose
) {
158 log(LOG_DEBUG
, "%s: %s scheduler allocated\n",
159 if_name(ifp
), priq_style(pif
));
166 priq_destroy(struct priq_if
*pif
)
168 struct ifclassq
*ifq
= pif
->pif_ifq
;
172 err
= priq_destroy_locked(pif
);
179 priq_destroy_locked(struct priq_if
*pif
)
181 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
183 (void) priq_clear_interface(pif
);
185 if (pktsched_verbose
) {
186 log(LOG_DEBUG
, "%s: %s scheduler destroyed\n",
187 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
190 zfree(priq_zone
, pif
);
196 * bring the interface back to the initial state by discarding
197 * all the filters and classes.
200 priq_clear_interface(struct priq_if
*pif
)
202 struct priq_class
*cl
;
205 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
207 /* clear out the classes */
208 for (pri
= 0; pri
<= pif
->pif_maxpri
; pri
++)
209 if ((cl
= pif
->pif_classes
[pri
]) != NULL
)
210 priq_class_destroy(pif
, cl
);
215 /* discard all the queued packets on the interface */
217 priq_purge(struct priq_if
*pif
)
219 struct priq_class
*cl
;
222 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
224 for (pri
= 0; pri
<= pif
->pif_maxpri
; pri
++) {
225 if ((cl
= pif
->pif_classes
[pri
]) != NULL
&& !qempty(&cl
->cl_q
))
226 priq_purgeq(pif
, cl
, 0, NULL
, NULL
);
230 * This assertion is safe to be made only when PF_ALTQ is not
231 * configured; otherwise, IFCQ_LEN represents the sum of the
232 * packets managed by ifcq_disc and altq_disc instances, which
233 * is possible when transitioning between the two.
235 VERIFY(IFCQ_LEN(pif
->pif_ifq
) == 0);
236 #endif /* !PF_ALTQ */
240 priq_purge_sc(struct priq_if
*pif
, cqrq_purge_sc_t
*pr
)
242 struct ifclassq
*ifq
= pif
->pif_ifq
;
245 IFCQ_LOCK_ASSERT_HELD(ifq
);
247 VERIFY(pr
->sc
== MBUF_SC_UNSPEC
|| MBUF_VALID_SC(pr
->sc
));
248 VERIFY(pr
->flow
!= 0);
250 if (pr
->sc
!= MBUF_SC_UNSPEC
) {
251 i
= MBUF_SCIDX(pr
->sc
);
252 VERIFY(i
< IFCQ_SC_MAX
);
254 priq_purgeq(pif
, ifq
->ifcq_disc_slots
[i
].cl
,
255 pr
->flow
, &pr
->packets
, &pr
->bytes
);
262 for (i
= 0; i
< IFCQ_SC_MAX
; i
++) {
263 priq_purgeq(pif
, ifq
->ifcq_disc_slots
[i
].cl
,
264 pr
->flow
, &cnt
, &len
);
272 priq_event(struct priq_if
*pif
, cqev_t ev
)
274 struct priq_class
*cl
;
277 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
279 for (pri
= 0; pri
<= pif
->pif_maxpri
; pri
++)
280 if ((cl
= pif
->pif_classes
[pri
]) != NULL
)
281 priq_updateq(pif
, cl
, ev
);
285 priq_add_queue(struct priq_if
*pif
, int priority
, u_int32_t qlimit
,
286 int flags
, u_int32_t qid
, struct priq_class
**clp
)
288 struct priq_class
*cl
;
290 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
292 /* check parameters */
293 if (priority
>= PRIQ_MAXPRI
)
295 if (pif
->pif_classes
[priority
] != NULL
)
297 if (priq_clh_to_clp(pif
, qid
) != NULL
)
300 cl
= priq_class_create(pif
, priority
, qlimit
, flags
, qid
);
310 static struct priq_class
*
311 priq_class_create(struct priq_if
*pif
, int pri
, u_int32_t qlimit
,
312 int flags
, u_int32_t qid
)
315 struct ifclassq
*ifq
;
316 struct priq_class
*cl
;
318 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
320 /* Sanitize flags unless internally configured */
321 if (pif
->pif_flags
& PRIQIFF_ALTQ
)
322 flags
&= PRCF_USERFLAGS
;
325 if (flags
& PRCF_RED
) {
326 log(LOG_ERR
, "%s: %s RED not available!\n",
327 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
330 #endif /* !CLASSQ_RED */
333 if (flags
& PRCF_RIO
) {
334 log(LOG_ERR
, "%s: %s RIO not available!\n",
335 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
338 #endif /* CLASSQ_RIO */
341 if (flags
& PRCF_BLUE
) {
342 log(LOG_ERR
, "%s: %s BLUE not available!\n",
343 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
346 #endif /* CLASSQ_BLUE */
348 /* These are mutually exclusive */
349 if ((flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) &&
350 (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) != PRCF_RED
&&
351 (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) != PRCF_RIO
&&
352 (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) != PRCF_BLUE
&&
353 (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) != PRCF_SFB
) {
354 log(LOG_ERR
, "%s: %s more than one RED|RIO|BLUE|SFB\n",
355 if_name(PRIQIF_IFP(pif
)), priq_style(pif
));
360 ifp
= PRIQIF_IFP(pif
);
362 if ((cl
= pif
->pif_classes
[pri
]) != NULL
) {
363 /* modify the class instead of creating a new one */
364 if (!qempty(&cl
->cl_q
))
365 priq_purgeq(pif
, cl
, 0, NULL
, NULL
);
367 if (q_is_rio(&cl
->cl_q
))
368 rio_destroy(cl
->cl_rio
);
369 #endif /* CLASSQ_RIO */
371 if (q_is_red(&cl
->cl_q
))
372 red_destroy(cl
->cl_red
);
373 #endif /* CLASSQ_RED */
375 if (q_is_blue(&cl
->cl_q
))
376 blue_destroy(cl
->cl_blue
);
377 #endif /* CLASSQ_BLUE */
378 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
379 sfb_destroy(cl
->cl_sfb
);
380 cl
->cl_qalg
.ptr
= NULL
;
381 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
382 qstate(&cl
->cl_q
) = QS_RUNNING
;
384 cl
= zalloc(priq_cl_zone
);
388 bzero(cl
, priq_cl_size
);
391 pif
->pif_classes
[pri
] = cl
;
392 if (flags
& PRCF_DEFAULTCLASS
)
393 pif
->pif_default
= cl
;
394 if (qlimit
== 0 || qlimit
> IFCQ_MAXLEN(ifq
)) {
395 qlimit
= IFCQ_MAXLEN(ifq
);
397 qlimit
= DEFAULT_QLIMIT
; /* use default */
399 _qinit(&cl
->cl_q
, Q_DROPTAIL
, qlimit
);
400 cl
->cl_flags
= flags
;
402 if (pri
> pif
->pif_maxpri
)
403 pif
->pif_maxpri
= pri
;
407 if (flags
& (PRCF_RED
|PRCF_RIO
|PRCF_BLUE
|PRCF_SFB
)) {
408 #if CLASSQ_RED || CLASSQ_RIO
409 u_int64_t ifbandwidth
= ifnet_output_linkrate(ifp
);
411 #endif /* CLASSQ_RED || CLASSQ_RIO */
414 if (flags
& PRCF_ECN
) {
415 if (flags
& PRCF_BLUE
)
416 cl
->cl_qflags
|= BLUEF_ECN
;
417 else if (flags
& PRCF_SFB
)
418 cl
->cl_qflags
|= SFBF_ECN
;
419 else if (flags
& PRCF_RED
)
420 cl
->cl_qflags
|= REDF_ECN
;
421 else if (flags
& PRCF_RIO
)
422 cl
->cl_qflags
|= RIOF_ECN
;
424 if (flags
& PRCF_FLOWCTL
) {
425 if (flags
& PRCF_SFB
)
426 cl
->cl_qflags
|= SFBF_FLOWCTL
;
428 if (flags
& PRCF_CLEARDSCP
) {
429 if (flags
& PRCF_RIO
)
430 cl
->cl_qflags
|= RIOF_CLEARDSCP
;
432 #if CLASSQ_RED || CLASSQ_RIO
434 * XXX: RED & RIO should be watching link speed and MTU
435 * events and recompute pkttime accordingly.
438 pkttime
= 1000 * 1000 * 1000; /* 1 sec */
440 pkttime
= (int64_t)ifp
->if_mtu
* 1000 * 1000 * 1000 /
443 /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
445 if (flags
& PRCF_RED
) {
446 cl
->cl_red
= red_alloc(ifp
, 0, 0,
447 qlimit(&cl
->cl_q
) * 10/100,
448 qlimit(&cl
->cl_q
) * 30/100,
449 cl
->cl_qflags
, pkttime
);
450 if (cl
->cl_red
!= NULL
)
451 qtype(&cl
->cl_q
) = Q_RED
;
453 #endif /* CLASSQ_RED */
455 if (flags
& PRCF_RIO
) {
457 rio_alloc(ifp
, 0, NULL
, cl
->cl_qflags
, pkttime
);
458 if (cl
->cl_rio
!= NULL
)
459 qtype(&cl
->cl_q
) = Q_RIO
;
461 #endif /* CLASSQ_RIO */
462 #endif /* CLASSQ_RED || CLASSQ_RIO */
464 if (flags
& PRCF_BLUE
) {
465 cl
->cl_blue
= blue_alloc(ifp
, 0, 0, cl
->cl_qflags
);
466 if (cl
->cl_blue
!= NULL
)
467 qtype(&cl
->cl_q
) = Q_BLUE
;
469 #endif /* CLASSQ_BLUE */
470 if (flags
& PRCF_SFB
) {
471 if (!(cl
->cl_flags
& PRCF_LAZY
))
472 cl
->cl_sfb
= sfb_alloc(ifp
, cl
->cl_handle
,
473 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
474 if (cl
->cl_sfb
!= NULL
|| (cl
->cl_flags
& PRCF_LAZY
))
475 qtype(&cl
->cl_q
) = Q_SFB
;
479 if (pktsched_verbose
) {
480 log(LOG_DEBUG
, "%s: %s created qid=%d pri=%d qlimit=%d "
481 "flags=%b\n", if_name(ifp
), priq_style(pif
),
482 cl
->cl_handle
, cl
->cl_pri
, qlimit
, flags
, PRCF_BITS
);
489 priq_remove_queue(struct priq_if
*pif
, u_int32_t qid
)
491 struct priq_class
*cl
;
493 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
495 if ((cl
= priq_clh_to_clp(pif
, qid
)) == NULL
)
498 return (priq_class_destroy(pif
, cl
));
502 priq_class_destroy(struct priq_if
*pif
, struct priq_class
*cl
)
504 struct ifclassq
*ifq
= pif
->pif_ifq
;
507 IFCQ_LOCK_ASSERT_HELD(ifq
);
509 if (!qempty(&cl
->cl_q
))
510 priq_purgeq(pif
, cl
, 0, NULL
, NULL
);
512 VERIFY(cl
->cl_pri
< PRIQ_MAXPRI
);
513 VERIFY(!pktsched_bit_tst(cl
->cl_pri
, &pif
->pif_bitmap
));
515 pif
->pif_classes
[cl
->cl_pri
] = NULL
;
516 if (pif
->pif_maxpri
== cl
->cl_pri
) {
517 for (pri
= cl
->cl_pri
; pri
>= 0; pri
--)
518 if (pif
->pif_classes
[pri
] != NULL
) {
519 pif
->pif_maxpri
= pri
;
523 pif
->pif_maxpri
= -1;
526 if (pif
->pif_default
== cl
)
527 pif
->pif_default
= NULL
;
529 if (cl
->cl_qalg
.ptr
!= NULL
) {
531 if (q_is_rio(&cl
->cl_q
))
532 rio_destroy(cl
->cl_rio
);
533 #endif /* CLASSQ_RIO */
535 if (q_is_red(&cl
->cl_q
))
536 red_destroy(cl
->cl_red
);
537 #endif /* CLASSQ_RED */
539 if (q_is_blue(&cl
->cl_q
))
540 blue_destroy(cl
->cl_blue
);
541 #endif /* CLASSQ_BLUE */
542 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
543 sfb_destroy(cl
->cl_sfb
);
544 cl
->cl_qalg
.ptr
= NULL
;
545 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
546 qstate(&cl
->cl_q
) = QS_RUNNING
;
549 if (pktsched_verbose
) {
550 log(LOG_DEBUG
, "%s: %s destroyed qid=%d pri=%d\n",
551 if_name(PRIQIF_IFP(pif
)), priq_style(pif
),
552 cl
->cl_handle
, cl
->cl_pri
);
555 zfree(priq_cl_zone
, cl
);
561 priq_enqueue(struct priq_if
*pif
, struct priq_class
*cl
, struct mbuf
*m
,
564 struct ifclassq
*ifq
= pif
->pif_ifq
;
568 IFCQ_LOCK_ASSERT_HELD(ifq
);
569 VERIFY(cl
== NULL
|| cl
->cl_pif
== pif
);
572 cl
= priq_clh_to_clp(pif
, t
->pftag_qid
);
574 cl
= pif
->pif_default
;
576 IFCQ_CONVERT_LOCK(ifq
);
583 VERIFY(pri
< PRIQ_MAXPRI
);
587 ret
= priq_addq(cl
, m
, t
);
589 if (ret
== CLASSQEQ_SUCCESS_FC
) {
590 /* packet enqueued, return advisory feedback */
593 VERIFY(ret
== CLASSQEQ_DROPPED
||
594 ret
== CLASSQEQ_DROPPED_FC
||
595 ret
== CLASSQEQ_DROPPED_SP
);
596 /* packet has been freed in priq_addq */
597 PKTCNTR_ADD(&cl
->cl_dropcnt
, 1, len
);
598 IFCQ_DROP_ADD(ifq
, 1, len
);
600 case CLASSQEQ_DROPPED
:
602 case CLASSQEQ_DROPPED_FC
:
604 case CLASSQEQ_DROPPED_SP
:
605 return (EQSUSPENDED
);
612 /* class is now active; indicate it as such */
613 if (!pktsched_bit_tst(pri
, &pif
->pif_bitmap
))
614 pktsched_bit_set(pri
, &pif
->pif_bitmap
);
616 /* successfully queued. */
621 * note: CLASSQDQ_POLL returns the next packet without removing the packet
622 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
623 * CLASSQDQ_REMOVE must return the same packet if called immediately
624 * after CLASSQDQ_POLL.
627 priq_dequeue(struct priq_if
*pif
, cqdq_op_t op
)
629 struct ifclassq
*ifq
= pif
->pif_ifq
;
630 struct priq_class
*cl
;
634 IFCQ_LOCK_ASSERT_HELD(ifq
);
636 if (pif
->pif_bitmap
== 0) {
637 /* no active class; nothing to dequeue */
640 VERIFY(!IFCQ_IS_EMPTY(ifq
));
642 pri
= pktsched_fls(pif
->pif_bitmap
) - 1; /* zero based */
643 VERIFY(pri
< PRIQ_MAXPRI
);
644 cl
= pif
->pif_classes
[pri
];
645 VERIFY(cl
!= NULL
&& !qempty(&cl
->cl_q
));
647 if (op
== CLASSQDQ_POLL
)
648 return (priq_pollq(cl
));
651 VERIFY(m
!= NULL
); /* qalg must be work conserving */
655 if (qempty(&cl
->cl_q
)) {
657 /* class is now inactive; indicate it as such */
658 pktsched_bit_clr(pri
, &pif
->pif_bitmap
);
660 PKTCNTR_ADD(&cl
->cl_xmitcnt
, 1, len
);
661 IFCQ_XMIT_ADD(ifq
, 1, len
);
667 priq_addq(struct priq_class
*cl
, struct mbuf
*m
, struct pf_mtag
*t
)
669 struct priq_if
*pif
= cl
->cl_pif
;
670 struct ifclassq
*ifq
= pif
->pif_ifq
;
672 IFCQ_LOCK_ASSERT_HELD(ifq
);
675 if (q_is_rio(&cl
->cl_q
))
676 return (rio_addq(cl
->cl_rio
, &cl
->cl_q
, m
, t
));
678 #endif /* CLASSQ_RIO */
680 if (q_is_red(&cl
->cl_q
))
681 return (red_addq(cl
->cl_red
, &cl
->cl_q
, m
, t
));
683 #endif /* CLASSQ_RED */
685 if (q_is_blue(&cl
->cl_q
))
686 return (blue_addq(cl
->cl_blue
, &cl
->cl_q
, m
, t
));
688 #endif /* CLASSQ_BLUE */
689 if (q_is_sfb(&cl
->cl_q
)) {
690 if (cl
->cl_sfb
== NULL
) {
691 struct ifnet
*ifp
= PRIQIF_IFP(pif
);
693 VERIFY(cl
->cl_flags
& PRCF_LAZY
);
694 cl
->cl_flags
&= ~PRCF_LAZY
;
695 IFCQ_CONVERT_LOCK(ifq
);
697 cl
->cl_sfb
= sfb_alloc(ifp
, cl
->cl_handle
,
698 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
699 if (cl
->cl_sfb
== NULL
) {
700 /* fall back to droptail */
701 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
702 cl
->cl_flags
&= ~PRCF_SFB
;
703 cl
->cl_qflags
&= ~(SFBF_ECN
| SFBF_FLOWCTL
);
705 log(LOG_ERR
, "%s: %s SFB lazy allocation "
706 "failed for qid=%d pri=%d, falling back "
707 "to DROPTAIL\n", if_name(ifp
),
708 priq_style(pif
), cl
->cl_handle
,
710 } else if (pif
->pif_throttle
!= IFNET_THROTTLE_OFF
) {
711 /* if there's pending throttling, set it */
712 cqrq_throttle_t tr
= { 1, pif
->pif_throttle
};
713 int err
= priq_throttle(pif
, &tr
);
718 tr
.level
= IFNET_THROTTLE_OFF
;
719 (void) priq_throttle(pif
, &tr
);
723 if (cl
->cl_sfb
!= NULL
)
724 return (sfb_addq(cl
->cl_sfb
, &cl
->cl_q
, m
, t
));
725 } else if (qlen(&cl
->cl_q
) >= qlimit(&cl
->cl_q
)) {
726 IFCQ_CONVERT_LOCK(ifq
);
728 return (CLASSQEQ_DROPPED
);
731 if (cl
->cl_flags
& PRCF_CLEARDSCP
)
732 write_dsfield(m
, t
, 0);
739 static inline struct mbuf
*
740 priq_getq(struct priq_class
*cl
)
742 IFCQ_LOCK_ASSERT_HELD(cl
->cl_pif
->pif_ifq
);
745 if (q_is_rio(&cl
->cl_q
))
746 return (rio_getq(cl
->cl_rio
, &cl
->cl_q
));
748 #endif /* CLASSQ_RIO */
750 if (q_is_red(&cl
->cl_q
))
751 return (red_getq(cl
->cl_red
, &cl
->cl_q
));
753 #endif /* CLASSQ_RED */
755 if (q_is_blue(&cl
->cl_q
))
756 return (blue_getq(cl
->cl_blue
, &cl
->cl_q
));
758 #endif /* CLASSQ_BLUE */
759 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
760 return (sfb_getq(cl
->cl_sfb
, &cl
->cl_q
));
762 return (_getq(&cl
->cl_q
));
765 static inline struct mbuf
*
766 priq_pollq(struct priq_class
*cl
)
768 IFCQ_LOCK_ASSERT_HELD(cl
->cl_pif
->pif_ifq
);
770 return (qhead(&cl
->cl_q
));
774 priq_purgeq(struct priq_if
*pif
, struct priq_class
*cl
, u_int32_t flow
,
775 u_int32_t
*packets
, u_int32_t
*bytes
)
777 struct ifclassq
*ifq
= pif
->pif_ifq
;
778 u_int32_t cnt
= 0, len
= 0, qlen
;
780 IFCQ_LOCK_ASSERT_HELD(ifq
);
782 if ((qlen
= qlen(&cl
->cl_q
)) == 0) {
783 VERIFY(!pktsched_bit_tst(cl
->cl_pri
, &pif
->pif_bitmap
));
787 /* become regular mutex before freeing mbufs */
788 IFCQ_CONVERT_LOCK(ifq
);
791 if (q_is_rio(&cl
->cl_q
))
792 rio_purgeq(cl
->cl_rio
, &cl
->cl_q
, flow
, &cnt
, &len
);
794 #endif /* CLASSQ_RIO */
796 if (q_is_red(&cl
->cl_q
))
797 red_purgeq(cl
->cl_red
, &cl
->cl_q
, flow
, &cnt
, &len
);
799 #endif /* CLASSQ_RED */
801 if (q_is_blue(&cl
->cl_q
))
802 blue_purgeq(cl
->cl_blue
, &cl
->cl_q
, flow
, &cnt
, &len
);
804 #endif /* CLASSQ_BLUE */
805 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
806 sfb_purgeq(cl
->cl_sfb
, &cl
->cl_q
, flow
, &cnt
, &len
);
808 _flushq_flow(&cl
->cl_q
, flow
, &cnt
, &len
);
811 VERIFY(qlen(&cl
->cl_q
) == (qlen
- cnt
));
813 PKTCNTR_ADD(&cl
->cl_dropcnt
, cnt
, len
);
814 IFCQ_DROP_ADD(ifq
, cnt
, len
);
816 VERIFY(((signed)IFCQ_LEN(ifq
) - cnt
) >= 0);
817 IFCQ_LEN(ifq
) -= cnt
;
819 if (qempty(&cl
->cl_q
))
820 pktsched_bit_clr(cl
->cl_pri
, &pif
->pif_bitmap
);
822 if (pktsched_verbose
) {
823 log(LOG_DEBUG
, "%s: %s purge qid=%d pri=%d "
824 "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
825 if_name(PRIQIF_IFP(pif
)), priq_style(pif
),
826 cl
->cl_handle
, cl
->cl_pri
, qlen
, qlen(&cl
->cl_q
),
838 priq_updateq(struct priq_if
*pif
, struct priq_class
*cl
, cqev_t ev
)
840 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
842 if (pktsched_verbose
) {
843 log(LOG_DEBUG
, "%s: %s update qid=%d pri=%d event=%s\n",
844 if_name(PRIQIF_IFP(pif
)), priq_style(pif
),
845 cl
->cl_handle
, cl
->cl_pri
, ifclassq_ev2str(ev
));
849 if (q_is_rio(&cl
->cl_q
))
850 return (rio_updateq(cl
->cl_rio
, ev
));
851 #endif /* CLASSQ_RIO */
853 if (q_is_red(&cl
->cl_q
))
854 return (red_updateq(cl
->cl_red
, ev
));
855 #endif /* CLASSQ_RED */
857 if (q_is_blue(&cl
->cl_q
))
858 return (blue_updateq(cl
->cl_blue
, ev
));
859 #endif /* CLASSQ_BLUE */
860 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
861 return (sfb_updateq(cl
->cl_sfb
, ev
));
865 priq_get_class_stats(struct priq_if
*pif
, u_int32_t qid
,
866 struct priq_classstats
*sp
)
868 struct priq_class
*cl
;
870 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
872 if ((cl
= priq_clh_to_clp(pif
, qid
)) == NULL
)
875 sp
->class_handle
= cl
->cl_handle
;
876 sp
->priority
= cl
->cl_pri
;
877 sp
->qlength
= qlen(&cl
->cl_q
);
878 sp
->qlimit
= qlimit(&cl
->cl_q
);
879 sp
->period
= cl
->cl_period
;
880 sp
->xmitcnt
= cl
->cl_xmitcnt
;
881 sp
->dropcnt
= cl
->cl_dropcnt
;
883 sp
->qtype
= qtype(&cl
->cl_q
);
884 sp
->qstate
= qstate(&cl
->cl_q
);
886 if (q_is_red(&cl
->cl_q
))
887 red_getstats(cl
->cl_red
, &sp
->red
[0]);
888 #endif /* CLASSQ_RED */
890 if (q_is_rio(&cl
->cl_q
))
891 rio_getstats(cl
->cl_rio
, &sp
->red
[0]);
892 #endif /* CLASSQ_RIO */
894 if (q_is_blue(&cl
->cl_q
))
895 blue_getstats(cl
->cl_blue
, &sp
->blue
);
896 #endif /* CLASSQ_BLUE */
897 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
898 sfb_getstats(cl
->cl_sfb
, &sp
->sfb
);
903 /* convert a class handle to the corresponding class pointer */
904 static inline struct priq_class
*
905 priq_clh_to_clp(struct priq_if
*pif
, u_int32_t chandle
)
907 struct priq_class
*cl
;
910 IFCQ_LOCK_ASSERT_HELD(pif
->pif_ifq
);
912 for (idx
= pif
->pif_maxpri
; idx
>= 0; idx
--)
913 if ((cl
= pif
->pif_classes
[idx
]) != NULL
&&
914 cl
->cl_handle
== chandle
)
921 priq_style(struct priq_if
*pif
)
923 return ((pif
->pif_flags
& PRIQIFF_ALTQ
) ? "ALTQ_PRIQ" : "PRIQ");
927 * priq_enqueue_ifclassq is an enqueue function to be registered to
928 * (*ifcq_enqueue) in struct ifclassq.
931 priq_enqueue_ifclassq(struct ifclassq
*ifq
, struct mbuf
*m
)
935 IFCQ_LOCK_ASSERT_HELD(ifq
);
937 if (!(m
->m_flags
& M_PKTHDR
)) {
938 /* should not happen */
939 log(LOG_ERR
, "%s: packet does not have pkthdr\n",
940 if_name(ifq
->ifcq_ifp
));
941 IFCQ_CONVERT_LOCK(ifq
);
946 i
= MBUF_SCIDX(mbuf_get_service_class(m
));
947 VERIFY((u_int32_t
)i
< IFCQ_SC_MAX
);
949 return (priq_enqueue(ifq
->ifcq_disc
,
950 ifq
->ifcq_disc_slots
[i
].cl
, m
, m_pftag(m
)));
954 * priq_dequeue_ifclassq is a dequeue function to be registered to
955 * (*ifcq_dequeue) in struct ifclass.
957 * note: CLASSQDQ_POLL returns the next packet without removing the packet
958 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
959 * CLASSQDQ_REMOVE must return the same packet if called immediately
960 * after CLASSQDQ_POLL.
963 priq_dequeue_ifclassq(struct ifclassq
*ifq
, cqdq_op_t op
)
965 return (priq_dequeue(ifq
->ifcq_disc
, op
));
969 priq_request_ifclassq(struct ifclassq
*ifq
, cqrq_t req
, void *arg
)
971 struct priq_if
*pif
= (struct priq_if
*)ifq
->ifcq_disc
;
974 IFCQ_LOCK_ASSERT_HELD(ifq
);
981 case CLASSQRQ_PURGE_SC
:
982 priq_purge_sc(pif
, (cqrq_purge_sc_t
*)arg
);
986 priq_event(pif
, (cqev_t
)arg
);
989 case CLASSQRQ_THROTTLE
:
990 err
= priq_throttle(pif
, (cqrq_throttle_t
*)arg
);
997 priq_setup_ifclassq(struct ifclassq
*ifq
, u_int32_t flags
)
999 struct ifnet
*ifp
= ifq
->ifcq_ifp
;
1000 struct priq_class
*cl0
, *cl1
, *cl2
, *cl3
, *cl4
;
1001 struct priq_class
*cl5
, *cl6
, *cl7
, *cl8
, *cl9
;
1002 struct priq_if
*pif
;
1003 u_int32_t maxlen
= 0, qflags
= 0;
1006 IFCQ_LOCK_ASSERT_HELD(ifq
);
1007 VERIFY(ifq
->ifcq_disc
== NULL
);
1008 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_NONE
);
1010 if (flags
& PKTSCHEDF_QALG_RED
)
1012 if (flags
& PKTSCHEDF_QALG_RIO
)
1014 if (flags
& PKTSCHEDF_QALG_BLUE
)
1015 qflags
|= PRCF_BLUE
;
1016 if (flags
& PKTSCHEDF_QALG_SFB
)
1018 if (flags
& PKTSCHEDF_QALG_ECN
)
1020 if (flags
& PKTSCHEDF_QALG_FLOWCTL
)
1021 qflags
|= PRCF_FLOWCTL
;
1023 pif
= priq_alloc(ifp
, M_WAITOK
, FALSE
);
1027 if ((maxlen
= IFCQ_MAXLEN(ifq
)) == 0)
1028 maxlen
= if_sndq_maxlen
;
1030 if ((err
= priq_add_queue(pif
, 0, maxlen
,
1031 qflags
| PRCF_LAZY
, SCIDX_BK_SYS
, &cl0
)) != 0)
1034 if ((err
= priq_add_queue(pif
, 1, maxlen
,
1035 qflags
| PRCF_LAZY
, SCIDX_BK
, &cl1
)) != 0)
1038 if ((err
= priq_add_queue(pif
, 2, maxlen
,
1039 qflags
| PRCF_DEFAULTCLASS
, SCIDX_BE
, &cl2
)) != 0)
1042 if ((err
= priq_add_queue(pif
, 3, maxlen
,
1043 qflags
| PRCF_LAZY
, SCIDX_RD
, &cl3
)) != 0)
1046 if ((err
= priq_add_queue(pif
, 4, maxlen
,
1047 qflags
| PRCF_LAZY
, SCIDX_OAM
, &cl4
)) != 0)
1050 if ((err
= priq_add_queue(pif
, 5, maxlen
,
1051 qflags
| PRCF_LAZY
, SCIDX_AV
, &cl5
)) != 0)
1054 if ((err
= priq_add_queue(pif
, 6, maxlen
,
1055 qflags
| PRCF_LAZY
, SCIDX_RV
, &cl6
)) != 0)
1058 if ((err
= priq_add_queue(pif
, 7, maxlen
,
1059 qflags
| PRCF_LAZY
, SCIDX_VI
, &cl7
)) != 0)
1062 if ((err
= priq_add_queue(pif
, 8, maxlen
,
1063 qflags
| PRCF_LAZY
, SCIDX_VO
, &cl8
)) != 0)
1066 if ((err
= priq_add_queue(pif
, 9, maxlen
,
1067 qflags
, SCIDX_CTL
, &cl9
)) != 0)
1070 err
= ifclassq_attach(ifq
, PKTSCHEDT_PRIQ
, pif
,
1071 priq_enqueue_ifclassq
, priq_dequeue_ifclassq
, NULL
,
1072 priq_request_ifclassq
);
1074 /* cache these for faster lookup */
1076 ifq
->ifcq_disc_slots
[SCIDX_BK_SYS
].qid
= SCIDX_BK_SYS
;
1077 ifq
->ifcq_disc_slots
[SCIDX_BK_SYS
].cl
= cl0
;
1079 ifq
->ifcq_disc_slots
[SCIDX_BK
].qid
= SCIDX_BK
;
1080 ifq
->ifcq_disc_slots
[SCIDX_BK
].cl
= cl1
;
1082 ifq
->ifcq_disc_slots
[SCIDX_BE
].qid
= SCIDX_BE
;
1083 ifq
->ifcq_disc_slots
[SCIDX_BE
].cl
= cl2
;
1085 ifq
->ifcq_disc_slots
[SCIDX_RD
].qid
= SCIDX_RD
;
1086 ifq
->ifcq_disc_slots
[SCIDX_RD
].cl
= cl3
;
1088 ifq
->ifcq_disc_slots
[SCIDX_OAM
].qid
= SCIDX_OAM
;
1089 ifq
->ifcq_disc_slots
[SCIDX_OAM
].cl
= cl4
;
1091 ifq
->ifcq_disc_slots
[SCIDX_AV
].qid
= SCIDX_AV
;
1092 ifq
->ifcq_disc_slots
[SCIDX_AV
].cl
= cl5
;
1094 ifq
->ifcq_disc_slots
[SCIDX_RV
].qid
= SCIDX_RV
;
1095 ifq
->ifcq_disc_slots
[SCIDX_RV
].cl
= cl6
;
1097 ifq
->ifcq_disc_slots
[SCIDX_VI
].qid
= SCIDX_VI
;
1098 ifq
->ifcq_disc_slots
[SCIDX_VI
].cl
= cl7
;
1100 ifq
->ifcq_disc_slots
[SCIDX_VO
].qid
= SCIDX_VO
;
1101 ifq
->ifcq_disc_slots
[SCIDX_VO
].cl
= cl8
;
1103 ifq
->ifcq_disc_slots
[SCIDX_CTL
].qid
= SCIDX_CTL
;
1104 ifq
->ifcq_disc_slots
[SCIDX_CTL
].cl
= cl9
;
1109 (void) priq_destroy_locked(pif
);
1115 priq_teardown_ifclassq(struct ifclassq
*ifq
)
1117 struct priq_if
*pif
= ifq
->ifcq_disc
;
1120 IFCQ_LOCK_ASSERT_HELD(ifq
);
1121 VERIFY(pif
!= NULL
&& ifq
->ifcq_type
== PKTSCHEDT_PRIQ
);
1123 (void) priq_destroy_locked(pif
);
1125 ifq
->ifcq_disc
= NULL
;
1126 for (i
= 0; i
< IFCQ_SC_MAX
; i
++) {
1127 ifq
->ifcq_disc_slots
[i
].qid
= 0;
1128 ifq
->ifcq_disc_slots
[i
].cl
= NULL
;
1131 return (ifclassq_detach(ifq
));
1135 priq_getqstats_ifclassq(struct ifclassq
*ifq
, u_int32_t slot
,
1136 struct if_ifclassq_stats
*ifqs
)
1138 struct priq_if
*pif
= ifq
->ifcq_disc
;
1140 IFCQ_LOCK_ASSERT_HELD(ifq
);
1141 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_PRIQ
);
1143 if (slot
>= IFCQ_SC_MAX
)
1146 return (priq_get_class_stats(pif
, ifq
->ifcq_disc_slots
[slot
].qid
,
1147 &ifqs
->ifqs_priq_stats
));
1151 priq_throttle(struct priq_if
*pif
, cqrq_throttle_t
*tr
)
1153 struct ifclassq
*ifq
= pif
->pif_ifq
;
1154 struct priq_class
*cl
;
1157 IFCQ_LOCK_ASSERT_HELD(ifq
);
1158 VERIFY(!(pif
->pif_flags
& PRIQIFF_ALTQ
));
1161 tr
->level
= pif
->pif_throttle
;
1165 if (tr
->level
== pif
->pif_throttle
)
1168 /* Current throttling levels only involve BK_SYS class */
1169 cl
= ifq
->ifcq_disc_slots
[SCIDX_BK_SYS
].cl
;
1171 switch (tr
->level
) {
1172 case IFNET_THROTTLE_OFF
:
1173 err
= priq_resumeq(pif
, cl
);
1176 case IFNET_THROTTLE_OPPORTUNISTIC
:
1177 err
= priq_suspendq(pif
, cl
);
1185 if (err
== 0 || err
== ENXIO
) {
1186 if (pktsched_verbose
) {
1187 log(LOG_DEBUG
, "%s: %s throttling level %sset %d->%d\n",
1188 if_name(PRIQIF_IFP(pif
)), priq_style(pif
),
1189 (err
== 0) ? "" : "lazy ", pif
->pif_throttle
,
1192 pif
->pif_throttle
= tr
->level
;
1196 priq_purgeq(pif
, cl
, 0, NULL
, NULL
);
1198 log(LOG_ERR
, "%s: %s unable to set throttling level "
1199 "%d->%d [error=%d]\n", if_name(PRIQIF_IFP(pif
)),
1200 priq_style(pif
), pif
->pif_throttle
, tr
->level
, err
);
1207 priq_resumeq(struct priq_if
*pif
, struct priq_class
*cl
)
1209 struct ifclassq
*ifq
= pif
->pif_ifq
;
1212 IFCQ_LOCK_ASSERT_HELD(ifq
);
1215 if (q_is_rio(&cl
->cl_q
))
1216 err
= rio_suspendq(cl
->cl_rio
, &cl
->cl_q
, FALSE
);
1218 #endif /* CLASSQ_RIO */
1220 if (q_is_red(&cl
->cl_q
))
1221 err
= red_suspendq(cl
->cl_red
, &cl
->cl_q
, FALSE
);
1223 #endif /* CLASSQ_RED */
1225 if (q_is_blue(&cl
->cl_q
))
1226 err
= blue_suspendq(cl
->cl_blue
, &cl
->cl_q
, FALSE
);
1228 #endif /* CLASSQ_BLUE */
1229 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1230 err
= sfb_suspendq(cl
->cl_sfb
, &cl
->cl_q
, FALSE
);
1233 qstate(&cl
->cl_q
) = QS_RUNNING
;
1239 priq_suspendq(struct priq_if
*pif
, struct priq_class
*cl
)
1241 struct ifclassq
*ifq
= pif
->pif_ifq
;
1244 IFCQ_LOCK_ASSERT_HELD(ifq
);
1247 if (q_is_rio(&cl
->cl_q
))
1248 err
= rio_suspendq(cl
->cl_rio
, &cl
->cl_q
, TRUE
);
1250 #endif /* CLASSQ_RIO */
1252 if (q_is_red(&cl
->cl_q
))
1253 err
= red_suspendq(cl
->cl_red
, &cl
->cl_q
, TRUE
);
1255 #endif /* CLASSQ_RED */
1257 if (q_is_blue(&cl
->cl_q
))
1258 err
= blue_suspendq(cl
->cl_blue
, &cl
->cl_q
, TRUE
);
1260 #endif /* CLASSQ_BLUE */
1261 if (q_is_sfb(&cl
->cl_q
)) {
1262 if (cl
->cl_sfb
!= NULL
) {
1263 err
= sfb_suspendq(cl
->cl_sfb
, &cl
->cl_q
, TRUE
);
1265 VERIFY(cl
->cl_flags
& PRCF_LAZY
);
1266 err
= ENXIO
; /* delayed throttling */
1270 if (err
== 0 || err
== ENXIO
)
1271 qstate(&cl
->cl_q
) = QS_SUSPENDED
;
1275 #endif /* PKTSCHED_PRIQ */