]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/net/classq/classq_subr.c
xnu-2050.7.9.tar.gz
[apple/xnu.git] / bsd / net / classq / classq_subr.c
diff --git a/bsd/net/classq/classq_subr.c b/bsd/net/classq/classq_subr.c
new file mode 100644 (file)
index 0000000..738c86e
--- /dev/null
@@ -0,0 +1,794 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/random.h>
+#include <sys/kernel_types.h>
+#include <sys/sysctl.h>
+
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+#include <net/net_osdep.h>
+#include <net/classq/classq.h>
+#if CLASSQ_RED
+#include <net/classq/classq_red.h>
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+#include <net/classq/classq_rio.h>
+#endif /* CLASSQ_RIO */
+#if CLASSQ_BLUE
+#include <net/classq/classq_blue.h>
+#endif /* CLASSQ_BLUE */
+#include <net/classq/classq_sfb.h>
+#include <net/pktsched/pktsched.h>
+
+#include <libkern/libkern.h>
+
+#if PF_ALTQ
+#include <net/altq/altq.h>
+#endif /* PF_ALTQ */
+
+static errno_t ifclassq_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
+    u_int32_t, struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *,
+    boolean_t);
+static struct mbuf *ifclassq_poll_common(struct ifclassq *,
+    mbuf_svc_class_t, boolean_t);
+static struct mbuf *ifclassq_tbr_dequeue_common(struct ifclassq *, int,
+    mbuf_svc_class_t, boolean_t);
+
+void
+classq_init(void)
+{
+       _CASSERT(MBUF_TC_BE == 0);
+       _CASSERT(MBUF_SC_BE == 0);
+       _CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
+
+#if CLASSQ_RED
+       red_init();
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+       rio_init();
+#endif /* CLASSQ_RIO */
+#if CLASSQ_BLUE
+       blue_init();
+#endif /* CLASSQ_BLUE */
+       sfb_init();
+}
+
+int
+ifclassq_setup(struct ifnet *ifp, u_int32_t sflags, boolean_t reuse)
+{
+#pragma unused(reuse)
+       struct ifclassq *ifq = &ifp->if_snd;
+       int err = 0;
+
+       IFCQ_LOCK(ifq);
+       VERIFY(IFCQ_IS_EMPTY(ifq));
+       ifq->ifcq_ifp = ifp;
+       IFCQ_LEN(ifq) = 0;
+       bzero(&ifq->ifcq_xmitcnt, sizeof (ifq->ifcq_xmitcnt));
+       bzero(&ifq->ifcq_dropcnt, sizeof (ifq->ifcq_dropcnt));
+
+       VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
+       VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
+       VERIFY(ifq->ifcq_flags == 0);
+       VERIFY(ifq->ifcq_sflags == 0);
+       VERIFY(ifq->ifcq_disc == NULL);
+       VERIFY(ifq->ifcq_enqueue == NULL);
+       VERIFY(ifq->ifcq_dequeue == NULL);
+       VERIFY(ifq->ifcq_dequeue_sc == NULL);
+       VERIFY(ifq->ifcq_request == NULL);
+
+       if (ifp->if_eflags & IFEF_TXSTART) {
+               u_int32_t maxlen = 0;
+
+               if ((maxlen = IFCQ_MAXLEN(ifq)) == 0)
+                       maxlen = if_sndq_maxlen;
+               IFCQ_SET_MAXLEN(ifq, maxlen);
+
+               ifq->ifcq_sflags = sflags;
+               err = ifclassq_pktsched_setup(ifq);
+               if (err == 0)
+                       ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
+       }
+
+#if PF_ALTQ
+       ifq->ifcq_drain = 0;
+       IFCQ_ALTQ(ifq)->altq_ifcq = ifq;
+       VERIFY(IFCQ_ALTQ(ifq)->altq_type == ALTQT_NONE);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_flags == 0);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_disc == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_enqueue == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_dequeue == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_dequeue_sc == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_request == NULL);
+
+       if ((ifp->if_eflags & IFEF_TXSTART) &&
+           ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED)
+               ALTQ_SET_READY(IFCQ_ALTQ(ifq));
+       else
+               ALTQ_CLEAR_READY(IFCQ_ALTQ(ifq));
+#endif /* PF_ALTQ */
+       IFCQ_UNLOCK(ifq);
+
+       return (err);
+}
+
+void
+ifclassq_teardown(struct ifnet *ifp)
+{
+       struct ifclassq *ifq = &ifp->if_snd;
+
+       IFCQ_LOCK(ifq);
+#if PF_ALTQ
+       if (ALTQ_IS_READY(IFCQ_ALTQ(ifq))) {
+               if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
+                       altq_disable(IFCQ_ALTQ(ifq));
+               if (ALTQ_IS_ATTACHED(IFCQ_ALTQ(ifq)))
+                       altq_detach(IFCQ_ALTQ(ifq));
+               IFCQ_ALTQ(ifq)->altq_flags = 0;
+       }
+       ifq->ifcq_drain = 0;
+       IFCQ_ALTQ(ifq)->altq_ifcq = NULL;
+       VERIFY(IFCQ_ALTQ(ifq)->altq_type == ALTQT_NONE);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_flags == 0);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_disc == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_enqueue == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_dequeue == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_dequeue_sc == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_request == NULL);
+#endif /* PF_ALTQ */
+
+       if (IFCQ_IS_READY(ifq)) {
+               if (IFCQ_TBR_IS_ENABLED(ifq)) {
+                       struct tb_profile tb = { 0, 0, 0 };
+                       (void) ifclassq_tbr_set(ifq, &tb, FALSE);
+               }
+               (void) pktsched_teardown(ifq);
+               ifq->ifcq_flags = 0;
+       }
+       ifq->ifcq_sflags = 0;
+
+       VERIFY(IFCQ_IS_EMPTY(ifq));
+       VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
+       VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
+       VERIFY(ifq->ifcq_flags == 0);
+       VERIFY(ifq->ifcq_sflags == 0);
+       VERIFY(ifq->ifcq_disc == NULL);
+       VERIFY(ifq->ifcq_enqueue == NULL);
+       VERIFY(ifq->ifcq_dequeue == NULL);
+       VERIFY(ifq->ifcq_dequeue_sc == NULL);
+       VERIFY(ifq->ifcq_request == NULL);
+       IFCQ_LEN(ifq) = 0;
+       IFCQ_MAXLEN(ifq) = 0;
+       bzero(&ifq->ifcq_xmitcnt, sizeof (ifq->ifcq_xmitcnt));
+       bzero(&ifq->ifcq_dropcnt, sizeof (ifq->ifcq_dropcnt));
+
+       IFCQ_UNLOCK(ifq);
+}
+
+int
+ifclassq_pktsched_setup(struct ifclassq *ifq)
+{
+       struct ifnet *ifp = ifq->ifcq_ifp;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(ifp->if_eflags & IFEF_TXSTART);
+
+       switch (ifp->if_output_sched_model) {
+       case IFNET_SCHED_MODEL_DRIVER_MANAGED:
+               err = pktsched_setup(ifq, PKTSCHEDT_TCQ, ifq->ifcq_sflags);
+               break;
+
+       case IFNET_SCHED_MODEL_NORMAL:
+               err = pktsched_setup(ifq, PKTSCHEDT_QFQ, ifq->ifcq_sflags);
+               break;
+
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+
+       return (err);
+}
+
+void
+ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
+{
+       IFCQ_LOCK(ifq);
+       if (maxqlen == 0)
+               maxqlen = if_sndq_maxlen;
+       IFCQ_SET_MAXLEN(ifq, maxqlen);
+       IFCQ_UNLOCK(ifq);
+}
+
+u_int32_t
+ifclassq_get_maxlen(struct ifclassq *ifq)
+{
+       return (IFCQ_MAXLEN(ifq));
+}
+
+u_int32_t
+ifclassq_get_len(struct ifclassq *ifq)
+{
+       return (IFCQ_LEN(ifq));
+}
+
+errno_t
+ifclassq_enqueue(struct ifclassq *ifq, struct mbuf *m)
+{
+       errno_t err;
+
+       IFCQ_LOCK_SPIN(ifq);
+
+#if PF_ALTQ
+       if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq))) {
+               ALTQ_ENQUEUE(IFCQ_ALTQ(ifq), m, err);
+       } else {
+               u_int32_t qlen = IFCQ_LEN(ifq);
+               IFCQ_ENQUEUE(ifq, m, err);
+               if (IFCQ_LEN(ifq) > qlen)
+                       ifq->ifcq_drain += (IFCQ_LEN(ifq) - qlen);
+       }
+#else /* !PF_ALTQ */
+       IFCQ_ENQUEUE(ifq, m, err);
+#endif /* PF_ALTQ */
+
+       IFCQ_UNLOCK(ifq);
+
+       return (err);
+}
+
+errno_t
+ifclassq_dequeue(struct ifclassq *ifq, u_int32_t limit, struct mbuf **head,
+    struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
+{
+       return (ifclassq_dequeue_common(ifq, MBUF_SC_UNSPEC, limit, head, tail,
+           cnt, len, FALSE));
+}
+
+errno_t
+ifclassq_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
+    u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
+    u_int32_t *len)
+{
+       return (ifclassq_dequeue_common(ifq, sc, limit, head, tail,
+           cnt, len, TRUE));
+}
+
+static errno_t
+ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
+    u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
+    u_int32_t *len, boolean_t drvmgt)
+{
+       struct ifnet *ifp = ifq->ifcq_ifp;
+       u_int32_t i = 0, l = 0;
+       struct mbuf **first, *last;
+#if PF_ALTQ
+       struct ifaltq *altq = IFCQ_ALTQ(ifq);
+       boolean_t draining;
+#endif /* PF_ALTQ */
+
+       VERIFY(!drvmgt || MBUF_VALID_SC(sc));
+
+       *head = NULL;
+       first = &(*head);
+       last = NULL;
+
+       ifq = &ifp->if_snd;
+       IFCQ_LOCK_SPIN(ifq);
+
+       while (i < limit) {
+               u_int64_t pktlen;
+#if PF_ALTQ
+               u_int32_t qlen;
+
+               qlen = IFCQ_LEN(ifq);
+               draining = IFCQ_IS_DRAINING(ifq);
+
+               if (drvmgt) {
+                       if (IFCQ_TBR_IS_ENABLED(ifq))
+                               IFCQ_TBR_DEQUEUE_SC(ifq, sc, *head);
+                       else if (draining)
+                               IFCQ_DEQUEUE_SC(ifq, sc, *head);
+                       else if (ALTQ_IS_ENABLED(altq))
+                               ALTQ_DEQUEUE_SC(altq, sc, *head);
+                       else
+                               *head = NULL;
+               } else {
+                       if (IFCQ_TBR_IS_ENABLED(ifq))
+                               IFCQ_TBR_DEQUEUE(ifq, *head);
+                       else if (draining)
+                               IFCQ_DEQUEUE(ifq, *head);
+                       else if (ALTQ_IS_ENABLED(altq))
+                               ALTQ_DEQUEUE(altq, *head);
+                       else
+                               *head = NULL;
+               }
+
+               if (draining && *head != NULL) {
+                       VERIFY(ifq->ifcq_drain >= (qlen - IFCQ_LEN(ifq)));
+                       ifq->ifcq_drain -= (qlen - IFCQ_LEN(ifq));
+               }
+#else /* ! PF_ALTQ */
+               if (drvmgt) {
+                       if (IFCQ_TBR_IS_ENABLED(ifq))
+                               IFCQ_TBR_DEQUEUE_SC(ifq, sc, *head);
+                       else
+                               IFCQ_DEQUEUE_SC(ifq, sc, *head);
+               } else {
+                       if (IFCQ_TBR_IS_ENABLED(ifq))
+                               IFCQ_TBR_DEQUEUE(ifq, *head);
+                       else
+                               IFCQ_DEQUEUE(ifq, *head);
+               }
+#endif /* !PF_ALTQ */
+
+               if (*head == NULL)
+                       break;
+
+               (*head)->m_nextpkt = NULL;
+               last = *head;
+
+               l += (*head)->m_pkthdr.len;
+               pktlen = (*head)->m_pkthdr.len;
+
+               (*head)->m_pkthdr.pf_mtag.pftag_pktseq =
+                   atomic_add_64_ov(&(ifp->if_bw.cur_seq), pktlen);
+
+               head = &(*head)->m_nextpkt;
+               i++;
+       }
+
+       IFCQ_UNLOCK(ifq);
+
+       if (tail != NULL)
+               *tail = last;
+       if (cnt != NULL)
+               *cnt = i;
+       if (len != NULL)
+               *len = l;
+
+       return ((*first != NULL) ? 0 : EAGAIN);
+}
+
+struct mbuf *
+ifclassq_poll(struct ifclassq *ifq)
+{
+       return (ifclassq_poll_common(ifq, MBUF_SC_UNSPEC, FALSE));
+}
+
+struct mbuf *
+ifclassq_poll_sc(struct ifclassq *ifq, mbuf_svc_class_t sc)
+{
+       return (ifclassq_poll_common(ifq, sc, TRUE));
+}
+
+static struct mbuf *
+ifclassq_poll_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
+    boolean_t drvmgt)
+{
+#if PF_ALTQ
+       struct ifaltq *altq = IFCQ_ALTQ(ifq);
+#endif /* PF_ALTQ */
+       struct mbuf *m;
+
+       VERIFY(!drvmgt || MBUF_VALID_SC(sc));
+
+#if PF_ALTQ
+       if (drvmgt) {
+               if (IFCQ_TBR_IS_ENABLED(ifq))
+                       IFCQ_TBR_POLL_SC(ifq, sc, m);
+               else if (IFCQ_IS_DRAINING(ifq))
+                       IFCQ_POLL_SC(ifq, sc, m);
+               else if (ALTQ_IS_ENABLED(altq))
+                       ALTQ_POLL_SC(altq, sc, m);
+               else
+                       m = NULL;
+       } else {
+               if (IFCQ_TBR_IS_ENABLED(ifq))
+                       IFCQ_TBR_POLL(ifq, m);
+               else if (IFCQ_IS_DRAINING(ifq))
+                       IFCQ_POLL(ifq, m);
+               else if (ALTQ_IS_ENABLED(altq))
+                       ALTQ_POLL(altq, m);
+               else
+                       m = NULL;
+       }
+#else /* ! PF_ALTQ */
+       if (drvmgt) {
+               if (IFCQ_TBR_IS_ENABLED(ifq))
+                       IFCQ_TBR_POLL_SC(ifq, sc, m);
+               else
+                       IFCQ_POLL_SC(ifq, sc, m);
+       } else {
+               if (IFCQ_TBR_IS_ENABLED(ifq))
+                       IFCQ_TBR_POLL(ifq, m);
+               else
+                       IFCQ_POLL(ifq, m);
+       }
+#endif /* !PF_ALTQ */
+
+       return (m);
+}
+
+void
+ifclassq_update(struct ifclassq *ifq, cqev_t ev)
+{
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(IFCQ_IS_READY(ifq));
+
+#if PF_ALTQ
+       if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
+               ALTQ_UPDATE(IFCQ_ALTQ(ifq), ev);
+#endif /* PF_ALTQ */
+       IFCQ_UPDATE(ifq, ev);
+}
+
+int
+ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline,
+    ifclassq_enq_func enqueue, ifclassq_deq_func dequeue,
+    ifclassq_deq_sc_func dequeue_sc, ifclassq_req_func request)
+{
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       VERIFY(ifq->ifcq_disc == NULL);
+       VERIFY(enqueue != NULL);
+       VERIFY(!(dequeue != NULL && dequeue_sc != NULL));
+       VERIFY(request != NULL);
+
+       ifq->ifcq_type = type;
+       ifq->ifcq_disc = discipline;
+       ifq->ifcq_enqueue = enqueue;
+       ifq->ifcq_dequeue = dequeue;
+       ifq->ifcq_dequeue_sc = dequeue_sc;
+       ifq->ifcq_request = request;
+
+       return (0);
+}
+
+int
+ifclassq_detach(struct ifclassq *ifq)
+{
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       VERIFY(ifq->ifcq_disc == NULL);
+
+       ifq->ifcq_type = PKTSCHEDT_NONE;
+       ifq->ifcq_disc = NULL;
+       ifq->ifcq_enqueue = NULL;
+       ifq->ifcq_dequeue = NULL;
+       ifq->ifcq_dequeue_sc = NULL;
+       ifq->ifcq_request = NULL;
+
+       return (0);
+}
+
+int
+ifclassq_getqstats(struct ifclassq *ifq, u_int32_t qid, void *ubuf,
+    u_int32_t *nbytes)
+{
+       struct if_ifclassq_stats *ifqs;
+       int err;
+
+       if (*nbytes < sizeof (*ifqs))
+               return (EINVAL);
+
+       ifqs = _MALLOC(sizeof (*ifqs), M_TEMP, M_WAITOK | M_ZERO);
+       if (ifqs == NULL)
+               return (ENOMEM);
+
+       IFCQ_LOCK(ifq);
+       if (!IFCQ_IS_READY(ifq)) {
+               IFCQ_UNLOCK(ifq);
+               _FREE(ifqs, M_TEMP);
+               return (ENXIO);
+       }
+
+       ifqs->ifqs_len = IFCQ_LEN(ifq);
+       ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
+       *(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
+       *(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
+       ifqs->ifqs_scheduler = ifq->ifcq_type;
+
+       err = pktsched_getqstats(ifq, qid, ifqs);
+       IFCQ_UNLOCK(ifq);
+
+       if (err == 0 && (err = copyout((caddr_t)ifqs,
+           (user_addr_t)(uintptr_t)ubuf, sizeof (*ifqs))) == 0)
+               *nbytes = sizeof (*ifqs);
+
+       _FREE(ifqs, M_TEMP);
+
+       return (err);
+}
+
+const char *
+ifclassq_ev2str(cqev_t ev)
+{
+       const char *c;
+
+       switch (ev) {
+       case CLASSQ_EV_LINK_SPEED:
+               c = "LINK_SPEED";
+               break;
+
+       case CLASSQ_EV_LINK_MTU:
+               c = "LINK_MTU";
+               break;
+
+       case CLASSQ_EV_LINK_UP:
+               c = "LINK_UP";
+               break;
+
+       case CLASSQ_EV_LINK_DOWN:
+               c = "LINK_DOWN";
+               break;
+
+       default:
+               c = "UNKNOWN";
+               break;
+       }
+
+       return (c);
+}
+
+/*
+ * internal representation of token bucket parameters
+ *     rate:   byte_per_unittime << 32
+ *             (((bits_per_sec) / 8) << 32) / machclk_freq
+ *     depth:  byte << 32
+ *
+ */
+#define        TBR_SHIFT       32
+#define        TBR_SCALE(x)    ((int64_t)(x) << TBR_SHIFT)
+#define        TBR_UNSCALE(x)  ((x) >> TBR_SHIFT)
+
+struct mbuf *
+ifclassq_tbr_dequeue(struct ifclassq *ifq, int op)
+{
+       return (ifclassq_tbr_dequeue_common(ifq, op, MBUF_SC_UNSPEC, FALSE));
+}
+
+struct mbuf *
+ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, int op, mbuf_svc_class_t sc)
+{
+       return (ifclassq_tbr_dequeue_common(ifq, op, sc, TRUE));
+}
+
+static struct mbuf *
+ifclassq_tbr_dequeue_common(struct ifclassq *ifq, int op,
+    mbuf_svc_class_t sc, boolean_t drvmgt)
+{
+       struct tb_regulator *tbr;
+       struct mbuf *m;
+       int64_t interval;
+       u_int64_t now;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       VERIFY(!drvmgt || MBUF_VALID_SC(sc));
+       VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
+
+       tbr = &ifq->ifcq_tbr;
+       if (op == CLASSQDQ_REMOVE && tbr->tbr_lastop == CLASSQDQ_POLL) {
+               /* if this is a remove after poll, bypass tbr check */
+       } else {
+               /* update token only when it is negative */
+               if (tbr->tbr_token <= 0) {
+                       now = read_machclk();
+                       interval = now - tbr->tbr_last;
+                       if (interval >= tbr->tbr_filluptime) {
+                               tbr->tbr_token = tbr->tbr_depth;
+                       } else {
+                               tbr->tbr_token += interval * tbr->tbr_rate;
+                               if (tbr->tbr_token > tbr->tbr_depth)
+                                       tbr->tbr_token = tbr->tbr_depth;
+                       }
+                       tbr->tbr_last = now;
+               }
+               /* if token is still negative, don't allow dequeue */
+               if (tbr->tbr_token <= 0)
+                       return (NULL);
+       }
+
+       /*
+        * ifclassq takes precedence over ALTQ queue;
+        * ifcq_drain count is adjusted by the caller.
+        */
+#if PF_ALTQ
+       if (IFCQ_IS_DRAINING(ifq)) {
+#endif /* PF_ALTQ */
+               if (op == CLASSQDQ_POLL) {
+                       if (drvmgt)
+                               IFCQ_POLL_SC(ifq, sc, m);
+                       else
+                               IFCQ_POLL(ifq, m);
+               } else {
+                       if (drvmgt)
+                               IFCQ_DEQUEUE_SC(ifq, sc, m);
+                       else
+                               IFCQ_DEQUEUE(ifq, m);
+               }
+#if PF_ALTQ
+       } else {
+               struct ifaltq *altq = IFCQ_ALTQ(ifq);
+               if (ALTQ_IS_ENABLED(altq)) {
+                       if (drvmgt)
+                               m = (*altq->altq_dequeue_sc)(altq, sc, op);
+                       else
+                               m = (*altq->altq_dequeue)(altq, op);
+               } else {
+                       m = NULL;
+               }
+       }
+#endif /* PF_ALTQ */
+
+       if (m != NULL && op == CLASSQDQ_REMOVE)
+               tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
+       tbr->tbr_lastop = op;
+
+       return (m);
+}
+
+/*
+ * set a token bucket regulator.
+ * if the specified rate is zero, the token bucket regulator is deleted.
+ */
+int
+ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
+    boolean_t update)
+{
+       struct tb_regulator *tbr;
+       struct ifnet *ifp = ifq->ifcq_ifp;
+       u_int64_t rate, old_rate;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(IFCQ_IS_READY(ifq));
+
+       VERIFY(machclk_freq != 0);
+
+       tbr = &ifq->ifcq_tbr;
+       old_rate = tbr->tbr_rate_raw;
+
+       rate = profile->rate;
+       if (profile->percent > 0) {
+               u_int64_t eff_rate;
+
+               if (profile->percent > 100)
+                       return (EINVAL);
+               if ((eff_rate = ifp->if_output_bw.eff_bw) == 0)
+                       return (ENODEV);
+               rate = (eff_rate * profile->percent) / 100;
+       }
+
+       if (rate == 0) {
+               if (!IFCQ_TBR_IS_ENABLED(ifq))
+                       return (ENOENT);
+
+               if (pktsched_verbose)
+                       printf("%s: TBR disabled\n", if_name(ifp));
+
+               /* disable this TBR */
+               ifq->ifcq_flags &= ~IFCQF_TBR;
+               bzero(tbr, sizeof (*tbr));
+               ifnet_set_start_cycle(ifp, NULL);
+               if (update)
+                       ifclassq_update(ifq, CLASSQ_EV_LINK_SPEED);
+               return (0);
+       }
+
+       if (pktsched_verbose) {
+               printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
+                   (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
+                   "enabled", rate, profile->depth);
+       }
+
+       /* set the new TBR */
+       bzero(tbr, sizeof (*tbr));
+       tbr->tbr_rate_raw = rate;
+       tbr->tbr_percent = profile->percent;
+       ifq->ifcq_flags |= IFCQF_TBR;
+
+       /*
+        * Note that the TBR fill up time (hence the ifnet restart time)
+        * is directly related to the specified TBR depth.  The ideal
+        * depth value should be computed such that the interval time
+        * between each successive wakeup is adequately spaced apart,
+        * in order to reduce scheduling overheads.  A target interval
+        * of 10 ms seems to provide good performance balance.  This can be
+        * overridden by specifying the depth profile.  Values smaller than
+        * the ideal depth will reduce delay at the expense of CPU cycles.
+        */
+       tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
+       if (tbr->tbr_rate > 0) {
+               u_int32_t mtu = ifp->if_mtu;
+               int64_t ival, idepth = 0;
+               int i;
+
+               if (mtu < IF_MINMTU)
+                       mtu = IF_MINMTU;
+
+               ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
+
+               for (i = 1; ; i++) {
+                       idepth = TBR_SCALE(i * mtu);
+                       if ((idepth / tbr->tbr_rate) > ival)
+                               break;
+               }
+               VERIFY(idepth > 0);
+
+               tbr->tbr_depth = TBR_SCALE(profile->depth);
+               if (tbr->tbr_depth == 0) {
+                       tbr->tbr_filluptime = idepth / tbr->tbr_rate;
+                       /* a little fudge factor to get closer to rate */
+                       tbr->tbr_depth = idepth + (idepth >> 3);
+               } else {
+                       tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
+               }
+       } else {
+               tbr->tbr_depth = TBR_SCALE(profile->depth);
+               tbr->tbr_filluptime = 0xffffffffffffffffLL;
+       }
+       tbr->tbr_token = tbr->tbr_depth;
+       tbr->tbr_last = read_machclk();
+       tbr->tbr_lastop = CLASSQDQ_REMOVE;
+
+       if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
+               struct timespec ts =
+                   { 0, pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
+               if (pktsched_verbose) {
+                       printf("%s: TBR calculated tokens %lld "
+                           "filluptime %llu ns\n", if_name(ifp),
+                           TBR_UNSCALE(tbr->tbr_token),
+                           pktsched_abs_to_nsecs(tbr->tbr_filluptime));
+               }
+               ifnet_set_start_cycle(ifp, &ts);
+       } else {
+               if (pktsched_verbose) {
+                       if (tbr->tbr_rate == 0) {
+                               printf("%s: TBR calculated tokens %lld "
+                                   "infinite filluptime\n", if_name(ifp),
+                                   TBR_UNSCALE(tbr->tbr_token));
+                       } else if (!(ifp->if_flags & IFF_UP)) {
+                               printf("%s: TBR suspended (link is down)\n",
+                                   if_name(ifp));
+                       }
+               }
+               ifnet_set_start_cycle(ifp, NULL);
+       }
+       if (update && tbr->tbr_rate_raw != old_rate)
+               ifclassq_update(ifq, CLASSQ_EV_LINK_SPEED);
+
+       return (0);
+}