]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/pktsched/pktsched_rmclass.c
2 * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* $OpenBSD: altq_rmclass.c,v 1.13 2007/09/13 20:40:02 chl Exp $ */
30 /* $KAME: altq_rmclass.c,v 1.10 2001/02/09 07:20:40 kjc Exp $ */
33 * Copyright (c) 1991-1997 Regents of the University of California.
34 * All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the Network Research
47 * Group at Lawrence Berkeley Laboratory.
48 * 4. Neither the name of the University nor of the Laboratory may be used
49 * to endorse or promote products derived from this software without
50 * specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * LBL code modified by speer@eng.sun.com, May 1977.
65 * For questions and/or comments, please send mail to cbq@ee.lbl.gov
68 #include <sys/cdefs.h>
70 #ident "@(#)rm_class.c 1.48 97/12/05 SMI"
74 #include <sys/param.h>
75 #include <sys/malloc.h>
77 #include <sys/socket.h>
78 #include <sys/systm.h>
79 #include <sys/errno.h>
81 #include <sys/kernel.h>
82 #include <sys/kernel_types.h>
83 #include <sys/syslog.h>
85 #include <kern/zalloc.h>
88 #include <net/net_osdep.h>
89 #include <net/pktsched/pktsched.h>
90 #include <net/pktsched/pktsched_rmclass.h>
91 #include <net/pktsched/pktsched_rmclass_debug.h>
92 #include <net/classq/classq_red.h>
93 #include <net/classq/classq_rio.h>
94 #include <net/classq/classq_blue.h>
95 #include <net/classq/classq_sfb.h>
101 #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
107 static int rmc_satisfied(struct rm_class
*, struct timeval
*);
108 static void rmc_wrr_set_weights(struct rm_ifdat
*);
109 static void rmc_depth_compute(struct rm_class
*);
110 static void rmc_depth_recompute(rm_class_t
*);
112 static struct mbuf
*_rmc_wrr_dequeue_next(struct rm_ifdat
*, cqdq_op_t
);
113 static struct mbuf
*_rmc_prr_dequeue_next(struct rm_ifdat
*, cqdq_op_t
);
115 static int _rmc_addq(rm_class_t
*, struct mbuf
*, struct pf_mtag
*);
116 static void _rmc_dropq(rm_class_t
*);
117 static struct mbuf
*_rmc_getq(rm_class_t
*);
118 static struct mbuf
*_rmc_pollq(rm_class_t
*);
120 static int rmc_under_limit(struct rm_class
*, struct timeval
*);
121 static void rmc_tl_satisfied(struct rm_ifdat
*, struct timeval
*);
122 static void rmc_drop_action(struct rm_class
*);
123 static void rmc_restart(struct rm_class
*);
124 static void rmc_root_overlimit(rm_class_t
*, rm_class_t
*);
126 #define RMC_ZONE_MAX 32 /* maximum elements in zone */
127 #define RMC_ZONE_NAME "pktsched_cbq_cl" /* zone name (CBQ for now) */
129 static unsigned int rmc_size
; /* size of zone element */
130 static struct zone
*rmc_zone
; /* zone for rm_class */
135 if (rmc_zone
!= NULL
)
138 rmc_size
= sizeof (struct rm_class
);
139 rmc_zone
= zinit(rmc_size
, RMC_ZONE_MAX
* rmc_size
, 0, RMC_ZONE_NAME
);
140 if (rmc_zone
== NULL
) {
141 panic("%s: failed allocating %s", __func__
, RMC_ZONE_NAME
);
144 zone_change(rmc_zone
, Z_EXPAND
, TRUE
);
145 zone_change(rmc_zone
, Z_CALLERACCT
, TRUE
);
148 #define BORROW_OFFTIME
150 * BORROW_OFFTIME (experimental):
151 * borrow the offtime of the class borrowing from.
152 * the reason is that when its own offtime is set, the class is unable
153 * to borrow much, especially when cutoff is taking effect.
154 * but when the borrowed class is overloaded (advidle is close to minidle),
155 * use the borrowing class's offtime to avoid overload.
157 #define ADJUST_CUTOFF
159 * ADJUST_CUTOFF (experimental):
160 * if no underlimit class is found due to cutoff, increase cutoff and
161 * retry the scheduling loop.
162 * also, don't invoke delay_actions while cutoff is taking effect,
163 * since a sleeping class won't have a chance to be scheduled in the
166 * now heuristics for setting the top-level variable (cutoff_) becomes:
167 * 1. if a packet arrives for a not-overlimit class, set cutoff
168 * to the depth of the class.
169 * 2. if cutoff is i, and a packet arrives for an overlimit class
170 * with an underlimit ancestor at a lower level than i (say j),
171 * then set cutoff to j.
172 * 3. at scheduling a packet, if there is no underlimit class
173 * due to the current cutoff level, increase cutoff by 1 and
174 * then try to schedule again.
179 * rmc_newclass(...) - Create a new resource management class at priority
180 * 'pri' on the interface given by 'ifd'.
182 * nsecPerByte is the data rate of the interface in nanoseconds/byte.
183 * E.g., 800 for a 10Mb/s ethernet. If the class gets less
184 * than 100% of the bandwidth, this number should be the
185 * 'effective' rate for the class. Let f be the
186 * bandwidth fraction allocated to this class, and let
187 * nsPerByte be the data rate of the output link in
188 * nanoseconds/byte. Then nsecPerByte is set to
189 * nsPerByte / f. E.g., 1600 (= 800 / .5)
190 * for a class that gets 50% of an ethernet's bandwidth.
192 * action the routine to call when the class is over limit.
194 * maxq max allowable queue size for class (in packets).
196 * parent parent class pointer.
198 * borrow class to borrow from (should be either 'parent' or null).
200 * maxidle max value allowed for class 'idle' time estimate (this
201 * parameter determines how large an initial burst of packets
202 * can be before overlimit action is invoked.
204 * offtime how long 'delay' action will delay when class goes over
205 * limit (this parameter determines the steady-state burst
206 * size when a class is running over its limit).
208 * Maxidle and offtime have to be computed from the following: If the
209 * average packet size is s, the bandwidth fraction allocated to this
210 * class is f, we want to allow b packet bursts, and the gain of the
211 * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
213 * ptime = s * nsPerByte * (1 - f) / f
214 * maxidle = ptime * (1 - g^b) / g^b
215 * minidle = -ptime * (1 / (f - 1))
216 * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
218 * Operationally, it's convenient to specify maxidle & offtime in units
219 * independent of the link bandwidth so the maxidle & offtime passed to
220 * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
221 * (The constant factor is a scale factor needed to make the parameters
222 * integers. This scaling also means that the 'unscaled' values of
223 * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
224 * not nanoseconds.) Also note that the 'idle' filter computation keeps
225 * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
226 * maxidle also must be scaled upward by this value. Thus, the passed
227 * values for maxidle and offtime can be computed as follows:
229 * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
230 * offtime = offtime * 8 / (1000 * nsecPerByte)
232 * When USE_HRTIME is employed, then maxidle and offtime become:
233 * maxidle = maxilde * (8.0 / nsecPerByte);
234 * offtime = offtime * (8.0 / nsecPerByte);
237 rmc_newclass(int pri
, struct rm_ifdat
*ifd
, u_int32_t nsecPerByte
,
238 void (*action
)(rm_class_t
*, rm_class_t
*), u_int32_t qid
, u_int32_t maxq
,
239 struct rm_class
*parent
, struct rm_class
*borrow
, u_int32_t maxidle
,
240 int minidle
, u_int32_t offtime
, int pktsize
, int flags
)
243 struct ifclassq
*ifq
;
245 struct rm_class
*peer
;
247 if (nsecPerByte
== 0) {
248 log(LOG_ERR
, "%s: invalid inverse data rate\n", __func__
);
252 if (pri
>= RM_MAXPRIO
) {
253 log(LOG_ERR
, "%s: priority %d out of range! (max %d)\n",
254 __func__
, pri
, RM_MAXPRIO
- 1);
259 if (flags
& RMCF_RED
) {
260 log(LOG_ERR
, "%s: RED not configured for CBQ!\n", __func__
);
263 #endif /* !CLASSQ_RED */
266 if (flags
& RMCF_RIO
) {
267 log(LOG_ERR
, "%s: RIO not configured for CBQ!\n", __func__
);
270 #endif /* CLASSQ_RIO */
273 if (flags
& RMCF_BLUE
) {
274 log(LOG_ERR
, "%s: BLUE not configured for CBQ!\n", __func__
);
277 #endif /* CLASSQ_BLUE */
279 /* These are mutually exclusive */
280 if ((flags
& (RMCF_RED
|RMCF_RIO
|RMCF_BLUE
|RMCF_SFB
)) &&
281 (flags
& (RMCF_RED
|RMCF_RIO
|RMCF_BLUE
|RMCF_SFB
)) != RMCF_RED
&&
282 (flags
& (RMCF_RED
|RMCF_RIO
|RMCF_BLUE
|RMCF_SFB
)) != RMCF_RIO
&&
283 (flags
& (RMCF_RED
|RMCF_RIO
|RMCF_BLUE
|RMCF_SFB
)) != RMCF_BLUE
&&
284 (flags
& (RMCF_RED
|RMCF_RIO
|RMCF_BLUE
|RMCF_SFB
)) != RMCF_SFB
) {
285 log(LOG_ERR
, "%s: RED|RIO|BLUE|SFB mutually exclusive\n",
290 cl
= zalloc(rmc_zone
);
295 CALLOUT_INIT(&cl
->callout_
);
298 * Class initialization.
300 cl
->children_
= NULL
;
301 cl
->parent_
= parent
;
302 cl
->borrow_
= borrow
;
306 cl
->allotment_
= RM_NS_PER_SEC
/ nsecPerByte
; /* Bytes per sec */
309 cl
->ns_per_byte_
= nsecPerByte
;
314 if (maxq
== 0 || maxq
> IFCQ_MAXLEN(ifq
)) {
315 maxq
= IFCQ_MAXLEN(ifq
);
317 maxq
= DEFAULT_QLIMIT
; /* use default */
319 _qinit(&cl
->q_
, Q_DROPHEAD
, maxq
);
323 cl
->minidle_
= (minidle
* (int)nsecPerByte
) / 8;
324 if (cl
->minidle_
> 0)
327 cl
->maxidle_
= (maxidle
* nsecPerByte
) / 8;
328 if (cl
->maxidle_
== 0)
331 cl
->avgidle_
= cl
->maxidle_
;
332 cl
->offtime_
= ((offtime
* nsecPerByte
) / 8) >> RM_FILTER_GAIN
;
333 if (cl
->offtime_
== 0)
336 cl
->overlimit
= action
;
338 if (flags
& (RMCF_RED
|RMCF_RIO
|RMCF_BLUE
|RMCF_SFB
)) {
342 if (flags
& RMCF_ECN
) {
343 if (flags
& RMCF_BLUE
)
344 cl
->qflags_
|= BLUEF_ECN
;
345 else if (flags
& RMCF_SFB
)
346 cl
->qflags_
|= SFBF_ECN
;
347 else if (flags
& RMCF_RED
)
348 cl
->qflags_
|= REDF_ECN
;
349 else if (flags
& RMCF_RIO
)
350 cl
->qflags_
|= RIOF_ECN
;
352 if (flags
& RMCF_FLOWCTL
) {
353 if (flags
& RMCF_SFB
)
354 cl
->qflags_
|= SFBF_FLOWCTL
;
356 if (flags
& RMCF_FLOWVALVE
) {
357 if (flags
& RMCF_RED
)
358 cl
->qflags_
|= REDF_FLOWVALVE
;
360 if (flags
& RMCF_CLEARDSCP
) {
361 if (flags
& RMCF_RIO
)
362 cl
->qflags_
|= RIOF_CLEARDSCP
;
364 pkttime
= nsecPerByte
* pktsize
/ 1000;
366 /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
368 if (flags
& RMCF_RED
) {
369 cl
->red_
= red_alloc(ifp
, 0, 0,
370 qlimit(&cl
->q_
) * 10/100,
371 qlimit(&cl
->q_
) * 30/100,
372 cl
->qflags_
, pkttime
);
373 if (cl
->red_
!= NULL
)
374 qtype(&cl
->q_
) = Q_RED
;
376 #endif /* CLASSQ_RED */
378 if (flags
& RMCF_RIO
) {
380 rio_alloc(ifp
, 0, NULL
, cl
->qflags_
, pkttime
);
381 if (cl
->rio_
!= NULL
)
382 qtype(&cl
->q_
) = Q_RIO
;
384 #endif /* CLASSQ_RIO */
386 if (flags
& RMCF_BLUE
) {
387 cl
->blue_
= blue_alloc(ifp
, 0, 0, cl
->qflags_
);
388 if (cl
->blue_
!= NULL
)
389 qtype(&cl
->q_
) = Q_BLUE
;
391 #endif /* CLASSQ_BLUE */
392 if (flags
& RMCF_SFB
) {
393 if (!(cl
->flags_
& RMCF_LAZY
))
394 cl
->sfb_
= sfb_alloc(ifp
, qid
,
395 qlimit(&cl
->q_
), cl
->qflags_
);
396 if (cl
->sfb_
!= NULL
|| (cl
->flags_
& RMCF_LAZY
))
397 qtype(&cl
->q_
) = Q_SFB
;
402 * put the class into the class tree
404 if ((peer
= ifd
->active_
[pri
]) != NULL
) {
405 /* find the last class at this pri */
407 while (peer
->peer_
!= ifd
->active_
[pri
])
411 ifd
->active_
[pri
] = cl
;
416 cl
->next_
= parent
->children_
;
417 parent
->children_
= cl
;
422 * Compute the depth of this class and its ancestors in the class
425 rmc_depth_compute(cl
);
428 * If CBQ's WRR is enabled, then initialize the class WRR state.
432 ifd
->alloc_
[pri
] += cl
->allotment_
;
433 rmc_wrr_set_weights(ifd
);
439 rmc_modclass(struct rm_class
*cl
, u_int32_t nsecPerByte
, int maxq
,
440 u_int32_t maxidle
, int minidle
, u_int32_t offtime
, int pktsize
)
442 #pragma unused(pktsize)
443 struct rm_ifdat
*ifd
;
444 u_int32_t old_allotment
;
447 old_allotment
= cl
->allotment_
;
449 cl
->allotment_
= RM_NS_PER_SEC
/ nsecPerByte
; /* Bytes per sec */
451 cl
->ns_per_byte_
= nsecPerByte
;
453 qlimit(&cl
->q_
) = maxq
;
455 cl
->minidle_
= (minidle
* nsecPerByte
) / 8;
456 if (cl
->minidle_
> 0)
459 cl
->maxidle_
= (maxidle
* nsecPerByte
) / 8;
460 if (cl
->maxidle_
== 0)
463 cl
->avgidle_
= cl
->maxidle_
;
464 cl
->offtime_
= ((offtime
* nsecPerByte
) / 8) >> RM_FILTER_GAIN
;
465 if (cl
->offtime_
== 0)
469 * If CBQ's WRR is enabled, then initialize the class WRR state.
472 ifd
->alloc_
[cl
->pri_
] += cl
->allotment_
- old_allotment
;
473 rmc_wrr_set_weights(ifd
);
480 * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
481 * the appropriate run robin weights for the CBQ weighted round robin
488 rmc_wrr_set_weights(struct rm_ifdat
*ifd
)
491 struct rm_class
*cl
, *clh
;
493 for (i
= 0; i
< RM_MAXPRIO
; i
++) {
495 * This is inverted from that of the simulator to
496 * maintain precision.
498 if (ifd
->num_
[i
] == 0) {
502 ifd
->alloc_
[i
] / (ifd
->num_
[i
] * ifd
->maxpkt_
);
505 * Compute the weighted allotment for each class.
506 * This takes the expensive div instruction out
507 * of the main loop for the wrr scheduling path.
508 * These only get recomputed when a class comes or
511 if (ifd
->active_
[i
] != NULL
) {
512 clh
= cl
= ifd
->active_
[i
];
514 /* safe-guard for slow link or alloc_ == 0 */
515 if (ifd
->M_
[i
] == 0) {
516 cl
->w_allotment_
= 0;
519 cl
->allotment_
/ ifd
->M_
[i
];
522 } while ((cl
!= NULL
) && (cl
!= clh
));
528 rmc_get_weight(struct rm_ifdat
*ifd
, int pri
)
530 if ((pri
>= 0) && (pri
< RM_MAXPRIO
))
531 return (ifd
->M_
[pri
]);
538 * rmc_depth_compute(struct rm_class *cl) - This function computes the
539 * appropriate depth of class 'cl' and its ancestors.
545 rmc_depth_compute(struct rm_class
*cl
)
547 rm_class_t
*t
= cl
, *p
;
550 * Recompute the depth for the branch of the tree.
554 if (p
&& (t
->depth_
>= p
->depth_
)) {
555 p
->depth_
= t
->depth_
+ 1;
564 * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
565 * the depth of the tree after a class has been deleted.
571 rmc_depth_recompute(rm_class_t
*cl
)
577 if ((t
= p
->children_
) == NULL
) {
583 if (t
->depth_
> cdepth
)
588 if (p
->depth_
== cdepth
+ 1)
589 /* no change to this parent */
592 p
->depth_
= cdepth
+ 1;
601 * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
602 * function deletes a class from the link-sharing structure and frees
603 * all resources associated with the class.
609 rmc_delete_class(struct rm_ifdat
*ifd
, struct rm_class
*cl
)
611 struct rm_class
*p
, *head
, *previous
;
613 VERIFY(cl
->children_
== NULL
);
616 CALLOUT_STOP(&cl
->callout_
);
619 * Free packets in the packet queue.
620 * XXX - this may not be a desired behavior. Packets should be
626 * If the class has a parent, then remove the class from the
627 * class from the parent's children chain.
629 if (cl
->parent_
!= NULL
) {
630 head
= cl
->parent_
->children_
;
632 if (head
->next_
== NULL
) {
634 cl
->parent_
->children_
= NULL
;
635 cl
->parent_
->leaf_
= 1;
636 } else while (p
!= NULL
) {
639 cl
->parent_
->children_
= cl
->next_
;
641 previous
->next_
= cl
->next_
;
652 * Delete class from class priority peer list.
654 if ((p
= ifd
->active_
[cl
->pri_
]) != NULL
) {
656 * If there is more than one member of this priority
657 * level, then look for class(cl) in the priority level.
660 while (p
->peer_
!= cl
)
662 p
->peer_
= cl
->peer_
;
664 if (ifd
->active_
[cl
->pri_
] == cl
)
665 ifd
->active_
[cl
->pri_
] = cl
->peer_
;
668 ifd
->active_
[cl
->pri_
] = NULL
;
673 * Recompute the WRR weights.
676 ifd
->alloc_
[cl
->pri_
] -= cl
->allotment_
;
677 ifd
->num_
[cl
->pri_
]--;
678 rmc_wrr_set_weights(ifd
);
682 * Re-compute the depth of the tree.
684 rmc_depth_recompute(cl
->parent_
);
687 * Free the class structure.
689 if (cl
->qalg_
.ptr
!= NULL
) {
691 if (q_is_rio(&cl
->q_
))
692 rio_destroy(cl
->rio_
);
693 #endif /* CLASSQ_RIO */
695 if (q_is_red(&cl
->q_
))
696 red_destroy(cl
->red_
);
697 #endif /* CLASSQ_RED */
699 if (q_is_blue(&cl
->q_
))
700 blue_destroy(cl
->blue_
);
701 #endif /* CLASSQ_BLUE */
702 if (q_is_sfb(&cl
->q_
) && cl
->sfb_
!= NULL
)
703 sfb_destroy(cl
->sfb_
);
704 cl
->qalg_
.ptr
= NULL
;
705 qtype(&cl
->q_
) = Q_DROPTAIL
;
706 qstate(&cl
->q_
) = QS_RUNNING
;
714 * rmc_init(...) - Initialize the resource management data structures
715 * associated with the output portion of interface 'ifp'. 'ifd' is
716 * where the structures will be built (for backwards compatibility, the
717 * structures aren't kept in the ifnet struct). 'nsecPerByte'
718 * gives the link speed (inverse of bandwidth) in nanoseconds/byte.
719 * 'restart' is the driver-specific routine that the generic 'delay
720 * until under limit' action will call to restart output. `maxq'
721 * is the queue size of the 'link' & 'default' classes. 'maxqueued'
722 * is the maximum number of packets that the resource management
723 * code will allow to be queued 'downstream' (this is typically 1).
725 * Returns: 0 on success
729 rmc_init(struct ifclassq
*ifq
, struct rm_ifdat
*ifd
, u_int32_t nsecPerByte
,
730 void (*restart
)(struct ifclassq
*), u_int32_t qid
, int maxq
, int maxqueued
,
731 u_int32_t maxidle
, int minidle
, u_int32_t offtime
, int flags
)
733 struct ifnet
*ifp
= ifq
->ifcq_ifp
;
737 * Initialize the CBQ tracing/debug facility.
741 if (nsecPerByte
== 0) {
742 log(LOG_ERR
, "%s: %s: invalid inverse data rate)\n",
743 __func__
, if_name(ifp
));
749 log(LOG_ERR
, "%s: %s: invalid MTU (interface not "
750 "initialized?)\n", __func__
, if_name(ifp
));
753 bzero((char *)ifd
, sizeof (*ifd
));
756 ifd
->restart
= restart
;
757 ifd
->maxqueued_
= maxqueued
;
758 ifd
->ns_per_byte_
= nsecPerByte
;
760 ifd
->wrr_
= (flags
& RMCF_WRR
) ? 1 : 0;
761 ifd
->efficient_
= (flags
& RMCF_EFFICIENT
) ? 1 : 0;
763 ifd
->maxiftime_
= mtu
* nsecPerByte
/ 1000 * 16;
764 if (mtu
* nsecPerByte
> 10 * 1000000)
765 ifd
->maxiftime_
/= 4;
769 CBQTRACE(rmc_init
, 'INIT', ifd
->cutoff_
);
772 * Initialize the CBQ's WRR state.
774 for (i
= 0; i
< RM_MAXPRIO
; i
++) {
779 ifd
->active_
[i
] = NULL
;
783 * Initialize current packet state.
787 for (i
= 0; i
< RM_MAXQUEUED
; i
++) {
788 ifd
->class_
[i
] = NULL
;
790 ifd
->borrowed_
[i
] = NULL
;
794 * Create the root class of the link-sharing structure.
796 if ((ifd
->root_
= rmc_newclass(0, ifd
, nsecPerByte
,
797 rmc_root_overlimit
, qid
, maxq
, 0, 0, maxidle
, minidle
, offtime
,
799 log(LOG_ERR
, "rmc_init: root class not allocated\n");
802 ifd
->root_
->depth_
= 0;
809 * rmc_queue_packet(struct rm_class *cl, struct mbuf *m) - Add packet given by
810 * mbuf 'm' to queue for resource class 'cl'. This routine is called
811 * by a driver's if_output routine. This routine must be called with
812 * output packet completion interrupts locked out (to avoid racing with
815 * Returns: 0 on successful queueing
816 * CLASSQEQ_DROPPED when packet drop occurs
819 rmc_queue_packet(struct rm_class
*cl
, struct mbuf
*m
, struct pf_mtag
*t
)
822 struct rm_ifdat
*ifd
= cl
->ifdat_
;
824 int is_empty
= qempty(&cl
->q_
);
828 if (ifd
->cutoff_
> 0) {
829 if (TV_LT(&cl
->undertime_
, &now
)) {
830 if (ifd
->cutoff_
> cl
->depth_
)
831 ifd
->cutoff_
= cl
->depth_
;
832 CBQTRACE(rmc_queue_packet
, 'ffoc', cl
->depth_
);
835 * the class is overlimit. if the class has
836 * underlimit ancestors, set cutoff to the lowest
839 struct rm_class
*borrow
= cl
->borrow_
;
841 while (borrow
!= NULL
&&
842 borrow
->depth_
< ifd
->cutoff_
) {
843 if (TV_LT(&borrow
->undertime_
, &now
)) {
844 ifd
->cutoff_
= borrow
->depth_
;
845 CBQTRACE(rmc_queue_packet
, 'ffob',
849 borrow
= borrow
->borrow_
;
854 ret
= _rmc_addq(cl
, m
, t
);
856 (ret
== CLASSQEQ_DROPPED
|| ret
== CLASSQEQ_DROPPED_FC
||
857 ret
== CLASSQEQ_DROPPED_SP
)) {
861 VERIFY(ret
== 0 || ret
== CLASSQEQ_SUCCESS_FC
);
863 CBQTRACE(rmc_queue_packet
, 'type', cl
->stats_
.handle
);
867 if (qlen(&cl
->q_
) > qlimit(&cl
->q_
)) {
868 /* note: qlimit can be set to 0 or 1 */
870 return (CLASSQEQ_DROPPED
);
877 * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
878 * classes to see if there are satified.
882 rmc_tl_satisfied(struct rm_ifdat
*ifd
, struct timeval
*now
)
887 for (i
= RM_MAXPRIO
- 1; i
>= 0; i
--) {
888 if ((bp
= ifd
->active_
[i
]) != NULL
) {
891 if (!rmc_satisfied(p
, now
)) {
892 ifd
->cutoff_
= p
->depth_
;
904 * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
908 rmc_satisfied(struct rm_class
*cl
, struct timeval
*now
)
914 if (TV_LT(now
, &cl
->undertime_
))
916 if (cl
->depth_
== 0) {
917 if (!cl
->sleeping_
&& (qlen(&cl
->q_
) > cl
->qthresh_
))
922 if (cl
->children_
!= NULL
) {
925 if (!rmc_satisfied(p
, now
))
935 * Return 1 if class 'cl' is under limit or can borrow from a parent,
936 * 0 if overlimit. As a side-effect, this routine will invoke the
937 * class overlimit action if the class if overlimit.
941 rmc_under_limit(struct rm_class
*cl
, struct timeval
*now
)
945 struct rm_ifdat
*ifd
= cl
->ifdat_
;
947 ifd
->borrowed_
[ifd
->qi_
] = NULL
;
949 * If cl is the root class, then always return that it is
950 * underlimit. Otherwise, check to see if the class is underlimit.
952 if (cl
->parent_
== NULL
)
956 if (TV_LT(now
, &cl
->undertime_
))
959 CALLOUT_STOP(&cl
->callout_
);
961 cl
->undertime_
.tv_sec
= 0;
966 while (cl
->undertime_
.tv_sec
&& TV_LT(now
, &cl
->undertime_
)) {
967 if (((cl
= cl
->borrow_
) == NULL
) ||
968 (cl
->depth_
> ifd
->cutoff_
)) {
972 * cutoff is taking effect, just
973 * return false without calling
978 #ifdef BORROW_OFFTIME
980 * check if the class can borrow offtime too.
981 * borrow offtime from the top of the borrow
982 * chain if the top class is not overloaded.
986 * cutoff is taking effect, use this
990 CBQTRACE(rmc_under_limit
, 'ffou', ifd
->cutoff_
);
992 if (top
!= NULL
&& top
->avgidle_
== top
->minidle_
)
995 (p
->overlimit
)(p
, top
);
998 (p
->overlimit
)(p
, NULL
);
1006 ifd
->borrowed_
[ifd
->qi_
] = cl
;
1011 * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
1012 * Packet-by-packet round robin.
1014 * The heart of the weighted round-robin scheduler, which decides which
1015 * class next gets to send a packet. Highest priority first, then
1016 * weighted round-robin within priorites.
1018 * Each able-to-send class gets to send until its byte allocation is
1019 * exhausted. Thus, the active pointer is only changed after a class has
1020 * exhausted its allocation.
1022 * If the scheduler finds no class that is underlimit or able to borrow,
1023 * then the first class found that had a nonzero queue and is allowed to
1024 * borrow gets to send.
1027 static struct mbuf
*
1028 _rmc_wrr_dequeue_next(struct rm_ifdat
*ifd
, cqdq_op_t op
)
1030 struct rm_class
*cl
= NULL
, *first
= NULL
;
1039 * if the driver polls the top of the queue and then removes
1040 * the polled packet, we must return the same packet.
1042 if (op
== CLASSQDQ_REMOVE
&& ifd
->pollcache_
) {
1043 cl
= ifd
->pollcache_
;
1045 if (ifd
->efficient_
) {
1046 /* check if this class is overlimit */
1047 if (cl
->undertime_
.tv_sec
!= 0 &&
1048 rmc_under_limit(cl
, &now
) == 0)
1051 ifd
->pollcache_
= NULL
;
1054 /* mode == CLASSQDQ_POLL || pollcache == NULL */
1055 ifd
->pollcache_
= NULL
;
1056 ifd
->borrowed_
[ifd
->qi_
] = NULL
;
1058 #ifdef ADJUST_CUTOFF
1061 for (cpri
= RM_MAXPRIO
- 1; cpri
>= 0; cpri
--) {
1062 if (ifd
->na_
[cpri
] == 0)
1066 * Loop through twice for a priority level, if some class
1067 * was unable to send a packet the first round because
1068 * of the weighted round-robin mechanism.
1069 * During the second loop at this level, deficit==2.
1070 * (This second loop is not needed if for every class,
1071 * "M[cl->pri_])" times "cl->allotment" is greater than
1072 * the byte size for the largest packet in the class.)
1075 cl
= ifd
->active_
[cpri
];
1078 if ((deficit
< 2) && (cl
->bytes_alloc_
<= 0))
1079 cl
->bytes_alloc_
+= cl
->w_allotment_
;
1080 if (!qempty(&cl
->q_
)) {
1081 if ((cl
->undertime_
.tv_sec
== 0) ||
1082 rmc_under_limit(cl
, &now
)) {
1083 if (cl
->bytes_alloc_
> 0 || deficit
> 1)
1086 /* underlimit but no alloc */
1089 ifd
->borrowed_
[ifd
->qi_
] = NULL
;
1091 } else if (first
== NULL
&& cl
->borrow_
!= NULL
)
1092 first
= cl
; /* borrowing candidate */
1095 cl
->bytes_alloc_
= 0;
1097 } while (cl
!= ifd
->active_
[cpri
]);
1100 /* first loop found an underlimit class with deficit */
1101 /* Loop on same priority level, with new deficit. */
1107 #ifdef ADJUST_CUTOFF
1109 * no underlimit class found. if cutoff is taking effect,
1110 * increase cutoff and try again.
1112 if (first
!= NULL
&& ifd
->cutoff_
< ifd
->root_
->depth_
) {
1114 CBQTRACE(_rmc_wrr_dequeue_next
, 'ojda', ifd
->cutoff_
);
1117 #endif /* ADJUST_CUTOFF */
1119 * If LINK_EFFICIENCY is turned on, then the first overlimit
1120 * class we encounter will send a packet if all the classes
1121 * of the link-sharing structure are overlimit.
1124 CBQTRACE(_rmc_wrr_dequeue_next
, 'otsr', ifd
->cutoff_
);
1126 if (!ifd
->efficient_
|| first
== NULL
)
1131 #if 0 /* too time-consuming for nothing */
1133 CALLOUT_STOP(&cl
->callout_
);
1135 cl
->undertime_
.tv_sec
= 0;
1137 ifd
->borrowed_
[ifd
->qi_
] = cl
->borrow_
;
1138 ifd
->cutoff_
= cl
->borrow_
->depth_
;
1141 * Deque the packet and do the book keeping...
1144 if (op
== CLASSQDQ_REMOVE
) {
1149 if (qempty(&cl
->q_
))
1153 * Update class statistics and link data.
1155 if (cl
->bytes_alloc_
> 0)
1156 cl
->bytes_alloc_
-= m_pktlen(m
);
1158 if ((cl
->bytes_alloc_
<= 0) || first
== cl
)
1159 ifd
->active_
[cl
->pri_
] = cl
->peer_
;
1161 ifd
->active_
[cl
->pri_
] = cl
;
1163 ifd
->class_
[ifd
->qi_
] = cl
;
1164 ifd
->curlen_
[ifd
->qi_
] = m_pktlen(m
);
1165 ifd
->now_
[ifd
->qi_
] = now
;
1166 ifd
->qi_
= (ifd
->qi_
+ 1) % ifd
->maxqueued_
;
1169 /* mode == ALTDQ_PPOLL */
1171 ifd
->pollcache_
= cl
;
1177 * Dequeue & return next packet from the highest priority class that
1178 * has a packet to send & has enough allocation to send it. This
1179 * routine is called by a driver whenever it needs a new packet to
1182 static struct mbuf
*
1183 _rmc_prr_dequeue_next(struct rm_ifdat
*ifd
, cqdq_op_t op
)
1187 struct rm_class
*cl
, *first
= NULL
;
1193 * if the driver polls the top of the queue and then removes
1194 * the polled packet, we must return the same packet.
1196 if (op
== CLASSQDQ_REMOVE
&& ifd
->pollcache_
) {
1197 cl
= ifd
->pollcache_
;
1199 ifd
->pollcache_
= NULL
;
1202 /* mode == CLASSQDQ_POLL || pollcache == NULL */
1203 ifd
->pollcache_
= NULL
;
1204 ifd
->borrowed_
[ifd
->qi_
] = NULL
;
1206 #ifdef ADJUST_CUTOFF
1209 for (cpri
= RM_MAXPRIO
- 1; cpri
>= 0; cpri
--) {
1210 if (ifd
->na_
[cpri
] == 0)
1212 cl
= ifd
->active_
[cpri
];
1215 if (!qempty(&cl
->q_
)) {
1216 if ((cl
->undertime_
.tv_sec
== 0) ||
1217 rmc_under_limit(cl
, &now
))
1219 if (first
== NULL
&& cl
->borrow_
!= NULL
)
1223 } while (cl
!= ifd
->active_
[cpri
]);
1226 #ifdef ADJUST_CUTOFF
1228 * no underlimit class found. if cutoff is taking effect, increase
1229 * cutoff and try again.
1231 if (first
!= NULL
&& ifd
->cutoff_
< ifd
->root_
->depth_
) {
1235 #endif /* ADJUST_CUTOFF */
1237 * If LINK_EFFICIENCY is turned on, then the first overlimit
1238 * class we encounter will send a packet if all the classes
1239 * of the link-sharing structure are overlimit.
1242 if (!ifd
->efficient_
|| first
== NULL
)
1247 #if 0 /* too time-consuming for nothing */
1249 CALLOUT_STOP(&cl
->callout_
);
1251 cl
->undertime_
.tv_sec
= 0;
1253 ifd
->borrowed_
[ifd
->qi_
] = cl
->borrow_
;
1254 ifd
->cutoff_
= cl
->borrow_
->depth_
;
1257 * Deque the packet and do the book keeping...
1260 if (op
== CLASSQDQ_REMOVE
) {
1265 if (qempty(&cl
->q_
))
1268 ifd
->active_
[cpri
] = cl
->peer_
;
1270 ifd
->class_
[ifd
->qi_
] = cl
;
1271 ifd
->curlen_
[ifd
->qi_
] = m_pktlen(m
);
1272 ifd
->now_
[ifd
->qi_
] = now
;
1273 ifd
->qi_
= (ifd
->qi_
+ 1) % ifd
->maxqueued_
;
1276 /* mode == CLASSQDQ_POLL */
1278 ifd
->pollcache_
= cl
;
1285 * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
1286 * is invoked by the packet driver to get the next packet to be
1287 * dequeued and output on the link. If WRR is enabled, then the
1288 * WRR dequeue next routine will determine the next packet to sent.
1289 * Otherwise, packet-by-packet round robin is invoked.
1291 * Returns: NULL, if a packet is not available or if all
1292 * classes are overlimit.
1294 * Otherwise, Pointer to the next packet.
1298 rmc_dequeue_next(struct rm_ifdat
*ifd
, cqdq_op_t mode
)
1300 if (ifd
->queued_
>= ifd
->maxqueued_
)
1303 return (_rmc_wrr_dequeue_next(ifd
, mode
));
1305 return (_rmc_prr_dequeue_next(ifd
, mode
));
1309 * Update the utilization estimate for the packet that just completed.
1310 * The packet's class & the parent(s) of that class all get their
1311 * estimators updated. This routine is called by the driver's output-
1312 * packet-completion interrupt service routine.
1316 * a macro to approximate "divide by 1000" that gives 0.000999,
1317 * if a value has enough effective digits.
1318 * (on pentium, mul takes 9 cycles but div takes 46!)
1320 #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
1322 rmc_update_class_util(struct rm_ifdat
*ifd
)
1324 int idle
, avgidle
, pktlen
;
1325 int pkt_time
, tidle
;
1326 rm_class_t
*cl
, *borrowed
;
1327 rm_class_t
*borrows
;
1328 struct timeval
*nowp
;
1331 * Get the most recent completed class.
1333 if ((cl
= ifd
->class_
[ifd
->qo_
]) == NULL
)
1336 pktlen
= ifd
->curlen_
[ifd
->qo_
];
1337 borrowed
= ifd
->borrowed_
[ifd
->qo_
];
1340 PKTCNTR_ADD(&cl
->stats_
.xmit_cnt
, 1, pktlen
);
1343 * Run estimator on class and its ancestors.
1346 * rm_update_class_util is designed to be called when the
1347 * transfer is completed from a xmit complete interrupt,
1348 * but most drivers don't implement an upcall for that.
1349 * so, just use estimated completion time.
1350 * as a result, ifd->qi_ and ifd->qo_ are always synced.
1352 nowp
= &ifd
->now_
[ifd
->qo_
];
1353 /* get pkt_time (for link) in usec */
1354 #if 1 /* use approximation */
1355 pkt_time
= ifd
->curlen_
[ifd
->qo_
] * ifd
->ns_per_byte_
;
1356 pkt_time
= NSEC_TO_USEC(pkt_time
);
1358 pkt_time
= ifd
->curlen_
[ifd
->qo_
] * ifd
->ns_per_byte_
/ 1000;
1360 #if 1 /* ALTQ4PPP */
1361 if (TV_LT(nowp
, &ifd
->ifnow_
)) {
1365 * make sure the estimated completion time does not go
1366 * too far. it can happen when the link layer supports
1367 * data compression or the interface speed is set to
1368 * a much lower value.
1370 TV_DELTA(&ifd
->ifnow_
, nowp
, iftime
);
1371 if (iftime
+pkt_time
< ifd
->maxiftime_
) {
1372 TV_ADD_DELTA(&ifd
->ifnow_
, pkt_time
, &ifd
->ifnow_
);
1374 TV_ADD_DELTA(nowp
, ifd
->maxiftime_
, &ifd
->ifnow_
);
1377 TV_ADD_DELTA(nowp
, pkt_time
, &ifd
->ifnow_
);
1380 if (TV_LT(nowp
, &ifd
->ifnow_
)) {
1381 TV_ADD_DELTA(&ifd
->ifnow_
, pkt_time
, &ifd
->ifnow_
);
1383 TV_ADD_DELTA(nowp
, pkt_time
, &ifd
->ifnow_
);
1387 while (cl
!= NULL
) {
1388 TV_DELTA(&ifd
->ifnow_
, &cl
->last_
, idle
);
1389 if (idle
>= 2000000)
1391 * this class is idle enough, reset avgidle.
1392 * (TV_DELTA returns 2000000 us when delta is large.)
1394 cl
->avgidle_
= cl
->maxidle_
;
1396 /* get pkt_time (for class) in usec */
1397 #if 1 /* use approximation */
1398 pkt_time
= pktlen
* cl
->ns_per_byte_
;
1399 pkt_time
= NSEC_TO_USEC(pkt_time
);
1401 pkt_time
= pktlen
* cl
->ns_per_byte_
/ 1000;
1405 avgidle
= cl
->avgidle_
;
1406 avgidle
+= idle
- (avgidle
>> RM_FILTER_GAIN
);
1407 cl
->avgidle_
= avgidle
;
1409 /* Are we overlimit ? */
1411 CBQTRACE(rmc_update_class_util
, 'milo',
1414 * need some lower bound for avgidle, otherwise
1415 * a borrowing class gets unbounded penalty.
1417 if (avgidle
< cl
->minidle_
)
1418 avgidle
= cl
->avgidle_
= cl
->minidle_
;
1420 /* set next idle to make avgidle 0 */
1422 (((1 - RM_POWER
) * avgidle
) >> RM_FILTER_GAIN
);
1423 TV_ADD_DELTA(nowp
, tidle
, &cl
->undertime_
);
1427 (avgidle
> cl
->maxidle_
) ? cl
->maxidle_
: avgidle
;
1428 cl
->undertime_
.tv_sec
= 0;
1429 if (cl
->sleeping_
) {
1430 CALLOUT_STOP(&cl
->callout_
);
1435 if (borrows
!= NULL
) {
1437 ++cl
->stats_
.borrows
;
1441 cl
->last_
= ifd
->ifnow_
;
1442 cl
->last_pkttime_
= pkt_time
;
1445 if (cl
->parent_
== NULL
) {
1446 /* take stats of root class */
1447 PKTCNTR_ADD(&cl
->stats_
.xmit_cnt
, 1, pktlen
);
1455 * Check to see if cutoff needs to set to a new level.
1457 cl
= ifd
->class_
[ifd
->qo_
];
1458 if (borrowed
&& (ifd
->cutoff_
>= borrowed
->depth_
)) {
1459 if ((qlen(&cl
->q_
) <= 0) ||
1460 TV_LT(nowp
, &borrowed
->undertime_
)) {
1461 rmc_tl_satisfied(ifd
, nowp
);
1462 CBQTRACE(rmc_update_class_util
, 'broe', ifd
->cutoff_
);
1464 ifd
->cutoff_
= borrowed
->depth_
;
1465 CBQTRACE(rmc_update_class_util
, 'ffob',
1471 * Release class slot
1473 ifd
->borrowed_
[ifd
->qo_
] = NULL
;
1474 ifd
->class_
[ifd
->qo_
] = NULL
;
1475 ifd
->qo_
= (ifd
->qo_
+ 1) % ifd
->maxqueued_
;
1481 * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
1482 * over-limit action routines. These get invoked by rmc_under_limit()
1483 * if a class with packets to send if over its bandwidth limit & can't
1484 * borrow from a parent class.
1490 rmc_drop_action(struct rm_class
*cl
)
1492 struct rm_ifdat
*ifd
= cl
->ifdat_
;
1494 VERIFY(qlen(&cl
->q_
) > 0);
1495 IFCQ_CONVERT_LOCK(ifd
->ifq_
);
1497 if (qempty(&cl
->q_
))
1498 ifd
->na_
[cl
->pri_
]--;
1502 rmc_drop(struct rm_class
*cl
, u_int32_t flow
, u_int32_t
*packets
,
1505 struct rm_ifdat
*ifd
= cl
->ifdat_
;
1506 struct ifclassq
*ifq
= ifd
->ifq_
;
1507 u_int32_t pkt
= 0, len
= 0, qlen
;
1509 if ((qlen
= qlen(&cl
->q_
)) != 0) {
1510 IFCQ_CONVERT_LOCK(ifq
);
1512 if (q_is_rio(&cl
->q_
))
1513 rio_purgeq(cl
->rio_
, &cl
->q_
, flow
, &pkt
, &len
);
1515 #endif /* CLASSQ_RIO */
1517 if (q_is_red(&cl
->q_
))
1518 red_purgeq(cl
->red_
, &cl
->q_
, flow
, &pkt
, &len
);
1520 #endif /* CLASSQ_RED */
1522 if (q_is_blue(&cl
->q_
))
1523 blue_purgeq(cl
->blue_
, &cl
->q_
, flow
, &pkt
, &len
);
1525 #endif /* CLASSQ_BLUE */
1526 if (q_is_sfb(&cl
->q_
) && cl
->sfb_
!= NULL
)
1527 sfb_purgeq(cl
->sfb_
, &cl
->q_
, flow
, &pkt
, &len
);
1529 _flushq_flow(&cl
->q_
, flow
, &pkt
, &len
);
1532 VERIFY(qlen(&cl
->q_
) == (qlen
- pkt
));
1534 PKTCNTR_ADD(&cl
->stats_
.drop_cnt
, pkt
, len
);
1535 IFCQ_DROP_ADD(ifq
, pkt
, len
);
1537 VERIFY(((signed)IFCQ_LEN(ifq
) - pkt
) >= 0);
1538 IFCQ_LEN(ifq
) -= pkt
;
1540 if (qempty(&cl
->q_
))
1541 ifd
->na_
[cl
->pri_
]--;
1544 if (packets
!= NULL
)
1551 rmc_dropall(struct rm_class
*cl
)
1553 rmc_drop(cl
, 0, NULL
, NULL
);
1558 * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
1559 * delay action routine. It is invoked via rmc_under_limit when the
1560 * packet is discoverd to be overlimit.
1562 * If the delay action is result of borrow class being overlimit, then
1563 * delay for the offtime of the borrowing class that is overlimit.
1569 rmc_delay_action(struct rm_class
*cl
, struct rm_class
*borrow
)
1571 int ndelay
, t
, extradelay
;
1573 cl
->stats_
.overactions
++;
1574 TV_DELTA(&cl
->undertime_
, &cl
->overtime_
, ndelay
);
1575 #ifndef BORROW_OFFTIME
1576 ndelay
+= cl
->offtime_
;
1579 if (!cl
->sleeping_
) {
1580 CBQTRACE(rmc_delay_action
, 'yled', cl
->stats_
.handle
);
1581 #ifdef BORROW_OFFTIME
1583 extradelay
= borrow
->offtime_
;
1586 extradelay
= cl
->offtime_
;
1589 * XXX recalculate suspend time:
1590 * current undertime is (tidle + pkt_time) calculated
1591 * from the last transmission.
1592 * tidle: time required to bring avgidle back to 0
1593 * pkt_time: target waiting time for this class
1594 * we need to replace pkt_time by offtime
1596 extradelay
-= cl
->last_pkttime_
;
1597 if (extradelay
> 0) {
1598 TV_ADD_DELTA(&cl
->undertime_
, extradelay
,
1600 ndelay
+= extradelay
;
1604 cl
->stats_
.delays
++;
1607 * Since packets are phased randomly with respect to the
1608 * clock, 1 tick (the next clock tick) can be an arbitrarily
1609 * short time so we have to wait for at least two ticks.
1610 * NOTE: If there's no other traffic, we need the timer as
1611 * a 'backstop' to restart this class.
1613 if (ndelay
> tick
* 2) {
1615 * FreeBSD rounds up the tick;
1616 * other BSDs round down the tick.
1618 t
= hzto(&cl
->undertime_
) + 1;
1622 CALLOUT_RESET(&cl
->callout_
, t
,
1623 (timeout_t
*)rmc_restart
, (caddr_t
)cl
);
1629 * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
1630 * called by the system timer code & is responsible checking if the
1631 * class is still sleeping (it might have been restarted as a side
1632 * effect of the queue scan on a packet arrival) and, if so, restarting
1633 * output for the class. Inspecting the class state & restarting output
1634 * require locking the class structure. In general the driver is
1635 * responsible for locking but this is the only routine that is not
1636 * called directly or indirectly from the interface driver so it has
1637 * know about system locking conventions.
1643 rmc_restart(struct rm_class
*cl
)
1645 struct rm_ifdat
*ifd
= cl
->ifdat_
;
1647 if (cl
->sleeping_
) {
1649 cl
->undertime_
.tv_sec
= 0;
1651 if (ifd
->queued_
< ifd
->maxqueued_
&& ifd
->restart
!= NULL
) {
1652 CBQTRACE(rmc_restart
, 'trts', cl
->stats_
.handle
);
1653 (ifd
->restart
)(ifd
->ifq_
);
1660 * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
1661 * handling routine for the root class of the link sharing structure.
1666 rmc_root_overlimit(struct rm_class
*cl
,
1667 struct rm_class
*borrow
)
1669 #pragma unused(cl, borrow)
1670 panic("rmc_root_overlimit");
1674 * Packet Queue handling routines. Eventually, this is to localize the
1675 * effects on the code whether queues are red queues or droptail
1680 _rmc_addq(rm_class_t
*cl
, struct mbuf
*m
, struct pf_mtag
*t
)
1683 if (q_is_rio(&cl
->q_
))
1684 return (rio_addq(cl
->rio_
, &cl
->q_
, m
, t
));
1686 #endif /* CLASSQ_RIO */
1688 if (q_is_red(&cl
->q_
))
1689 return (red_addq(cl
->red_
, &cl
->q_
, m
, t
));
1691 #endif /* CLASSQ_RED */
1693 if (q_is_blue(&cl
->q_
))
1694 return (blue_addq(cl
->blue_
, &cl
->q_
, m
, t
));
1696 #endif /* CLASSQ_BLUE */
1697 if (q_is_sfb(&cl
->q_
)) {
1698 if (cl
->sfb_
== NULL
) {
1699 struct ifclassq
*ifq
= cl
->ifdat_
->ifq_
;
1700 struct ifnet
*ifp
= ifq
->ifcq_ifp
;
1702 VERIFY(cl
->flags_
& RMCF_LAZY
);
1703 IFCQ_CONVERT_LOCK(ifq
);
1705 cl
->sfb_
= sfb_alloc(ifp
, cl
->stats_
.handle
,
1706 qlimit(&cl
->q_
), cl
->qflags_
);
1707 if (cl
->sfb_
== NULL
) {
1708 /* fall back to droptail */
1709 qtype(&cl
->q_
) = Q_DROPTAIL
;
1710 cl
->flags_
&= ~RMCF_SFB
;
1711 cl
->qflags_
&= ~(SFBF_ECN
| SFBF_FLOWCTL
);
1713 log(LOG_ERR
, "%s: CBQ SFB lazy allocation "
1714 "failed for qid=%d pri=%d, falling back "
1715 "to DROPTAIL\n", if_name(ifp
),
1716 cl
->stats_
.handle
, cl
->pri_
);
1719 if (cl
->sfb_
!= NULL
)
1720 return (sfb_addq(cl
->sfb_
, &cl
->q_
, m
, t
));
1723 else if (cl
->flags_
& RMCF_CLEARDSCP
)
1724 write_dsfield(m
, t
, 0);
1727 /* test for qlen > qlimit is done by caller */
1732 /* note: _rmc_dropq is not called for red */
1734 _rmc_dropq(rm_class_t
*cl
)
1738 if ((m
= _rmc_getq(cl
)) != NULL
)
1742 static struct mbuf
*
1743 _rmc_getq(rm_class_t
*cl
)
1746 if (q_is_rio(&cl
->q_
))
1747 return (rio_getq(cl
->rio_
, &cl
->q_
));
1749 #endif /* CLASSQ_RIO */
1751 if (q_is_red(&cl
->q_
))
1752 return (red_getq(cl
->red_
, &cl
->q_
));
1754 #endif /* CLASSQ_RED */
1756 if (q_is_blue(&cl
->q_
))
1757 return (blue_getq(cl
->blue_
, &cl
->q_
));
1759 #endif /* CLASSQ_BLUE */
1760 if (q_is_sfb(&cl
->q_
) && cl
->sfb_
!= NULL
)
1761 return (sfb_getq(cl
->sfb_
, &cl
->q_
));
1763 return (_getq(&cl
->q_
));
1766 static struct mbuf
*
1767 _rmc_pollq(rm_class_t
*cl
)
1769 return (qhead(&cl
->q_
));
1773 rmc_updateq(rm_class_t
*cl
, cqev_t ev
)
1776 if (q_is_rio(&cl
->q_
))
1777 return (rio_updateq(cl
->rio_
, ev
));
1778 #endif /* CLASSQ_RIO */
1780 if (q_is_red(&cl
->q_
))
1781 return (red_updateq(cl
->red_
, ev
));
1782 #endif /* CLASSQ_RED */
1784 if (q_is_blue(&cl
->q_
))
1785 return (blue_updateq(cl
->blue_
, ev
));
1786 #endif /* CLASSQ_BLUE */
1787 if (q_is_sfb(&cl
->q_
) && cl
->sfb_
!= NULL
)
1788 return (sfb_updateq(cl
->sfb_
, ev
));
1793 struct cbqtrace cbqtrace_buffer
[NCBQTRACE
+1];
1794 struct cbqtrace
*cbqtrace_ptr
= NULL
;
1798 * DDB hook to trace cbq events:
1799 * the last 1024 events are held in a circular buffer.
1800 * use "call cbqtrace_dump(N)" to display 20 events from Nth event.
1802 void cbqtrace_dump(int);
1803 static char *rmc_funcname(void *);
1805 static struct rmc_funcs
{
1810 rmc_init
, "rmc_init",
1811 rmc_queue_packet
, "rmc_queue_packet",
1812 rmc_under_limit
, "rmc_under_limit",
1813 rmc_update_class_util
, "rmc_update_class_util",
1814 rmc_delay_action
, "rmc_delay_action",
1815 rmc_restart
, "rmc_restart",
1816 _rmc_wrr_dequeue_next
, "_rmc_wrr_dequeue_next",
1821 rmc_funcname(void *func
)
1823 struct rmc_funcs
*fp
;
1825 for (fp
= rmc_funcs
; fp
->func
!= NULL
; fp
++)
1826 if (fp
->func
== func
)
1832 cbqtrace_dump(int counter
)
1837 counter
= counter
% NCBQTRACE
;
1838 p
= (int *)&cbqtrace_buffer
[counter
];
1840 for (i
= 0; i
< 20; i
++) {
1841 log(LOG_DEBUG
, "[0x%x] ", *p
++);
1842 log(LOG_DEBUG
, "%s: ", rmc_funcname((void *)*p
++));
1844 log(LOG_DEBUG
, "%c%c%c%c: ", cp
[0], cp
[1], cp
[2], cp
[3]);
1845 log(LOG_DEBUG
, "%d\n", *p
++);
1847 if (p
>= (int *)&cbqtrace_buffer
[NCBQTRACE
])
1848 p
= (int *)cbqtrace_buffer
;
1851 #endif /* CBQ_TRACE */
1852 #endif /* PKTSCHED_CBQ */