]>
Commit | Line | Data |
---|---|---|
316670eb A |
1 | /* |
2 | * Copyright (c) 2007-2012 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | /* $OpenBSD: altq_rmclass.c,v 1.13 2007/09/13 20:40:02 chl Exp $ */ | |
30 | /* $KAME: altq_rmclass.c,v 1.10 2001/02/09 07:20:40 kjc Exp $ */ | |
31 | ||
32 | /* | |
33 | * Copyright (c) 1991-1997 Regents of the University of California. | |
34 | * All rights reserved. | |
35 | * | |
36 | * Redistribution and use in source and binary forms, with or without | |
37 | * modification, are permitted provided that the following conditions | |
38 | * are met: | |
39 | * 1. Redistributions of source code must retain the above copyright | |
40 | * notice, this list of conditions and the following disclaimer. | |
41 | * 2. Redistributions in binary form must reproduce the above copyright | |
42 | * notice, this list of conditions and the following disclaimer in the | |
43 | * documentation and/or other materials provided with the distribution. | |
44 | * 3. All advertising materials mentioning features or use of this software | |
45 | * must display the following acknowledgement: | |
46 | * This product includes software developed by the Network Research | |
47 | * Group at Lawrence Berkeley Laboratory. | |
48 | * 4. Neither the name of the University nor of the Laboratory may be used | |
49 | * to endorse or promote products derived from this software without | |
50 | * specific prior written permission. | |
51 | * | |
52 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
53 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
54 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
55 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
56 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
57 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
58 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
59 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
60 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
61 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
62 | * SUCH DAMAGE. | |
63 | * | |
64 | * LBL code modified by speer@eng.sun.com, May 1977. | |
65 | * For questions and/or comments, please send mail to cbq@ee.lbl.gov | |
66 | */ | |
67 | ||
68 | #include <sys/cdefs.h> | |
69 | ||
70 | #ident "@(#)rm_class.c 1.48 97/12/05 SMI" | |
71 | ||
72 | #if PKTSCHED_CBQ | |
73 | ||
74 | #include <sys/param.h> | |
75 | #include <sys/malloc.h> | |
76 | #include <sys/mbuf.h> | |
77 | #include <sys/socket.h> | |
78 | #include <sys/systm.h> | |
79 | #include <sys/errno.h> | |
80 | #include <sys/time.h> | |
81 | #include <sys/kernel.h> | |
82 | #include <sys/kernel_types.h> | |
83 | #include <sys/syslog.h> | |
84 | ||
85 | #include <kern/zalloc.h> | |
86 | ||
87 | #include <net/if.h> | |
88 | #include <net/net_osdep.h> | |
89 | #include <net/pktsched/pktsched.h> | |
90 | #include <net/pktsched/pktsched_rmclass.h> | |
91 | #include <net/pktsched/pktsched_rmclass_debug.h> | |
92 | #include <net/classq/classq_red.h> | |
93 | #include <net/classq/classq_rio.h> | |
94 | #include <net/classq/classq_blue.h> | |
95 | #include <net/classq/classq_sfb.h> | |
96 | ||
97 | /* | |
98 | * Local Macros | |
99 | */ | |
100 | ||
101 | #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; } | |
102 | ||
103 | /* | |
104 | * Local routines. | |
105 | */ | |
106 | ||
107 | static int rmc_satisfied(struct rm_class *, struct timeval *); | |
108 | static void rmc_wrr_set_weights(struct rm_ifdat *); | |
109 | static void rmc_depth_compute(struct rm_class *); | |
110 | static void rmc_depth_recompute(rm_class_t *); | |
111 | ||
112 | static struct mbuf *_rmc_wrr_dequeue_next(struct rm_ifdat *, cqdq_op_t); | |
113 | static struct mbuf *_rmc_prr_dequeue_next(struct rm_ifdat *, cqdq_op_t); | |
114 | ||
115 | static int _rmc_addq(rm_class_t *, struct mbuf *, struct pf_mtag *); | |
116 | static void _rmc_dropq(rm_class_t *); | |
117 | static struct mbuf *_rmc_getq(rm_class_t *); | |
118 | static struct mbuf *_rmc_pollq(rm_class_t *); | |
119 | ||
120 | static int rmc_under_limit(struct rm_class *, struct timeval *); | |
121 | static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *); | |
122 | static void rmc_drop_action(struct rm_class *); | |
123 | static void rmc_restart(struct rm_class *); | |
124 | static void rmc_root_overlimit(rm_class_t *, rm_class_t *); | |
125 | ||
126 | #define RMC_ZONE_MAX 32 /* maximum elements in zone */ | |
127 | #define RMC_ZONE_NAME "pktsched_cbq_cl" /* zone name (CBQ for now) */ | |
128 | ||
129 | static unsigned int rmc_size; /* size of zone element */ | |
130 | static struct zone *rmc_zone; /* zone for rm_class */ | |
131 | ||
132 | void | |
133 | rmclass_init(void) | |
134 | { | |
135 | if (rmc_zone != NULL) | |
136 | return; | |
137 | ||
138 | rmc_size = sizeof (struct rm_class); | |
139 | rmc_zone = zinit(rmc_size, RMC_ZONE_MAX * rmc_size, 0, RMC_ZONE_NAME); | |
140 | if (rmc_zone == NULL) { | |
141 | panic("%s: failed allocating %s", __func__, RMC_ZONE_NAME); | |
142 | /* NOTREACHED */ | |
143 | } | |
144 | zone_change(rmc_zone, Z_EXPAND, TRUE); | |
145 | zone_change(rmc_zone, Z_CALLERACCT, TRUE); | |
146 | } | |
147 | ||
148 | #define BORROW_OFFTIME | |
149 | /* | |
150 | * BORROW_OFFTIME (experimental): | |
151 | * borrow the offtime of the class borrowing from. | |
152 | * the reason is that when its own offtime is set, the class is unable | |
153 | * to borrow much, especially when cutoff is taking effect. | |
154 | * but when the borrowed class is overloaded (advidle is close to minidle), | |
155 | * use the borrowing class's offtime to avoid overload. | |
156 | */ | |
157 | #define ADJUST_CUTOFF | |
158 | /* | |
159 | * ADJUST_CUTOFF (experimental): | |
160 | * if no underlimit class is found due to cutoff, increase cutoff and | |
161 | * retry the scheduling loop. | |
162 | * also, don't invoke delay_actions while cutoff is taking effect, | |
163 | * since a sleeping class won't have a chance to be scheduled in the | |
164 | * next loop. | |
165 | * | |
166 | * now heuristics for setting the top-level variable (cutoff_) becomes: | |
167 | * 1. if a packet arrives for a not-overlimit class, set cutoff | |
168 | * to the depth of the class. | |
169 | * 2. if cutoff is i, and a packet arrives for an overlimit class | |
170 | * with an underlimit ancestor at a lower level than i (say j), | |
171 | * then set cutoff to j. | |
172 | * 3. at scheduling a packet, if there is no underlimit class | |
173 | * due to the current cutoff level, increase cutoff by 1 and | |
174 | * then try to schedule again. | |
175 | */ | |
176 | ||
177 | /* | |
178 | * rm_class_t * | |
179 | * rmc_newclass(...) - Create a new resource management class at priority | |
180 | * 'pri' on the interface given by 'ifd'. | |
181 | * | |
182 | * nsecPerByte is the data rate of the interface in nanoseconds/byte. | |
183 | * E.g., 800 for a 10Mb/s ethernet. If the class gets less | |
184 | * than 100% of the bandwidth, this number should be the | |
185 | * 'effective' rate for the class. Let f be the | |
186 | * bandwidth fraction allocated to this class, and let | |
187 | * nsPerByte be the data rate of the output link in | |
188 | * nanoseconds/byte. Then nsecPerByte is set to | |
189 | * nsPerByte / f. E.g., 1600 (= 800 / .5) | |
190 | * for a class that gets 50% of an ethernet's bandwidth. | |
191 | * | |
192 | * action the routine to call when the class is over limit. | |
193 | * | |
194 | * maxq max allowable queue size for class (in packets). | |
195 | * | |
196 | * parent parent class pointer. | |
197 | * | |
198 | * borrow class to borrow from (should be either 'parent' or null). | |
199 | * | |
200 | * maxidle max value allowed for class 'idle' time estimate (this | |
201 | * parameter determines how large an initial burst of packets | |
202 | * can be before overlimit action is invoked. | |
203 | * | |
204 | * offtime how long 'delay' action will delay when class goes over | |
205 | * limit (this parameter determines the steady-state burst | |
206 | * size when a class is running over its limit). | |
207 | * | |
208 | * Maxidle and offtime have to be computed from the following: If the | |
209 | * average packet size is s, the bandwidth fraction allocated to this | |
210 | * class is f, we want to allow b packet bursts, and the gain of the | |
211 | * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then: | |
212 | * | |
213 | * ptime = s * nsPerByte * (1 - f) / f | |
214 | * maxidle = ptime * (1 - g^b) / g^b | |
215 | * minidle = -ptime * (1 / (f - 1)) | |
216 | * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1) | |
217 | * | |
218 | * Operationally, it's convenient to specify maxidle & offtime in units | |
219 | * independent of the link bandwidth so the maxidle & offtime passed to | |
220 | * this routine are the above values multiplied by 8*f/(1000*nsPerByte). | |
221 | * (The constant factor is a scale factor needed to make the parameters | |
222 | * integers. This scaling also means that the 'unscaled' values of | |
223 | * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds, | |
224 | * not nanoseconds.) Also note that the 'idle' filter computation keeps | |
225 | * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of | |
226 | * maxidle also must be scaled upward by this value. Thus, the passed | |
227 | * values for maxidle and offtime can be computed as follows: | |
228 | * | |
229 | * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte) | |
230 | * offtime = offtime * 8 / (1000 * nsecPerByte) | |
231 | * | |
232 | * When USE_HRTIME is employed, then maxidle and offtime become: | |
233 | * maxidle = maxilde * (8.0 / nsecPerByte); | |
234 | * offtime = offtime * (8.0 / nsecPerByte); | |
235 | */ | |
236 | struct rm_class * | |
237 | rmc_newclass(int pri, struct rm_ifdat *ifd, u_int32_t nsecPerByte, | |
238 | void (*action)(rm_class_t *, rm_class_t *), u_int32_t qid, u_int32_t maxq, | |
239 | struct rm_class *parent, struct rm_class *borrow, u_int32_t maxidle, | |
240 | int minidle, u_int32_t offtime, int pktsize, int flags) | |
241 | { | |
242 | struct ifnet *ifp; | |
243 | struct ifclassq *ifq; | |
244 | struct rm_class *cl; | |
245 | struct rm_class *peer; | |
246 | ||
247 | if (nsecPerByte == 0) { | |
248 | log(LOG_ERR, "%s: invalid inverse data rate\n", __func__); | |
249 | return (NULL); | |
250 | } | |
251 | ||
252 | if (pri >= RM_MAXPRIO) { | |
253 | log(LOG_ERR, "%s: priority %d out of range! (max %d)\n", | |
254 | __func__, pri, RM_MAXPRIO - 1); | |
255 | return (NULL); | |
256 | } | |
257 | ||
258 | #if !CLASSQ_RED | |
259 | if (flags & RMCF_RED) { | |
260 | log(LOG_ERR, "%s: RED not configured for CBQ!\n", __func__); | |
261 | return (NULL); | |
262 | } | |
263 | #endif /* !CLASSQ_RED */ | |
264 | ||
265 | #if !CLASSQ_RIO | |
266 | if (flags & RMCF_RIO) { | |
267 | log(LOG_ERR, "%s: RIO not configured for CBQ!\n", __func__); | |
268 | return (NULL); | |
269 | } | |
270 | #endif /* CLASSQ_RIO */ | |
271 | ||
272 | #if !CLASSQ_BLUE | |
273 | if (flags & RMCF_BLUE) { | |
274 | log(LOG_ERR, "%s: BLUE not configured for CBQ!\n", __func__); | |
275 | return (NULL); | |
276 | } | |
277 | #endif /* CLASSQ_BLUE */ | |
278 | ||
279 | /* These are mutually exclusive */ | |
280 | if ((flags & (RMCF_RED|RMCF_RIO|RMCF_BLUE|RMCF_SFB)) && | |
281 | (flags & (RMCF_RED|RMCF_RIO|RMCF_BLUE|RMCF_SFB)) != RMCF_RED && | |
282 | (flags & (RMCF_RED|RMCF_RIO|RMCF_BLUE|RMCF_SFB)) != RMCF_RIO && | |
283 | (flags & (RMCF_RED|RMCF_RIO|RMCF_BLUE|RMCF_SFB)) != RMCF_BLUE && | |
284 | (flags & (RMCF_RED|RMCF_RIO|RMCF_BLUE|RMCF_SFB)) != RMCF_SFB) { | |
285 | log(LOG_ERR, "%s: RED|RIO|BLUE|SFB mutually exclusive\n", | |
286 | __func__); | |
287 | return (NULL); | |
288 | } | |
289 | ||
290 | cl = zalloc(rmc_zone); | |
291 | if (cl == NULL) | |
292 | return (NULL); | |
293 | ||
294 | bzero(cl, rmc_size); | |
295 | CALLOUT_INIT(&cl->callout_); | |
296 | ||
297 | /* | |
298 | * Class initialization. | |
299 | */ | |
300 | cl->children_ = NULL; | |
301 | cl->parent_ = parent; | |
302 | cl->borrow_ = borrow; | |
303 | cl->leaf_ = 1; | |
304 | cl->ifdat_ = ifd; | |
305 | cl->pri_ = pri; | |
306 | cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ | |
307 | cl->depth_ = 0; | |
308 | cl->qthresh_ = 0; | |
309 | cl->ns_per_byte_ = nsecPerByte; | |
310 | ||
311 | ifq = ifd->ifq_; | |
312 | ifp = ifq->ifcq_ifp; | |
313 | ||
314 | if (maxq == 0 || maxq > IFCQ_MAXLEN(ifq)) { | |
315 | maxq = IFCQ_MAXLEN(ifq); | |
316 | if (maxq == 0) | |
317 | maxq = DEFAULT_QLIMIT; /* use default */ | |
318 | } | |
319 | _qinit(&cl->q_, Q_DROPHEAD, maxq); | |
320 | ||
321 | cl->flags_ = flags; | |
322 | ||
323 | cl->minidle_ = (minidle * (int)nsecPerByte) / 8; | |
324 | if (cl->minidle_ > 0) | |
325 | cl->minidle_ = 0; | |
326 | ||
327 | cl->maxidle_ = (maxidle * nsecPerByte) / 8; | |
328 | if (cl->maxidle_ == 0) | |
329 | cl->maxidle_ = 1; | |
330 | ||
331 | cl->avgidle_ = cl->maxidle_; | |
332 | cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; | |
333 | if (cl->offtime_ == 0) | |
334 | cl->offtime_ = 1; | |
335 | ||
336 | cl->overlimit = action; | |
337 | ||
338 | if (flags & (RMCF_RED|RMCF_RIO|RMCF_BLUE|RMCF_SFB)) { | |
339 | int pkttime; | |
340 | ||
341 | cl->qflags_ = 0; | |
342 | if (flags & RMCF_ECN) { | |
343 | if (flags & RMCF_BLUE) | |
344 | cl->qflags_ |= BLUEF_ECN; | |
345 | else if (flags & RMCF_SFB) | |
346 | cl->qflags_ |= SFBF_ECN; | |
347 | else if (flags & RMCF_RED) | |
348 | cl->qflags_ |= REDF_ECN; | |
349 | else if (flags & RMCF_RIO) | |
350 | cl->qflags_ |= RIOF_ECN; | |
351 | } | |
352 | if (flags & RMCF_FLOWCTL) { | |
353 | if (flags & RMCF_SFB) | |
354 | cl->qflags_ |= SFBF_FLOWCTL; | |
355 | } | |
356 | if (flags & RMCF_FLOWVALVE) { | |
357 | if (flags & RMCF_RED) | |
358 | cl->qflags_ |= REDF_FLOWVALVE; | |
359 | } | |
360 | if (flags & RMCF_CLEARDSCP) { | |
361 | if (flags & RMCF_RIO) | |
362 | cl->qflags_ |= RIOF_CLEARDSCP; | |
363 | } | |
364 | pkttime = nsecPerByte * pktsize / 1000; | |
365 | ||
366 | /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */ | |
367 | #if CLASSQ_RED | |
368 | if (flags & RMCF_RED) { | |
369 | cl->red_ = red_alloc(ifp, 0, 0, | |
370 | qlimit(&cl->q_) * 10/100, | |
371 | qlimit(&cl->q_) * 30/100, | |
372 | cl->qflags_, pkttime); | |
373 | if (cl->red_ != NULL) | |
374 | qtype(&cl->q_) = Q_RED; | |
375 | } | |
376 | #endif /* CLASSQ_RED */ | |
377 | #if CLASSQ_RIO | |
378 | if (flags & RMCF_RIO) { | |
379 | cl->rio_ = | |
380 | rio_alloc(ifp, 0, NULL, cl->qflags_, pkttime); | |
381 | if (cl->rio_ != NULL) | |
382 | qtype(&cl->q_) = Q_RIO; | |
383 | } | |
384 | #endif /* CLASSQ_RIO */ | |
385 | #if CLASSQ_BLUE | |
386 | if (flags & RMCF_BLUE) { | |
387 | cl->blue_ = blue_alloc(ifp, 0, 0, cl->qflags_); | |
388 | if (cl->blue_ != NULL) | |
389 | qtype(&cl->q_) = Q_BLUE; | |
390 | } | |
391 | #endif /* CLASSQ_BLUE */ | |
392 | if (flags & RMCF_SFB) { | |
393 | if (!(cl->flags_ & RMCF_LAZY)) | |
394 | cl->sfb_ = sfb_alloc(ifp, qid, | |
395 | qlimit(&cl->q_), cl->qflags_); | |
396 | if (cl->sfb_ != NULL || (cl->flags_ & RMCF_LAZY)) | |
397 | qtype(&cl->q_) = Q_SFB; | |
398 | } | |
399 | } | |
400 | ||
401 | /* | |
402 | * put the class into the class tree | |
403 | */ | |
404 | if ((peer = ifd->active_[pri]) != NULL) { | |
405 | /* find the last class at this pri */ | |
406 | cl->peer_ = peer; | |
407 | while (peer->peer_ != ifd->active_[pri]) | |
408 | peer = peer->peer_; | |
409 | peer->peer_ = cl; | |
410 | } else { | |
411 | ifd->active_[pri] = cl; | |
412 | cl->peer_ = cl; | |
413 | } | |
414 | ||
415 | if (cl->parent_) { | |
416 | cl->next_ = parent->children_; | |
417 | parent->children_ = cl; | |
418 | parent->leaf_ = 0; | |
419 | } | |
420 | ||
421 | /* | |
422 | * Compute the depth of this class and its ancestors in the class | |
423 | * hierarchy. | |
424 | */ | |
425 | rmc_depth_compute(cl); | |
426 | ||
427 | /* | |
428 | * If CBQ's WRR is enabled, then initialize the class WRR state. | |
429 | */ | |
430 | if (ifd->wrr_) { | |
431 | ifd->num_[pri]++; | |
432 | ifd->alloc_[pri] += cl->allotment_; | |
433 | rmc_wrr_set_weights(ifd); | |
434 | } | |
435 | return (cl); | |
436 | } | |
437 | ||
438 | int | |
439 | rmc_modclass(struct rm_class *cl, u_int32_t nsecPerByte, int maxq, | |
440 | u_int32_t maxidle, int minidle, u_int32_t offtime, int pktsize) | |
441 | { | |
442 | #pragma unused(pktsize) | |
443 | struct rm_ifdat *ifd; | |
444 | u_int32_t old_allotment; | |
445 | ||
446 | ifd = cl->ifdat_; | |
447 | old_allotment = cl->allotment_; | |
448 | ||
449 | cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ | |
450 | cl->qthresh_ = 0; | |
451 | cl->ns_per_byte_ = nsecPerByte; | |
452 | ||
453 | qlimit(&cl->q_) = maxq; | |
454 | ||
455 | cl->minidle_ = (minidle * nsecPerByte) / 8; | |
456 | if (cl->minidle_ > 0) | |
457 | cl->minidle_ = 0; | |
458 | ||
459 | cl->maxidle_ = (maxidle * nsecPerByte) / 8; | |
460 | if (cl->maxidle_ == 0) | |
461 | cl->maxidle_ = 1; | |
462 | ||
463 | cl->avgidle_ = cl->maxidle_; | |
464 | cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; | |
465 | if (cl->offtime_ == 0) | |
466 | cl->offtime_ = 1; | |
467 | ||
468 | /* | |
469 | * If CBQ's WRR is enabled, then initialize the class WRR state. | |
470 | */ | |
471 | if (ifd->wrr_) { | |
472 | ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment; | |
473 | rmc_wrr_set_weights(ifd); | |
474 | } | |
475 | return (0); | |
476 | } | |
477 | ||
478 | /* | |
479 | * static void | |
480 | * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes | |
481 | * the appropriate run robin weights for the CBQ weighted round robin | |
482 | * algorithm. | |
483 | * | |
484 | * Returns: NONE | |
485 | */ | |
486 | ||
487 | static void | |
488 | rmc_wrr_set_weights(struct rm_ifdat *ifd) | |
489 | { | |
490 | int i; | |
491 | struct rm_class *cl, *clh; | |
492 | ||
493 | for (i = 0; i < RM_MAXPRIO; i++) { | |
494 | /* | |
495 | * This is inverted from that of the simulator to | |
496 | * maintain precision. | |
497 | */ | |
498 | if (ifd->num_[i] == 0) { | |
499 | ifd->M_[i] = 0; | |
500 | } else { | |
501 | ifd->M_[i] = | |
502 | ifd->alloc_[i] / (ifd->num_[i] * ifd->maxpkt_); | |
503 | } | |
504 | /* | |
505 | * Compute the weighted allotment for each class. | |
506 | * This takes the expensive div instruction out | |
507 | * of the main loop for the wrr scheduling path. | |
508 | * These only get recomputed when a class comes or | |
509 | * goes. | |
510 | */ | |
511 | if (ifd->active_[i] != NULL) { | |
512 | clh = cl = ifd->active_[i]; | |
513 | do { | |
514 | /* safe-guard for slow link or alloc_ == 0 */ | |
515 | if (ifd->M_[i] == 0) { | |
516 | cl->w_allotment_ = 0; | |
517 | } else { | |
518 | cl->w_allotment_ = | |
519 | cl->allotment_ / ifd->M_[i]; | |
520 | } | |
521 | cl = cl->peer_; | |
522 | } while ((cl != NULL) && (cl != clh)); | |
523 | } | |
524 | } | |
525 | } | |
526 | ||
527 | int | |
528 | rmc_get_weight(struct rm_ifdat *ifd, int pri) | |
529 | { | |
530 | if ((pri >= 0) && (pri < RM_MAXPRIO)) | |
531 | return (ifd->M_[pri]); | |
532 | else | |
533 | return (0); | |
534 | } | |
535 | ||
536 | /* | |
537 | * static void | |
538 | * rmc_depth_compute(struct rm_class *cl) - This function computes the | |
539 | * appropriate depth of class 'cl' and its ancestors. | |
540 | * | |
541 | * Returns: NONE | |
542 | */ | |
543 | ||
544 | static void | |
545 | rmc_depth_compute(struct rm_class *cl) | |
546 | { | |
547 | rm_class_t *t = cl, *p; | |
548 | ||
549 | /* | |
550 | * Recompute the depth for the branch of the tree. | |
551 | */ | |
552 | while (t != NULL) { | |
553 | p = t->parent_; | |
554 | if (p && (t->depth_ >= p->depth_)) { | |
555 | p->depth_ = t->depth_ + 1; | |
556 | t = p; | |
557 | } else | |
558 | t = NULL; | |
559 | } | |
560 | } | |
561 | ||
562 | /* | |
563 | * static void | |
564 | * rmc_depth_recompute(struct rm_class *cl) - This function re-computes | |
565 | * the depth of the tree after a class has been deleted. | |
566 | * | |
567 | * Returns: NONE | |
568 | */ | |
569 | ||
570 | static void | |
571 | rmc_depth_recompute(rm_class_t *cl) | |
572 | { | |
573 | rm_class_t *p, *t; | |
574 | ||
575 | p = cl; | |
576 | while (p != NULL) { | |
577 | if ((t = p->children_) == NULL) { | |
578 | p->depth_ = 0; | |
579 | } else { | |
580 | int cdepth = 0; | |
581 | ||
582 | while (t != NULL) { | |
583 | if (t->depth_ > cdepth) | |
584 | cdepth = t->depth_; | |
585 | t = t->next_; | |
586 | } | |
587 | ||
588 | if (p->depth_ == cdepth + 1) | |
589 | /* no change to this parent */ | |
590 | return; | |
591 | ||
592 | p->depth_ = cdepth + 1; | |
593 | } | |
594 | ||
595 | p = p->parent_; | |
596 | } | |
597 | } | |
598 | ||
599 | /* | |
600 | * void | |
601 | * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This | |
602 | * function deletes a class from the link-sharing structure and frees | |
603 | * all resources associated with the class. | |
604 | * | |
605 | * Returns: NONE | |
606 | */ | |
607 | ||
608 | void | |
609 | rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl) | |
610 | { | |
611 | struct rm_class *p, *head, *previous; | |
612 | ||
613 | VERIFY(cl->children_ == NULL); | |
614 | ||
615 | if (cl->sleeping_) | |
616 | CALLOUT_STOP(&cl->callout_); | |
617 | ||
618 | /* | |
619 | * Free packets in the packet queue. | |
620 | * XXX - this may not be a desired behavior. Packets should be | |
621 | * re-queued. | |
622 | */ | |
623 | rmc_dropall(cl); | |
624 | ||
625 | /* | |
626 | * If the class has a parent, then remove the class from the | |
627 | * class from the parent's children chain. | |
628 | */ | |
629 | if (cl->parent_ != NULL) { | |
630 | head = cl->parent_->children_; | |
631 | p = previous = head; | |
632 | if (head->next_ == NULL) { | |
633 | VERIFY(head == cl); | |
634 | cl->parent_->children_ = NULL; | |
635 | cl->parent_->leaf_ = 1; | |
636 | } else while (p != NULL) { | |
637 | if (p == cl) { | |
638 | if (cl == head) | |
639 | cl->parent_->children_ = cl->next_; | |
640 | else | |
641 | previous->next_ = cl->next_; | |
642 | cl->next_ = NULL; | |
643 | p = NULL; | |
644 | } else { | |
645 | previous = p; | |
646 | p = p->next_; | |
647 | } | |
648 | } | |
649 | } | |
650 | ||
651 | /* | |
652 | * Delete class from class priority peer list. | |
653 | */ | |
654 | if ((p = ifd->active_[cl->pri_]) != NULL) { | |
655 | /* | |
656 | * If there is more than one member of this priority | |
657 | * level, then look for class(cl) in the priority level. | |
658 | */ | |
659 | if (p != p->peer_) { | |
660 | while (p->peer_ != cl) | |
661 | p = p->peer_; | |
662 | p->peer_ = cl->peer_; | |
663 | ||
664 | if (ifd->active_[cl->pri_] == cl) | |
665 | ifd->active_[cl->pri_] = cl->peer_; | |
666 | } else { | |
667 | VERIFY(p == cl); | |
668 | ifd->active_[cl->pri_] = NULL; | |
669 | } | |
670 | } | |
671 | ||
672 | /* | |
673 | * Recompute the WRR weights. | |
674 | */ | |
675 | if (ifd->wrr_) { | |
676 | ifd->alloc_[cl->pri_] -= cl->allotment_; | |
677 | ifd->num_[cl->pri_]--; | |
678 | rmc_wrr_set_weights(ifd); | |
679 | } | |
680 | ||
681 | /* | |
682 | * Re-compute the depth of the tree. | |
683 | */ | |
684 | rmc_depth_recompute(cl->parent_); | |
685 | ||
686 | /* | |
687 | * Free the class structure. | |
688 | */ | |
689 | if (cl->qalg_.ptr != NULL) { | |
690 | #if CLASSQ_RIO | |
691 | if (q_is_rio(&cl->q_)) | |
692 | rio_destroy(cl->rio_); | |
693 | #endif /* CLASSQ_RIO */ | |
694 | #if CLASSQ_RED | |
695 | if (q_is_red(&cl->q_)) | |
696 | red_destroy(cl->red_); | |
697 | #endif /* CLASSQ_RED */ | |
698 | #if CLASSQ_BLUE | |
699 | if (q_is_blue(&cl->q_)) | |
700 | blue_destroy(cl->blue_); | |
701 | #endif /* CLASSQ_BLUE */ | |
702 | if (q_is_sfb(&cl->q_) && cl->sfb_ != NULL) | |
703 | sfb_destroy(cl->sfb_); | |
704 | cl->qalg_.ptr = NULL; | |
705 | qtype(&cl->q_) = Q_DROPTAIL; | |
706 | qstate(&cl->q_) = QS_RUNNING; | |
707 | } | |
708 | zfree(rmc_zone, cl); | |
709 | } | |
710 | ||
711 | ||
712 | /* | |
713 | * int | |
714 | * rmc_init(...) - Initialize the resource management data structures | |
715 | * associated with the output portion of interface 'ifp'. 'ifd' is | |
716 | * where the structures will be built (for backwards compatibility, the | |
717 | * structures aren't kept in the ifnet struct). 'nsecPerByte' | |
718 | * gives the link speed (inverse of bandwidth) in nanoseconds/byte. | |
719 | * 'restart' is the driver-specific routine that the generic 'delay | |
720 | * until under limit' action will call to restart output. `maxq' | |
721 | * is the queue size of the 'link' & 'default' classes. 'maxqueued' | |
722 | * is the maximum number of packets that the resource management | |
723 | * code will allow to be queued 'downstream' (this is typically 1). | |
724 | * | |
725 | * Returns: 0 on success | |
726 | */ | |
727 | ||
728 | int | |
729 | rmc_init(struct ifclassq *ifq, struct rm_ifdat *ifd, u_int32_t nsecPerByte, | |
730 | void (*restart)(struct ifclassq *), u_int32_t qid, int maxq, int maxqueued, | |
731 | u_int32_t maxidle, int minidle, u_int32_t offtime, int flags) | |
732 | { | |
733 | struct ifnet *ifp = ifq->ifcq_ifp; | |
734 | int i, mtu; | |
735 | ||
736 | /* | |
737 | * Initialize the CBQ tracing/debug facility. | |
738 | */ | |
739 | CBQTRACEINIT(); | |
740 | ||
741 | if (nsecPerByte == 0) { | |
742 | log(LOG_ERR, "%s: %s: invalid inverse data rate)\n", | |
743 | __func__, if_name(ifp)); | |
744 | return (EINVAL); | |
745 | } | |
746 | ||
747 | mtu = ifp->if_mtu; | |
748 | if (mtu < 1) { | |
749 | log(LOG_ERR, "%s: %s: invalid MTU (interface not " | |
750 | "initialized?)\n", __func__, if_name(ifp)); | |
751 | return (EINVAL); | |
752 | } | |
753 | bzero((char *)ifd, sizeof (*ifd)); | |
754 | ||
755 | ifd->ifq_ = ifq; | |
756 | ifd->restart = restart; | |
757 | ifd->maxqueued_ = maxqueued; | |
758 | ifd->ns_per_byte_ = nsecPerByte; | |
759 | ifd->maxpkt_ = mtu; | |
760 | ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0; | |
761 | ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0; | |
762 | #if 1 | |
763 | ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16; | |
764 | if (mtu * nsecPerByte > 10 * 1000000) | |
765 | ifd->maxiftime_ /= 4; | |
766 | #endif | |
767 | ||
768 | reset_cutoff(ifd); | |
769 | CBQTRACE(rmc_init, 'INIT', ifd->cutoff_); | |
770 | ||
771 | /* | |
772 | * Initialize the CBQ's WRR state. | |
773 | */ | |
774 | for (i = 0; i < RM_MAXPRIO; i++) { | |
775 | ifd->alloc_[i] = 0; | |
776 | ifd->M_[i] = 0; | |
777 | ifd->num_[i] = 0; | |
778 | ifd->na_[i] = 0; | |
779 | ifd->active_[i] = NULL; | |
780 | } | |
781 | ||
782 | /* | |
783 | * Initialize current packet state. | |
784 | */ | |
785 | ifd->qi_ = 0; | |
786 | ifd->qo_ = 0; | |
787 | for (i = 0; i < RM_MAXQUEUED; i++) { | |
788 | ifd->class_[i] = NULL; | |
789 | ifd->curlen_[i] = 0; | |
790 | ifd->borrowed_[i] = NULL; | |
791 | } | |
792 | ||
793 | /* | |
794 | * Create the root class of the link-sharing structure. | |
795 | */ | |
796 | if ((ifd->root_ = rmc_newclass(0, ifd, nsecPerByte, | |
797 | rmc_root_overlimit, qid, maxq, 0, 0, maxidle, minidle, offtime, | |
798 | 0, 0)) == NULL) { | |
799 | log(LOG_ERR, "rmc_init: root class not allocated\n"); | |
800 | return (ENOMEM); | |
801 | } | |
802 | ifd->root_->depth_ = 0; | |
803 | ||
804 | return (0); | |
805 | } | |
806 | ||
807 | /* | |
808 | * void | |
809 | * rmc_queue_packet(struct rm_class *cl, struct mbuf *m) - Add packet given by | |
810 | * mbuf 'm' to queue for resource class 'cl'. This routine is called | |
811 | * by a driver's if_output routine. This routine must be called with | |
812 | * output packet completion interrupts locked out (to avoid racing with | |
813 | * rmc_dequeue_next). | |
814 | * | |
815 | * Returns: 0 on successful queueing | |
816 | * CLASSQEQ_DROPPED when packet drop occurs | |
817 | */ | |
818 | int | |
819 | rmc_queue_packet(struct rm_class *cl, struct mbuf *m, struct pf_mtag *t) | |
820 | { | |
821 | struct timeval now; | |
822 | struct rm_ifdat *ifd = cl->ifdat_; | |
823 | int cpri = cl->pri_; | |
824 | int is_empty = qempty(&cl->q_); | |
825 | int ret = 0; | |
826 | ||
827 | RM_GETTIME(now); | |
828 | if (ifd->cutoff_ > 0) { | |
829 | if (TV_LT(&cl->undertime_, &now)) { | |
830 | if (ifd->cutoff_ > cl->depth_) | |
831 | ifd->cutoff_ = cl->depth_; | |
832 | CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_); | |
833 | } else { | |
834 | /* | |
835 | * the class is overlimit. if the class has | |
836 | * underlimit ancestors, set cutoff to the lowest | |
837 | * depth among them. | |
838 | */ | |
839 | struct rm_class *borrow = cl->borrow_; | |
840 | ||
841 | while (borrow != NULL && | |
842 | borrow->depth_ < ifd->cutoff_) { | |
843 | if (TV_LT(&borrow->undertime_, &now)) { | |
844 | ifd->cutoff_ = borrow->depth_; | |
845 | CBQTRACE(rmc_queue_packet, 'ffob', | |
846 | ifd->cutoff_); | |
847 | break; | |
848 | } | |
849 | borrow = borrow->borrow_; | |
850 | } | |
851 | } | |
852 | } | |
853 | ||
854 | ret = _rmc_addq(cl, m, t); | |
855 | if (ret != 0 && | |
856 | (ret == CLASSQEQ_DROPPED || ret == CLASSQEQ_DROPPED_FC || | |
857 | ret == CLASSQEQ_DROPPED_SP)) { | |
858 | /* failed */ | |
859 | return (ret); | |
860 | } | |
861 | VERIFY(ret == 0 || ret == CLASSQEQ_SUCCESS_FC); | |
862 | if (is_empty) { | |
863 | CBQTRACE(rmc_queue_packet, 'type', cl->stats_.handle); | |
864 | ifd->na_[cpri]++; | |
865 | } | |
866 | ||
867 | if (qlen(&cl->q_) > qlimit(&cl->q_)) { | |
868 | /* note: qlimit can be set to 0 or 1 */ | |
869 | rmc_drop_action(cl); | |
870 | return (CLASSQEQ_DROPPED); | |
871 | } | |
872 | return (ret); | |
873 | } | |
874 | ||
875 | /* | |
876 | * void | |
877 | * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all | |
878 | * classes to see if there are satified. | |
879 | */ | |
880 | ||
881 | static void | |
882 | rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) | |
883 | { | |
884 | int i; | |
885 | rm_class_t *p, *bp; | |
886 | ||
887 | for (i = RM_MAXPRIO - 1; i >= 0; i--) { | |
888 | if ((bp = ifd->active_[i]) != NULL) { | |
889 | p = bp; | |
890 | do { | |
891 | if (!rmc_satisfied(p, now)) { | |
892 | ifd->cutoff_ = p->depth_; | |
893 | return; | |
894 | } | |
895 | p = p->peer_; | |
896 | } while (p != bp); | |
897 | } | |
898 | } | |
899 | ||
900 | reset_cutoff(ifd); | |
901 | } | |
902 | ||
903 | /* | |
904 | * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise. | |
905 | */ | |
906 | ||
907 | static int | |
908 | rmc_satisfied(struct rm_class *cl, struct timeval *now) | |
909 | { | |
910 | rm_class_t *p; | |
911 | ||
912 | if (cl == NULL) | |
913 | return (1); | |
914 | if (TV_LT(now, &cl->undertime_)) | |
915 | return (1); | |
916 | if (cl->depth_ == 0) { | |
917 | if (!cl->sleeping_ && (qlen(&cl->q_) > cl->qthresh_)) | |
918 | return (0); | |
919 | else | |
920 | return (1); | |
921 | } | |
922 | if (cl->children_ != NULL) { | |
923 | p = cl->children_; | |
924 | while (p != NULL) { | |
925 | if (!rmc_satisfied(p, now)) | |
926 | return (0); | |
927 | p = p->next_; | |
928 | } | |
929 | } | |
930 | ||
931 | return (1); | |
932 | } | |
933 | ||
934 | /* | |
935 | * Return 1 if class 'cl' is under limit or can borrow from a parent, | |
936 | * 0 if overlimit. As a side-effect, this routine will invoke the | |
937 | * class overlimit action if the class if overlimit. | |
938 | */ | |
939 | ||
940 | static int | |
941 | rmc_under_limit(struct rm_class *cl, struct timeval *now) | |
942 | { | |
943 | rm_class_t *p = cl; | |
944 | rm_class_t *top; | |
945 | struct rm_ifdat *ifd = cl->ifdat_; | |
946 | ||
947 | ifd->borrowed_[ifd->qi_] = NULL; | |
948 | /* | |
949 | * If cl is the root class, then always return that it is | |
950 | * underlimit. Otherwise, check to see if the class is underlimit. | |
951 | */ | |
952 | if (cl->parent_ == NULL) | |
953 | return (1); | |
954 | ||
955 | if (cl->sleeping_) { | |
956 | if (TV_LT(now, &cl->undertime_)) | |
957 | return (0); | |
958 | ||
959 | CALLOUT_STOP(&cl->callout_); | |
960 | cl->sleeping_ = 0; | |
961 | cl->undertime_.tv_sec = 0; | |
962 | return (1); | |
963 | } | |
964 | ||
965 | top = NULL; | |
966 | while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) { | |
967 | if (((cl = cl->borrow_) == NULL) || | |
968 | (cl->depth_ > ifd->cutoff_)) { | |
969 | #ifdef ADJUST_CUTOFF | |
970 | if (cl != NULL) | |
971 | /* | |
972 | * cutoff is taking effect, just | |
973 | * return false without calling | |
974 | * the delay action. | |
975 | */ | |
976 | return (0); | |
977 | #endif | |
978 | #ifdef BORROW_OFFTIME | |
979 | /* | |
980 | * check if the class can borrow offtime too. | |
981 | * borrow offtime from the top of the borrow | |
982 | * chain if the top class is not overloaded. | |
983 | */ | |
984 | if (cl != NULL) { | |
985 | /* | |
986 | * cutoff is taking effect, use this | |
987 | * class as top. | |
988 | */ | |
989 | top = cl; | |
990 | CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_); | |
991 | } | |
992 | if (top != NULL && top->avgidle_ == top->minidle_) | |
993 | top = NULL; | |
994 | p->overtime_ = *now; | |
995 | (p->overlimit)(p, top); | |
996 | #else | |
997 | p->overtime_ = *now; | |
998 | (p->overlimit)(p, NULL); | |
999 | #endif | |
1000 | return (0); | |
1001 | } | |
1002 | top = cl; | |
1003 | } | |
1004 | ||
1005 | if (cl != p) | |
1006 | ifd->borrowed_[ifd->qi_] = cl; | |
1007 | return (1); | |
1008 | } | |
1009 | ||
1010 | /* | |
1011 | * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to | |
1012 | * Packet-by-packet round robin. | |
1013 | * | |
1014 | * The heart of the weighted round-robin scheduler, which decides which | |
1015 | * class next gets to send a packet. Highest priority first, then | |
1016 | * weighted round-robin within priorites. | |
1017 | * | |
1018 | * Each able-to-send class gets to send until its byte allocation is | |
1019 | * exhausted. Thus, the active pointer is only changed after a class has | |
1020 | * exhausted its allocation. | |
1021 | * | |
1022 | * If the scheduler finds no class that is underlimit or able to borrow, | |
1023 | * then the first class found that had a nonzero queue and is allowed to | |
1024 | * borrow gets to send. | |
1025 | */ | |
1026 | ||
1027 | static struct mbuf * | |
1028 | _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, cqdq_op_t op) | |
1029 | { | |
1030 | struct rm_class *cl = NULL, *first = NULL; | |
1031 | u_int32_t deficit; | |
1032 | int cpri; | |
1033 | struct mbuf *m; | |
1034 | struct timeval now; | |
1035 | ||
1036 | RM_GETTIME(now); | |
1037 | ||
1038 | /* | |
1039 | * if the driver polls the top of the queue and then removes | |
1040 | * the polled packet, we must return the same packet. | |
1041 | */ | |
1042 | if (op == CLASSQDQ_REMOVE && ifd->pollcache_) { | |
1043 | cl = ifd->pollcache_; | |
1044 | cpri = cl->pri_; | |
1045 | if (ifd->efficient_) { | |
1046 | /* check if this class is overlimit */ | |
1047 | if (cl->undertime_.tv_sec != 0 && | |
1048 | rmc_under_limit(cl, &now) == 0) | |
1049 | first = cl; | |
1050 | } | |
1051 | ifd->pollcache_ = NULL; | |
1052 | goto _wrr_out; | |
1053 | } else { | |
1054 | /* mode == CLASSQDQ_POLL || pollcache == NULL */ | |
1055 | ifd->pollcache_ = NULL; | |
1056 | ifd->borrowed_[ifd->qi_] = NULL; | |
1057 | } | |
1058 | #ifdef ADJUST_CUTOFF | |
1059 | _again: | |
1060 | #endif | |
1061 | for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { | |
1062 | if (ifd->na_[cpri] == 0) | |
1063 | continue; | |
1064 | deficit = 0; | |
1065 | /* | |
1066 | * Loop through twice for a priority level, if some class | |
1067 | * was unable to send a packet the first round because | |
1068 | * of the weighted round-robin mechanism. | |
1069 | * During the second loop at this level, deficit==2. | |
1070 | * (This second loop is not needed if for every class, | |
1071 | * "M[cl->pri_])" times "cl->allotment" is greater than | |
1072 | * the byte size for the largest packet in the class.) | |
1073 | */ | |
1074 | _wrr_loop: | |
1075 | cl = ifd->active_[cpri]; | |
1076 | VERIFY(cl != NULL); | |
1077 | do { | |
1078 | if ((deficit < 2) && (cl->bytes_alloc_ <= 0)) | |
1079 | cl->bytes_alloc_ += cl->w_allotment_; | |
1080 | if (!qempty(&cl->q_)) { | |
1081 | if ((cl->undertime_.tv_sec == 0) || | |
1082 | rmc_under_limit(cl, &now)) { | |
1083 | if (cl->bytes_alloc_ > 0 || deficit > 1) | |
1084 | goto _wrr_out; | |
1085 | ||
1086 | /* underlimit but no alloc */ | |
1087 | deficit = 1; | |
1088 | #if 1 | |
1089 | ifd->borrowed_[ifd->qi_] = NULL; | |
1090 | #endif | |
1091 | } else if (first == NULL && cl->borrow_ != NULL) | |
1092 | first = cl; /* borrowing candidate */ | |
1093 | } | |
1094 | ||
1095 | cl->bytes_alloc_ = 0; | |
1096 | cl = cl->peer_; | |
1097 | } while (cl != ifd->active_[cpri]); | |
1098 | ||
1099 | if (deficit == 1) { | |
1100 | /* first loop found an underlimit class with deficit */ | |
1101 | /* Loop on same priority level, with new deficit. */ | |
1102 | deficit = 2; | |
1103 | goto _wrr_loop; | |
1104 | } | |
1105 | } | |
1106 | ||
1107 | #ifdef ADJUST_CUTOFF | |
1108 | /* | |
1109 | * no underlimit class found. if cutoff is taking effect, | |
1110 | * increase cutoff and try again. | |
1111 | */ | |
1112 | if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { | |
1113 | ifd->cutoff_++; | |
1114 | CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_); | |
1115 | goto _again; | |
1116 | } | |
1117 | #endif /* ADJUST_CUTOFF */ | |
1118 | /* | |
1119 | * If LINK_EFFICIENCY is turned on, then the first overlimit | |
1120 | * class we encounter will send a packet if all the classes | |
1121 | * of the link-sharing structure are overlimit. | |
1122 | */ | |
1123 | reset_cutoff(ifd); | |
1124 | CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_); | |
1125 | ||
1126 | if (!ifd->efficient_ || first == NULL) | |
1127 | return (NULL); | |
1128 | ||
1129 | cl = first; | |
1130 | cpri = cl->pri_; | |
1131 | #if 0 /* too time-consuming for nothing */ | |
1132 | if (cl->sleeping_) | |
1133 | CALLOUT_STOP(&cl->callout_); | |
1134 | cl->sleeping_ = 0; | |
1135 | cl->undertime_.tv_sec = 0; | |
1136 | #endif | |
1137 | ifd->borrowed_[ifd->qi_] = cl->borrow_; | |
1138 | ifd->cutoff_ = cl->borrow_->depth_; | |
1139 | ||
1140 | /* | |
1141 | * Deque the packet and do the book keeping... | |
1142 | */ | |
1143 | _wrr_out: | |
1144 | if (op == CLASSQDQ_REMOVE) { | |
1145 | m = _rmc_getq(cl); | |
1146 | if (m == NULL) | |
1147 | return (NULL); | |
1148 | ||
1149 | if (qempty(&cl->q_)) | |
1150 | ifd->na_[cpri]--; | |
1151 | ||
1152 | /* | |
1153 | * Update class statistics and link data. | |
1154 | */ | |
1155 | if (cl->bytes_alloc_ > 0) | |
1156 | cl->bytes_alloc_ -= m_pktlen(m); | |
1157 | ||
1158 | if ((cl->bytes_alloc_ <= 0) || first == cl) | |
1159 | ifd->active_[cl->pri_] = cl->peer_; | |
1160 | else | |
1161 | ifd->active_[cl->pri_] = cl; | |
1162 | ||
1163 | ifd->class_[ifd->qi_] = cl; | |
1164 | ifd->curlen_[ifd->qi_] = m_pktlen(m); | |
1165 | ifd->now_[ifd->qi_] = now; | |
1166 | ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; | |
1167 | ifd->queued_++; | |
1168 | } else { | |
1169 | /* mode == ALTDQ_PPOLL */ | |
1170 | m = _rmc_pollq(cl); | |
1171 | ifd->pollcache_ = cl; | |
1172 | } | |
1173 | return (m); | |
1174 | } | |
1175 | ||
1176 | /* | |
1177 | * Dequeue & return next packet from the highest priority class that | |
1178 | * has a packet to send & has enough allocation to send it. This | |
1179 | * routine is called by a driver whenever it needs a new packet to | |
1180 | * output. | |
1181 | */ | |
1182 | static struct mbuf * | |
1183 | _rmc_prr_dequeue_next(struct rm_ifdat *ifd, cqdq_op_t op) | |
1184 | { | |
1185 | struct mbuf *m; | |
1186 | int cpri; | |
1187 | struct rm_class *cl, *first = NULL; | |
1188 | struct timeval now; | |
1189 | ||
1190 | RM_GETTIME(now); | |
1191 | ||
1192 | /* | |
1193 | * if the driver polls the top of the queue and then removes | |
1194 | * the polled packet, we must return the same packet. | |
1195 | */ | |
1196 | if (op == CLASSQDQ_REMOVE && ifd->pollcache_) { | |
1197 | cl = ifd->pollcache_; | |
1198 | cpri = cl->pri_; | |
1199 | ifd->pollcache_ = NULL; | |
1200 | goto _prr_out; | |
1201 | } else { | |
1202 | /* mode == CLASSQDQ_POLL || pollcache == NULL */ | |
1203 | ifd->pollcache_ = NULL; | |
1204 | ifd->borrowed_[ifd->qi_] = NULL; | |
1205 | } | |
1206 | #ifdef ADJUST_CUTOFF | |
1207 | _again: | |
1208 | #endif | |
1209 | for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { | |
1210 | if (ifd->na_[cpri] == 0) | |
1211 | continue; | |
1212 | cl = ifd->active_[cpri]; | |
1213 | VERIFY(cl != NULL); | |
1214 | do { | |
1215 | if (!qempty(&cl->q_)) { | |
1216 | if ((cl->undertime_.tv_sec == 0) || | |
1217 | rmc_under_limit(cl, &now)) | |
1218 | goto _prr_out; | |
1219 | if (first == NULL && cl->borrow_ != NULL) | |
1220 | first = cl; | |
1221 | } | |
1222 | cl = cl->peer_; | |
1223 | } while (cl != ifd->active_[cpri]); | |
1224 | } | |
1225 | ||
1226 | #ifdef ADJUST_CUTOFF | |
1227 | /* | |
1228 | * no underlimit class found. if cutoff is taking effect, increase | |
1229 | * cutoff and try again. | |
1230 | */ | |
1231 | if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { | |
1232 | ifd->cutoff_++; | |
1233 | goto _again; | |
1234 | } | |
1235 | #endif /* ADJUST_CUTOFF */ | |
1236 | /* | |
1237 | * If LINK_EFFICIENCY is turned on, then the first overlimit | |
1238 | * class we encounter will send a packet if all the classes | |
1239 | * of the link-sharing structure are overlimit. | |
1240 | */ | |
1241 | reset_cutoff(ifd); | |
1242 | if (!ifd->efficient_ || first == NULL) | |
1243 | return (NULL); | |
1244 | ||
1245 | cl = first; | |
1246 | cpri = cl->pri_; | |
1247 | #if 0 /* too time-consuming for nothing */ | |
1248 | if (cl->sleeping_) | |
1249 | CALLOUT_STOP(&cl->callout_); | |
1250 | cl->sleeping_ = 0; | |
1251 | cl->undertime_.tv_sec = 0; | |
1252 | #endif | |
1253 | ifd->borrowed_[ifd->qi_] = cl->borrow_; | |
1254 | ifd->cutoff_ = cl->borrow_->depth_; | |
1255 | ||
1256 | /* | |
1257 | * Deque the packet and do the book keeping... | |
1258 | */ | |
1259 | _prr_out: | |
1260 | if (op == CLASSQDQ_REMOVE) { | |
1261 | m = _rmc_getq(cl); | |
1262 | if (m == NULL) | |
1263 | return (NULL); | |
1264 | ||
1265 | if (qempty(&cl->q_)) | |
1266 | ifd->na_[cpri]--; | |
1267 | ||
1268 | ifd->active_[cpri] = cl->peer_; | |
1269 | ||
1270 | ifd->class_[ifd->qi_] = cl; | |
1271 | ifd->curlen_[ifd->qi_] = m_pktlen(m); | |
1272 | ifd->now_[ifd->qi_] = now; | |
1273 | ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; | |
1274 | ifd->queued_++; | |
1275 | } else { | |
1276 | /* mode == CLASSQDQ_POLL */ | |
1277 | m = _rmc_pollq(cl); | |
1278 | ifd->pollcache_ = cl; | |
1279 | } | |
1280 | return (m); | |
1281 | } | |
1282 | ||
1283 | /* | |
1284 | * struct mbuf * | |
1285 | * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function | |
1286 | * is invoked by the packet driver to get the next packet to be | |
1287 | * dequeued and output on the link. If WRR is enabled, then the | |
1288 | * WRR dequeue next routine will determine the next packet to sent. | |
1289 | * Otherwise, packet-by-packet round robin is invoked. | |
1290 | * | |
1291 | * Returns: NULL, if a packet is not available or if all | |
1292 | * classes are overlimit. | |
1293 | * | |
1294 | * Otherwise, Pointer to the next packet. | |
1295 | */ | |
1296 | ||
1297 | struct mbuf * | |
1298 | rmc_dequeue_next(struct rm_ifdat *ifd, cqdq_op_t mode) | |
1299 | { | |
1300 | if (ifd->queued_ >= ifd->maxqueued_) | |
1301 | return (NULL); | |
1302 | else if (ifd->wrr_) | |
1303 | return (_rmc_wrr_dequeue_next(ifd, mode)); | |
1304 | else | |
1305 | return (_rmc_prr_dequeue_next(ifd, mode)); | |
1306 | } | |
1307 | ||
1308 | /* | |
1309 | * Update the utilization estimate for the packet that just completed. | |
1310 | * The packet's class & the parent(s) of that class all get their | |
1311 | * estimators updated. This routine is called by the driver's output- | |
1312 | * packet-completion interrupt service routine. | |
1313 | */ | |
1314 | ||
1315 | /* | |
1316 | * a macro to approximate "divide by 1000" that gives 0.000999, | |
1317 | * if a value has enough effective digits. | |
1318 | * (on pentium, mul takes 9 cycles but div takes 46!) | |
1319 | */ | |
1320 | #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17)) | |
1321 | void | |
1322 | rmc_update_class_util(struct rm_ifdat *ifd) | |
1323 | { | |
1324 | int idle, avgidle, pktlen; | |
1325 | int pkt_time, tidle; | |
1326 | rm_class_t *cl, *borrowed; | |
1327 | rm_class_t *borrows; | |
1328 | struct timeval *nowp; | |
1329 | ||
1330 | /* | |
1331 | * Get the most recent completed class. | |
1332 | */ | |
1333 | if ((cl = ifd->class_[ifd->qo_]) == NULL) | |
1334 | return; | |
1335 | ||
1336 | pktlen = ifd->curlen_[ifd->qo_]; | |
1337 | borrowed = ifd->borrowed_[ifd->qo_]; | |
1338 | borrows = borrowed; | |
1339 | ||
1340 | PKTCNTR_ADD(&cl->stats_.xmit_cnt, 1, pktlen); | |
1341 | ||
1342 | /* | |
1343 | * Run estimator on class and its ancestors. | |
1344 | */ | |
1345 | /* | |
1346 | * rm_update_class_util is designed to be called when the | |
1347 | * transfer is completed from a xmit complete interrupt, | |
1348 | * but most drivers don't implement an upcall for that. | |
1349 | * so, just use estimated completion time. | |
1350 | * as a result, ifd->qi_ and ifd->qo_ are always synced. | |
1351 | */ | |
1352 | nowp = &ifd->now_[ifd->qo_]; | |
1353 | /* get pkt_time (for link) in usec */ | |
1354 | #if 1 /* use approximation */ | |
1355 | pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_; | |
1356 | pkt_time = NSEC_TO_USEC(pkt_time); | |
1357 | #else | |
1358 | pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000; | |
1359 | #endif | |
1360 | #if 1 /* ALTQ4PPP */ | |
1361 | if (TV_LT(nowp, &ifd->ifnow_)) { | |
1362 | int iftime; | |
1363 | ||
1364 | /* | |
1365 | * make sure the estimated completion time does not go | |
1366 | * too far. it can happen when the link layer supports | |
1367 | * data compression or the interface speed is set to | |
1368 | * a much lower value. | |
1369 | */ | |
1370 | TV_DELTA(&ifd->ifnow_, nowp, iftime); | |
1371 | if (iftime+pkt_time < ifd->maxiftime_) { | |
1372 | TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); | |
1373 | } else { | |
1374 | TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_); | |
1375 | } | |
1376 | } else { | |
1377 | TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); | |
1378 | } | |
1379 | #else | |
1380 | if (TV_LT(nowp, &ifd->ifnow_)) { | |
1381 | TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); | |
1382 | } else { | |
1383 | TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); | |
1384 | } | |
1385 | #endif | |
1386 | ||
1387 | while (cl != NULL) { | |
1388 | TV_DELTA(&ifd->ifnow_, &cl->last_, idle); | |
1389 | if (idle >= 2000000) | |
1390 | /* | |
1391 | * this class is idle enough, reset avgidle. | |
1392 | * (TV_DELTA returns 2000000 us when delta is large.) | |
1393 | */ | |
1394 | cl->avgidle_ = cl->maxidle_; | |
1395 | ||
1396 | /* get pkt_time (for class) in usec */ | |
1397 | #if 1 /* use approximation */ | |
1398 | pkt_time = pktlen * cl->ns_per_byte_; | |
1399 | pkt_time = NSEC_TO_USEC(pkt_time); | |
1400 | #else | |
1401 | pkt_time = pktlen * cl->ns_per_byte_ / 1000; | |
1402 | #endif | |
1403 | idle -= pkt_time; | |
1404 | ||
1405 | avgidle = cl->avgidle_; | |
1406 | avgidle += idle - (avgidle >> RM_FILTER_GAIN); | |
1407 | cl->avgidle_ = avgidle; | |
1408 | ||
1409 | /* Are we overlimit ? */ | |
1410 | if (avgidle <= 0) { | |
1411 | CBQTRACE(rmc_update_class_util, 'milo', | |
1412 | cl->stats_.handle); | |
1413 | /* | |
1414 | * need some lower bound for avgidle, otherwise | |
1415 | * a borrowing class gets unbounded penalty. | |
1416 | */ | |
1417 | if (avgidle < cl->minidle_) | |
1418 | avgidle = cl->avgidle_ = cl->minidle_; | |
1419 | ||
1420 | /* set next idle to make avgidle 0 */ | |
1421 | tidle = pkt_time + | |
1422 | (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN); | |
1423 | TV_ADD_DELTA(nowp, tidle, &cl->undertime_); | |
1424 | ++cl->stats_.over; | |
1425 | } else { | |
1426 | cl->avgidle_ = | |
1427 | (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle; | |
1428 | cl->undertime_.tv_sec = 0; | |
1429 | if (cl->sleeping_) { | |
1430 | CALLOUT_STOP(&cl->callout_); | |
1431 | cl->sleeping_ = 0; | |
1432 | } | |
1433 | } | |
1434 | ||
1435 | if (borrows != NULL) { | |
1436 | if (borrows != cl) | |
1437 | ++cl->stats_.borrows; | |
1438 | else | |
1439 | borrows = NULL; | |
1440 | } | |
1441 | cl->last_ = ifd->ifnow_; | |
1442 | cl->last_pkttime_ = pkt_time; | |
1443 | ||
1444 | #if 1 | |
1445 | if (cl->parent_ == NULL) { | |
1446 | /* take stats of root class */ | |
1447 | PKTCNTR_ADD(&cl->stats_.xmit_cnt, 1, pktlen); | |
1448 | } | |
1449 | #endif | |
1450 | ||
1451 | cl = cl->parent_; | |
1452 | } | |
1453 | ||
1454 | /* | |
1455 | * Check to see if cutoff needs to set to a new level. | |
1456 | */ | |
1457 | cl = ifd->class_[ifd->qo_]; | |
1458 | if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) { | |
1459 | if ((qlen(&cl->q_) <= 0) || | |
1460 | TV_LT(nowp, &borrowed->undertime_)) { | |
1461 | rmc_tl_satisfied(ifd, nowp); | |
1462 | CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); | |
1463 | } else { | |
1464 | ifd->cutoff_ = borrowed->depth_; | |
1465 | CBQTRACE(rmc_update_class_util, 'ffob', | |
1466 | borrowed->depth_); | |
1467 | } | |
1468 | } | |
1469 | ||
1470 | /* | |
1471 | * Release class slot | |
1472 | */ | |
1473 | ifd->borrowed_[ifd->qo_] = NULL; | |
1474 | ifd->class_[ifd->qo_] = NULL; | |
1475 | ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_; | |
1476 | ifd->queued_--; | |
1477 | } | |
1478 | ||
1479 | /* | |
1480 | * void | |
1481 | * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific) | |
1482 | * over-limit action routines. These get invoked by rmc_under_limit() | |
1483 | * if a class with packets to send if over its bandwidth limit & can't | |
1484 | * borrow from a parent class. | |
1485 | * | |
1486 | * Returns: NONE | |
1487 | */ | |
1488 | ||
1489 | static void | |
1490 | rmc_drop_action(struct rm_class *cl) | |
1491 | { | |
1492 | struct rm_ifdat *ifd = cl->ifdat_; | |
1493 | ||
1494 | VERIFY(qlen(&cl->q_) > 0); | |
1495 | IFCQ_CONVERT_LOCK(ifd->ifq_); | |
1496 | _rmc_dropq(cl); | |
1497 | if (qempty(&cl->q_)) | |
1498 | ifd->na_[cl->pri_]--; | |
1499 | } | |
1500 | ||
1501 | void | |
1502 | rmc_drop(struct rm_class *cl, u_int32_t flow, u_int32_t *packets, | |
1503 | u_int32_t *bytes) | |
1504 | { | |
1505 | struct rm_ifdat *ifd = cl->ifdat_; | |
1506 | struct ifclassq *ifq = ifd->ifq_; | |
1507 | u_int32_t pkt = 0, len = 0, qlen; | |
1508 | ||
1509 | if ((qlen = qlen(&cl->q_)) != 0) { | |
1510 | IFCQ_CONVERT_LOCK(ifq); | |
1511 | #if CLASSQ_RIO | |
1512 | if (q_is_rio(&cl->q_)) | |
1513 | rio_purgeq(cl->rio_, &cl->q_, flow, &pkt, &len); | |
1514 | else | |
1515 | #endif /* CLASSQ_RIO */ | |
1516 | #if CLASSQ_RED | |
1517 | if (q_is_red(&cl->q_)) | |
1518 | red_purgeq(cl->red_, &cl->q_, flow, &pkt, &len); | |
1519 | else | |
1520 | #endif /* CLASSQ_RED */ | |
1521 | #if CLASSQ_BLUE | |
1522 | if (q_is_blue(&cl->q_)) | |
1523 | blue_purgeq(cl->blue_, &cl->q_, flow, &pkt, &len); | |
1524 | else | |
1525 | #endif /* CLASSQ_BLUE */ | |
1526 | if (q_is_sfb(&cl->q_) && cl->sfb_ != NULL) | |
1527 | sfb_purgeq(cl->sfb_, &cl->q_, flow, &pkt, &len); | |
1528 | else | |
1529 | _flushq_flow(&cl->q_, flow, &pkt, &len); | |
1530 | ||
1531 | if (pkt > 0) { | |
1532 | VERIFY(qlen(&cl->q_) == (qlen - pkt)); | |
1533 | ||
1534 | PKTCNTR_ADD(&cl->stats_.drop_cnt, pkt, len); | |
1535 | IFCQ_DROP_ADD(ifq, pkt, len); | |
1536 | ||
1537 | VERIFY(((signed)IFCQ_LEN(ifq) - pkt) >= 0); | |
1538 | IFCQ_LEN(ifq) -= pkt; | |
1539 | ||
1540 | if (qempty(&cl->q_)) | |
1541 | ifd->na_[cl->pri_]--; | |
1542 | } | |
1543 | } | |
1544 | if (packets != NULL) | |
1545 | *packets = pkt; | |
1546 | if (bytes != NULL) | |
1547 | *bytes = len; | |
1548 | } | |
1549 | ||
1550 | void | |
1551 | rmc_dropall(struct rm_class *cl) | |
1552 | { | |
1553 | rmc_drop(cl, 0, NULL, NULL); | |
1554 | } | |
1555 | ||
1556 | /* | |
1557 | * void | |
1558 | * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ | |
1559 | * delay action routine. It is invoked via rmc_under_limit when the | |
1560 | * packet is discoverd to be overlimit. | |
1561 | * | |
1562 | * If the delay action is result of borrow class being overlimit, then | |
1563 | * delay for the offtime of the borrowing class that is overlimit. | |
1564 | * | |
1565 | * Returns: NONE | |
1566 | */ | |
1567 | ||
1568 | void | |
1569 | rmc_delay_action(struct rm_class *cl, struct rm_class *borrow) | |
1570 | { | |
1571 | int ndelay, t, extradelay; | |
1572 | ||
1573 | cl->stats_.overactions++; | |
1574 | TV_DELTA(&cl->undertime_, &cl->overtime_, ndelay); | |
1575 | #ifndef BORROW_OFFTIME | |
1576 | ndelay += cl->offtime_; | |
1577 | #endif | |
1578 | ||
1579 | if (!cl->sleeping_) { | |
1580 | CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle); | |
1581 | #ifdef BORROW_OFFTIME | |
1582 | if (borrow != NULL) | |
1583 | extradelay = borrow->offtime_; | |
1584 | else | |
1585 | #endif | |
1586 | extradelay = cl->offtime_; | |
1587 | ||
1588 | /* | |
1589 | * XXX recalculate suspend time: | |
1590 | * current undertime is (tidle + pkt_time) calculated | |
1591 | * from the last transmission. | |
1592 | * tidle: time required to bring avgidle back to 0 | |
1593 | * pkt_time: target waiting time for this class | |
1594 | * we need to replace pkt_time by offtime | |
1595 | */ | |
1596 | extradelay -= cl->last_pkttime_; | |
1597 | if (extradelay > 0) { | |
1598 | TV_ADD_DELTA(&cl->undertime_, extradelay, | |
1599 | &cl->undertime_); | |
1600 | ndelay += extradelay; | |
1601 | } | |
1602 | ||
1603 | cl->sleeping_ = 1; | |
1604 | cl->stats_.delays++; | |
1605 | ||
1606 | /* | |
1607 | * Since packets are phased randomly with respect to the | |
1608 | * clock, 1 tick (the next clock tick) can be an arbitrarily | |
1609 | * short time so we have to wait for at least two ticks. | |
1610 | * NOTE: If there's no other traffic, we need the timer as | |
1611 | * a 'backstop' to restart this class. | |
1612 | */ | |
1613 | if (ndelay > tick * 2) { | |
1614 | /* | |
1615 | * FreeBSD rounds up the tick; | |
1616 | * other BSDs round down the tick. | |
1617 | */ | |
1618 | t = hzto(&cl->undertime_) + 1; | |
1619 | } else { | |
1620 | t = 2; | |
1621 | } | |
1622 | CALLOUT_RESET(&cl->callout_, t, | |
1623 | (timeout_t *)rmc_restart, (caddr_t)cl); | |
1624 | } | |
1625 | } | |
1626 | ||
1627 | /* | |
1628 | * void | |
1629 | * rmc_restart() - is just a helper routine for rmc_delay_action -- it is | |
1630 | * called by the system timer code & is responsible checking if the | |
1631 | * class is still sleeping (it might have been restarted as a side | |
1632 | * effect of the queue scan on a packet arrival) and, if so, restarting | |
1633 | * output for the class. Inspecting the class state & restarting output | |
1634 | * require locking the class structure. In general the driver is | |
1635 | * responsible for locking but this is the only routine that is not | |
1636 | * called directly or indirectly from the interface driver so it has | |
1637 | * know about system locking conventions. | |
1638 | * | |
1639 | * Returns: NONE | |
1640 | */ | |
1641 | ||
1642 | static void | |
1643 | rmc_restart(struct rm_class *cl) | |
1644 | { | |
1645 | struct rm_ifdat *ifd = cl->ifdat_; | |
1646 | ||
1647 | if (cl->sleeping_) { | |
1648 | cl->sleeping_ = 0; | |
1649 | cl->undertime_.tv_sec = 0; | |
1650 | ||
1651 | if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) { | |
1652 | CBQTRACE(rmc_restart, 'trts', cl->stats_.handle); | |
1653 | (ifd->restart)(ifd->ifq_); | |
1654 | } | |
1655 | } | |
1656 | } | |
1657 | ||
1658 | /* | |
1659 | * void | |
1660 | * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit | |
1661 | * handling routine for the root class of the link sharing structure. | |
1662 | * | |
1663 | * Returns: NONE | |
1664 | */ | |
1665 | static void | |
1666 | rmc_root_overlimit(struct rm_class *cl, | |
1667 | struct rm_class *borrow) | |
1668 | { | |
1669 | #pragma unused(cl, borrow) | |
1670 | panic("rmc_root_overlimit"); | |
1671 | } | |
1672 | ||
1673 | /* | |
1674 | * Packet Queue handling routines. Eventually, this is to localize the | |
1675 | * effects on the code whether queues are red queues or droptail | |
1676 | * queues. | |
1677 | */ | |
1678 | ||
1679 | static int | |
1680 | _rmc_addq(rm_class_t *cl, struct mbuf *m, struct pf_mtag *t) | |
1681 | { | |
1682 | #if CLASSQ_RIO | |
1683 | if (q_is_rio(&cl->q_)) | |
1684 | return (rio_addq(cl->rio_, &cl->q_, m, t)); | |
1685 | else | |
1686 | #endif /* CLASSQ_RIO */ | |
1687 | #if CLASSQ_RED | |
1688 | if (q_is_red(&cl->q_)) | |
1689 | return (red_addq(cl->red_, &cl->q_, m, t)); | |
1690 | else | |
1691 | #endif /* CLASSQ_RED */ | |
1692 | #if CLASSQ_BLUE | |
1693 | if (q_is_blue(&cl->q_)) | |
1694 | return (blue_addq(cl->blue_, &cl->q_, m, t)); | |
1695 | else | |
1696 | #endif /* CLASSQ_BLUE */ | |
1697 | if (q_is_sfb(&cl->q_)) { | |
1698 | if (cl->sfb_ == NULL) { | |
1699 | struct ifclassq *ifq = cl->ifdat_->ifq_; | |
1700 | struct ifnet *ifp = ifq->ifcq_ifp; | |
1701 | ||
1702 | VERIFY(cl->flags_ & RMCF_LAZY); | |
1703 | IFCQ_CONVERT_LOCK(ifq); | |
1704 | ||
1705 | cl->sfb_ = sfb_alloc(ifp, cl->stats_.handle, | |
1706 | qlimit(&cl->q_), cl->qflags_); | |
1707 | if (cl->sfb_ == NULL) { | |
1708 | /* fall back to droptail */ | |
1709 | qtype(&cl->q_) = Q_DROPTAIL; | |
1710 | cl->flags_ &= ~RMCF_SFB; | |
1711 | cl->qflags_ &= ~(SFBF_ECN | SFBF_FLOWCTL); | |
1712 | ||
1713 | log(LOG_ERR, "%s: CBQ SFB lazy allocation " | |
1714 | "failed for qid=%d pri=%d, falling back " | |
1715 | "to DROPTAIL\n", if_name(ifp), | |
1716 | cl->stats_.handle, cl->pri_); | |
1717 | } | |
1718 | } | |
1719 | if (cl->sfb_ != NULL) | |
1720 | return (sfb_addq(cl->sfb_, &cl->q_, m, t)); | |
39236c6e A |
1721 | } |
1722 | #if PF_ECN | |
1723 | else if (cl->flags_ & RMCF_CLEARDSCP) | |
316670eb | 1724 | write_dsfield(m, t, 0); |
39236c6e | 1725 | #endif /* PF_ECN */ |
316670eb A |
1726 | |
1727 | /* test for qlen > qlimit is done by caller */ | |
1728 | _addq(&cl->q_, m); | |
1729 | return (0); | |
1730 | } | |
1731 | ||
1732 | /* note: _rmc_dropq is not called for red */ | |
1733 | static void | |
1734 | _rmc_dropq(rm_class_t *cl) | |
1735 | { | |
1736 | struct mbuf *m; | |
1737 | ||
1738 | if ((m = _rmc_getq(cl)) != NULL) | |
1739 | m_freem(m); | |
1740 | } | |
1741 | ||
1742 | static struct mbuf * | |
1743 | _rmc_getq(rm_class_t *cl) | |
1744 | { | |
1745 | #if CLASSQ_RIO | |
1746 | if (q_is_rio(&cl->q_)) | |
1747 | return (rio_getq(cl->rio_, &cl->q_)); | |
1748 | else | |
1749 | #endif /* CLASSQ_RIO */ | |
1750 | #if CLASSQ_RED | |
1751 | if (q_is_red(&cl->q_)) | |
1752 | return (red_getq(cl->red_, &cl->q_)); | |
1753 | else | |
1754 | #endif /* CLASSQ_RED */ | |
1755 | #if CLASSQ_BLUE | |
1756 | if (q_is_blue(&cl->q_)) | |
1757 | return (blue_getq(cl->blue_, &cl->q_)); | |
1758 | else | |
1759 | #endif /* CLASSQ_BLUE */ | |
1760 | if (q_is_sfb(&cl->q_) && cl->sfb_ != NULL) | |
1761 | return (sfb_getq(cl->sfb_, &cl->q_)); | |
1762 | ||
1763 | return (_getq(&cl->q_)); | |
1764 | } | |
1765 | ||
1766 | static struct mbuf * | |
1767 | _rmc_pollq(rm_class_t *cl) | |
1768 | { | |
1769 | return (qhead(&cl->q_)); | |
1770 | } | |
1771 | ||
1772 | void | |
1773 | rmc_updateq(rm_class_t *cl, cqev_t ev) | |
1774 | { | |
1775 | #if CLASSQ_RIO | |
1776 | if (q_is_rio(&cl->q_)) | |
1777 | return (rio_updateq(cl->rio_, ev)); | |
1778 | #endif /* CLASSQ_RIO */ | |
1779 | #if CLASSQ_RED | |
1780 | if (q_is_red(&cl->q_)) | |
1781 | return (red_updateq(cl->red_, ev)); | |
1782 | #endif /* CLASSQ_RED */ | |
1783 | #if CLASSQ_BLUE | |
1784 | if (q_is_blue(&cl->q_)) | |
1785 | return (blue_updateq(cl->blue_, ev)); | |
1786 | #endif /* CLASSQ_BLUE */ | |
1787 | if (q_is_sfb(&cl->q_) && cl->sfb_ != NULL) | |
1788 | return (sfb_updateq(cl->sfb_, ev)); | |
1789 | } | |
1790 | ||
1791 | #ifdef CBQ_TRACE | |
1792 | ||
1793 | struct cbqtrace cbqtrace_buffer[NCBQTRACE+1]; | |
1794 | struct cbqtrace *cbqtrace_ptr = NULL; | |
1795 | int cbqtrace_count; | |
1796 | ||
1797 | /* | |
1798 | * DDB hook to trace cbq events: | |
1799 | * the last 1024 events are held in a circular buffer. | |
1800 | * use "call cbqtrace_dump(N)" to display 20 events from Nth event. | |
1801 | */ | |
1802 | void cbqtrace_dump(int); | |
1803 | static char *rmc_funcname(void *); | |
1804 | ||
1805 | static struct rmc_funcs { | |
1806 | void *func; | |
1807 | char *name; | |
1808 | } rmc_funcs[] = | |
1809 | { | |
1810 | rmc_init, "rmc_init", | |
1811 | rmc_queue_packet, "rmc_queue_packet", | |
1812 | rmc_under_limit, "rmc_under_limit", | |
1813 | rmc_update_class_util, "rmc_update_class_util", | |
1814 | rmc_delay_action, "rmc_delay_action", | |
1815 | rmc_restart, "rmc_restart", | |
1816 | _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next", | |
1817 | NULL, NULL | |
1818 | }; | |
1819 | ||
1820 | static char * | |
1821 | rmc_funcname(void *func) | |
1822 | { | |
1823 | struct rmc_funcs *fp; | |
1824 | ||
1825 | for (fp = rmc_funcs; fp->func != NULL; fp++) | |
1826 | if (fp->func == func) | |
1827 | return (fp->name); | |
1828 | return ("unknown"); | |
1829 | } | |
1830 | ||
1831 | void | |
1832 | cbqtrace_dump(int counter) | |
1833 | { | |
1834 | int i, *p; | |
1835 | char *cp; | |
1836 | ||
1837 | counter = counter % NCBQTRACE; | |
1838 | p = (int *)&cbqtrace_buffer[counter]; | |
1839 | ||
1840 | for (i = 0; i < 20; i++) { | |
1841 | log(LOG_DEBUG, "[0x%x] ", *p++); | |
1842 | log(LOG_DEBUG, "%s: ", rmc_funcname((void *)*p++)); | |
1843 | cp = (char *)p++; | |
1844 | log(LOG_DEBUG, "%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]); | |
1845 | log(LOG_DEBUG, "%d\n", *p++); | |
1846 | ||
1847 | if (p >= (int *)&cbqtrace_buffer[NCBQTRACE]) | |
1848 | p = (int *)cbqtrace_buffer; | |
1849 | } | |
1850 | } | |
1851 | #endif /* CBQ_TRACE */ | |
1852 | #endif /* PKTSCHED_CBQ */ |