]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/pktsched/pktsched_hfsc.c
2 * Copyright (c) 2007-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* $OpenBSD: altq_hfsc.c,v 1.25 2007/09/13 20:40:02 chl Exp $ */
30 /* $KAME: altq_hfsc.c,v 1.17 2002/11/29 07:48:33 kjc Exp $ */
33 * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
35 * Permission to use, copy, modify, and distribute this software and
36 * its documentation is hereby granted (including for commercial or
37 * for-profit use), provided that both the copyright notice and this
38 * permission notice appear in all copies of the software, derivative
39 * works, or modified versions, and any portions thereof.
41 * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
42 * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
43 * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
45 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
46 * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
48 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
49 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
50 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
53 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
56 * Carnegie Mellon encourages (but does not require) users of this
57 * software to return any improvements or extensions that they make,
58 * and to grant Carnegie Mellon the rights to redistribute these
59 * changes without encumbrance.
62 * H-FSC is described in Proceedings of SIGCOMM'97,
63 * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
64 * Real-Time and Priority Service"
65 * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
67 * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
68 * when a class has an upperlimit, the fit-time is computed from the
69 * upperlimit service curve. the link-sharing scheduler does not schedule
70 * a class whose fit-time exceeds the current time.
75 #include <sys/cdefs.h>
76 #include <sys/param.h>
77 #include <sys/malloc.h>
79 #include <sys/systm.h>
80 #include <sys/errno.h>
81 #include <sys/kernel.h>
82 #include <sys/syslog.h>
84 #include <kern/zalloc.h>
87 #include <net/net_osdep.h>
89 #include <net/pktsched/pktsched_hfsc.h>
90 #include <netinet/in.h>
96 static int hfsc_enqueue_ifclassq(struct ifclassq
*, struct mbuf
*);
97 static struct mbuf
*hfsc_dequeue_ifclassq(struct ifclassq
*, cqdq_op_t
);
98 static int hfsc_request_ifclassq(struct ifclassq
*, cqrq_t
, void *);
100 static int hfsc_addq(struct hfsc_class
*, struct mbuf
*, struct pf_mtag
*);
101 static struct mbuf
*hfsc_getq(struct hfsc_class
*);
102 static struct mbuf
*hfsc_pollq(struct hfsc_class
*);
103 static void hfsc_purgeq(struct hfsc_if
*, struct hfsc_class
*, u_int32_t
,
104 u_int32_t
*, u_int32_t
*);
105 static void hfsc_print_sc(struct hfsc_if
*, u_int32_t
, u_int64_t
,
106 struct service_curve
*, struct internal_sc
*, const char *);
107 static void hfsc_updateq_linkrate(struct hfsc_if
*, struct hfsc_class
*);
108 static void hfsc_updateq(struct hfsc_if
*, struct hfsc_class
*, cqev_t
);
110 static int hfsc_clear_interface(struct hfsc_if
*);
111 static struct hfsc_class
*hfsc_class_create(struct hfsc_if
*,
112 struct service_curve
*, struct service_curve
*, struct service_curve
*,
113 struct hfsc_class
*, u_int32_t
, int, u_int32_t
);
114 static int hfsc_class_destroy(struct hfsc_if
*, struct hfsc_class
*);
115 static int hfsc_destroy_locked(struct hfsc_if
*);
116 static struct hfsc_class
*hfsc_nextclass(struct hfsc_class
*);
117 static struct hfsc_class
*hfsc_clh_to_clp(struct hfsc_if
*, u_int32_t
);
118 static const char *hfsc_style(struct hfsc_if
*);
120 static void set_active(struct hfsc_class
*, u_int32_t
);
121 static void set_passive(struct hfsc_class
*);
123 static void init_ed(struct hfsc_class
*, u_int32_t
);
124 static void update_ed(struct hfsc_class
*, u_int32_t
);
125 static void update_d(struct hfsc_class
*, u_int32_t
);
126 static void init_vf(struct hfsc_class
*, u_int32_t
);
127 static void update_vf(struct hfsc_class
*, u_int32_t
, u_int64_t
);
128 static void update_cfmin(struct hfsc_class
*);
129 static void ellist_insert(struct hfsc_class
*);
130 static void ellist_remove(struct hfsc_class
*);
131 static void ellist_update(struct hfsc_class
*);
132 static struct hfsc_class
*ellist_get_mindl(ellist_t
*, u_int64_t
);
133 static void actlist_insert(struct hfsc_class
*);
134 static void actlist_remove(struct hfsc_class
*);
135 static void actlist_update(struct hfsc_class
*);
136 static struct hfsc_class
*actlist_firstfit(struct hfsc_class
*, u_int64_t
);
138 static inline u_int64_t
seg_x2y(u_int64_t
, u_int64_t
);
139 static inline u_int64_t
seg_y2x(u_int64_t
, u_int64_t
);
140 static inline u_int64_t
m2sm(u_int64_t
);
141 static inline u_int64_t
m2ism(u_int64_t
);
142 static inline u_int64_t
d2dx(u_int64_t
);
143 static u_int64_t
sm2m(u_int64_t
);
144 static u_int64_t
dx2d(u_int64_t
);
146 static boolean_t
sc2isc(struct hfsc_class
*, struct service_curve
*,
147 struct internal_sc
*, u_int64_t
);
148 static void rtsc_init(struct runtime_sc
*, struct internal_sc
*,
149 u_int64_t
, u_int64_t
);
150 static u_int64_t
rtsc_y2x(struct runtime_sc
*, u_int64_t
);
151 static u_int64_t
rtsc_x2y(struct runtime_sc
*, u_int64_t
);
152 static void rtsc_min(struct runtime_sc
*, struct internal_sc
*,
153 u_int64_t
, u_int64_t
);
155 #define HFSC_ZONE_MAX 32 /* maximum elements in zone */
156 #define HFSC_ZONE_NAME "pktsched_hfsc" /* zone name */
158 static unsigned int hfsc_size
; /* size of zone element */
159 static struct zone
*hfsc_zone
; /* zone for hfsc_if */
161 #define HFSC_CL_ZONE_MAX 32 /* maximum elements in zone */
162 #define HFSC_CL_ZONE_NAME "pktsched_hfsc_cl" /* zone name */
164 static unsigned int hfsc_cl_size
; /* size of zone element */
165 static struct zone
*hfsc_cl_zone
; /* zone for hfsc_class */
170 #define HFSC_IS_A_PARENT_CLASS(cl) ((cl)->cl_children != NULL)
172 #define HT_INFINITY 0xffffffffffffffffLL /* infinite time value */
177 hfsc_size
= sizeof (struct hfsc_if
);
178 hfsc_zone
= zinit(hfsc_size
, HFSC_ZONE_MAX
* hfsc_size
,
180 if (hfsc_zone
== NULL
) {
181 panic("%s: failed allocating %s", __func__
, HFSC_ZONE_NAME
);
184 zone_change(hfsc_zone
, Z_EXPAND
, TRUE
);
185 zone_change(hfsc_zone
, Z_CALLERACCT
, TRUE
);
187 hfsc_cl_size
= sizeof (struct hfsc_class
);
188 hfsc_cl_zone
= zinit(hfsc_cl_size
, HFSC_CL_ZONE_MAX
* hfsc_cl_size
,
189 0, HFSC_CL_ZONE_NAME
);
190 if (hfsc_cl_zone
== NULL
) {
191 panic("%s: failed allocating %s", __func__
, HFSC_CL_ZONE_NAME
);
194 zone_change(hfsc_cl_zone
, Z_EXPAND
, TRUE
);
195 zone_change(hfsc_cl_zone
, Z_CALLERACCT
, TRUE
);
199 hfsc_alloc(struct ifnet
*ifp
, int how
, boolean_t altq
)
203 hif
= (how
== M_WAITOK
) ? zalloc(hfsc_zone
) : zalloc_noblock(hfsc_zone
);
207 bzero(hif
, hfsc_size
);
208 TAILQ_INIT(&hif
->hif_eligible
);
209 hif
->hif_ifq
= &ifp
->if_snd
;
211 hif
->hif_maxclasses
= HFSC_MAX_CLASSES
;
212 hif
->hif_flags
|= HFSCIFF_ALTQ
;
214 hif
->hif_maxclasses
= IFCQ_SC_MAX
+ 1; /* incl. root class */
217 if ((hif
->hif_class_tbl
= _MALLOC(sizeof (struct hfsc_class
*) *
218 hif
->hif_maxclasses
, M_DEVBUF
, M_WAITOK
|M_ZERO
)) == NULL
) {
219 log(LOG_ERR
, "%s: %s unable to allocate class table array\n",
220 if_name(ifp
), hfsc_style(hif
));
224 if (pktsched_verbose
) {
225 log(LOG_DEBUG
, "%s: %s scheduler allocated\n",
226 if_name(ifp
), hfsc_style(hif
));
232 if (hif
->hif_class_tbl
!= NULL
) {
233 _FREE(hif
->hif_class_tbl
, M_DEVBUF
);
234 hif
->hif_class_tbl
= NULL
;
236 zfree(hfsc_zone
, hif
);
242 hfsc_destroy(struct hfsc_if
*hif
)
244 struct ifclassq
*ifq
= hif
->hif_ifq
;
248 err
= hfsc_destroy_locked(hif
);
255 hfsc_destroy_locked(struct hfsc_if
*hif
)
257 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
259 (void) hfsc_clear_interface(hif
);
260 (void) hfsc_class_destroy(hif
, hif
->hif_rootclass
);
262 VERIFY(hif
->hif_class_tbl
!= NULL
);
263 _FREE(hif
->hif_class_tbl
, M_DEVBUF
);
264 hif
->hif_class_tbl
= NULL
;
266 if (pktsched_verbose
) {
267 log(LOG_DEBUG
, "%s: %s scheduler destroyed\n",
268 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
271 zfree(hfsc_zone
, hif
);
277 * bring the interface back to the initial state by discarding
278 * all the filters and classes except the root class.
281 hfsc_clear_interface(struct hfsc_if
*hif
)
283 struct hfsc_class
*cl
;
285 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
287 /* clear out the classes */
288 while (hif
->hif_rootclass
!= NULL
&&
289 (cl
= hif
->hif_rootclass
->cl_children
) != NULL
) {
291 * remove the first leaf class found in the hierarchy
294 for (; cl
!= NULL
; cl
= hfsc_nextclass(cl
)) {
295 if (!HFSC_IS_A_PARENT_CLASS(cl
)) {
296 (void) hfsc_class_destroy(hif
, cl
);
305 /* discard all the queued packets on the interface */
307 hfsc_purge(struct hfsc_if
*hif
)
309 struct hfsc_class
*cl
;
311 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
313 for (cl
= hif
->hif_rootclass
; cl
!= NULL
; cl
= hfsc_nextclass(cl
)) {
314 if (!qempty(&cl
->cl_q
))
315 hfsc_purgeq(hif
, cl
, 0, NULL
, NULL
);
319 * This assertion is safe to be made only when PF_ALTQ is not
320 * configured; otherwise, IFCQ_LEN represents the sum of the
321 * packets managed by ifcq_disc and altq_disc instances, which
322 * is possible when transitioning between the two.
324 VERIFY(IFCQ_LEN(hif
->hif_ifq
) == 0);
325 #endif /* !PF_ALTQ */
329 hfsc_event(struct hfsc_if
*hif
, cqev_t ev
)
331 struct hfsc_class
*cl
;
333 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
335 for (cl
= hif
->hif_rootclass
; cl
!= NULL
; cl
= hfsc_nextclass(cl
))
336 hfsc_updateq(hif
, cl
, ev
);
340 hfsc_add_queue(struct hfsc_if
*hif
, struct service_curve
*rtsc
,
341 struct service_curve
*lssc
, struct service_curve
*ulsc
,
342 u_int32_t qlimit
, int flags
, u_int32_t parent_qid
, u_int32_t qid
,
343 struct hfsc_class
**clp
)
345 struct hfsc_class
*cl
= NULL
, *parent
;
347 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
349 if (parent_qid
== HFSC_NULLCLASS_HANDLE
&& hif
->hif_rootclass
== NULL
)
351 else if ((parent
= hfsc_clh_to_clp(hif
, parent_qid
)) == NULL
)
354 if (hfsc_clh_to_clp(hif
, qid
) != NULL
)
357 cl
= hfsc_class_create(hif
, rtsc
, lssc
, ulsc
, parent
,
368 static struct hfsc_class
*
369 hfsc_class_create(struct hfsc_if
*hif
, struct service_curve
*rsc
,
370 struct service_curve
*fsc
, struct service_curve
*usc
,
371 struct hfsc_class
*parent
, u_int32_t qlimit
, int flags
, u_int32_t qid
)
374 struct ifclassq
*ifq
;
375 struct hfsc_class
*cl
, *p
;
379 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
381 /* Sanitize flags unless internally configured */
382 if (hif
->hif_flags
& HFSCIFF_ALTQ
)
383 flags
&= HFCF_USERFLAGS
;
385 if (hif
->hif_classes
>= hif
->hif_maxclasses
) {
386 log(LOG_ERR
, "%s: %s out of classes! (max %d)\n",
387 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
),
388 hif
->hif_maxclasses
);
393 if (flags
& HFCF_RED
) {
394 log(LOG_ERR
, "%s: %s RED not available!\n",
395 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
398 #endif /* !CLASSQ_RED */
401 if (flags
& HFCF_RIO
) {
402 log(LOG_ERR
, "%s: %s RIO not available!\n",
403 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
406 #endif /* CLASSQ_RIO */
409 if (flags
& HFCF_BLUE
) {
410 log(LOG_ERR
, "%s: %s BLUE not available!\n",
411 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
414 #endif /* CLASSQ_BLUE */
416 /* These are mutually exclusive */
417 if ((flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) &&
418 (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) != HFCF_RED
&&
419 (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) != HFCF_RIO
&&
420 (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) != HFCF_BLUE
&&
421 (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) != HFCF_SFB
) {
422 log(LOG_ERR
, "%s: %s more than one RED|RIO|BLUE|SFB\n",
423 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
427 cl
= zalloc(hfsc_cl_zone
);
431 bzero(cl
, hfsc_cl_size
);
432 TAILQ_INIT(&cl
->cl_actc
);
434 ifp
= HFSCIF_IFP(hif
);
436 if (qlimit
== 0 || qlimit
> IFCQ_MAXLEN(ifq
)) {
437 qlimit
= IFCQ_MAXLEN(ifq
);
439 qlimit
= DEFAULT_QLIMIT
; /* use default */
441 _qinit(&cl
->cl_q
, Q_DROPTAIL
, qlimit
);
443 cl
->cl_flags
= flags
;
444 if (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) {
445 #if CLASSQ_RED || CLASSQ_RIO
447 #endif /* CLASSQ_RED || CLASSQ_RIO */
451 if (rsc
!= NULL
&& rsc
->m2
> m2
)
453 if (fsc
!= NULL
&& fsc
->m2
> m2
)
455 if (usc
!= NULL
&& usc
->m2
> m2
)
459 if (flags
& HFCF_ECN
) {
460 if (flags
& HFCF_BLUE
)
461 cl
->cl_qflags
|= BLUEF_ECN
;
462 else if (flags
& HFCF_SFB
)
463 cl
->cl_qflags
|= SFBF_ECN
;
464 else if (flags
& HFCF_RED
)
465 cl
->cl_qflags
|= REDF_ECN
;
466 else if (flags
& HFCF_RIO
)
467 cl
->cl_qflags
|= RIOF_ECN
;
469 if (flags
& HFCF_FLOWCTL
) {
470 if (flags
& HFCF_SFB
)
471 cl
->cl_qflags
|= SFBF_FLOWCTL
;
473 if (flags
& HFCF_CLEARDSCP
) {
474 if (flags
& HFCF_RIO
)
475 cl
->cl_qflags
|= RIOF_CLEARDSCP
;
477 #if CLASSQ_RED || CLASSQ_RIO
479 * XXX: RED & RIO should be watching link speed and MTU
480 * events and recompute pkttime accordingly.
483 pkttime
= 1000 * 1000 * 1000; /* 1 sec */
485 pkttime
= (int64_t)ifp
->if_mtu
* 1000 * 1000 * 1000 /
488 /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
490 if (flags
& HFCF_RED
) {
491 cl
->cl_red
= red_alloc(ifp
, 0, 0,
492 qlimit(&cl
->cl_q
) * 10/100,
493 qlimit(&cl
->cl_q
) * 30/100,
494 cl
->cl_qflags
, pkttime
);
495 if (cl
->cl_red
!= NULL
)
496 qtype(&cl
->cl_q
) = Q_RED
;
498 #endif /* CLASSQ_RED */
500 if (flags
& HFCF_RIO
) {
502 rio_alloc(ifp
, 0, NULL
, cl
->cl_qflags
, pkttime
);
503 if (cl
->cl_rio
!= NULL
)
504 qtype(&cl
->cl_q
) = Q_RIO
;
506 #endif /* CLASSQ_RIO */
507 #endif /* CLASSQ_RED || CLASSQ_RIO */
509 if (flags
& HFCF_BLUE
) {
510 cl
->cl_blue
= blue_alloc(ifp
, 0, 0, cl
->cl_qflags
);
511 if (cl
->cl_blue
!= NULL
)
512 qtype(&cl
->cl_q
) = Q_BLUE
;
514 #endif /* CLASSQ_BLUE */
515 if (flags
& HFCF_SFB
) {
516 if (!(cl
->cl_flags
& HFCF_LAZY
))
517 cl
->cl_sfb
= sfb_alloc(ifp
, qid
,
518 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
519 if (cl
->cl_sfb
!= NULL
|| (cl
->cl_flags
& HFCF_LAZY
))
520 qtype(&cl
->cl_q
) = Q_SFB
;
524 cl
->cl_id
= hif
->hif_classid
++;
527 cl
->cl_parent
= parent
;
529 eff_rate
= ifnet_output_linkrate(HFSCIF_IFP(hif
));
530 hif
->hif_eff_rate
= eff_rate
;
532 if (rsc
!= NULL
&& (rsc
->m1
!= 0 || rsc
->m2
!= 0) &&
533 (!(rsc
->fl
& HFSCF_M1_PCT
) || (rsc
->m1
> 0 && rsc
->m1
<= 100)) &&
534 (!(rsc
->fl
& HFSCF_M2_PCT
) || (rsc
->m2
> 0 && rsc
->m2
<= 100))) {
535 rsc
->fl
&= HFSCF_USERFLAGS
;
536 cl
->cl_flags
|= HFCF_RSC
;
538 (void) sc2isc(cl
, &cl
->cl_rsc0
, &cl
->cl_rsc
, eff_rate
);
539 rtsc_init(&cl
->cl_deadline
, &cl
->cl_rsc
, 0, 0);
540 rtsc_init(&cl
->cl_eligible
, &cl
->cl_rsc
, 0, 0);
542 if (fsc
!= NULL
&& (fsc
->m1
!= 0 || fsc
->m2
!= 0) &&
543 (!(fsc
->fl
& HFSCF_M1_PCT
) || (fsc
->m1
> 0 && fsc
->m1
<= 100)) &&
544 (!(fsc
->fl
& HFSCF_M2_PCT
) || (fsc
->m2
> 0 && fsc
->m2
<= 100))) {
545 fsc
->fl
&= HFSCF_USERFLAGS
;
546 cl
->cl_flags
|= HFCF_FSC
;
548 (void) sc2isc(cl
, &cl
->cl_fsc0
, &cl
->cl_fsc
, eff_rate
);
549 rtsc_init(&cl
->cl_virtual
, &cl
->cl_fsc
, 0, 0);
551 if (usc
!= NULL
&& (usc
->m1
!= 0 || usc
->m2
!= 0) &&
552 (!(usc
->fl
& HFSCF_M1_PCT
) || (usc
->m1
> 0 && usc
->m1
<= 100)) &&
553 (!(usc
->fl
& HFSCF_M2_PCT
) || (usc
->m2
> 0 && usc
->m2
<= 100))) {
554 usc
->fl
&= HFSCF_USERFLAGS
;
555 cl
->cl_flags
|= HFCF_USC
;
557 (void) sc2isc(cl
, &cl
->cl_usc0
, &cl
->cl_usc
, eff_rate
);
558 rtsc_init(&cl
->cl_ulimit
, &cl
->cl_usc
, 0, 0);
562 * find a free slot in the class table. if the slot matching
563 * the lower bits of qid is free, use this slot. otherwise,
564 * use the first free slot.
566 i
= qid
% hif
->hif_maxclasses
;
567 if (hif
->hif_class_tbl
[i
] == NULL
) {
568 hif
->hif_class_tbl
[i
] = cl
;
570 for (i
= 0; i
< hif
->hif_maxclasses
; i
++)
571 if (hif
->hif_class_tbl
[i
] == NULL
) {
572 hif
->hif_class_tbl
[i
] = cl
;
575 if (i
== hif
->hif_maxclasses
) {
581 if (flags
& HFCF_DEFAULTCLASS
)
582 hif
->hif_defaultclass
= cl
;
584 if (parent
== NULL
) {
585 /* this is root class */
586 hif
->hif_rootclass
= cl
;
588 /* add this class to the children list of the parent */
589 if ((p
= parent
->cl_children
) == NULL
)
590 parent
->cl_children
= cl
;
592 while (p
->cl_siblings
!= NULL
)
598 if (pktsched_verbose
) {
599 log(LOG_DEBUG
, "%s: %s created qid=%d pqid=%d qlimit=%d "
600 "flags=%b\n", if_name(ifp
), hfsc_style(hif
), cl
->cl_handle
,
601 (cl
->cl_parent
!= NULL
) ? cl
->cl_parent
->cl_handle
: 0,
602 qlimit(&cl
->cl_q
), cl
->cl_flags
, HFCF_BITS
);
603 if (cl
->cl_flags
& HFCF_RSC
) {
604 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
605 &cl
->cl_rsc0
, &cl
->cl_rsc
, "rsc");
607 if (cl
->cl_flags
& HFCF_FSC
) {
608 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
609 &cl
->cl_fsc0
, &cl
->cl_fsc
, "fsc");
611 if (cl
->cl_flags
& HFCF_USC
) {
612 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
613 &cl
->cl_usc0
, &cl
->cl_usc
, "usc");
620 if (cl
->cl_qalg
.ptr
!= NULL
) {
622 if (q_is_rio(&cl
->cl_q
))
623 rio_destroy(cl
->cl_rio
);
624 #endif /* CLASSQ_RIO */
626 if (q_is_red(&cl
->cl_q
))
627 red_destroy(cl
->cl_red
);
628 #endif /* CLASSQ_RED */
630 if (q_is_blue(&cl
->cl_q
))
631 blue_destroy(cl
->cl_blue
);
632 #endif /* CLASSQ_BLUE */
633 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
634 sfb_destroy(cl
->cl_sfb
);
635 cl
->cl_qalg
.ptr
= NULL
;
636 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
637 qstate(&cl
->cl_q
) = QS_RUNNING
;
639 zfree(hfsc_cl_zone
, cl
);
644 hfsc_remove_queue(struct hfsc_if
*hif
, u_int32_t qid
)
646 struct hfsc_class
*cl
;
648 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
650 if ((cl
= hfsc_clh_to_clp(hif
, qid
)) == NULL
)
653 return (hfsc_class_destroy(hif
, cl
));
657 hfsc_class_destroy(struct hfsc_if
*hif
, struct hfsc_class
*cl
)
664 if (HFSC_IS_A_PARENT_CLASS(cl
))
667 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
669 if (!qempty(&cl
->cl_q
))
670 hfsc_purgeq(hif
, cl
, 0, NULL
, NULL
);
672 if (cl
->cl_parent
== NULL
) {
673 /* this is root class */
675 struct hfsc_class
*p
= cl
->cl_parent
->cl_children
;
678 cl
->cl_parent
->cl_children
= cl
->cl_siblings
;
680 if (p
->cl_siblings
== cl
) {
681 p
->cl_siblings
= cl
->cl_siblings
;
684 } while ((p
= p
->cl_siblings
) != NULL
);
688 for (i
= 0; i
< hif
->hif_maxclasses
; i
++)
689 if (hif
->hif_class_tbl
[i
] == cl
) {
690 hif
->hif_class_tbl
[i
] = NULL
;
696 if (cl
->cl_qalg
.ptr
!= NULL
) {
698 if (q_is_rio(&cl
->cl_q
))
699 rio_destroy(cl
->cl_rio
);
700 #endif /* CLASSQ_RIO */
702 if (q_is_red(&cl
->cl_q
))
703 red_destroy(cl
->cl_red
);
704 #endif /* CLASSQ_RED */
706 if (q_is_blue(&cl
->cl_q
))
707 blue_destroy(cl
->cl_blue
);
708 #endif /* CLASSQ_BLUE */
709 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
710 sfb_destroy(cl
->cl_sfb
);
711 cl
->cl_qalg
.ptr
= NULL
;
712 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
713 qstate(&cl
->cl_q
) = QS_RUNNING
;
716 if (cl
== hif
->hif_rootclass
)
717 hif
->hif_rootclass
= NULL
;
718 if (cl
== hif
->hif_defaultclass
)
719 hif
->hif_defaultclass
= NULL
;
721 if (pktsched_verbose
) {
722 log(LOG_DEBUG
, "%s: %s destroyed qid=%d slot=%d\n",
723 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
),
724 cl
->cl_handle
, cl
->cl_id
);
727 zfree(hfsc_cl_zone
, cl
);
733 * hfsc_nextclass returns the next class in the tree.
735 * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
738 static struct hfsc_class
*
739 hfsc_nextclass(struct hfsc_class
*cl
)
741 IFCQ_LOCK_ASSERT_HELD(cl
->cl_hif
->hif_ifq
);
743 if (cl
->cl_children
!= NULL
)
744 cl
= cl
->cl_children
;
745 else if (cl
->cl_siblings
!= NULL
)
746 cl
= cl
->cl_siblings
;
748 while ((cl
= cl
->cl_parent
) != NULL
)
749 if (cl
->cl_siblings
) {
750 cl
= cl
->cl_siblings
;
759 hfsc_enqueue(struct hfsc_if
*hif
, struct hfsc_class
*cl
, struct mbuf
*m
,
762 struct ifclassq
*ifq
= hif
->hif_ifq
;
766 IFCQ_LOCK_ASSERT_HELD(ifq
);
767 VERIFY(cl
== NULL
|| cl
->cl_hif
== hif
);
771 cl
= hfsc_clh_to_clp(hif
, t
->pftag_qid
);
773 cl
= hfsc_clh_to_clp(hif
, 0);
774 #endif /* !PF_ALTQ */
775 if (cl
== NULL
|| HFSC_IS_A_PARENT_CLASS(cl
)) {
776 cl
= hif
->hif_defaultclass
;
778 IFCQ_CONVERT_LOCK(ifq
);
787 ret
= hfsc_addq(cl
, m
, t
);
789 if (ret
== CLASSQEQ_SUCCESS_FC
) {
790 /* packet enqueued, return advisory feedback */
793 VERIFY(ret
== CLASSQEQ_DROPPED
||
794 ret
== CLASSQEQ_DROPPED_FC
||
795 ret
== CLASSQEQ_DROPPED_SP
);
796 /* packet has been freed in hfsc_addq */
797 PKTCNTR_ADD(&cl
->cl_stats
.drop_cnt
, 1, len
);
798 IFCQ_DROP_ADD(ifq
, 1, len
);
800 case CLASSQEQ_DROPPED
:
802 case CLASSQEQ_DROPPED_FC
:
804 case CLASSQEQ_DROPPED_SP
:
805 return (EQSUSPENDED
);
811 IFCQ_INC_BYTES(ifq
, len
);
812 cl
->cl_hif
->hif_packets
++;
814 /* successfully queued. */
815 if (qlen(&cl
->cl_q
) == 1)
822 * note: CLASSQDQ_POLL returns the next packet without removing the packet
823 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
824 * CLASSQDQ_REMOVE must return the same packet if called immediately
825 * after CLASSQDQ_POLL.
828 hfsc_dequeue(struct hfsc_if
*hif
, cqdq_op_t op
)
830 struct ifclassq
*ifq
= hif
->hif_ifq
;
831 struct hfsc_class
*cl
;
833 u_int32_t len
, next_len
;
837 IFCQ_LOCK_ASSERT_HELD(ifq
);
839 if (hif
->hif_packets
== 0)
840 /* no packet in the tree */
843 cur_time
= read_machclk();
845 if (op
== CLASSQDQ_REMOVE
&& hif
->hif_pollcache
!= NULL
) {
847 cl
= hif
->hif_pollcache
;
848 hif
->hif_pollcache
= NULL
;
849 /* check if the class was scheduled by real-time criteria */
850 if (cl
->cl_flags
& HFCF_RSC
)
851 realtime
= (cl
->cl_e
<= cur_time
);
854 * if there are eligible classes, use real-time criteria.
855 * find the class with the minimum deadline among
856 * the eligible classes.
858 if ((cl
= ellist_get_mindl(&hif
->hif_eligible
, cur_time
))
864 * use link-sharing criteria
865 * get the class with the minimum vt in the hierarchy
867 cl
= hif
->hif_rootclass
;
868 while (HFSC_IS_A_PARENT_CLASS(cl
)) {
870 cl
= actlist_firstfit(cl
, cur_time
);
873 log(LOG_ERR
, "%s: %s "
874 "%d fit but none found\n",
875 if_name(HFSCIF_IFP(hif
)),
876 hfsc_style(hif
), fits
);
880 * update parent's cl_cvtmin.
881 * don't update if the new vt is smaller.
883 if (cl
->cl_parent
->cl_cvtmin
< cl
->cl_vt
)
884 cl
->cl_parent
->cl_cvtmin
= cl
->cl_vt
;
889 if (op
== CLASSQDQ_POLL
) {
890 hif
->hif_pollcache
= cl
;
899 cl
->cl_hif
->hif_packets
--;
901 IFCQ_DEC_BYTES(ifq
, len
);
902 IFCQ_XMIT_ADD(ifq
, 1, len
);
903 PKTCNTR_ADD(&cl
->cl_stats
.xmit_cnt
, 1, len
);
905 update_vf(cl
, len
, cur_time
);
909 if (!qempty(&cl
->cl_q
)) {
910 if (cl
->cl_flags
& HFCF_RSC
) {
912 next_len
= m_pktlen(qhead(&cl
->cl_q
));
915 update_ed(cl
, next_len
);
917 update_d(cl
, next_len
);
920 /* the class becomes passive */
929 hfsc_addq(struct hfsc_class
*cl
, struct mbuf
*m
, struct pf_mtag
*t
)
931 struct ifclassq
*ifq
= cl
->cl_hif
->hif_ifq
;
933 IFCQ_LOCK_ASSERT_HELD(ifq
);
936 if (q_is_rio(&cl
->cl_q
))
937 return (rio_addq(cl
->cl_rio
, &cl
->cl_q
, m
, t
));
939 #endif /* CLASSQ_RIO */
941 if (q_is_red(&cl
->cl_q
))
942 return (red_addq(cl
->cl_red
, &cl
->cl_q
, m
, t
));
944 #endif /* CLASSQ_RED */
946 if (q_is_blue(&cl
->cl_q
))
947 return (blue_addq(cl
->cl_blue
, &cl
->cl_q
, m
, t
));
949 #endif /* CLASSQ_BLUE */
950 if (q_is_sfb(&cl
->cl_q
)) {
951 if (cl
->cl_sfb
== NULL
) {
952 struct ifnet
*ifp
= HFSCIF_IFP(cl
->cl_hif
);
954 VERIFY(cl
->cl_flags
& HFCF_LAZY
);
955 IFCQ_CONVERT_LOCK(ifq
);
957 cl
->cl_sfb
= sfb_alloc(ifp
, cl
->cl_handle
,
958 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
959 if (cl
->cl_sfb
== NULL
) {
960 /* fall back to droptail */
961 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
962 cl
->cl_flags
&= ~HFCF_SFB
;
963 cl
->cl_qflags
&= ~(SFBF_ECN
| SFBF_FLOWCTL
);
965 log(LOG_ERR
, "%s: %s SFB lazy allocation "
966 "failed for qid=%d slot=%d, falling back "
967 "to DROPTAIL\n", if_name(ifp
),
968 hfsc_style(cl
->cl_hif
), cl
->cl_handle
,
972 if (cl
->cl_sfb
!= NULL
)
973 return (sfb_addq(cl
->cl_sfb
, &cl
->cl_q
, m
, t
));
974 } else if (qlen(&cl
->cl_q
) >= qlimit(&cl
->cl_q
)) {
975 IFCQ_CONVERT_LOCK(ifq
);
977 return (CLASSQEQ_DROPPED
);
981 if (cl
->cl_flags
& HFCF_CLEARDSCP
)
982 write_dsfield(m
, t
, 0);
991 hfsc_getq(struct hfsc_class
*cl
)
993 IFCQ_LOCK_ASSERT_HELD(cl
->cl_hif
->hif_ifq
);
996 if (q_is_rio(&cl
->cl_q
))
997 return (rio_getq(cl
->cl_rio
, &cl
->cl_q
));
999 #endif /* CLASSQ_RIO */
1001 if (q_is_red(&cl
->cl_q
))
1002 return (red_getq(cl
->cl_red
, &cl
->cl_q
));
1004 #endif /* CLASSQ_RED */
1006 if (q_is_blue(&cl
->cl_q
))
1007 return (blue_getq(cl
->cl_blue
, &cl
->cl_q
));
1009 #endif /* CLASSQ_BLUE */
1010 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1011 return (sfb_getq(cl
->cl_sfb
, &cl
->cl_q
));
1013 return (_getq(&cl
->cl_q
));
1016 static struct mbuf
*
1017 hfsc_pollq(struct hfsc_class
*cl
)
1019 IFCQ_LOCK_ASSERT_HELD(cl
->cl_hif
->hif_ifq
);
1021 return (qhead(&cl
->cl_q
));
1025 hfsc_purgeq(struct hfsc_if
*hif
, struct hfsc_class
*cl
, u_int32_t flow
,
1026 u_int32_t
*packets
, u_int32_t
*bytes
)
1028 struct ifclassq
*ifq
= hif
->hif_ifq
;
1029 u_int32_t cnt
= 0, len
= 0, qlen
;
1031 IFCQ_LOCK_ASSERT_HELD(ifq
);
1033 if ((qlen
= qlen(&cl
->cl_q
)) == 0) {
1034 VERIFY(hif
->hif_packets
== 0);
1038 /* become regular mutex before freeing mbufs */
1039 IFCQ_CONVERT_LOCK(ifq
);
1042 if (q_is_rio(&cl
->cl_q
))
1043 rio_purgeq(cl
->cl_rio
, &cl
->cl_q
, flow
, &cnt
, &len
);
1045 #endif /* CLASSQ_RIO */
1047 if (q_is_red(&cl
->cl_q
))
1048 red_purgeq(cl
->cl_red
, &cl
->cl_q
, flow
, &cnt
, &len
);
1050 #endif /* CLASSQ_RED */
1052 if (q_is_blue(&cl
->cl_q
))
1053 blue_purgeq(cl
->cl_blue
, &cl
->cl_q
, flow
, &cnt
, &len
);
1055 #endif /* CLASSQ_BLUE */
1056 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1057 sfb_purgeq(cl
->cl_sfb
, &cl
->cl_q
, flow
, &cnt
, &len
);
1059 _flushq_flow(&cl
->cl_q
, flow
, &cnt
, &len
);
1062 VERIFY(qlen(&cl
->cl_q
) == (qlen
- cnt
));
1064 PKTCNTR_ADD(&cl
->cl_stats
.drop_cnt
, cnt
, len
);
1065 IFCQ_DROP_ADD(ifq
, cnt
, len
);
1067 VERIFY(hif
->hif_packets
>= cnt
);
1068 hif
->hif_packets
-= cnt
;
1070 VERIFY(((signed)IFCQ_LEN(ifq
) - cnt
) >= 0);
1071 IFCQ_LEN(ifq
) -= cnt
;
1073 if (qempty(&cl
->cl_q
)) {
1074 update_vf(cl
, 0, 0); /* remove cl from the actlist */
1078 if (pktsched_verbose
) {
1079 log(LOG_DEBUG
, "%s: %s purge qid=%d slot=%d "
1080 "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
1081 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
),
1082 cl
->cl_handle
, cl
->cl_id
, qlen
, qlen(&cl
->cl_q
),
1087 if (packets
!= NULL
)
1094 hfsc_print_sc(struct hfsc_if
*hif
, u_int32_t qid
, u_int64_t eff_rate
,
1095 struct service_curve
*sc
, struct internal_sc
*isc
, const char *which
)
1097 struct ifnet
*ifp
= HFSCIF_IFP(hif
);
1099 log(LOG_DEBUG
, "%s: %s qid=%d {%s_m1=%llu%s [%llu], "
1100 "%s_d=%u msec, %s_m2=%llu%s [%llu]} linkrate=%llu bps\n",
1101 if_name(ifp
), hfsc_style(hif
), qid
,
1102 which
, sc
->m1
, (sc
->fl
& HFSCF_M1_PCT
) ? "%" : " bps", isc
->sm1
,
1104 which
, sc
->m2
, (sc
->fl
& HFSCF_M2_PCT
) ? "%" : " bps", isc
->sm2
,
1109 hfsc_updateq_linkrate(struct hfsc_if
*hif
, struct hfsc_class
*cl
)
1111 u_int64_t eff_rate
= ifnet_output_linkrate(HFSCIF_IFP(hif
));
1112 struct service_curve
*sc
;
1113 struct internal_sc
*isc
;
1115 /* Update parameters only if rate has changed */
1116 if (eff_rate
== hif
->hif_eff_rate
)
1121 if ((cl
->cl_flags
& HFCF_RSC
) && sc2isc(cl
, sc
, isc
, eff_rate
)) {
1122 rtsc_init(&cl
->cl_deadline
, isc
, 0, 0);
1123 rtsc_init(&cl
->cl_eligible
, isc
, 0, 0);
1124 if (pktsched_verbose
) {
1125 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
1131 if ((cl
->cl_flags
& HFCF_FSC
) && sc2isc(cl
, sc
, isc
, eff_rate
)) {
1132 rtsc_init(&cl
->cl_virtual
, isc
, 0, 0);
1133 if (pktsched_verbose
) {
1134 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
1140 if ((cl
->cl_flags
& HFCF_USC
) && sc2isc(cl
, sc
, isc
, eff_rate
)) {
1141 rtsc_init(&cl
->cl_ulimit
, isc
, 0, 0);
1142 if (pktsched_verbose
) {
1143 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
1150 hfsc_updateq(struct hfsc_if
*hif
, struct hfsc_class
*cl
, cqev_t ev
)
1152 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
1154 if (pktsched_verbose
) {
1155 log(LOG_DEBUG
, "%s: %s update qid=%d slot=%d event=%s\n",
1156 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
),
1157 cl
->cl_handle
, cl
->cl_id
, ifclassq_ev2str(ev
));
1160 if (ev
== CLASSQ_EV_LINK_BANDWIDTH
)
1161 hfsc_updateq_linkrate(hif
, cl
);
1164 if (q_is_rio(&cl
->cl_q
))
1165 return (rio_updateq(cl
->cl_rio
, ev
));
1166 #endif /* CLASSQ_RIO */
1168 if (q_is_red(&cl
->cl_q
))
1169 return (red_updateq(cl
->cl_red
, ev
));
1170 #endif /* CLASSQ_RED */
1172 if (q_is_blue(&cl
->cl_q
))
1173 return (blue_updateq(cl
->cl_blue
, ev
));
1174 #endif /* CLASSQ_BLUE */
1175 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1176 return (sfb_updateq(cl
->cl_sfb
, ev
));
1180 set_active(struct hfsc_class
*cl
, u_int32_t len
)
1182 if (cl
->cl_flags
& HFCF_RSC
)
1184 if (cl
->cl_flags
& HFCF_FSC
)
1187 cl
->cl_stats
.period
++;
1191 set_passive(struct hfsc_class
*cl
)
1193 if (cl
->cl_flags
& HFCF_RSC
)
1197 * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
1198 * needs to be called explicitly to remove a class from actlist
1203 init_ed(struct hfsc_class
*cl
, u_int32_t next_len
)
1207 cur_time
= read_machclk();
1209 /* update the deadline curve */
1210 rtsc_min(&cl
->cl_deadline
, &cl
->cl_rsc
, cur_time
, cl
->cl_cumul
);
1213 * update the eligible curve.
1214 * for concave, it is equal to the deadline curve.
1215 * for convex, it is a linear curve with slope m2.
1217 cl
->cl_eligible
= cl
->cl_deadline
;
1218 if (cl
->cl_rsc
.sm1
<= cl
->cl_rsc
.sm2
) {
1219 cl
->cl_eligible
.dx
= 0;
1220 cl
->cl_eligible
.dy
= 0;
1223 /* compute e and d */
1224 cl
->cl_e
= rtsc_y2x(&cl
->cl_eligible
, cl
->cl_cumul
);
1225 cl
->cl_d
= rtsc_y2x(&cl
->cl_deadline
, cl
->cl_cumul
+ next_len
);
1231 update_ed(struct hfsc_class
*cl
, u_int32_t next_len
)
1233 cl
->cl_e
= rtsc_y2x(&cl
->cl_eligible
, cl
->cl_cumul
);
1234 cl
->cl_d
= rtsc_y2x(&cl
->cl_deadline
, cl
->cl_cumul
+ next_len
);
1240 update_d(struct hfsc_class
*cl
, u_int32_t next_len
)
1242 cl
->cl_d
= rtsc_y2x(&cl
->cl_deadline
, cl
->cl_cumul
+ next_len
);
1246 init_vf(struct hfsc_class
*cl
, u_int32_t len
)
1249 struct hfsc_class
*max_cl
, *p
;
1250 u_int64_t vt
, f
, cur_time
;
1255 for (; cl
->cl_parent
!= NULL
; cl
= cl
->cl_parent
) {
1257 if (go_active
&& cl
->cl_nactive
++ == 0)
1263 max_cl
= actlist_last(&cl
->cl_parent
->cl_actc
);
1264 if (max_cl
!= NULL
) {
1266 * set vt to the average of the min and max
1267 * classes. if the parent's period didn't
1268 * change, don't decrease vt of the class.
1271 if (cl
->cl_parent
->cl_cvtmin
!= 0)
1272 vt
= (cl
->cl_parent
->cl_cvtmin
+ vt
)/2;
1274 if (cl
->cl_parent
->cl_vtperiod
!=
1275 cl
->cl_parentperiod
|| vt
> cl
->cl_vt
)
1279 * first child for a new parent backlog period.
1280 * add parent's cvtmax to vtoff of children
1281 * to make a new vt (vtoff + vt) larger than
1282 * the vt in the last period for all children.
1284 vt
= cl
->cl_parent
->cl_cvtmax
;
1285 for (p
= cl
->cl_parent
->cl_children
; p
!= NULL
;
1289 cl
->cl_parent
->cl_cvtmax
= 0;
1290 cl
->cl_parent
->cl_cvtmin
= 0;
1292 cl
->cl_initvt
= cl
->cl_vt
;
1294 /* update the virtual curve */
1295 vt
= cl
->cl_vt
+ cl
->cl_vtoff
;
1296 rtsc_min(&cl
->cl_virtual
, &cl
->cl_fsc
,
1298 if (cl
->cl_virtual
.x
== vt
) {
1299 cl
->cl_virtual
.x
-= cl
->cl_vtoff
;
1304 cl
->cl_vtperiod
++; /* increment vt period */
1305 cl
->cl_parentperiod
= cl
->cl_parent
->cl_vtperiod
;
1306 if (cl
->cl_parent
->cl_nactive
== 0)
1307 cl
->cl_parentperiod
++;
1312 if (cl
->cl_flags
& HFCF_USC
) {
1313 /* class has upper limit curve */
1315 cur_time
= read_machclk();
1317 /* update the ulimit curve */
1318 rtsc_min(&cl
->cl_ulimit
, &cl
->cl_usc
, cur_time
,
1321 cl
->cl_myf
= rtsc_y2x(&cl
->cl_ulimit
,
1327 if (cl
->cl_myf
> cl
->cl_cfmin
)
1331 if (f
!= cl
->cl_f
) {
1333 update_cfmin(cl
->cl_parent
);
1339 update_vf(struct hfsc_class
*cl
, u_int32_t len
, u_int64_t cur_time
)
1341 #pragma unused(cur_time)
1343 u_int64_t myf_bound
, delta
;
1348 go_passive
= (qempty(&cl
->cl_q
) && (cl
->cl_flags
& HFCF_FSC
));
1350 for (; cl
->cl_parent
!= NULL
; cl
= cl
->cl_parent
) {
1352 cl
->cl_total
+= len
;
1354 if (!(cl
->cl_flags
& HFCF_FSC
) || cl
->cl_nactive
== 0)
1357 if (go_passive
&& --cl
->cl_nactive
== 0)
1363 /* no more active child, going passive */
1365 /* update cvtmax of the parent class */
1366 if (cl
->cl_vt
> cl
->cl_parent
->cl_cvtmax
)
1367 cl
->cl_parent
->cl_cvtmax
= cl
->cl_vt
;
1369 /* remove this class from the vt list */
1372 update_cfmin(cl
->cl_parent
);
1380 cl
->cl_vt
= rtsc_y2x(&cl
->cl_virtual
, cl
->cl_total
)
1381 - cl
->cl_vtoff
+ cl
->cl_vtadj
;
1384 * if vt of the class is smaller than cvtmin,
1385 * the class was skipped in the past due to non-fit.
1386 * if so, we need to adjust vtadj.
1388 if (cl
->cl_vt
< cl
->cl_parent
->cl_cvtmin
) {
1389 cl
->cl_vtadj
+= cl
->cl_parent
->cl_cvtmin
- cl
->cl_vt
;
1390 cl
->cl_vt
= cl
->cl_parent
->cl_cvtmin
;
1393 /* update the vt list */
1396 if (cl
->cl_flags
& HFCF_USC
) {
1397 cl
->cl_myf
= cl
->cl_myfadj
+
1398 rtsc_y2x(&cl
->cl_ulimit
, cl
->cl_total
);
1401 * if myf lags behind by more than one clock tick
1402 * from the current time, adjust myfadj to prevent
1403 * a rate-limited class from going greedy.
1404 * in a steady state under rate-limiting, myf
1405 * fluctuates within one clock tick.
1407 myf_bound
= cur_time
- machclk_per_tick
;
1408 if (cl
->cl_myf
< myf_bound
) {
1409 delta
= cur_time
- cl
->cl_myf
;
1410 cl
->cl_myfadj
+= delta
;
1411 cl
->cl_myf
+= delta
;
1416 /* cl_f is max(cl_myf, cl_cfmin) */
1417 if (cl
->cl_myf
> cl
->cl_cfmin
)
1421 if (f
!= cl
->cl_f
) {
1423 update_cfmin(cl
->cl_parent
);
1429 update_cfmin(struct hfsc_class
*cl
)
1431 struct hfsc_class
*p
;
1434 if (TAILQ_EMPTY(&cl
->cl_actc
)) {
1438 cfmin
= HT_INFINITY
;
1439 TAILQ_FOREACH(p
, &cl
->cl_actc
, cl_actlist
) {
1444 if (p
->cl_f
< cfmin
)
1447 cl
->cl_cfmin
= cfmin
;
1451 * TAILQ based ellist and actlist implementation
1452 * (ion wanted to make a calendar queue based implementation)
1455 * eligible list holds backlogged classes being sorted by their eligible times.
1456 * there is one eligible list per interface.
1460 ellist_insert(struct hfsc_class
*cl
)
1462 struct hfsc_if
*hif
= cl
->cl_hif
;
1463 struct hfsc_class
*p
;
1465 /* check the last entry first */
1466 if ((p
= TAILQ_LAST(&hif
->hif_eligible
, _eligible
)) == NULL
||
1467 p
->cl_e
<= cl
->cl_e
) {
1468 TAILQ_INSERT_TAIL(&hif
->hif_eligible
, cl
, cl_ellist
);
1472 TAILQ_FOREACH(p
, &hif
->hif_eligible
, cl_ellist
) {
1473 if (cl
->cl_e
< p
->cl_e
) {
1474 TAILQ_INSERT_BEFORE(p
, cl
, cl_ellist
);
1478 VERIFY(0); /* should not reach here */
1482 ellist_remove(struct hfsc_class
*cl
)
1484 struct hfsc_if
*hif
= cl
->cl_hif
;
1486 TAILQ_REMOVE(&hif
->hif_eligible
, cl
, cl_ellist
);
1490 ellist_update(struct hfsc_class
*cl
)
1492 struct hfsc_if
*hif
= cl
->cl_hif
;
1493 struct hfsc_class
*p
, *last
;
1496 * the eligible time of a class increases monotonically.
1497 * if the next entry has a larger eligible time, nothing to do.
1499 p
= TAILQ_NEXT(cl
, cl_ellist
);
1500 if (p
== NULL
|| cl
->cl_e
<= p
->cl_e
)
1503 /* check the last entry */
1504 last
= TAILQ_LAST(&hif
->hif_eligible
, _eligible
);
1505 VERIFY(last
!= NULL
);
1506 if (last
->cl_e
<= cl
->cl_e
) {
1507 TAILQ_REMOVE(&hif
->hif_eligible
, cl
, cl_ellist
);
1508 TAILQ_INSERT_TAIL(&hif
->hif_eligible
, cl
, cl_ellist
);
1513 * the new position must be between the next entry
1514 * and the last entry
1516 while ((p
= TAILQ_NEXT(p
, cl_ellist
)) != NULL
) {
1517 if (cl
->cl_e
< p
->cl_e
) {
1518 TAILQ_REMOVE(&hif
->hif_eligible
, cl
, cl_ellist
);
1519 TAILQ_INSERT_BEFORE(p
, cl
, cl_ellist
);
1523 VERIFY(0); /* should not reach here */
1526 /* find the class with the minimum deadline among the eligible classes */
1527 static struct hfsc_class
*
1528 ellist_get_mindl(ellist_t
*head
, u_int64_t cur_time
)
1530 struct hfsc_class
*p
, *cl
= NULL
;
1532 TAILQ_FOREACH(p
, head
, cl_ellist
) {
1533 if (p
->cl_e
> cur_time
)
1535 if (cl
== NULL
|| p
->cl_d
< cl
->cl_d
)
1542 * active children list holds backlogged child classes being sorted
1543 * by their virtual time.
1544 * each intermediate class has one active children list.
1548 actlist_insert(struct hfsc_class
*cl
)
1550 struct hfsc_class
*p
;
1552 /* check the last entry first */
1553 if ((p
= TAILQ_LAST(&cl
->cl_parent
->cl_actc
, _active
)) == NULL
||
1554 p
->cl_vt
<= cl
->cl_vt
) {
1555 TAILQ_INSERT_TAIL(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1559 TAILQ_FOREACH(p
, &cl
->cl_parent
->cl_actc
, cl_actlist
) {
1560 if (cl
->cl_vt
< p
->cl_vt
) {
1561 TAILQ_INSERT_BEFORE(p
, cl
, cl_actlist
);
1565 VERIFY(0); /* should not reach here */
1569 actlist_remove(struct hfsc_class
*cl
)
1571 TAILQ_REMOVE(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1575 actlist_update(struct hfsc_class
*cl
)
1577 struct hfsc_class
*p
, *last
;
1580 * the virtual time of a class increases monotonically during its
1581 * backlogged period.
1582 * if the next entry has a larger virtual time, nothing to do.
1584 p
= TAILQ_NEXT(cl
, cl_actlist
);
1585 if (p
== NULL
|| cl
->cl_vt
< p
->cl_vt
)
1588 /* check the last entry */
1589 last
= TAILQ_LAST(&cl
->cl_parent
->cl_actc
, _active
);
1590 VERIFY(last
!= NULL
);
1591 if (last
->cl_vt
<= cl
->cl_vt
) {
1592 TAILQ_REMOVE(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1593 TAILQ_INSERT_TAIL(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1598 * the new position must be between the next entry
1599 * and the last entry
1601 while ((p
= TAILQ_NEXT(p
, cl_actlist
)) != NULL
) {
1602 if (cl
->cl_vt
< p
->cl_vt
) {
1603 TAILQ_REMOVE(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1604 TAILQ_INSERT_BEFORE(p
, cl
, cl_actlist
);
1608 VERIFY(0); /* should not reach here */
1611 static struct hfsc_class
*
1612 actlist_firstfit(struct hfsc_class
*cl
, u_int64_t cur_time
)
1614 struct hfsc_class
*p
;
1616 TAILQ_FOREACH(p
, &cl
->cl_actc
, cl_actlist
) {
1617 if (p
->cl_f
<= cur_time
)
1624 * service curve support functions
1626 * external service curve parameters
1629 * internal service curve parameters
1630 * sm: (bytes/tsc_interval) << SM_SHIFT
1631 * ism: (tsc_count/byte) << ISM_SHIFT
1634 * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
1635 * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
1636 * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
1637 * digits in decimal using the following table.
1639 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
1640 * ----------+-------------------------------------------------------
1641 * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6
1642 * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6
1643 * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6
1645 * nsec/byte 80000 8000 800 80 8
1646 * ism(500MHz) 40000 4000 400 40 4
1647 * ism(200MHz) 16000 1600 160 16 1.6
1650 #define ISM_SHIFT 10
1652 #define SM_MASK ((1LL << SM_SHIFT) - 1)
1653 #define ISM_MASK ((1LL << ISM_SHIFT) - 1)
1655 static inline u_int64_t
1656 seg_x2y(u_int64_t x
, u_int64_t sm
)
1662 * y = x * sm >> SM_SHIFT
1663 * but divide it for the upper and lower bits to avoid overflow
1665 y
= (x
>> SM_SHIFT
) * sm
+ (((x
& SM_MASK
) * sm
) >> SM_SHIFT
);
1669 static inline u_int64_t
1670 seg_y2x(u_int64_t y
, u_int64_t ism
)
1676 else if (ism
== HT_INFINITY
)
1679 x
= (y
>> ISM_SHIFT
) * ism
1680 + (((y
& ISM_MASK
) * ism
) >> ISM_SHIFT
);
1685 static inline u_int64_t
1690 sm
= (m
<< SM_SHIFT
) / 8 / machclk_freq
;
1694 static inline u_int64_t
1702 ism
= ((u_int64_t
)machclk_freq
<< ISM_SHIFT
) * 8 / m
;
1706 static inline u_int64_t
1711 dx
= (d
* machclk_freq
) / 1000;
1720 m
= (sm
* 8 * machclk_freq
) >> SM_SHIFT
;
1729 d
= dx
* 1000 / machclk_freq
;
1734 sc2isc(struct hfsc_class
*cl
, struct service_curve
*sc
, struct internal_sc
*isc
,
1737 struct hfsc_if
*hif
= cl
->cl_hif
;
1738 struct internal_sc oisc
= *isc
;
1741 if (eff_rate
== 0 && (sc
->fl
& (HFSCF_M1_PCT
| HFSCF_M2_PCT
))) {
1743 * If service curve is configured with percentage and the
1744 * effective uplink rate is not known, assume this is a
1745 * transient case, and that the rate will be updated in
1746 * the near future via CLASSQ_EV_LINK_SPEED. Pick a
1747 * reasonable number for now, e.g. 10 Mbps.
1749 eff_rate
= (10 * 1000 * 1000);
1751 log(LOG_WARNING
, "%s: %s qid=%d slot=%d eff_rate unknown; "
1752 "using temporary rate %llu bps\n", if_name(HFSCIF_IFP(hif
)),
1753 hfsc_style(hif
), cl
->cl_handle
, cl
->cl_id
, eff_rate
);
1757 if (sc
->fl
& HFSCF_M1_PCT
) {
1758 VERIFY(m1
> 0 && m1
<= 100);
1759 m1
= (eff_rate
* m1
) / 100;
1763 if (sc
->fl
& HFSCF_M2_PCT
) {
1764 VERIFY(m2
> 0 && m2
<= 100);
1765 m2
= (eff_rate
* m2
) / 100;
1768 isc
->sm1
= m2sm(m1
);
1769 isc
->ism1
= m2ism(m1
);
1770 isc
->dx
= d2dx(sc
->d
);
1771 isc
->dy
= seg_x2y(isc
->dx
, isc
->sm1
);
1772 isc
->sm2
= m2sm(m2
);
1773 isc
->ism2
= m2ism(m2
);
1775 /* return non-zero if there's any change */
1776 return (bcmp(&oisc
, isc
, sizeof (*isc
)));
1780 * initialize the runtime service curve with the given internal
1781 * service curve starting at (x, y).
1784 rtsc_init(struct runtime_sc
*rtsc
, struct internal_sc
*isc
, u_int64_t x
,
1789 rtsc
->sm1
= isc
->sm1
;
1790 rtsc
->ism1
= isc
->ism1
;
1793 rtsc
->sm2
= isc
->sm2
;
1794 rtsc
->ism2
= isc
->ism2
;
1798 * calculate the y-projection of the runtime service curve by the
1799 * given x-projection value
1802 rtsc_y2x(struct runtime_sc
*rtsc
, u_int64_t y
)
1808 else if (y
<= rtsc
->y
+ rtsc
->dy
) {
1809 /* x belongs to the 1st segment */
1811 x
= rtsc
->x
+ rtsc
->dx
;
1813 x
= rtsc
->x
+ seg_y2x(y
- rtsc
->y
, rtsc
->ism1
);
1815 /* x belongs to the 2nd segment */
1816 x
= rtsc
->x
+ rtsc
->dx
1817 + seg_y2x(y
- rtsc
->y
- rtsc
->dy
, rtsc
->ism2
);
1823 rtsc_x2y(struct runtime_sc
*rtsc
, u_int64_t x
)
1829 else if (x
<= rtsc
->x
+ rtsc
->dx
)
1830 /* y belongs to the 1st segment */
1831 y
= rtsc
->y
+ seg_x2y(x
- rtsc
->x
, rtsc
->sm1
);
1833 /* y belongs to the 2nd segment */
1834 y
= rtsc
->y
+ rtsc
->dy
1835 + seg_x2y(x
- rtsc
->x
- rtsc
->dx
, rtsc
->sm2
);
1840 * update the runtime service curve by taking the minimum of the current
1841 * runtime service curve and the service curve starting at (x, y).
1844 rtsc_min(struct runtime_sc
*rtsc
, struct internal_sc
*isc
, u_int64_t x
,
1847 u_int64_t y1
, y2
, dx
, dy
;
1849 if (isc
->sm1
<= isc
->sm2
) {
1850 /* service curve is convex */
1851 y1
= rtsc_x2y(rtsc
, x
);
1853 /* the current rtsc is smaller */
1861 * service curve is concave
1862 * compute the two y values of the current rtsc
1866 y1
= rtsc_x2y(rtsc
, x
);
1868 /* rtsc is below isc, no change to rtsc */
1872 y2
= rtsc_x2y(rtsc
, x
+ isc
->dx
);
1873 if (y2
>= y
+ isc
->dy
) {
1874 /* rtsc is above isc, replace rtsc by isc */
1883 * the two curves intersect
1884 * compute the offsets (dx, dy) using the reverse
1885 * function of seg_x2y()
1886 * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
1888 dx
= ((y1
- y
) << SM_SHIFT
) / (isc
->sm1
- isc
->sm2
);
1890 * check if (x, y1) belongs to the 1st segment of rtsc.
1891 * if so, add the offset.
1893 if (rtsc
->x
+ rtsc
->dx
> x
)
1894 dx
+= rtsc
->x
+ rtsc
->dx
- x
;
1895 dy
= seg_x2y(dx
, isc
->sm1
);
1904 hfsc_get_class_stats(struct hfsc_if
*hif
, u_int32_t qid
,
1905 struct hfsc_classstats
*sp
)
1907 struct hfsc_class
*cl
;
1909 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
1911 if ((cl
= hfsc_clh_to_clp(hif
, qid
)) == NULL
)
1914 sp
->class_id
= cl
->cl_id
;
1915 sp
->class_handle
= cl
->cl_handle
;
1917 if (cl
->cl_flags
& HFCF_RSC
) {
1918 sp
->rsc
.m1
= sm2m(cl
->cl_rsc
.sm1
);
1919 sp
->rsc
.d
= dx2d(cl
->cl_rsc
.dx
);
1920 sp
->rsc
.m2
= sm2m(cl
->cl_rsc
.sm2
);
1926 if (cl
->cl_flags
& HFCF_FSC
) {
1927 sp
->fsc
.m1
= sm2m(cl
->cl_fsc
.sm1
);
1928 sp
->fsc
.d
= dx2d(cl
->cl_fsc
.dx
);
1929 sp
->fsc
.m2
= sm2m(cl
->cl_fsc
.sm2
);
1935 if (cl
->cl_flags
& HFCF_USC
) {
1936 sp
->usc
.m1
= sm2m(cl
->cl_usc
.sm1
);
1937 sp
->usc
.d
= dx2d(cl
->cl_usc
.dx
);
1938 sp
->usc
.m2
= sm2m(cl
->cl_usc
.sm2
);
1945 sp
->total
= cl
->cl_total
;
1946 sp
->cumul
= cl
->cl_cumul
;
1953 sp
->initvt
= cl
->cl_initvt
;
1954 sp
->vtperiod
= cl
->cl_vtperiod
;
1955 sp
->parentperiod
= cl
->cl_parentperiod
;
1956 sp
->nactive
= cl
->cl_nactive
;
1957 sp
->vtoff
= cl
->cl_vtoff
;
1958 sp
->cvtmax
= cl
->cl_cvtmax
;
1959 sp
->myf
= cl
->cl_myf
;
1960 sp
->cfmin
= cl
->cl_cfmin
;
1961 sp
->cvtmin
= cl
->cl_cvtmin
;
1962 sp
->myfadj
= cl
->cl_myfadj
;
1963 sp
->vtadj
= cl
->cl_vtadj
;
1965 sp
->cur_time
= read_machclk();
1966 sp
->machclk_freq
= machclk_freq
;
1968 sp
->qlength
= qlen(&cl
->cl_q
);
1969 sp
->qlimit
= qlimit(&cl
->cl_q
);
1970 sp
->xmit_cnt
= cl
->cl_stats
.xmit_cnt
;
1971 sp
->drop_cnt
= cl
->cl_stats
.drop_cnt
;
1972 sp
->period
= cl
->cl_stats
.period
;
1974 sp
->qtype
= qtype(&cl
->cl_q
);
1975 sp
->qstate
= qstate(&cl
->cl_q
);
1977 if (q_is_red(&cl
->cl_q
))
1978 red_getstats(cl
->cl_red
, &sp
->red
[0]);
1979 #endif /* CLASSQ_RED */
1981 if (q_is_rio(&cl
->cl_q
))
1982 rio_getstats(cl
->cl_rio
, &sp
->red
[0]);
1983 #endif /* CLASSQ_RIO */
1985 if (q_is_blue(&cl
->cl_q
))
1986 blue_getstats(cl
->cl_blue
, &sp
->blue
);
1987 #endif /* CLASSQ_BLUE */
1988 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1989 sfb_getstats(cl
->cl_sfb
, &sp
->sfb
);
1994 /* convert a class handle to the corresponding class pointer */
1995 static struct hfsc_class
*
1996 hfsc_clh_to_clp(struct hfsc_if
*hif
, u_int32_t chandle
)
1999 struct hfsc_class
*cl
;
2001 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
2004 * first, try optimistically the slot matching the lower bits of
2005 * the handle. if it fails, do the linear table search.
2007 i
= chandle
% hif
->hif_maxclasses
;
2008 if ((cl
= hif
->hif_class_tbl
[i
]) != NULL
&& cl
->cl_handle
== chandle
)
2010 for (i
= 0; i
< hif
->hif_maxclasses
; i
++)
2011 if ((cl
= hif
->hif_class_tbl
[i
]) != NULL
&&
2012 cl
->cl_handle
== chandle
)
2018 hfsc_style(struct hfsc_if
*hif
)
2020 return ((hif
->hif_flags
& HFSCIFF_ALTQ
) ? "ALTQ_HFSC" : "HFSC");
2024 hfsc_setup_ifclassq(struct ifclassq
*ifq
, u_int32_t flags
)
2026 #pragma unused(ifq, flags)
2027 return (ENXIO
); /* not yet */
2031 hfsc_teardown_ifclassq(struct ifclassq
*ifq
)
2033 struct hfsc_if
*hif
= ifq
->ifcq_disc
;
2036 IFCQ_LOCK_ASSERT_HELD(ifq
);
2037 VERIFY(hif
!= NULL
&& ifq
->ifcq_type
== PKTSCHEDT_HFSC
);
2039 (void) hfsc_destroy_locked(hif
);
2041 ifq
->ifcq_disc
= NULL
;
2042 for (i
= 0; i
< IFCQ_SC_MAX
; i
++) {
2043 ifq
->ifcq_disc_slots
[i
].qid
= 0;
2044 ifq
->ifcq_disc_slots
[i
].cl
= NULL
;
2047 return (ifclassq_detach(ifq
));
2051 hfsc_getqstats_ifclassq(struct ifclassq
*ifq
, u_int32_t slot
,
2052 struct if_ifclassq_stats
*ifqs
)
2054 struct hfsc_if
*hif
= ifq
->ifcq_disc
;
2056 IFCQ_LOCK_ASSERT_HELD(ifq
);
2057 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_HFSC
);
2059 if (slot
>= IFCQ_SC_MAX
)
2062 return (hfsc_get_class_stats(hif
, ifq
->ifcq_disc_slots
[slot
].qid
,
2063 &ifqs
->ifqs_hfsc_stats
));
2065 #endif /* PKTSCHED_HFSC */