]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/pktsched/pktsched_hfsc.c
c7b405380f0e86631d81b50277d8b0cb54ef6e08
2 * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* $OpenBSD: altq_hfsc.c,v 1.25 2007/09/13 20:40:02 chl Exp $ */
30 /* $KAME: altq_hfsc.c,v 1.17 2002/11/29 07:48:33 kjc Exp $ */
33 * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
35 * Permission to use, copy, modify, and distribute this software and
36 * its documentation is hereby granted (including for commercial or
37 * for-profit use), provided that both the copyright notice and this
38 * permission notice appear in all copies of the software, derivative
39 * works, or modified versions, and any portions thereof.
41 * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
42 * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
43 * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
45 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
46 * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
48 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
49 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
50 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
53 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
56 * Carnegie Mellon encourages (but does not require) users of this
57 * software to return any improvements or extensions that they make,
58 * and to grant Carnegie Mellon the rights to redistribute these
59 * changes without encumbrance.
62 * H-FSC is described in Proceedings of SIGCOMM'97,
63 * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
64 * Real-Time and Priority Service"
65 * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
67 * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
68 * when a class has an upperlimit, the fit-time is computed from the
69 * upperlimit service curve. the link-sharing scheduler does not schedule
70 * a class whose fit-time exceeds the current time.
75 #include <sys/cdefs.h>
76 #include <sys/param.h>
77 #include <sys/malloc.h>
79 #include <sys/systm.h>
80 #include <sys/errno.h>
81 #include <sys/kernel.h>
82 #include <sys/syslog.h>
84 #include <kern/zalloc.h>
87 #include <net/net_osdep.h>
89 #include <net/pktsched/pktsched_hfsc.h>
90 #include <netinet/in.h>
96 static int hfsc_enqueue_ifclassq(struct ifclassq
*, struct mbuf
*);
97 static struct mbuf
*hfsc_dequeue_ifclassq(struct ifclassq
*, cqdq_op_t
);
98 static int hfsc_request_ifclassq(struct ifclassq
*, cqrq_t
, void *);
100 static int hfsc_addq(struct hfsc_class
*, struct mbuf
*, struct pf_mtag
*);
101 static struct mbuf
*hfsc_getq(struct hfsc_class
*);
102 static struct mbuf
*hfsc_pollq(struct hfsc_class
*);
103 static void hfsc_purgeq(struct hfsc_if
*, struct hfsc_class
*, u_int32_t
,
104 u_int32_t
*, u_int32_t
*);
105 static void hfsc_print_sc(struct hfsc_if
*, u_int32_t
, u_int64_t
,
106 struct service_curve
*, struct internal_sc
*, const char *);
107 static void hfsc_updateq_linkrate(struct hfsc_if
*, struct hfsc_class
*);
108 static void hfsc_updateq(struct hfsc_if
*, struct hfsc_class
*, cqev_t
);
110 static int hfsc_clear_interface(struct hfsc_if
*);
111 static struct hfsc_class
*hfsc_class_create(struct hfsc_if
*,
112 struct service_curve
*, struct service_curve
*, struct service_curve
*,
113 struct hfsc_class
*, u_int32_t
, int, u_int32_t
);
114 static int hfsc_class_destroy(struct hfsc_if
*, struct hfsc_class
*);
115 static int hfsc_destroy_locked(struct hfsc_if
*);
116 static struct hfsc_class
*hfsc_nextclass(struct hfsc_class
*);
117 static struct hfsc_class
*hfsc_clh_to_clp(struct hfsc_if
*, u_int32_t
);
118 static const char *hfsc_style(struct hfsc_if
*);
120 static void set_active(struct hfsc_class
*, u_int32_t
);
121 static void set_passive(struct hfsc_class
*);
123 static void init_ed(struct hfsc_class
*, u_int32_t
);
124 static void update_ed(struct hfsc_class
*, u_int32_t
);
125 static void update_d(struct hfsc_class
*, u_int32_t
);
126 static void init_vf(struct hfsc_class
*, u_int32_t
);
127 static void update_vf(struct hfsc_class
*, u_int32_t
, u_int64_t
);
128 static void update_cfmin(struct hfsc_class
*);
129 static void ellist_insert(struct hfsc_class
*);
130 static void ellist_remove(struct hfsc_class
*);
131 static void ellist_update(struct hfsc_class
*);
132 static struct hfsc_class
*ellist_get_mindl(ellist_t
*, u_int64_t
);
133 static void actlist_insert(struct hfsc_class
*);
134 static void actlist_remove(struct hfsc_class
*);
135 static void actlist_update(struct hfsc_class
*);
136 static struct hfsc_class
*actlist_firstfit(struct hfsc_class
*, u_int64_t
);
138 static inline u_int64_t
seg_x2y(u_int64_t
, u_int64_t
);
139 static inline u_int64_t
seg_y2x(u_int64_t
, u_int64_t
);
140 static inline u_int64_t
m2sm(u_int64_t
);
141 static inline u_int64_t
m2ism(u_int64_t
);
142 static inline u_int64_t
d2dx(u_int64_t
);
143 static u_int64_t
sm2m(u_int64_t
);
144 static u_int64_t
dx2d(u_int64_t
);
146 static boolean_t
sc2isc(struct hfsc_class
*, struct service_curve
*,
147 struct internal_sc
*, u_int64_t
);
148 static void rtsc_init(struct runtime_sc
*, struct internal_sc
*,
149 u_int64_t
, u_int64_t
);
150 static u_int64_t
rtsc_y2x(struct runtime_sc
*, u_int64_t
);
151 static u_int64_t
rtsc_x2y(struct runtime_sc
*, u_int64_t
);
152 static void rtsc_min(struct runtime_sc
*, struct internal_sc
*,
153 u_int64_t
, u_int64_t
);
155 #define HFSC_ZONE_MAX 32 /* maximum elements in zone */
156 #define HFSC_ZONE_NAME "pktsched_hfsc" /* zone name */
158 static unsigned int hfsc_size
; /* size of zone element */
159 static struct zone
*hfsc_zone
; /* zone for hfsc_if */
161 #define HFSC_CL_ZONE_MAX 32 /* maximum elements in zone */
162 #define HFSC_CL_ZONE_NAME "pktsched_hfsc_cl" /* zone name */
164 static unsigned int hfsc_cl_size
; /* size of zone element */
165 static struct zone
*hfsc_cl_zone
; /* zone for hfsc_class */
170 #define HFSC_IS_A_PARENT_CLASS(cl) ((cl)->cl_children != NULL)
172 #define HT_INFINITY 0xffffffffffffffffLL /* infinite time value */
177 hfsc_size
= sizeof (struct hfsc_if
);
178 hfsc_zone
= zinit(hfsc_size
, HFSC_ZONE_MAX
* hfsc_size
,
180 if (hfsc_zone
== NULL
) {
181 panic("%s: failed allocating %s", __func__
, HFSC_ZONE_NAME
);
184 zone_change(hfsc_zone
, Z_EXPAND
, TRUE
);
185 zone_change(hfsc_zone
, Z_CALLERACCT
, TRUE
);
187 hfsc_cl_size
= sizeof (struct hfsc_class
);
188 hfsc_cl_zone
= zinit(hfsc_cl_size
, HFSC_CL_ZONE_MAX
* hfsc_cl_size
,
189 0, HFSC_CL_ZONE_NAME
);
190 if (hfsc_cl_zone
== NULL
) {
191 panic("%s: failed allocating %s", __func__
, HFSC_CL_ZONE_NAME
);
194 zone_change(hfsc_cl_zone
, Z_EXPAND
, TRUE
);
195 zone_change(hfsc_cl_zone
, Z_CALLERACCT
, TRUE
);
199 hfsc_alloc(struct ifnet
*ifp
, int how
, boolean_t altq
)
203 hif
= (how
== M_WAITOK
) ? zalloc(hfsc_zone
) : zalloc_noblock(hfsc_zone
);
207 bzero(hif
, hfsc_size
);
208 TAILQ_INIT(&hif
->hif_eligible
);
209 hif
->hif_ifq
= &ifp
->if_snd
;
211 hif
->hif_maxclasses
= HFSC_MAX_CLASSES
;
212 hif
->hif_flags
|= HFSCIFF_ALTQ
;
214 hif
->hif_maxclasses
= IFCQ_SC_MAX
+ 1; /* incl. root class */
217 if ((hif
->hif_class_tbl
= _MALLOC(sizeof (struct hfsc_class
*) *
218 hif
->hif_maxclasses
, M_DEVBUF
, M_WAITOK
|M_ZERO
)) == NULL
) {
219 log(LOG_ERR
, "%s: %s unable to allocate class table array\n",
220 if_name(ifp
), hfsc_style(hif
));
224 if (pktsched_verbose
) {
225 log(LOG_DEBUG
, "%s: %s scheduler allocated\n",
226 if_name(ifp
), hfsc_style(hif
));
232 if (hif
->hif_class_tbl
!= NULL
) {
233 _FREE(hif
->hif_class_tbl
, M_DEVBUF
);
234 hif
->hif_class_tbl
= NULL
;
236 zfree(hfsc_zone
, hif
);
242 hfsc_destroy(struct hfsc_if
*hif
)
244 struct ifclassq
*ifq
= hif
->hif_ifq
;
248 err
= hfsc_destroy_locked(hif
);
255 hfsc_destroy_locked(struct hfsc_if
*hif
)
257 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
259 (void) hfsc_clear_interface(hif
);
260 (void) hfsc_class_destroy(hif
, hif
->hif_rootclass
);
262 VERIFY(hif
->hif_class_tbl
!= NULL
);
263 _FREE(hif
->hif_class_tbl
, M_DEVBUF
);
264 hif
->hif_class_tbl
= NULL
;
266 if (pktsched_verbose
) {
267 log(LOG_DEBUG
, "%s: %s scheduler destroyed\n",
268 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
271 zfree(hfsc_zone
, hif
);
277 * bring the interface back to the initial state by discarding
278 * all the filters and classes except the root class.
281 hfsc_clear_interface(struct hfsc_if
*hif
)
283 struct hfsc_class
*cl
;
285 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
287 /* clear out the classes */
288 while (hif
->hif_rootclass
!= NULL
&&
289 (cl
= hif
->hif_rootclass
->cl_children
) != NULL
) {
291 * remove the first leaf class found in the hierarchy
294 for (; cl
!= NULL
; cl
= hfsc_nextclass(cl
)) {
295 if (!HFSC_IS_A_PARENT_CLASS(cl
)) {
296 (void) hfsc_class_destroy(hif
, cl
);
305 /* discard all the queued packets on the interface */
307 hfsc_purge(struct hfsc_if
*hif
)
309 struct hfsc_class
*cl
;
311 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
313 for (cl
= hif
->hif_rootclass
; cl
!= NULL
; cl
= hfsc_nextclass(cl
)) {
314 if (!qempty(&cl
->cl_q
))
315 hfsc_purgeq(hif
, cl
, 0, NULL
, NULL
);
319 * This assertion is safe to be made only when PF_ALTQ is not
320 * configured; otherwise, IFCQ_LEN represents the sum of the
321 * packets managed by ifcq_disc and altq_disc instances, which
322 * is possible when transitioning between the two.
324 VERIFY(IFCQ_LEN(hif
->hif_ifq
) == 0);
325 #endif /* !PF_ALTQ */
329 hfsc_event(struct hfsc_if
*hif
, cqev_t ev
)
331 struct hfsc_class
*cl
;
333 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
335 for (cl
= hif
->hif_rootclass
; cl
!= NULL
; cl
= hfsc_nextclass(cl
))
336 hfsc_updateq(hif
, cl
, ev
);
340 hfsc_add_queue(struct hfsc_if
*hif
, struct service_curve
*rtsc
,
341 struct service_curve
*lssc
, struct service_curve
*ulsc
,
342 u_int32_t qlimit
, int flags
, u_int32_t parent_qid
, u_int32_t qid
,
343 struct hfsc_class
**clp
)
345 struct hfsc_class
*cl
= NULL
, *parent
;
347 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
349 if (parent_qid
== HFSC_NULLCLASS_HANDLE
&& hif
->hif_rootclass
== NULL
)
351 else if ((parent
= hfsc_clh_to_clp(hif
, parent_qid
)) == NULL
)
354 if (hfsc_clh_to_clp(hif
, qid
) != NULL
)
357 cl
= hfsc_class_create(hif
, rtsc
, lssc
, ulsc
, parent
,
368 static struct hfsc_class
*
369 hfsc_class_create(struct hfsc_if
*hif
, struct service_curve
*rsc
,
370 struct service_curve
*fsc
, struct service_curve
*usc
,
371 struct hfsc_class
*parent
, u_int32_t qlimit
, int flags
, u_int32_t qid
)
374 struct ifclassq
*ifq
;
375 struct hfsc_class
*cl
, *p
;
379 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
381 /* Sanitize flags unless internally configured */
382 if (hif
->hif_flags
& HFSCIFF_ALTQ
)
383 flags
&= HFCF_USERFLAGS
;
385 if (hif
->hif_classes
>= hif
->hif_maxclasses
) {
386 log(LOG_ERR
, "%s: %s out of classes! (max %d)\n",
387 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
),
388 hif
->hif_maxclasses
);
393 if (flags
& HFCF_RED
) {
394 log(LOG_ERR
, "%s: %s RED not available!\n",
395 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
398 #endif /* !CLASSQ_RED */
401 if (flags
& HFCF_RIO
) {
402 log(LOG_ERR
, "%s: %s RIO not available!\n",
403 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
406 #endif /* CLASSQ_RIO */
409 if (flags
& HFCF_BLUE
) {
410 log(LOG_ERR
, "%s: %s BLUE not available!\n",
411 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
414 #endif /* CLASSQ_BLUE */
416 /* These are mutually exclusive */
417 if ((flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) &&
418 (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) != HFCF_RED
&&
419 (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) != HFCF_RIO
&&
420 (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) != HFCF_BLUE
&&
421 (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) != HFCF_SFB
) {
422 log(LOG_ERR
, "%s: %s more than one RED|RIO|BLUE|SFB\n",
423 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
427 cl
= zalloc(hfsc_cl_zone
);
431 bzero(cl
, hfsc_cl_size
);
432 TAILQ_INIT(&cl
->cl_actc
);
434 ifp
= HFSCIF_IFP(hif
);
436 if (qlimit
== 0 || qlimit
> IFCQ_MAXLEN(ifq
)) {
437 qlimit
= IFCQ_MAXLEN(ifq
);
439 qlimit
= DEFAULT_QLIMIT
; /* use default */
441 _qinit(&cl
->cl_q
, Q_DROPTAIL
, qlimit
);
443 cl
->cl_flags
= flags
;
444 if (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) {
445 #if CLASSQ_RED || CLASSQ_RIO
447 #endif /* CLASSQ_RED || CLASSQ_RIO */
451 if (rsc
!= NULL
&& rsc
->m2
> m2
)
453 if (fsc
!= NULL
&& fsc
->m2
> m2
)
455 if (usc
!= NULL
&& usc
->m2
> m2
)
459 if (flags
& HFCF_ECN
) {
460 if (flags
& HFCF_BLUE
)
461 cl
->cl_qflags
|= BLUEF_ECN
;
462 else if (flags
& HFCF_SFB
)
463 cl
->cl_qflags
|= SFBF_ECN
;
464 else if (flags
& HFCF_RED
)
465 cl
->cl_qflags
|= REDF_ECN
;
466 else if (flags
& HFCF_RIO
)
467 cl
->cl_qflags
|= RIOF_ECN
;
469 if (flags
& HFCF_FLOWCTL
) {
470 if (flags
& HFCF_SFB
)
471 cl
->cl_qflags
|= SFBF_FLOWCTL
;
473 if (flags
& HFCF_CLEARDSCP
) {
474 if (flags
& HFCF_RIO
)
475 cl
->cl_qflags
|= RIOF_CLEARDSCP
;
477 #if CLASSQ_RED || CLASSQ_RIO
479 * XXX: RED & RIO should be watching link speed and MTU
480 * events and recompute pkttime accordingly.
483 pkttime
= 1000 * 1000 * 1000; /* 1 sec */
485 pkttime
= (int64_t)ifp
->if_mtu
* 1000 * 1000 * 1000 /
488 /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
490 if (flags
& HFCF_RED
) {
491 cl
->cl_red
= red_alloc(ifp
, 0, 0,
492 qlimit(&cl
->cl_q
) * 10/100,
493 qlimit(&cl
->cl_q
) * 30/100,
494 cl
->cl_qflags
, pkttime
);
495 if (cl
->cl_red
!= NULL
)
496 qtype(&cl
->cl_q
) = Q_RED
;
498 #endif /* CLASSQ_RED */
500 if (flags
& HFCF_RIO
) {
502 rio_alloc(ifp
, 0, NULL
, cl
->cl_qflags
, pkttime
);
503 if (cl
->cl_rio
!= NULL
)
504 qtype(&cl
->cl_q
) = Q_RIO
;
506 #endif /* CLASSQ_RIO */
507 #endif /* CLASSQ_RED || CLASSQ_RIO */
509 if (flags
& HFCF_BLUE
) {
510 cl
->cl_blue
= blue_alloc(ifp
, 0, 0, cl
->cl_qflags
);
511 if (cl
->cl_blue
!= NULL
)
512 qtype(&cl
->cl_q
) = Q_BLUE
;
514 #endif /* CLASSQ_BLUE */
515 if (flags
& HFCF_SFB
) {
516 if (!(cl
->cl_flags
& HFCF_LAZY
))
517 cl
->cl_sfb
= sfb_alloc(ifp
, qid
,
518 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
519 if (cl
->cl_sfb
!= NULL
|| (cl
->cl_flags
& HFCF_LAZY
))
520 qtype(&cl
->cl_q
) = Q_SFB
;
524 cl
->cl_id
= hif
->hif_classid
++;
527 cl
->cl_parent
= parent
;
529 eff_rate
= ifnet_output_linkrate(HFSCIF_IFP(hif
));
530 hif
->hif_eff_rate
= eff_rate
;
532 if (rsc
!= NULL
&& (rsc
->m1
!= 0 || rsc
->m2
!= 0) &&
533 (!(rsc
->fl
& HFSCF_M1_PCT
) || (rsc
->m1
> 0 && rsc
->m1
<= 100)) &&
534 (!(rsc
->fl
& HFSCF_M2_PCT
) || (rsc
->m2
> 0 && rsc
->m2
<= 100))) {
535 rsc
->fl
&= HFSCF_USERFLAGS
;
536 cl
->cl_flags
|= HFCF_RSC
;
538 (void) sc2isc(cl
, &cl
->cl_rsc0
, &cl
->cl_rsc
, eff_rate
);
539 rtsc_init(&cl
->cl_deadline
, &cl
->cl_rsc
, 0, 0);
540 rtsc_init(&cl
->cl_eligible
, &cl
->cl_rsc
, 0, 0);
542 if (fsc
!= NULL
&& (fsc
->m1
!= 0 || fsc
->m2
!= 0) &&
543 (!(fsc
->fl
& HFSCF_M1_PCT
) || (fsc
->m1
> 0 && fsc
->m1
<= 100)) &&
544 (!(fsc
->fl
& HFSCF_M2_PCT
) || (fsc
->m2
> 0 && fsc
->m2
<= 100))) {
545 fsc
->fl
&= HFSCF_USERFLAGS
;
546 cl
->cl_flags
|= HFCF_FSC
;
548 (void) sc2isc(cl
, &cl
->cl_fsc0
, &cl
->cl_fsc
, eff_rate
);
549 rtsc_init(&cl
->cl_virtual
, &cl
->cl_fsc
, 0, 0);
551 if (usc
!= NULL
&& (usc
->m1
!= 0 || usc
->m2
!= 0) &&
552 (!(usc
->fl
& HFSCF_M1_PCT
) || (usc
->m1
> 0 && usc
->m1
<= 100)) &&
553 (!(usc
->fl
& HFSCF_M2_PCT
) || (usc
->m2
> 0 && usc
->m2
<= 100))) {
554 usc
->fl
&= HFSCF_USERFLAGS
;
555 cl
->cl_flags
|= HFCF_USC
;
557 (void) sc2isc(cl
, &cl
->cl_usc0
, &cl
->cl_usc
, eff_rate
);
558 rtsc_init(&cl
->cl_ulimit
, &cl
->cl_usc
, 0, 0);
562 * find a free slot in the class table. if the slot matching
563 * the lower bits of qid is free, use this slot. otherwise,
564 * use the first free slot.
566 i
= qid
% hif
->hif_maxclasses
;
567 if (hif
->hif_class_tbl
[i
] == NULL
) {
568 hif
->hif_class_tbl
[i
] = cl
;
570 for (i
= 0; i
< hif
->hif_maxclasses
; i
++)
571 if (hif
->hif_class_tbl
[i
] == NULL
) {
572 hif
->hif_class_tbl
[i
] = cl
;
575 if (i
== hif
->hif_maxclasses
) {
581 if (flags
& HFCF_DEFAULTCLASS
)
582 hif
->hif_defaultclass
= cl
;
584 if (parent
== NULL
) {
585 /* this is root class */
586 hif
->hif_rootclass
= cl
;
588 /* add this class to the children list of the parent */
589 if ((p
= parent
->cl_children
) == NULL
)
590 parent
->cl_children
= cl
;
592 while (p
->cl_siblings
!= NULL
)
598 if (pktsched_verbose
) {
599 log(LOG_DEBUG
, "%s: %s created qid=%d pqid=%d qlimit=%d "
600 "flags=%b\n", if_name(ifp
), hfsc_style(hif
), cl
->cl_handle
,
601 (cl
->cl_parent
!= NULL
) ? cl
->cl_parent
->cl_handle
: 0,
602 qlimit(&cl
->cl_q
), cl
->cl_flags
, HFCF_BITS
);
603 if (cl
->cl_flags
& HFCF_RSC
) {
604 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
605 &cl
->cl_rsc0
, &cl
->cl_rsc
, "rsc");
607 if (cl
->cl_flags
& HFCF_FSC
) {
608 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
609 &cl
->cl_fsc0
, &cl
->cl_fsc
, "fsc");
611 if (cl
->cl_flags
& HFCF_USC
) {
612 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
613 &cl
->cl_usc0
, &cl
->cl_usc
, "usc");
620 if (cl
->cl_qalg
.ptr
!= NULL
) {
622 if (q_is_rio(&cl
->cl_q
))
623 rio_destroy(cl
->cl_rio
);
624 #endif /* CLASSQ_RIO */
626 if (q_is_red(&cl
->cl_q
))
627 red_destroy(cl
->cl_red
);
628 #endif /* CLASSQ_RED */
630 if (q_is_blue(&cl
->cl_q
))
631 blue_destroy(cl
->cl_blue
);
632 #endif /* CLASSQ_BLUE */
633 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
634 sfb_destroy(cl
->cl_sfb
);
635 cl
->cl_qalg
.ptr
= NULL
;
636 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
637 qstate(&cl
->cl_q
) = QS_RUNNING
;
639 zfree(hfsc_cl_zone
, cl
);
644 hfsc_remove_queue(struct hfsc_if
*hif
, u_int32_t qid
)
646 struct hfsc_class
*cl
;
648 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
650 if ((cl
= hfsc_clh_to_clp(hif
, qid
)) == NULL
)
653 return (hfsc_class_destroy(hif
, cl
));
657 hfsc_class_destroy(struct hfsc_if
*hif
, struct hfsc_class
*cl
)
664 if (HFSC_IS_A_PARENT_CLASS(cl
))
667 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
669 if (!qempty(&cl
->cl_q
))
670 hfsc_purgeq(hif
, cl
, 0, NULL
, NULL
);
672 if (cl
->cl_parent
== NULL
) {
673 /* this is root class */
675 struct hfsc_class
*p
= cl
->cl_parent
->cl_children
;
678 cl
->cl_parent
->cl_children
= cl
->cl_siblings
;
680 if (p
->cl_siblings
== cl
) {
681 p
->cl_siblings
= cl
->cl_siblings
;
684 } while ((p
= p
->cl_siblings
) != NULL
);
688 for (i
= 0; i
< hif
->hif_maxclasses
; i
++)
689 if (hif
->hif_class_tbl
[i
] == cl
) {
690 hif
->hif_class_tbl
[i
] = NULL
;
696 if (cl
->cl_qalg
.ptr
!= NULL
) {
698 if (q_is_rio(&cl
->cl_q
))
699 rio_destroy(cl
->cl_rio
);
700 #endif /* CLASSQ_RIO */
702 if (q_is_red(&cl
->cl_q
))
703 red_destroy(cl
->cl_red
);
704 #endif /* CLASSQ_RED */
706 if (q_is_blue(&cl
->cl_q
))
707 blue_destroy(cl
->cl_blue
);
708 #endif /* CLASSQ_BLUE */
709 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
710 sfb_destroy(cl
->cl_sfb
);
711 cl
->cl_qalg
.ptr
= NULL
;
712 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
713 qstate(&cl
->cl_q
) = QS_RUNNING
;
716 if (cl
== hif
->hif_rootclass
)
717 hif
->hif_rootclass
= NULL
;
718 if (cl
== hif
->hif_defaultclass
)
719 hif
->hif_defaultclass
= NULL
;
721 if (pktsched_verbose
) {
722 log(LOG_DEBUG
, "%s: %s destroyed qid=%d slot=%d\n",
723 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
),
724 cl
->cl_handle
, cl
->cl_id
);
727 zfree(hfsc_cl_zone
, cl
);
733 * hfsc_nextclass returns the next class in the tree.
735 * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
738 static struct hfsc_class
*
739 hfsc_nextclass(struct hfsc_class
*cl
)
741 IFCQ_LOCK_ASSERT_HELD(cl
->cl_hif
->hif_ifq
);
743 if (cl
->cl_children
!= NULL
)
744 cl
= cl
->cl_children
;
745 else if (cl
->cl_siblings
!= NULL
)
746 cl
= cl
->cl_siblings
;
748 while ((cl
= cl
->cl_parent
) != NULL
)
749 if (cl
->cl_siblings
) {
750 cl
= cl
->cl_siblings
;
759 hfsc_enqueue(struct hfsc_if
*hif
, struct hfsc_class
*cl
, struct mbuf
*m
,
762 struct ifclassq
*ifq
= hif
->hif_ifq
;
766 IFCQ_LOCK_ASSERT_HELD(ifq
);
767 VERIFY(cl
== NULL
|| cl
->cl_hif
== hif
);
770 cl
= hfsc_clh_to_clp(hif
, t
->pftag_qid
);
771 if (cl
== NULL
|| HFSC_IS_A_PARENT_CLASS(cl
)) {
772 cl
= hif
->hif_defaultclass
;
774 IFCQ_CONVERT_LOCK(ifq
);
783 ret
= hfsc_addq(cl
, m
, t
);
785 if (ret
== CLASSQEQ_SUCCESS_FC
) {
786 /* packet enqueued, return advisory feedback */
789 VERIFY(ret
== CLASSQEQ_DROPPED
||
790 ret
== CLASSQEQ_DROPPED_FC
||
791 ret
== CLASSQEQ_DROPPED_SP
);
792 /* packet has been freed in hfsc_addq */
793 PKTCNTR_ADD(&cl
->cl_stats
.drop_cnt
, 1, len
);
794 IFCQ_DROP_ADD(ifq
, 1, len
);
796 case CLASSQEQ_DROPPED
:
798 case CLASSQEQ_DROPPED_FC
:
800 case CLASSQEQ_DROPPED_SP
:
801 return (EQSUSPENDED
);
807 cl
->cl_hif
->hif_packets
++;
809 /* successfully queued. */
810 if (qlen(&cl
->cl_q
) == 1)
817 * note: CLASSQDQ_POLL returns the next packet without removing the packet
818 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
819 * CLASSQDQ_REMOVE must return the same packet if called immediately
820 * after CLASSQDQ_POLL.
823 hfsc_dequeue(struct hfsc_if
*hif
, cqdq_op_t op
)
825 struct ifclassq
*ifq
= hif
->hif_ifq
;
826 struct hfsc_class
*cl
;
828 u_int32_t len
, next_len
;
832 IFCQ_LOCK_ASSERT_HELD(ifq
);
834 if (hif
->hif_packets
== 0)
835 /* no packet in the tree */
838 cur_time
= read_machclk();
840 if (op
== CLASSQDQ_REMOVE
&& hif
->hif_pollcache
!= NULL
) {
842 cl
= hif
->hif_pollcache
;
843 hif
->hif_pollcache
= NULL
;
844 /* check if the class was scheduled by real-time criteria */
845 if (cl
->cl_flags
& HFCF_RSC
)
846 realtime
= (cl
->cl_e
<= cur_time
);
849 * if there are eligible classes, use real-time criteria.
850 * find the class with the minimum deadline among
851 * the eligible classes.
853 if ((cl
= ellist_get_mindl(&hif
->hif_eligible
, cur_time
))
859 * use link-sharing criteria
860 * get the class with the minimum vt in the hierarchy
862 cl
= hif
->hif_rootclass
;
863 while (HFSC_IS_A_PARENT_CLASS(cl
)) {
865 cl
= actlist_firstfit(cl
, cur_time
);
868 log(LOG_ERR
, "%s: %s "
869 "%d fit but none found\n",
870 if_name(HFSCIF_IFP(hif
)),
871 hfsc_style(hif
), fits
);
875 * update parent's cl_cvtmin.
876 * don't update if the new vt is smaller.
878 if (cl
->cl_parent
->cl_cvtmin
< cl
->cl_vt
)
879 cl
->cl_parent
->cl_cvtmin
= cl
->cl_vt
;
884 if (op
== CLASSQDQ_POLL
) {
885 hif
->hif_pollcache
= cl
;
894 cl
->cl_hif
->hif_packets
--;
896 IFCQ_XMIT_ADD(ifq
, 1, len
);
897 PKTCNTR_ADD(&cl
->cl_stats
.xmit_cnt
, 1, len
);
899 update_vf(cl
, len
, cur_time
);
903 if (!qempty(&cl
->cl_q
)) {
904 if (cl
->cl_flags
& HFCF_RSC
) {
906 next_len
= m_pktlen(qhead(&cl
->cl_q
));
909 update_ed(cl
, next_len
);
911 update_d(cl
, next_len
);
914 /* the class becomes passive */
923 hfsc_addq(struct hfsc_class
*cl
, struct mbuf
*m
, struct pf_mtag
*t
)
925 struct ifclassq
*ifq
= cl
->cl_hif
->hif_ifq
;
927 IFCQ_LOCK_ASSERT_HELD(ifq
);
930 if (q_is_rio(&cl
->cl_q
))
931 return (rio_addq(cl
->cl_rio
, &cl
->cl_q
, m
, t
));
933 #endif /* CLASSQ_RIO */
935 if (q_is_red(&cl
->cl_q
))
936 return (red_addq(cl
->cl_red
, &cl
->cl_q
, m
, t
));
938 #endif /* CLASSQ_RED */
940 if (q_is_blue(&cl
->cl_q
))
941 return (blue_addq(cl
->cl_blue
, &cl
->cl_q
, m
, t
));
943 #endif /* CLASSQ_BLUE */
944 if (q_is_sfb(&cl
->cl_q
)) {
945 if (cl
->cl_sfb
== NULL
) {
946 struct ifnet
*ifp
= HFSCIF_IFP(cl
->cl_hif
);
948 VERIFY(cl
->cl_flags
& HFCF_LAZY
);
949 IFCQ_CONVERT_LOCK(ifq
);
951 cl
->cl_sfb
= sfb_alloc(ifp
, cl
->cl_handle
,
952 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
953 if (cl
->cl_sfb
== NULL
) {
954 /* fall back to droptail */
955 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
956 cl
->cl_flags
&= ~HFCF_SFB
;
957 cl
->cl_qflags
&= ~(SFBF_ECN
| SFBF_FLOWCTL
);
959 log(LOG_ERR
, "%s: %s SFB lazy allocation "
960 "failed for qid=%d slot=%d, falling back "
961 "to DROPTAIL\n", if_name(ifp
),
962 hfsc_style(cl
->cl_hif
), cl
->cl_handle
,
966 if (cl
->cl_sfb
!= NULL
)
967 return (sfb_addq(cl
->cl_sfb
, &cl
->cl_q
, m
, t
));
968 } else if (qlen(&cl
->cl_q
) >= qlimit(&cl
->cl_q
)) {
969 IFCQ_CONVERT_LOCK(ifq
);
971 return (CLASSQEQ_DROPPED
);
974 if (cl
->cl_flags
& HFCF_CLEARDSCP
)
975 write_dsfield(m
, t
, 0);
983 hfsc_getq(struct hfsc_class
*cl
)
985 IFCQ_LOCK_ASSERT_HELD(cl
->cl_hif
->hif_ifq
);
988 if (q_is_rio(&cl
->cl_q
))
989 return (rio_getq(cl
->cl_rio
, &cl
->cl_q
));
991 #endif /* CLASSQ_RIO */
993 if (q_is_red(&cl
->cl_q
))
994 return (red_getq(cl
->cl_red
, &cl
->cl_q
));
996 #endif /* CLASSQ_RED */
998 if (q_is_blue(&cl
->cl_q
))
999 return (blue_getq(cl
->cl_blue
, &cl
->cl_q
));
1001 #endif /* CLASSQ_BLUE */
1002 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1003 return (sfb_getq(cl
->cl_sfb
, &cl
->cl_q
));
1005 return (_getq(&cl
->cl_q
));
1008 static struct mbuf
*
1009 hfsc_pollq(struct hfsc_class
*cl
)
1011 IFCQ_LOCK_ASSERT_HELD(cl
->cl_hif
->hif_ifq
);
1013 return (qhead(&cl
->cl_q
));
1017 hfsc_purgeq(struct hfsc_if
*hif
, struct hfsc_class
*cl
, u_int32_t flow
,
1018 u_int32_t
*packets
, u_int32_t
*bytes
)
1020 struct ifclassq
*ifq
= hif
->hif_ifq
;
1021 u_int32_t cnt
= 0, len
= 0, qlen
;
1023 IFCQ_LOCK_ASSERT_HELD(ifq
);
1025 if ((qlen
= qlen(&cl
->cl_q
)) == 0) {
1026 VERIFY(hif
->hif_packets
== 0);
1030 /* become regular mutex before freeing mbufs */
1031 IFCQ_CONVERT_LOCK(ifq
);
1034 if (q_is_rio(&cl
->cl_q
))
1035 rio_purgeq(cl
->cl_rio
, &cl
->cl_q
, flow
, &cnt
, &len
);
1037 #endif /* CLASSQ_RIO */
1039 if (q_is_red(&cl
->cl_q
))
1040 red_purgeq(cl
->cl_red
, &cl
->cl_q
, flow
, &cnt
, &len
);
1042 #endif /* CLASSQ_RED */
1044 if (q_is_blue(&cl
->cl_q
))
1045 blue_purgeq(cl
->cl_blue
, &cl
->cl_q
, flow
, &cnt
, &len
);
1047 #endif /* CLASSQ_BLUE */
1048 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1049 sfb_purgeq(cl
->cl_sfb
, &cl
->cl_q
, flow
, &cnt
, &len
);
1051 _flushq_flow(&cl
->cl_q
, flow
, &cnt
, &len
);
1054 VERIFY(qlen(&cl
->cl_q
) == (qlen
- cnt
));
1056 PKTCNTR_ADD(&cl
->cl_stats
.drop_cnt
, cnt
, len
);
1057 IFCQ_DROP_ADD(ifq
, cnt
, len
);
1059 VERIFY(hif
->hif_packets
>= cnt
);
1060 hif
->hif_packets
-= cnt
;
1062 VERIFY(((signed)IFCQ_LEN(ifq
) - cnt
) >= 0);
1063 IFCQ_LEN(ifq
) -= cnt
;
1065 if (qempty(&cl
->cl_q
)) {
1066 update_vf(cl
, 0, 0); /* remove cl from the actlist */
1070 if (pktsched_verbose
) {
1071 log(LOG_DEBUG
, "%s: %s purge qid=%d slot=%d "
1072 "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
1073 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
),
1074 cl
->cl_handle
, cl
->cl_id
, qlen
, qlen(&cl
->cl_q
),
1079 if (packets
!= NULL
)
1086 hfsc_print_sc(struct hfsc_if
*hif
, u_int32_t qid
, u_int64_t eff_rate
,
1087 struct service_curve
*sc
, struct internal_sc
*isc
, const char *which
)
1089 struct ifnet
*ifp
= HFSCIF_IFP(hif
);
1091 log(LOG_DEBUG
, "%s: %s qid=%d {%s_m1=%llu%s [%llu], "
1092 "%s_d=%u msec, %s_m2=%llu%s [%llu]} linkrate=%llu bps\n",
1093 if_name(ifp
), hfsc_style(hif
), qid
,
1094 which
, sc
->m1
, (sc
->fl
& HFSCF_M1_PCT
) ? "%" : " bps", isc
->sm1
,
1096 which
, sc
->m2
, (sc
->fl
& HFSCF_M2_PCT
) ? "%" : " bps", isc
->sm2
,
1101 hfsc_updateq_linkrate(struct hfsc_if
*hif
, struct hfsc_class
*cl
)
1103 u_int64_t eff_rate
= ifnet_output_linkrate(HFSCIF_IFP(hif
));
1104 struct service_curve
*sc
;
1105 struct internal_sc
*isc
;
1107 /* Update parameters only if rate has changed */
1108 if (eff_rate
== hif
->hif_eff_rate
)
1113 if ((cl
->cl_flags
& HFCF_RSC
) && sc2isc(cl
, sc
, isc
, eff_rate
)) {
1114 rtsc_init(&cl
->cl_deadline
, isc
, 0, 0);
1115 rtsc_init(&cl
->cl_eligible
, isc
, 0, 0);
1116 if (pktsched_verbose
) {
1117 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
1123 if ((cl
->cl_flags
& HFCF_FSC
) && sc2isc(cl
, sc
, isc
, eff_rate
)) {
1124 rtsc_init(&cl
->cl_virtual
, isc
, 0, 0);
1125 if (pktsched_verbose
) {
1126 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
1132 if ((cl
->cl_flags
& HFCF_USC
) && sc2isc(cl
, sc
, isc
, eff_rate
)) {
1133 rtsc_init(&cl
->cl_ulimit
, isc
, 0, 0);
1134 if (pktsched_verbose
) {
1135 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
1142 hfsc_updateq(struct hfsc_if
*hif
, struct hfsc_class
*cl
, cqev_t ev
)
1144 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
1146 if (pktsched_verbose
) {
1147 log(LOG_DEBUG
, "%s: %s update qid=%d slot=%d event=%s\n",
1148 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
),
1149 cl
->cl_handle
, cl
->cl_id
, ifclassq_ev2str(ev
));
1152 if (ev
== CLASSQ_EV_LINK_SPEED
)
1153 hfsc_updateq_linkrate(hif
, cl
);
1156 if (q_is_rio(&cl
->cl_q
))
1157 return (rio_updateq(cl
->cl_rio
, ev
));
1158 #endif /* CLASSQ_RIO */
1160 if (q_is_red(&cl
->cl_q
))
1161 return (red_updateq(cl
->cl_red
, ev
));
1162 #endif /* CLASSQ_RED */
1164 if (q_is_blue(&cl
->cl_q
))
1165 return (blue_updateq(cl
->cl_blue
, ev
));
1166 #endif /* CLASSQ_BLUE */
1167 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1168 return (sfb_updateq(cl
->cl_sfb
, ev
));
1172 set_active(struct hfsc_class
*cl
, u_int32_t len
)
1174 if (cl
->cl_flags
& HFCF_RSC
)
1176 if (cl
->cl_flags
& HFCF_FSC
)
1179 cl
->cl_stats
.period
++;
1183 set_passive(struct hfsc_class
*cl
)
1185 if (cl
->cl_flags
& HFCF_RSC
)
1189 * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
1190 * needs to be called explicitly to remove a class from actlist
1195 init_ed(struct hfsc_class
*cl
, u_int32_t next_len
)
1199 cur_time
= read_machclk();
1201 /* update the deadline curve */
1202 rtsc_min(&cl
->cl_deadline
, &cl
->cl_rsc
, cur_time
, cl
->cl_cumul
);
1205 * update the eligible curve.
1206 * for concave, it is equal to the deadline curve.
1207 * for convex, it is a linear curve with slope m2.
1209 cl
->cl_eligible
= cl
->cl_deadline
;
1210 if (cl
->cl_rsc
.sm1
<= cl
->cl_rsc
.sm2
) {
1211 cl
->cl_eligible
.dx
= 0;
1212 cl
->cl_eligible
.dy
= 0;
1215 /* compute e and d */
1216 cl
->cl_e
= rtsc_y2x(&cl
->cl_eligible
, cl
->cl_cumul
);
1217 cl
->cl_d
= rtsc_y2x(&cl
->cl_deadline
, cl
->cl_cumul
+ next_len
);
1223 update_ed(struct hfsc_class
*cl
, u_int32_t next_len
)
1225 cl
->cl_e
= rtsc_y2x(&cl
->cl_eligible
, cl
->cl_cumul
);
1226 cl
->cl_d
= rtsc_y2x(&cl
->cl_deadline
, cl
->cl_cumul
+ next_len
);
1232 update_d(struct hfsc_class
*cl
, u_int32_t next_len
)
1234 cl
->cl_d
= rtsc_y2x(&cl
->cl_deadline
, cl
->cl_cumul
+ next_len
);
1238 init_vf(struct hfsc_class
*cl
, u_int32_t len
)
1241 struct hfsc_class
*max_cl
, *p
;
1242 u_int64_t vt
, f
, cur_time
;
1247 for (; cl
->cl_parent
!= NULL
; cl
= cl
->cl_parent
) {
1249 if (go_active
&& cl
->cl_nactive
++ == 0)
1255 max_cl
= actlist_last(&cl
->cl_parent
->cl_actc
);
1256 if (max_cl
!= NULL
) {
1258 * set vt to the average of the min and max
1259 * classes. if the parent's period didn't
1260 * change, don't decrease vt of the class.
1263 if (cl
->cl_parent
->cl_cvtmin
!= 0)
1264 vt
= (cl
->cl_parent
->cl_cvtmin
+ vt
)/2;
1266 if (cl
->cl_parent
->cl_vtperiod
!=
1267 cl
->cl_parentperiod
|| vt
> cl
->cl_vt
)
1271 * first child for a new parent backlog period.
1272 * add parent's cvtmax to vtoff of children
1273 * to make a new vt (vtoff + vt) larger than
1274 * the vt in the last period for all children.
1276 vt
= cl
->cl_parent
->cl_cvtmax
;
1277 for (p
= cl
->cl_parent
->cl_children
; p
!= NULL
;
1281 cl
->cl_parent
->cl_cvtmax
= 0;
1282 cl
->cl_parent
->cl_cvtmin
= 0;
1284 cl
->cl_initvt
= cl
->cl_vt
;
1286 /* update the virtual curve */
1287 vt
= cl
->cl_vt
+ cl
->cl_vtoff
;
1288 rtsc_min(&cl
->cl_virtual
, &cl
->cl_fsc
,
1290 if (cl
->cl_virtual
.x
== vt
) {
1291 cl
->cl_virtual
.x
-= cl
->cl_vtoff
;
1296 cl
->cl_vtperiod
++; /* increment vt period */
1297 cl
->cl_parentperiod
= cl
->cl_parent
->cl_vtperiod
;
1298 if (cl
->cl_parent
->cl_nactive
== 0)
1299 cl
->cl_parentperiod
++;
1304 if (cl
->cl_flags
& HFCF_USC
) {
1305 /* class has upper limit curve */
1307 cur_time
= read_machclk();
1309 /* update the ulimit curve */
1310 rtsc_min(&cl
->cl_ulimit
, &cl
->cl_usc
, cur_time
,
1313 cl
->cl_myf
= rtsc_y2x(&cl
->cl_ulimit
,
1319 if (cl
->cl_myf
> cl
->cl_cfmin
)
1323 if (f
!= cl
->cl_f
) {
1325 update_cfmin(cl
->cl_parent
);
1331 update_vf(struct hfsc_class
*cl
, u_int32_t len
, u_int64_t cur_time
)
1333 #pragma unused(cur_time)
1335 u_int64_t myf_bound
, delta
;
1340 go_passive
= (qempty(&cl
->cl_q
) && (cl
->cl_flags
& HFCF_FSC
));
1342 for (; cl
->cl_parent
!= NULL
; cl
= cl
->cl_parent
) {
1344 cl
->cl_total
+= len
;
1346 if (!(cl
->cl_flags
& HFCF_FSC
) || cl
->cl_nactive
== 0)
1349 if (go_passive
&& --cl
->cl_nactive
== 0)
1355 /* no more active child, going passive */
1357 /* update cvtmax of the parent class */
1358 if (cl
->cl_vt
> cl
->cl_parent
->cl_cvtmax
)
1359 cl
->cl_parent
->cl_cvtmax
= cl
->cl_vt
;
1361 /* remove this class from the vt list */
1364 update_cfmin(cl
->cl_parent
);
1372 cl
->cl_vt
= rtsc_y2x(&cl
->cl_virtual
, cl
->cl_total
)
1373 - cl
->cl_vtoff
+ cl
->cl_vtadj
;
1376 * if vt of the class is smaller than cvtmin,
1377 * the class was skipped in the past due to non-fit.
1378 * if so, we need to adjust vtadj.
1380 if (cl
->cl_vt
< cl
->cl_parent
->cl_cvtmin
) {
1381 cl
->cl_vtadj
+= cl
->cl_parent
->cl_cvtmin
- cl
->cl_vt
;
1382 cl
->cl_vt
= cl
->cl_parent
->cl_cvtmin
;
1385 /* update the vt list */
1388 if (cl
->cl_flags
& HFCF_USC
) {
1389 cl
->cl_myf
= cl
->cl_myfadj
+
1390 rtsc_y2x(&cl
->cl_ulimit
, cl
->cl_total
);
1393 * if myf lags behind by more than one clock tick
1394 * from the current time, adjust myfadj to prevent
1395 * a rate-limited class from going greedy.
1396 * in a steady state under rate-limiting, myf
1397 * fluctuates within one clock tick.
1399 myf_bound
= cur_time
- machclk_per_tick
;
1400 if (cl
->cl_myf
< myf_bound
) {
1401 delta
= cur_time
- cl
->cl_myf
;
1402 cl
->cl_myfadj
+= delta
;
1403 cl
->cl_myf
+= delta
;
1408 /* cl_f is max(cl_myf, cl_cfmin) */
1409 if (cl
->cl_myf
> cl
->cl_cfmin
)
1413 if (f
!= cl
->cl_f
) {
1415 update_cfmin(cl
->cl_parent
);
1421 update_cfmin(struct hfsc_class
*cl
)
1423 struct hfsc_class
*p
;
1426 if (TAILQ_EMPTY(&cl
->cl_actc
)) {
1430 cfmin
= HT_INFINITY
;
1431 TAILQ_FOREACH(p
, &cl
->cl_actc
, cl_actlist
) {
1436 if (p
->cl_f
< cfmin
)
1439 cl
->cl_cfmin
= cfmin
;
1443 * TAILQ based ellist and actlist implementation
1444 * (ion wanted to make a calendar queue based implementation)
1447 * eligible list holds backlogged classes being sorted by their eligible times.
1448 * there is one eligible list per interface.
1452 ellist_insert(struct hfsc_class
*cl
)
1454 struct hfsc_if
*hif
= cl
->cl_hif
;
1455 struct hfsc_class
*p
;
1457 /* check the last entry first */
1458 if ((p
= TAILQ_LAST(&hif
->hif_eligible
, _eligible
)) == NULL
||
1459 p
->cl_e
<= cl
->cl_e
) {
1460 TAILQ_INSERT_TAIL(&hif
->hif_eligible
, cl
, cl_ellist
);
1464 TAILQ_FOREACH(p
, &hif
->hif_eligible
, cl_ellist
) {
1465 if (cl
->cl_e
< p
->cl_e
) {
1466 TAILQ_INSERT_BEFORE(p
, cl
, cl_ellist
);
1470 VERIFY(0); /* should not reach here */
1474 ellist_remove(struct hfsc_class
*cl
)
1476 struct hfsc_if
*hif
= cl
->cl_hif
;
1478 TAILQ_REMOVE(&hif
->hif_eligible
, cl
, cl_ellist
);
1482 ellist_update(struct hfsc_class
*cl
)
1484 struct hfsc_if
*hif
= cl
->cl_hif
;
1485 struct hfsc_class
*p
, *last
;
1488 * the eligible time of a class increases monotonically.
1489 * if the next entry has a larger eligible time, nothing to do.
1491 p
= TAILQ_NEXT(cl
, cl_ellist
);
1492 if (p
== NULL
|| cl
->cl_e
<= p
->cl_e
)
1495 /* check the last entry */
1496 last
= TAILQ_LAST(&hif
->hif_eligible
, _eligible
);
1497 VERIFY(last
!= NULL
);
1498 if (last
->cl_e
<= cl
->cl_e
) {
1499 TAILQ_REMOVE(&hif
->hif_eligible
, cl
, cl_ellist
);
1500 TAILQ_INSERT_TAIL(&hif
->hif_eligible
, cl
, cl_ellist
);
1505 * the new position must be between the next entry
1506 * and the last entry
1508 while ((p
= TAILQ_NEXT(p
, cl_ellist
)) != NULL
) {
1509 if (cl
->cl_e
< p
->cl_e
) {
1510 TAILQ_REMOVE(&hif
->hif_eligible
, cl
, cl_ellist
);
1511 TAILQ_INSERT_BEFORE(p
, cl
, cl_ellist
);
1515 VERIFY(0); /* should not reach here */
1518 /* find the class with the minimum deadline among the eligible classes */
1519 static struct hfsc_class
*
1520 ellist_get_mindl(ellist_t
*head
, u_int64_t cur_time
)
1522 struct hfsc_class
*p
, *cl
= NULL
;
1524 TAILQ_FOREACH(p
, head
, cl_ellist
) {
1525 if (p
->cl_e
> cur_time
)
1527 if (cl
== NULL
|| p
->cl_d
< cl
->cl_d
)
1534 * active children list holds backlogged child classes being sorted
1535 * by their virtual time.
1536 * each intermediate class has one active children list.
1540 actlist_insert(struct hfsc_class
*cl
)
1542 struct hfsc_class
*p
;
1544 /* check the last entry first */
1545 if ((p
= TAILQ_LAST(&cl
->cl_parent
->cl_actc
, _active
)) == NULL
||
1546 p
->cl_vt
<= cl
->cl_vt
) {
1547 TAILQ_INSERT_TAIL(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1551 TAILQ_FOREACH(p
, &cl
->cl_parent
->cl_actc
, cl_actlist
) {
1552 if (cl
->cl_vt
< p
->cl_vt
) {
1553 TAILQ_INSERT_BEFORE(p
, cl
, cl_actlist
);
1557 VERIFY(0); /* should not reach here */
1561 actlist_remove(struct hfsc_class
*cl
)
1563 TAILQ_REMOVE(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1567 actlist_update(struct hfsc_class
*cl
)
1569 struct hfsc_class
*p
, *last
;
1572 * the virtual time of a class increases monotonically during its
1573 * backlogged period.
1574 * if the next entry has a larger virtual time, nothing to do.
1576 p
= TAILQ_NEXT(cl
, cl_actlist
);
1577 if (p
== NULL
|| cl
->cl_vt
< p
->cl_vt
)
1580 /* check the last entry */
1581 last
= TAILQ_LAST(&cl
->cl_parent
->cl_actc
, _active
);
1582 VERIFY(last
!= NULL
);
1583 if (last
->cl_vt
<= cl
->cl_vt
) {
1584 TAILQ_REMOVE(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1585 TAILQ_INSERT_TAIL(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1590 * the new position must be between the next entry
1591 * and the last entry
1593 while ((p
= TAILQ_NEXT(p
, cl_actlist
)) != NULL
) {
1594 if (cl
->cl_vt
< p
->cl_vt
) {
1595 TAILQ_REMOVE(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1596 TAILQ_INSERT_BEFORE(p
, cl
, cl_actlist
);
1600 VERIFY(0); /* should not reach here */
1603 static struct hfsc_class
*
1604 actlist_firstfit(struct hfsc_class
*cl
, u_int64_t cur_time
)
1606 struct hfsc_class
*p
;
1608 TAILQ_FOREACH(p
, &cl
->cl_actc
, cl_actlist
) {
1609 if (p
->cl_f
<= cur_time
)
1616 * service curve support functions
1618 * external service curve parameters
1621 * internal service curve parameters
1622 * sm: (bytes/tsc_interval) << SM_SHIFT
1623 * ism: (tsc_count/byte) << ISM_SHIFT
1626 * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
1627 * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
1628 * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
1629 * digits in decimal using the following table.
1631 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
1632 * ----------+-------------------------------------------------------
1633 * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6
1634 * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6
1635 * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6
1637 * nsec/byte 80000 8000 800 80 8
1638 * ism(500MHz) 40000 4000 400 40 4
1639 * ism(200MHz) 16000 1600 160 16 1.6
1642 #define ISM_SHIFT 10
1644 #define SM_MASK ((1LL << SM_SHIFT) - 1)
1645 #define ISM_MASK ((1LL << ISM_SHIFT) - 1)
1647 static inline u_int64_t
1648 seg_x2y(u_int64_t x
, u_int64_t sm
)
1654 * y = x * sm >> SM_SHIFT
1655 * but divide it for the upper and lower bits to avoid overflow
1657 y
= (x
>> SM_SHIFT
) * sm
+ (((x
& SM_MASK
) * sm
) >> SM_SHIFT
);
1661 static inline u_int64_t
1662 seg_y2x(u_int64_t y
, u_int64_t ism
)
1668 else if (ism
== HT_INFINITY
)
1671 x
= (y
>> ISM_SHIFT
) * ism
1672 + (((y
& ISM_MASK
) * ism
) >> ISM_SHIFT
);
1677 static inline u_int64_t
1682 sm
= (m
<< SM_SHIFT
) / 8 / machclk_freq
;
1686 static inline u_int64_t
1694 ism
= ((u_int64_t
)machclk_freq
<< ISM_SHIFT
) * 8 / m
;
1698 static inline u_int64_t
1703 dx
= (d
* machclk_freq
) / 1000;
1712 m
= (sm
* 8 * machclk_freq
) >> SM_SHIFT
;
1721 d
= dx
* 1000 / machclk_freq
;
1726 sc2isc(struct hfsc_class
*cl
, struct service_curve
*sc
, struct internal_sc
*isc
,
1729 struct hfsc_if
*hif
= cl
->cl_hif
;
1730 struct internal_sc oisc
= *isc
;
1733 if (eff_rate
== 0 && (sc
->fl
& (HFSCF_M1_PCT
| HFSCF_M2_PCT
))) {
1735 * If service curve is configured with percentage and the
1736 * effective uplink rate is not known, assume this is a
1737 * transient case, and that the rate will be updated in
1738 * the near future via CLASSQ_EV_LINK_SPEED. Pick a
1739 * reasonable number for now, e.g. 10 Mbps.
1741 eff_rate
= (10 * 1000 * 1000);
1743 log(LOG_WARNING
, "%s: %s qid=%d slot=%d eff_rate unknown; "
1744 "using temporary rate %llu bps\n", if_name(HFSCIF_IFP(hif
)),
1745 hfsc_style(hif
), cl
->cl_handle
, cl
->cl_id
, eff_rate
);
1749 if (sc
->fl
& HFSCF_M1_PCT
) {
1750 VERIFY(m1
> 0 && m1
<= 100);
1751 m1
= (eff_rate
* m1
) / 100;
1755 if (sc
->fl
& HFSCF_M2_PCT
) {
1756 VERIFY(m2
> 0 && m2
<= 100);
1757 m2
= (eff_rate
* m2
) / 100;
1760 isc
->sm1
= m2sm(m1
);
1761 isc
->ism1
= m2ism(m1
);
1762 isc
->dx
= d2dx(sc
->d
);
1763 isc
->dy
= seg_x2y(isc
->dx
, isc
->sm1
);
1764 isc
->sm2
= m2sm(m2
);
1765 isc
->ism2
= m2ism(m2
);
1767 /* return non-zero if there's any change */
1768 return (bcmp(&oisc
, isc
, sizeof (*isc
)));
1772 * initialize the runtime service curve with the given internal
1773 * service curve starting at (x, y).
1776 rtsc_init(struct runtime_sc
*rtsc
, struct internal_sc
*isc
, u_int64_t x
,
1781 rtsc
->sm1
= isc
->sm1
;
1782 rtsc
->ism1
= isc
->ism1
;
1785 rtsc
->sm2
= isc
->sm2
;
1786 rtsc
->ism2
= isc
->ism2
;
1790 * calculate the y-projection of the runtime service curve by the
1791 * given x-projection value
1794 rtsc_y2x(struct runtime_sc
*rtsc
, u_int64_t y
)
1800 else if (y
<= rtsc
->y
+ rtsc
->dy
) {
1801 /* x belongs to the 1st segment */
1803 x
= rtsc
->x
+ rtsc
->dx
;
1805 x
= rtsc
->x
+ seg_y2x(y
- rtsc
->y
, rtsc
->ism1
);
1807 /* x belongs to the 2nd segment */
1808 x
= rtsc
->x
+ rtsc
->dx
1809 + seg_y2x(y
- rtsc
->y
- rtsc
->dy
, rtsc
->ism2
);
1815 rtsc_x2y(struct runtime_sc
*rtsc
, u_int64_t x
)
1821 else if (x
<= rtsc
->x
+ rtsc
->dx
)
1822 /* y belongs to the 1st segment */
1823 y
= rtsc
->y
+ seg_x2y(x
- rtsc
->x
, rtsc
->sm1
);
1825 /* y belongs to the 2nd segment */
1826 y
= rtsc
->y
+ rtsc
->dy
1827 + seg_x2y(x
- rtsc
->x
- rtsc
->dx
, rtsc
->sm2
);
1832 * update the runtime service curve by taking the minimum of the current
1833 * runtime service curve and the service curve starting at (x, y).
1836 rtsc_min(struct runtime_sc
*rtsc
, struct internal_sc
*isc
, u_int64_t x
,
1839 u_int64_t y1
, y2
, dx
, dy
;
1841 if (isc
->sm1
<= isc
->sm2
) {
1842 /* service curve is convex */
1843 y1
= rtsc_x2y(rtsc
, x
);
1845 /* the current rtsc is smaller */
1853 * service curve is concave
1854 * compute the two y values of the current rtsc
1858 y1
= rtsc_x2y(rtsc
, x
);
1860 /* rtsc is below isc, no change to rtsc */
1864 y2
= rtsc_x2y(rtsc
, x
+ isc
->dx
);
1865 if (y2
>= y
+ isc
->dy
) {
1866 /* rtsc is above isc, replace rtsc by isc */
1875 * the two curves intersect
1876 * compute the offsets (dx, dy) using the reverse
1877 * function of seg_x2y()
1878 * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
1880 dx
= ((y1
- y
) << SM_SHIFT
) / (isc
->sm1
- isc
->sm2
);
1882 * check if (x, y1) belongs to the 1st segment of rtsc.
1883 * if so, add the offset.
1885 if (rtsc
->x
+ rtsc
->dx
> x
)
1886 dx
+= rtsc
->x
+ rtsc
->dx
- x
;
1887 dy
= seg_x2y(dx
, isc
->sm1
);
1896 hfsc_get_class_stats(struct hfsc_if
*hif
, u_int32_t qid
,
1897 struct hfsc_classstats
*sp
)
1899 struct hfsc_class
*cl
;
1901 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
1903 if ((cl
= hfsc_clh_to_clp(hif
, qid
)) == NULL
)
1906 sp
->class_id
= cl
->cl_id
;
1907 sp
->class_handle
= cl
->cl_handle
;
1909 if (cl
->cl_flags
& HFCF_RSC
) {
1910 sp
->rsc
.m1
= sm2m(cl
->cl_rsc
.sm1
);
1911 sp
->rsc
.d
= dx2d(cl
->cl_rsc
.dx
);
1912 sp
->rsc
.m2
= sm2m(cl
->cl_rsc
.sm2
);
1918 if (cl
->cl_flags
& HFCF_FSC
) {
1919 sp
->fsc
.m1
= sm2m(cl
->cl_fsc
.sm1
);
1920 sp
->fsc
.d
= dx2d(cl
->cl_fsc
.dx
);
1921 sp
->fsc
.m2
= sm2m(cl
->cl_fsc
.sm2
);
1927 if (cl
->cl_flags
& HFCF_USC
) {
1928 sp
->usc
.m1
= sm2m(cl
->cl_usc
.sm1
);
1929 sp
->usc
.d
= dx2d(cl
->cl_usc
.dx
);
1930 sp
->usc
.m2
= sm2m(cl
->cl_usc
.sm2
);
1937 sp
->total
= cl
->cl_total
;
1938 sp
->cumul
= cl
->cl_cumul
;
1945 sp
->initvt
= cl
->cl_initvt
;
1946 sp
->vtperiod
= cl
->cl_vtperiod
;
1947 sp
->parentperiod
= cl
->cl_parentperiod
;
1948 sp
->nactive
= cl
->cl_nactive
;
1949 sp
->vtoff
= cl
->cl_vtoff
;
1950 sp
->cvtmax
= cl
->cl_cvtmax
;
1951 sp
->myf
= cl
->cl_myf
;
1952 sp
->cfmin
= cl
->cl_cfmin
;
1953 sp
->cvtmin
= cl
->cl_cvtmin
;
1954 sp
->myfadj
= cl
->cl_myfadj
;
1955 sp
->vtadj
= cl
->cl_vtadj
;
1957 sp
->cur_time
= read_machclk();
1958 sp
->machclk_freq
= machclk_freq
;
1960 sp
->qlength
= qlen(&cl
->cl_q
);
1961 sp
->qlimit
= qlimit(&cl
->cl_q
);
1962 sp
->xmit_cnt
= cl
->cl_stats
.xmit_cnt
;
1963 sp
->drop_cnt
= cl
->cl_stats
.drop_cnt
;
1964 sp
->period
= cl
->cl_stats
.period
;
1966 sp
->qtype
= qtype(&cl
->cl_q
);
1967 sp
->qstate
= qstate(&cl
->cl_q
);
1969 if (q_is_red(&cl
->cl_q
))
1970 red_getstats(cl
->cl_red
, &sp
->red
[0]);
1971 #endif /* CLASSQ_RED */
1973 if (q_is_rio(&cl
->cl_q
))
1974 rio_getstats(cl
->cl_rio
, &sp
->red
[0]);
1975 #endif /* CLASSQ_RIO */
1977 if (q_is_blue(&cl
->cl_q
))
1978 blue_getstats(cl
->cl_blue
, &sp
->blue
);
1979 #endif /* CLASSQ_BLUE */
1980 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1981 sfb_getstats(cl
->cl_sfb
, &sp
->sfb
);
1986 /* convert a class handle to the corresponding class pointer */
1987 static struct hfsc_class
*
1988 hfsc_clh_to_clp(struct hfsc_if
*hif
, u_int32_t chandle
)
1991 struct hfsc_class
*cl
;
1993 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
1996 * first, try optimistically the slot matching the lower bits of
1997 * the handle. if it fails, do the linear table search.
1999 i
= chandle
% hif
->hif_maxclasses
;
2000 if ((cl
= hif
->hif_class_tbl
[i
]) != NULL
&& cl
->cl_handle
== chandle
)
2002 for (i
= 0; i
< hif
->hif_maxclasses
; i
++)
2003 if ((cl
= hif
->hif_class_tbl
[i
]) != NULL
&&
2004 cl
->cl_handle
== chandle
)
2010 hfsc_style(struct hfsc_if
*hif
)
2012 return ((hif
->hif_flags
& HFSCIFF_ALTQ
) ? "ALTQ_HFSC" : "HFSC");
2016 hfsc_setup_ifclassq(struct ifclassq
*ifq
, u_int32_t flags
)
2018 #pragma unused(ifq, flags)
2019 return (ENXIO
); /* not yet */
2023 hfsc_teardown_ifclassq(struct ifclassq
*ifq
)
2025 struct hfsc_if
*hif
= ifq
->ifcq_disc
;
2028 IFCQ_LOCK_ASSERT_HELD(ifq
);
2029 VERIFY(hif
!= NULL
&& ifq
->ifcq_type
== PKTSCHEDT_HFSC
);
2031 (void) hfsc_destroy_locked(hif
);
2033 ifq
->ifcq_disc
= NULL
;
2034 for (i
= 0; i
< IFCQ_SC_MAX
; i
++) {
2035 ifq
->ifcq_disc_slots
[i
].qid
= 0;
2036 ifq
->ifcq_disc_slots
[i
].cl
= NULL
;
2039 return (ifclassq_detach(ifq
));
2043 hfsc_getqstats_ifclassq(struct ifclassq
*ifq
, u_int32_t slot
,
2044 struct if_ifclassq_stats
*ifqs
)
2046 struct hfsc_if
*hif
= ifq
->ifcq_disc
;
2048 IFCQ_LOCK_ASSERT_HELD(ifq
);
2049 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_HFSC
);
2051 if (slot
>= IFCQ_SC_MAX
)
2054 return (hfsc_get_class_stats(hif
, ifq
->ifcq_disc_slots
[slot
].qid
,
2055 &ifqs
->ifqs_hfsc_stats
));
2057 #endif /* PKTSCHED_HFSC */