]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/pktsched/pktsched_hfsc.c
2 * Copyright (c) 2007-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* $OpenBSD: altq_hfsc.c,v 1.25 2007/09/13 20:40:02 chl Exp $ */
30 /* $KAME: altq_hfsc.c,v 1.17 2002/11/29 07:48:33 kjc Exp $ */
33 * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
35 * Permission to use, copy, modify, and distribute this software and
36 * its documentation is hereby granted (including for commercial or
37 * for-profit use), provided that both the copyright notice and this
38 * permission notice appear in all copies of the software, derivative
39 * works, or modified versions, and any portions thereof.
41 * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
42 * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
43 * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
45 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
46 * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
48 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
49 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
50 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
53 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
56 * Carnegie Mellon encourages (but does not require) users of this
57 * software to return any improvements or extensions that they make,
58 * and to grant Carnegie Mellon the rights to redistribute these
59 * changes without encumbrance.
62 * H-FSC is described in Proceedings of SIGCOMM'97,
63 * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
64 * Real-Time and Priority Service"
65 * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
67 * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
68 * when a class has an upperlimit, the fit-time is computed from the
69 * upperlimit service curve. the link-sharing scheduler does not schedule
70 * a class whose fit-time exceeds the current time.
75 #include <sys/cdefs.h>
76 #include <sys/param.h>
77 #include <sys/malloc.h>
79 #include <sys/systm.h>
80 #include <sys/errno.h>
81 #include <sys/kernel.h>
82 #include <sys/syslog.h>
84 #include <kern/zalloc.h>
87 #include <net/net_osdep.h>
89 #include <net/pktsched/pktsched_hfsc.h>
90 #include <netinet/in.h>
96 static int hfsc_enqueue_ifclassq(struct ifclassq
*, struct mbuf
*);
97 static struct mbuf
*hfsc_dequeue_ifclassq(struct ifclassq
*, cqdq_op_t
);
98 static int hfsc_request_ifclassq(struct ifclassq
*, cqrq_t
, void *);
100 static int hfsc_addq(struct hfsc_class
*, struct mbuf
*, struct pf_mtag
*);
101 static struct mbuf
*hfsc_getq(struct hfsc_class
*);
102 static struct mbuf
*hfsc_pollq(struct hfsc_class
*);
103 static void hfsc_purgeq(struct hfsc_if
*, struct hfsc_class
*, u_int32_t
,
104 u_int32_t
*, u_int32_t
*);
105 static void hfsc_print_sc(struct hfsc_if
*, u_int32_t
, u_int64_t
,
106 struct service_curve
*, struct internal_sc
*, const char *);
107 static void hfsc_updateq_linkrate(struct hfsc_if
*, struct hfsc_class
*);
108 static void hfsc_updateq(struct hfsc_if
*, struct hfsc_class
*, cqev_t
);
110 static int hfsc_clear_interface(struct hfsc_if
*);
111 static struct hfsc_class
*hfsc_class_create(struct hfsc_if
*,
112 struct service_curve
*, struct service_curve
*, struct service_curve
*,
113 struct hfsc_class
*, u_int32_t
, int, u_int32_t
);
114 static int hfsc_class_destroy(struct hfsc_if
*, struct hfsc_class
*);
115 static int hfsc_destroy_locked(struct hfsc_if
*);
116 static struct hfsc_class
*hfsc_nextclass(struct hfsc_class
*);
117 static struct hfsc_class
*hfsc_clh_to_clp(struct hfsc_if
*, u_int32_t
);
118 static const char *hfsc_style(struct hfsc_if
*);
120 static void set_active(struct hfsc_class
*, u_int32_t
);
121 static void set_passive(struct hfsc_class
*);
123 static void init_ed(struct hfsc_class
*, u_int32_t
);
124 static void update_ed(struct hfsc_class
*, u_int32_t
);
125 static void update_d(struct hfsc_class
*, u_int32_t
);
126 static void init_vf(struct hfsc_class
*, u_int32_t
);
127 static void update_vf(struct hfsc_class
*, u_int32_t
, u_int64_t
);
128 static void update_cfmin(struct hfsc_class
*);
129 static void ellist_insert(struct hfsc_class
*);
130 static void ellist_remove(struct hfsc_class
*);
131 static void ellist_update(struct hfsc_class
*);
132 static struct hfsc_class
*ellist_get_mindl(ellist_t
*, u_int64_t
);
133 static void actlist_insert(struct hfsc_class
*);
134 static void actlist_remove(struct hfsc_class
*);
135 static void actlist_update(struct hfsc_class
*);
136 static struct hfsc_class
*actlist_firstfit(struct hfsc_class
*, u_int64_t
);
138 static inline u_int64_t
seg_x2y(u_int64_t
, u_int64_t
);
139 static inline u_int64_t
seg_y2x(u_int64_t
, u_int64_t
);
140 static inline u_int64_t
m2sm(u_int64_t
);
141 static inline u_int64_t
m2ism(u_int64_t
);
142 static inline u_int64_t
d2dx(u_int64_t
);
143 static u_int64_t
sm2m(u_int64_t
);
144 static u_int64_t
dx2d(u_int64_t
);
146 static boolean_t
sc2isc(struct hfsc_class
*, struct service_curve
*,
147 struct internal_sc
*, u_int64_t
);
148 static void rtsc_init(struct runtime_sc
*, struct internal_sc
*,
149 u_int64_t
, u_int64_t
);
150 static u_int64_t
rtsc_y2x(struct runtime_sc
*, u_int64_t
);
151 static u_int64_t
rtsc_x2y(struct runtime_sc
*, u_int64_t
);
152 static void rtsc_min(struct runtime_sc
*, struct internal_sc
*,
153 u_int64_t
, u_int64_t
);
155 #define HFSC_ZONE_MAX 32 /* maximum elements in zone */
156 #define HFSC_ZONE_NAME "pktsched_hfsc" /* zone name */
158 static unsigned int hfsc_size
; /* size of zone element */
159 static struct zone
*hfsc_zone
; /* zone for hfsc_if */
161 #define HFSC_CL_ZONE_MAX 32 /* maximum elements in zone */
162 #define HFSC_CL_ZONE_NAME "pktsched_hfsc_cl" /* zone name */
164 static unsigned int hfsc_cl_size
; /* size of zone element */
165 static struct zone
*hfsc_cl_zone
; /* zone for hfsc_class */
170 #define HFSC_IS_A_PARENT_CLASS(cl) ((cl)->cl_children != NULL)
172 #define HT_INFINITY 0xffffffffffffffffLL /* infinite time value */
177 hfsc_size
= sizeof (struct hfsc_if
);
178 hfsc_zone
= zinit(hfsc_size
, HFSC_ZONE_MAX
* hfsc_size
,
180 if (hfsc_zone
== NULL
) {
181 panic("%s: failed allocating %s", __func__
, HFSC_ZONE_NAME
);
184 zone_change(hfsc_zone
, Z_EXPAND
, TRUE
);
185 zone_change(hfsc_zone
, Z_CALLERACCT
, TRUE
);
187 hfsc_cl_size
= sizeof (struct hfsc_class
);
188 hfsc_cl_zone
= zinit(hfsc_cl_size
, HFSC_CL_ZONE_MAX
* hfsc_cl_size
,
189 0, HFSC_CL_ZONE_NAME
);
190 if (hfsc_cl_zone
== NULL
) {
191 panic("%s: failed allocating %s", __func__
, HFSC_CL_ZONE_NAME
);
194 zone_change(hfsc_cl_zone
, Z_EXPAND
, TRUE
);
195 zone_change(hfsc_cl_zone
, Z_CALLERACCT
, TRUE
);
199 hfsc_alloc(struct ifnet
*ifp
, int how
, boolean_t altq
)
203 hif
= (how
== M_WAITOK
) ? zalloc(hfsc_zone
) : zalloc_noblock(hfsc_zone
);
207 bzero(hif
, hfsc_size
);
208 TAILQ_INIT(&hif
->hif_eligible
);
209 hif
->hif_ifq
= &ifp
->if_snd
;
211 hif
->hif_maxclasses
= HFSC_MAX_CLASSES
;
212 hif
->hif_flags
|= HFSCIFF_ALTQ
;
214 hif
->hif_maxclasses
= IFCQ_SC_MAX
+ 1; /* incl. root class */
217 if ((hif
->hif_class_tbl
= _MALLOC(sizeof (struct hfsc_class
*) *
218 hif
->hif_maxclasses
, M_DEVBUF
, M_WAITOK
|M_ZERO
)) == NULL
) {
219 log(LOG_ERR
, "%s: %s unable to allocate class table array\n",
220 if_name(ifp
), hfsc_style(hif
));
224 if (pktsched_verbose
) {
225 log(LOG_DEBUG
, "%s: %s scheduler allocated\n",
226 if_name(ifp
), hfsc_style(hif
));
232 if (hif
->hif_class_tbl
!= NULL
) {
233 _FREE(hif
->hif_class_tbl
, M_DEVBUF
);
234 hif
->hif_class_tbl
= NULL
;
236 zfree(hfsc_zone
, hif
);
242 hfsc_destroy(struct hfsc_if
*hif
)
244 struct ifclassq
*ifq
= hif
->hif_ifq
;
248 err
= hfsc_destroy_locked(hif
);
255 hfsc_destroy_locked(struct hfsc_if
*hif
)
257 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
259 (void) hfsc_clear_interface(hif
);
260 (void) hfsc_class_destroy(hif
, hif
->hif_rootclass
);
262 VERIFY(hif
->hif_class_tbl
!= NULL
);
263 _FREE(hif
->hif_class_tbl
, M_DEVBUF
);
264 hif
->hif_class_tbl
= NULL
;
266 if (pktsched_verbose
) {
267 log(LOG_DEBUG
, "%s: %s scheduler destroyed\n",
268 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
271 zfree(hfsc_zone
, hif
);
277 * bring the interface back to the initial state by discarding
278 * all the filters and classes except the root class.
281 hfsc_clear_interface(struct hfsc_if
*hif
)
283 struct hfsc_class
*cl
;
285 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
287 /* clear out the classes */
288 while (hif
->hif_rootclass
!= NULL
&&
289 (cl
= hif
->hif_rootclass
->cl_children
) != NULL
) {
291 * remove the first leaf class found in the hierarchy
294 for (; cl
!= NULL
; cl
= hfsc_nextclass(cl
)) {
295 if (!HFSC_IS_A_PARENT_CLASS(cl
)) {
296 (void) hfsc_class_destroy(hif
, cl
);
305 /* discard all the queued packets on the interface */
307 hfsc_purge(struct hfsc_if
*hif
)
309 struct hfsc_class
*cl
;
311 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
313 for (cl
= hif
->hif_rootclass
; cl
!= NULL
; cl
= hfsc_nextclass(cl
)) {
314 if (!qempty(&cl
->cl_q
))
315 hfsc_purgeq(hif
, cl
, 0, NULL
, NULL
);
319 * This assertion is safe to be made only when PF_ALTQ is not
320 * configured; otherwise, IFCQ_LEN represents the sum of the
321 * packets managed by ifcq_disc and altq_disc instances, which
322 * is possible when transitioning between the two.
324 VERIFY(IFCQ_LEN(hif
->hif_ifq
) == 0);
325 #endif /* !PF_ALTQ */
329 hfsc_event(struct hfsc_if
*hif
, cqev_t ev
)
331 struct hfsc_class
*cl
;
333 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
335 for (cl
= hif
->hif_rootclass
; cl
!= NULL
; cl
= hfsc_nextclass(cl
))
336 hfsc_updateq(hif
, cl
, ev
);
340 hfsc_add_queue(struct hfsc_if
*hif
, struct service_curve
*rtsc
,
341 struct service_curve
*lssc
, struct service_curve
*ulsc
,
342 u_int32_t qlimit
, int flags
, u_int32_t parent_qid
, u_int32_t qid
,
343 struct hfsc_class
**clp
)
345 struct hfsc_class
*cl
= NULL
, *parent
;
347 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
349 if (parent_qid
== HFSC_NULLCLASS_HANDLE
&& hif
->hif_rootclass
== NULL
)
351 else if ((parent
= hfsc_clh_to_clp(hif
, parent_qid
)) == NULL
)
354 if (hfsc_clh_to_clp(hif
, qid
) != NULL
)
357 cl
= hfsc_class_create(hif
, rtsc
, lssc
, ulsc
, parent
,
368 static struct hfsc_class
*
369 hfsc_class_create(struct hfsc_if
*hif
, struct service_curve
*rsc
,
370 struct service_curve
*fsc
, struct service_curve
*usc
,
371 struct hfsc_class
*parent
, u_int32_t qlimit
, int flags
, u_int32_t qid
)
374 struct ifclassq
*ifq
;
375 struct hfsc_class
*cl
, *p
;
379 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
381 /* Sanitize flags unless internally configured */
382 if (hif
->hif_flags
& HFSCIFF_ALTQ
)
383 flags
&= HFCF_USERFLAGS
;
385 if (hif
->hif_classes
>= hif
->hif_maxclasses
) {
386 log(LOG_ERR
, "%s: %s out of classes! (max %d)\n",
387 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
),
388 hif
->hif_maxclasses
);
393 if (flags
& HFCF_RED
) {
394 log(LOG_ERR
, "%s: %s RED not available!\n",
395 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
398 #endif /* !CLASSQ_RED */
401 if (flags
& HFCF_RIO
) {
402 log(LOG_ERR
, "%s: %s RIO not available!\n",
403 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
406 #endif /* CLASSQ_RIO */
409 if (flags
& HFCF_BLUE
) {
410 log(LOG_ERR
, "%s: %s BLUE not available!\n",
411 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
414 #endif /* CLASSQ_BLUE */
416 /* These are mutually exclusive */
417 if ((flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) &&
418 (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) != HFCF_RED
&&
419 (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) != HFCF_RIO
&&
420 (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) != HFCF_BLUE
&&
421 (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) != HFCF_SFB
) {
422 log(LOG_ERR
, "%s: %s more than one RED|RIO|BLUE|SFB\n",
423 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
));
427 cl
= zalloc(hfsc_cl_zone
);
431 bzero(cl
, hfsc_cl_size
);
432 TAILQ_INIT(&cl
->cl_actc
);
434 ifp
= HFSCIF_IFP(hif
);
436 if (qlimit
== 0 || qlimit
> IFCQ_MAXLEN(ifq
)) {
437 qlimit
= IFCQ_MAXLEN(ifq
);
439 qlimit
= DEFAULT_QLIMIT
; /* use default */
441 _qinit(&cl
->cl_q
, Q_DROPTAIL
, qlimit
);
443 cl
->cl_flags
= flags
;
444 if (flags
& (HFCF_RED
|HFCF_RIO
|HFCF_BLUE
|HFCF_SFB
)) {
445 #if CLASSQ_RED || CLASSQ_RIO
447 #endif /* CLASSQ_RED || CLASSQ_RIO */
451 if (rsc
!= NULL
&& rsc
->m2
> m2
)
453 if (fsc
!= NULL
&& fsc
->m2
> m2
)
455 if (usc
!= NULL
&& usc
->m2
> m2
)
459 if (flags
& HFCF_ECN
) {
460 if (flags
& HFCF_BLUE
)
461 cl
->cl_qflags
|= BLUEF_ECN
;
462 else if (flags
& HFCF_SFB
)
463 cl
->cl_qflags
|= SFBF_ECN
;
464 else if (flags
& HFCF_RED
)
465 cl
->cl_qflags
|= REDF_ECN
;
466 else if (flags
& HFCF_RIO
)
467 cl
->cl_qflags
|= RIOF_ECN
;
469 if (flags
& HFCF_FLOWCTL
) {
470 if (flags
& HFCF_SFB
)
471 cl
->cl_qflags
|= SFBF_FLOWCTL
;
473 if (flags
& HFCF_CLEARDSCP
) {
474 if (flags
& HFCF_RIO
)
475 cl
->cl_qflags
|= RIOF_CLEARDSCP
;
477 #if CLASSQ_RED || CLASSQ_RIO
479 * XXX: RED & RIO should be watching link speed and MTU
480 * events and recompute pkttime accordingly.
483 pkttime
= 1000 * 1000 * 1000; /* 1 sec */
485 pkttime
= (int64_t)ifp
->if_mtu
* 1000 * 1000 * 1000 /
488 /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
490 if (flags
& HFCF_RED
) {
491 cl
->cl_red
= red_alloc(ifp
, 0, 0,
492 qlimit(&cl
->cl_q
) * 10/100,
493 qlimit(&cl
->cl_q
) * 30/100,
494 cl
->cl_qflags
, pkttime
);
495 if (cl
->cl_red
!= NULL
)
496 qtype(&cl
->cl_q
) = Q_RED
;
498 #endif /* CLASSQ_RED */
500 if (flags
& HFCF_RIO
) {
502 rio_alloc(ifp
, 0, NULL
, cl
->cl_qflags
, pkttime
);
503 if (cl
->cl_rio
!= NULL
)
504 qtype(&cl
->cl_q
) = Q_RIO
;
506 #endif /* CLASSQ_RIO */
507 #endif /* CLASSQ_RED || CLASSQ_RIO */
509 if (flags
& HFCF_BLUE
) {
510 cl
->cl_blue
= blue_alloc(ifp
, 0, 0, cl
->cl_qflags
);
511 if (cl
->cl_blue
!= NULL
)
512 qtype(&cl
->cl_q
) = Q_BLUE
;
514 #endif /* CLASSQ_BLUE */
515 if (flags
& HFCF_SFB
) {
516 if (!(cl
->cl_flags
& HFCF_LAZY
))
517 cl
->cl_sfb
= sfb_alloc(ifp
, qid
,
518 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
519 if (cl
->cl_sfb
!= NULL
|| (cl
->cl_flags
& HFCF_LAZY
))
520 qtype(&cl
->cl_q
) = Q_SFB
;
524 cl
->cl_id
= hif
->hif_classid
++;
527 cl
->cl_parent
= parent
;
529 eff_rate
= ifnet_output_linkrate(HFSCIF_IFP(hif
));
530 hif
->hif_eff_rate
= eff_rate
;
532 if (rsc
!= NULL
&& (rsc
->m1
!= 0 || rsc
->m2
!= 0) &&
533 (!(rsc
->fl
& HFSCF_M1_PCT
) || (rsc
->m1
> 0 && rsc
->m1
<= 100)) &&
534 (!(rsc
->fl
& HFSCF_M2_PCT
) || (rsc
->m2
> 0 && rsc
->m2
<= 100))) {
535 rsc
->fl
&= HFSCF_USERFLAGS
;
536 cl
->cl_flags
|= HFCF_RSC
;
538 (void) sc2isc(cl
, &cl
->cl_rsc0
, &cl
->cl_rsc
, eff_rate
);
539 rtsc_init(&cl
->cl_deadline
, &cl
->cl_rsc
, 0, 0);
540 rtsc_init(&cl
->cl_eligible
, &cl
->cl_rsc
, 0, 0);
542 if (fsc
!= NULL
&& (fsc
->m1
!= 0 || fsc
->m2
!= 0) &&
543 (!(fsc
->fl
& HFSCF_M1_PCT
) || (fsc
->m1
> 0 && fsc
->m1
<= 100)) &&
544 (!(fsc
->fl
& HFSCF_M2_PCT
) || (fsc
->m2
> 0 && fsc
->m2
<= 100))) {
545 fsc
->fl
&= HFSCF_USERFLAGS
;
546 cl
->cl_flags
|= HFCF_FSC
;
548 (void) sc2isc(cl
, &cl
->cl_fsc0
, &cl
->cl_fsc
, eff_rate
);
549 rtsc_init(&cl
->cl_virtual
, &cl
->cl_fsc
, 0, 0);
551 if (usc
!= NULL
&& (usc
->m1
!= 0 || usc
->m2
!= 0) &&
552 (!(usc
->fl
& HFSCF_M1_PCT
) || (usc
->m1
> 0 && usc
->m1
<= 100)) &&
553 (!(usc
->fl
& HFSCF_M2_PCT
) || (usc
->m2
> 0 && usc
->m2
<= 100))) {
554 usc
->fl
&= HFSCF_USERFLAGS
;
555 cl
->cl_flags
|= HFCF_USC
;
557 (void) sc2isc(cl
, &cl
->cl_usc0
, &cl
->cl_usc
, eff_rate
);
558 rtsc_init(&cl
->cl_ulimit
, &cl
->cl_usc
, 0, 0);
562 * find a free slot in the class table. if the slot matching
563 * the lower bits of qid is free, use this slot. otherwise,
564 * use the first free slot.
566 i
= qid
% hif
->hif_maxclasses
;
567 if (hif
->hif_class_tbl
[i
] == NULL
) {
568 hif
->hif_class_tbl
[i
] = cl
;
570 for (i
= 0; i
< hif
->hif_maxclasses
; i
++)
571 if (hif
->hif_class_tbl
[i
] == NULL
) {
572 hif
->hif_class_tbl
[i
] = cl
;
575 if (i
== hif
->hif_maxclasses
) {
581 if (flags
& HFCF_DEFAULTCLASS
)
582 hif
->hif_defaultclass
= cl
;
584 if (parent
== NULL
) {
585 /* this is root class */
586 hif
->hif_rootclass
= cl
;
588 /* add this class to the children list of the parent */
589 if ((p
= parent
->cl_children
) == NULL
)
590 parent
->cl_children
= cl
;
592 while (p
->cl_siblings
!= NULL
)
598 if (pktsched_verbose
) {
599 log(LOG_DEBUG
, "%s: %s created qid=%d pqid=%d qlimit=%d "
600 "flags=%b\n", if_name(ifp
), hfsc_style(hif
), cl
->cl_handle
,
601 (cl
->cl_parent
!= NULL
) ? cl
->cl_parent
->cl_handle
: 0,
602 qlimit(&cl
->cl_q
), cl
->cl_flags
, HFCF_BITS
);
603 if (cl
->cl_flags
& HFCF_RSC
) {
604 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
605 &cl
->cl_rsc0
, &cl
->cl_rsc
, "rsc");
607 if (cl
->cl_flags
& HFCF_FSC
) {
608 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
609 &cl
->cl_fsc0
, &cl
->cl_fsc
, "fsc");
611 if (cl
->cl_flags
& HFCF_USC
) {
612 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
613 &cl
->cl_usc0
, &cl
->cl_usc
, "usc");
620 if (cl
->cl_qalg
.ptr
!= NULL
) {
622 if (q_is_rio(&cl
->cl_q
))
623 rio_destroy(cl
->cl_rio
);
624 #endif /* CLASSQ_RIO */
626 if (q_is_red(&cl
->cl_q
))
627 red_destroy(cl
->cl_red
);
628 #endif /* CLASSQ_RED */
630 if (q_is_blue(&cl
->cl_q
))
631 blue_destroy(cl
->cl_blue
);
632 #endif /* CLASSQ_BLUE */
633 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
634 sfb_destroy(cl
->cl_sfb
);
635 cl
->cl_qalg
.ptr
= NULL
;
636 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
637 qstate(&cl
->cl_q
) = QS_RUNNING
;
639 zfree(hfsc_cl_zone
, cl
);
644 hfsc_remove_queue(struct hfsc_if
*hif
, u_int32_t qid
)
646 struct hfsc_class
*cl
;
648 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
650 if ((cl
= hfsc_clh_to_clp(hif
, qid
)) == NULL
)
653 return (hfsc_class_destroy(hif
, cl
));
657 hfsc_class_destroy(struct hfsc_if
*hif
, struct hfsc_class
*cl
)
664 if (HFSC_IS_A_PARENT_CLASS(cl
))
667 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
669 if (!qempty(&cl
->cl_q
))
670 hfsc_purgeq(hif
, cl
, 0, NULL
, NULL
);
672 if (cl
->cl_parent
== NULL
) {
673 /* this is root class */
675 struct hfsc_class
*p
= cl
->cl_parent
->cl_children
;
678 cl
->cl_parent
->cl_children
= cl
->cl_siblings
;
680 if (p
->cl_siblings
== cl
) {
681 p
->cl_siblings
= cl
->cl_siblings
;
684 } while ((p
= p
->cl_siblings
) != NULL
);
688 for (i
= 0; i
< hif
->hif_maxclasses
; i
++)
689 if (hif
->hif_class_tbl
[i
] == cl
) {
690 hif
->hif_class_tbl
[i
] = NULL
;
696 if (cl
->cl_qalg
.ptr
!= NULL
) {
698 if (q_is_rio(&cl
->cl_q
))
699 rio_destroy(cl
->cl_rio
);
700 #endif /* CLASSQ_RIO */
702 if (q_is_red(&cl
->cl_q
))
703 red_destroy(cl
->cl_red
);
704 #endif /* CLASSQ_RED */
706 if (q_is_blue(&cl
->cl_q
))
707 blue_destroy(cl
->cl_blue
);
708 #endif /* CLASSQ_BLUE */
709 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
710 sfb_destroy(cl
->cl_sfb
);
711 cl
->cl_qalg
.ptr
= NULL
;
712 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
713 qstate(&cl
->cl_q
) = QS_RUNNING
;
716 if (cl
== hif
->hif_rootclass
)
717 hif
->hif_rootclass
= NULL
;
718 if (cl
== hif
->hif_defaultclass
)
719 hif
->hif_defaultclass
= NULL
;
721 if (pktsched_verbose
) {
722 log(LOG_DEBUG
, "%s: %s destroyed qid=%d slot=%d\n",
723 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
),
724 cl
->cl_handle
, cl
->cl_id
);
727 zfree(hfsc_cl_zone
, cl
);
733 * hfsc_nextclass returns the next class in the tree.
735 * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
738 static struct hfsc_class
*
739 hfsc_nextclass(struct hfsc_class
*cl
)
741 IFCQ_LOCK_ASSERT_HELD(cl
->cl_hif
->hif_ifq
);
743 if (cl
->cl_children
!= NULL
)
744 cl
= cl
->cl_children
;
745 else if (cl
->cl_siblings
!= NULL
)
746 cl
= cl
->cl_siblings
;
748 while ((cl
= cl
->cl_parent
) != NULL
)
749 if (cl
->cl_siblings
) {
750 cl
= cl
->cl_siblings
;
759 hfsc_enqueue(struct hfsc_if
*hif
, struct hfsc_class
*cl
, struct mbuf
*m
,
762 struct ifclassq
*ifq
= hif
->hif_ifq
;
766 IFCQ_LOCK_ASSERT_HELD(ifq
);
767 VERIFY(cl
== NULL
|| cl
->cl_hif
== hif
);
771 cl
= hfsc_clh_to_clp(hif
, t
->pftag_qid
);
773 cl
= hfsc_clh_to_clp(hif
, 0);
774 #endif /* !PF_ALTQ */
775 if (cl
== NULL
|| HFSC_IS_A_PARENT_CLASS(cl
)) {
776 cl
= hif
->hif_defaultclass
;
778 IFCQ_CONVERT_LOCK(ifq
);
787 ret
= hfsc_addq(cl
, m
, t
);
789 if (ret
== CLASSQEQ_SUCCESS_FC
) {
790 /* packet enqueued, return advisory feedback */
793 VERIFY(ret
== CLASSQEQ_DROPPED
||
794 ret
== CLASSQEQ_DROPPED_FC
||
795 ret
== CLASSQEQ_DROPPED_SP
);
796 /* packet has been freed in hfsc_addq */
797 PKTCNTR_ADD(&cl
->cl_stats
.drop_cnt
, 1, len
);
798 IFCQ_DROP_ADD(ifq
, 1, len
);
800 case CLASSQEQ_DROPPED
:
802 case CLASSQEQ_DROPPED_FC
:
804 case CLASSQEQ_DROPPED_SP
:
805 return (EQSUSPENDED
);
811 cl
->cl_hif
->hif_packets
++;
813 /* successfully queued. */
814 if (qlen(&cl
->cl_q
) == 1)
821 * note: CLASSQDQ_POLL returns the next packet without removing the packet
822 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
823 * CLASSQDQ_REMOVE must return the same packet if called immediately
824 * after CLASSQDQ_POLL.
827 hfsc_dequeue(struct hfsc_if
*hif
, cqdq_op_t op
)
829 struct ifclassq
*ifq
= hif
->hif_ifq
;
830 struct hfsc_class
*cl
;
832 u_int32_t len
, next_len
;
836 IFCQ_LOCK_ASSERT_HELD(ifq
);
838 if (hif
->hif_packets
== 0)
839 /* no packet in the tree */
842 cur_time
= read_machclk();
844 if (op
== CLASSQDQ_REMOVE
&& hif
->hif_pollcache
!= NULL
) {
846 cl
= hif
->hif_pollcache
;
847 hif
->hif_pollcache
= NULL
;
848 /* check if the class was scheduled by real-time criteria */
849 if (cl
->cl_flags
& HFCF_RSC
)
850 realtime
= (cl
->cl_e
<= cur_time
);
853 * if there are eligible classes, use real-time criteria.
854 * find the class with the minimum deadline among
855 * the eligible classes.
857 if ((cl
= ellist_get_mindl(&hif
->hif_eligible
, cur_time
))
863 * use link-sharing criteria
864 * get the class with the minimum vt in the hierarchy
866 cl
= hif
->hif_rootclass
;
867 while (HFSC_IS_A_PARENT_CLASS(cl
)) {
869 cl
= actlist_firstfit(cl
, cur_time
);
872 log(LOG_ERR
, "%s: %s "
873 "%d fit but none found\n",
874 if_name(HFSCIF_IFP(hif
)),
875 hfsc_style(hif
), fits
);
879 * update parent's cl_cvtmin.
880 * don't update if the new vt is smaller.
882 if (cl
->cl_parent
->cl_cvtmin
< cl
->cl_vt
)
883 cl
->cl_parent
->cl_cvtmin
= cl
->cl_vt
;
888 if (op
== CLASSQDQ_POLL
) {
889 hif
->hif_pollcache
= cl
;
898 cl
->cl_hif
->hif_packets
--;
900 IFCQ_XMIT_ADD(ifq
, 1, len
);
901 PKTCNTR_ADD(&cl
->cl_stats
.xmit_cnt
, 1, len
);
903 update_vf(cl
, len
, cur_time
);
907 if (!qempty(&cl
->cl_q
)) {
908 if (cl
->cl_flags
& HFCF_RSC
) {
910 next_len
= m_pktlen(qhead(&cl
->cl_q
));
913 update_ed(cl
, next_len
);
915 update_d(cl
, next_len
);
918 /* the class becomes passive */
927 hfsc_addq(struct hfsc_class
*cl
, struct mbuf
*m
, struct pf_mtag
*t
)
929 struct ifclassq
*ifq
= cl
->cl_hif
->hif_ifq
;
931 IFCQ_LOCK_ASSERT_HELD(ifq
);
934 if (q_is_rio(&cl
->cl_q
))
935 return (rio_addq(cl
->cl_rio
, &cl
->cl_q
, m
, t
));
937 #endif /* CLASSQ_RIO */
939 if (q_is_red(&cl
->cl_q
))
940 return (red_addq(cl
->cl_red
, &cl
->cl_q
, m
, t
));
942 #endif /* CLASSQ_RED */
944 if (q_is_blue(&cl
->cl_q
))
945 return (blue_addq(cl
->cl_blue
, &cl
->cl_q
, m
, t
));
947 #endif /* CLASSQ_BLUE */
948 if (q_is_sfb(&cl
->cl_q
)) {
949 if (cl
->cl_sfb
== NULL
) {
950 struct ifnet
*ifp
= HFSCIF_IFP(cl
->cl_hif
);
952 VERIFY(cl
->cl_flags
& HFCF_LAZY
);
953 IFCQ_CONVERT_LOCK(ifq
);
955 cl
->cl_sfb
= sfb_alloc(ifp
, cl
->cl_handle
,
956 qlimit(&cl
->cl_q
), cl
->cl_qflags
);
957 if (cl
->cl_sfb
== NULL
) {
958 /* fall back to droptail */
959 qtype(&cl
->cl_q
) = Q_DROPTAIL
;
960 cl
->cl_flags
&= ~HFCF_SFB
;
961 cl
->cl_qflags
&= ~(SFBF_ECN
| SFBF_FLOWCTL
);
963 log(LOG_ERR
, "%s: %s SFB lazy allocation "
964 "failed for qid=%d slot=%d, falling back "
965 "to DROPTAIL\n", if_name(ifp
),
966 hfsc_style(cl
->cl_hif
), cl
->cl_handle
,
970 if (cl
->cl_sfb
!= NULL
)
971 return (sfb_addq(cl
->cl_sfb
, &cl
->cl_q
, m
, t
));
972 } else if (qlen(&cl
->cl_q
) >= qlimit(&cl
->cl_q
)) {
973 IFCQ_CONVERT_LOCK(ifq
);
975 return (CLASSQEQ_DROPPED
);
979 if (cl
->cl_flags
& HFCF_CLEARDSCP
)
980 write_dsfield(m
, t
, 0);
989 hfsc_getq(struct hfsc_class
*cl
)
991 IFCQ_LOCK_ASSERT_HELD(cl
->cl_hif
->hif_ifq
);
994 if (q_is_rio(&cl
->cl_q
))
995 return (rio_getq(cl
->cl_rio
, &cl
->cl_q
));
997 #endif /* CLASSQ_RIO */
999 if (q_is_red(&cl
->cl_q
))
1000 return (red_getq(cl
->cl_red
, &cl
->cl_q
));
1002 #endif /* CLASSQ_RED */
1004 if (q_is_blue(&cl
->cl_q
))
1005 return (blue_getq(cl
->cl_blue
, &cl
->cl_q
));
1007 #endif /* CLASSQ_BLUE */
1008 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1009 return (sfb_getq(cl
->cl_sfb
, &cl
->cl_q
));
1011 return (_getq(&cl
->cl_q
));
1014 static struct mbuf
*
1015 hfsc_pollq(struct hfsc_class
*cl
)
1017 IFCQ_LOCK_ASSERT_HELD(cl
->cl_hif
->hif_ifq
);
1019 return (qhead(&cl
->cl_q
));
1023 hfsc_purgeq(struct hfsc_if
*hif
, struct hfsc_class
*cl
, u_int32_t flow
,
1024 u_int32_t
*packets
, u_int32_t
*bytes
)
1026 struct ifclassq
*ifq
= hif
->hif_ifq
;
1027 u_int32_t cnt
= 0, len
= 0, qlen
;
1029 IFCQ_LOCK_ASSERT_HELD(ifq
);
1031 if ((qlen
= qlen(&cl
->cl_q
)) == 0) {
1032 VERIFY(hif
->hif_packets
== 0);
1036 /* become regular mutex before freeing mbufs */
1037 IFCQ_CONVERT_LOCK(ifq
);
1040 if (q_is_rio(&cl
->cl_q
))
1041 rio_purgeq(cl
->cl_rio
, &cl
->cl_q
, flow
, &cnt
, &len
);
1043 #endif /* CLASSQ_RIO */
1045 if (q_is_red(&cl
->cl_q
))
1046 red_purgeq(cl
->cl_red
, &cl
->cl_q
, flow
, &cnt
, &len
);
1048 #endif /* CLASSQ_RED */
1050 if (q_is_blue(&cl
->cl_q
))
1051 blue_purgeq(cl
->cl_blue
, &cl
->cl_q
, flow
, &cnt
, &len
);
1053 #endif /* CLASSQ_BLUE */
1054 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1055 sfb_purgeq(cl
->cl_sfb
, &cl
->cl_q
, flow
, &cnt
, &len
);
1057 _flushq_flow(&cl
->cl_q
, flow
, &cnt
, &len
);
1060 VERIFY(qlen(&cl
->cl_q
) == (qlen
- cnt
));
1062 PKTCNTR_ADD(&cl
->cl_stats
.drop_cnt
, cnt
, len
);
1063 IFCQ_DROP_ADD(ifq
, cnt
, len
);
1065 VERIFY(hif
->hif_packets
>= cnt
);
1066 hif
->hif_packets
-= cnt
;
1068 VERIFY(((signed)IFCQ_LEN(ifq
) - cnt
) >= 0);
1069 IFCQ_LEN(ifq
) -= cnt
;
1071 if (qempty(&cl
->cl_q
)) {
1072 update_vf(cl
, 0, 0); /* remove cl from the actlist */
1076 if (pktsched_verbose
) {
1077 log(LOG_DEBUG
, "%s: %s purge qid=%d slot=%d "
1078 "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
1079 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
),
1080 cl
->cl_handle
, cl
->cl_id
, qlen
, qlen(&cl
->cl_q
),
1085 if (packets
!= NULL
)
1092 hfsc_print_sc(struct hfsc_if
*hif
, u_int32_t qid
, u_int64_t eff_rate
,
1093 struct service_curve
*sc
, struct internal_sc
*isc
, const char *which
)
1095 struct ifnet
*ifp
= HFSCIF_IFP(hif
);
1097 log(LOG_DEBUG
, "%s: %s qid=%d {%s_m1=%llu%s [%llu], "
1098 "%s_d=%u msec, %s_m2=%llu%s [%llu]} linkrate=%llu bps\n",
1099 if_name(ifp
), hfsc_style(hif
), qid
,
1100 which
, sc
->m1
, (sc
->fl
& HFSCF_M1_PCT
) ? "%" : " bps", isc
->sm1
,
1102 which
, sc
->m2
, (sc
->fl
& HFSCF_M2_PCT
) ? "%" : " bps", isc
->sm2
,
1107 hfsc_updateq_linkrate(struct hfsc_if
*hif
, struct hfsc_class
*cl
)
1109 u_int64_t eff_rate
= ifnet_output_linkrate(HFSCIF_IFP(hif
));
1110 struct service_curve
*sc
;
1111 struct internal_sc
*isc
;
1113 /* Update parameters only if rate has changed */
1114 if (eff_rate
== hif
->hif_eff_rate
)
1119 if ((cl
->cl_flags
& HFCF_RSC
) && sc2isc(cl
, sc
, isc
, eff_rate
)) {
1120 rtsc_init(&cl
->cl_deadline
, isc
, 0, 0);
1121 rtsc_init(&cl
->cl_eligible
, isc
, 0, 0);
1122 if (pktsched_verbose
) {
1123 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
1129 if ((cl
->cl_flags
& HFCF_FSC
) && sc2isc(cl
, sc
, isc
, eff_rate
)) {
1130 rtsc_init(&cl
->cl_virtual
, isc
, 0, 0);
1131 if (pktsched_verbose
) {
1132 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
1138 if ((cl
->cl_flags
& HFCF_USC
) && sc2isc(cl
, sc
, isc
, eff_rate
)) {
1139 rtsc_init(&cl
->cl_ulimit
, isc
, 0, 0);
1140 if (pktsched_verbose
) {
1141 hfsc_print_sc(hif
, cl
->cl_handle
, eff_rate
,
1148 hfsc_updateq(struct hfsc_if
*hif
, struct hfsc_class
*cl
, cqev_t ev
)
1150 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
1152 if (pktsched_verbose
) {
1153 log(LOG_DEBUG
, "%s: %s update qid=%d slot=%d event=%s\n",
1154 if_name(HFSCIF_IFP(hif
)), hfsc_style(hif
),
1155 cl
->cl_handle
, cl
->cl_id
, ifclassq_ev2str(ev
));
1158 if (ev
== CLASSQ_EV_LINK_BANDWIDTH
)
1159 hfsc_updateq_linkrate(hif
, cl
);
1162 if (q_is_rio(&cl
->cl_q
))
1163 return (rio_updateq(cl
->cl_rio
, ev
));
1164 #endif /* CLASSQ_RIO */
1166 if (q_is_red(&cl
->cl_q
))
1167 return (red_updateq(cl
->cl_red
, ev
));
1168 #endif /* CLASSQ_RED */
1170 if (q_is_blue(&cl
->cl_q
))
1171 return (blue_updateq(cl
->cl_blue
, ev
));
1172 #endif /* CLASSQ_BLUE */
1173 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1174 return (sfb_updateq(cl
->cl_sfb
, ev
));
1178 set_active(struct hfsc_class
*cl
, u_int32_t len
)
1180 if (cl
->cl_flags
& HFCF_RSC
)
1182 if (cl
->cl_flags
& HFCF_FSC
)
1185 cl
->cl_stats
.period
++;
1189 set_passive(struct hfsc_class
*cl
)
1191 if (cl
->cl_flags
& HFCF_RSC
)
1195 * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
1196 * needs to be called explicitly to remove a class from actlist
1201 init_ed(struct hfsc_class
*cl
, u_int32_t next_len
)
1205 cur_time
= read_machclk();
1207 /* update the deadline curve */
1208 rtsc_min(&cl
->cl_deadline
, &cl
->cl_rsc
, cur_time
, cl
->cl_cumul
);
1211 * update the eligible curve.
1212 * for concave, it is equal to the deadline curve.
1213 * for convex, it is a linear curve with slope m2.
1215 cl
->cl_eligible
= cl
->cl_deadline
;
1216 if (cl
->cl_rsc
.sm1
<= cl
->cl_rsc
.sm2
) {
1217 cl
->cl_eligible
.dx
= 0;
1218 cl
->cl_eligible
.dy
= 0;
1221 /* compute e and d */
1222 cl
->cl_e
= rtsc_y2x(&cl
->cl_eligible
, cl
->cl_cumul
);
1223 cl
->cl_d
= rtsc_y2x(&cl
->cl_deadline
, cl
->cl_cumul
+ next_len
);
1229 update_ed(struct hfsc_class
*cl
, u_int32_t next_len
)
1231 cl
->cl_e
= rtsc_y2x(&cl
->cl_eligible
, cl
->cl_cumul
);
1232 cl
->cl_d
= rtsc_y2x(&cl
->cl_deadline
, cl
->cl_cumul
+ next_len
);
1238 update_d(struct hfsc_class
*cl
, u_int32_t next_len
)
1240 cl
->cl_d
= rtsc_y2x(&cl
->cl_deadline
, cl
->cl_cumul
+ next_len
);
1244 init_vf(struct hfsc_class
*cl
, u_int32_t len
)
1247 struct hfsc_class
*max_cl
, *p
;
1248 u_int64_t vt
, f
, cur_time
;
1253 for (; cl
->cl_parent
!= NULL
; cl
= cl
->cl_parent
) {
1255 if (go_active
&& cl
->cl_nactive
++ == 0)
1261 max_cl
= actlist_last(&cl
->cl_parent
->cl_actc
);
1262 if (max_cl
!= NULL
) {
1264 * set vt to the average of the min and max
1265 * classes. if the parent's period didn't
1266 * change, don't decrease vt of the class.
1269 if (cl
->cl_parent
->cl_cvtmin
!= 0)
1270 vt
= (cl
->cl_parent
->cl_cvtmin
+ vt
)/2;
1272 if (cl
->cl_parent
->cl_vtperiod
!=
1273 cl
->cl_parentperiod
|| vt
> cl
->cl_vt
)
1277 * first child for a new parent backlog period.
1278 * add parent's cvtmax to vtoff of children
1279 * to make a new vt (vtoff + vt) larger than
1280 * the vt in the last period for all children.
1282 vt
= cl
->cl_parent
->cl_cvtmax
;
1283 for (p
= cl
->cl_parent
->cl_children
; p
!= NULL
;
1287 cl
->cl_parent
->cl_cvtmax
= 0;
1288 cl
->cl_parent
->cl_cvtmin
= 0;
1290 cl
->cl_initvt
= cl
->cl_vt
;
1292 /* update the virtual curve */
1293 vt
= cl
->cl_vt
+ cl
->cl_vtoff
;
1294 rtsc_min(&cl
->cl_virtual
, &cl
->cl_fsc
,
1296 if (cl
->cl_virtual
.x
== vt
) {
1297 cl
->cl_virtual
.x
-= cl
->cl_vtoff
;
1302 cl
->cl_vtperiod
++; /* increment vt period */
1303 cl
->cl_parentperiod
= cl
->cl_parent
->cl_vtperiod
;
1304 if (cl
->cl_parent
->cl_nactive
== 0)
1305 cl
->cl_parentperiod
++;
1310 if (cl
->cl_flags
& HFCF_USC
) {
1311 /* class has upper limit curve */
1313 cur_time
= read_machclk();
1315 /* update the ulimit curve */
1316 rtsc_min(&cl
->cl_ulimit
, &cl
->cl_usc
, cur_time
,
1319 cl
->cl_myf
= rtsc_y2x(&cl
->cl_ulimit
,
1325 if (cl
->cl_myf
> cl
->cl_cfmin
)
1329 if (f
!= cl
->cl_f
) {
1331 update_cfmin(cl
->cl_parent
);
1337 update_vf(struct hfsc_class
*cl
, u_int32_t len
, u_int64_t cur_time
)
1339 #pragma unused(cur_time)
1341 u_int64_t myf_bound
, delta
;
1346 go_passive
= (qempty(&cl
->cl_q
) && (cl
->cl_flags
& HFCF_FSC
));
1348 for (; cl
->cl_parent
!= NULL
; cl
= cl
->cl_parent
) {
1350 cl
->cl_total
+= len
;
1352 if (!(cl
->cl_flags
& HFCF_FSC
) || cl
->cl_nactive
== 0)
1355 if (go_passive
&& --cl
->cl_nactive
== 0)
1361 /* no more active child, going passive */
1363 /* update cvtmax of the parent class */
1364 if (cl
->cl_vt
> cl
->cl_parent
->cl_cvtmax
)
1365 cl
->cl_parent
->cl_cvtmax
= cl
->cl_vt
;
1367 /* remove this class from the vt list */
1370 update_cfmin(cl
->cl_parent
);
1378 cl
->cl_vt
= rtsc_y2x(&cl
->cl_virtual
, cl
->cl_total
)
1379 - cl
->cl_vtoff
+ cl
->cl_vtadj
;
1382 * if vt of the class is smaller than cvtmin,
1383 * the class was skipped in the past due to non-fit.
1384 * if so, we need to adjust vtadj.
1386 if (cl
->cl_vt
< cl
->cl_parent
->cl_cvtmin
) {
1387 cl
->cl_vtadj
+= cl
->cl_parent
->cl_cvtmin
- cl
->cl_vt
;
1388 cl
->cl_vt
= cl
->cl_parent
->cl_cvtmin
;
1391 /* update the vt list */
1394 if (cl
->cl_flags
& HFCF_USC
) {
1395 cl
->cl_myf
= cl
->cl_myfadj
+
1396 rtsc_y2x(&cl
->cl_ulimit
, cl
->cl_total
);
1399 * if myf lags behind by more than one clock tick
1400 * from the current time, adjust myfadj to prevent
1401 * a rate-limited class from going greedy.
1402 * in a steady state under rate-limiting, myf
1403 * fluctuates within one clock tick.
1405 myf_bound
= cur_time
- machclk_per_tick
;
1406 if (cl
->cl_myf
< myf_bound
) {
1407 delta
= cur_time
- cl
->cl_myf
;
1408 cl
->cl_myfadj
+= delta
;
1409 cl
->cl_myf
+= delta
;
1414 /* cl_f is max(cl_myf, cl_cfmin) */
1415 if (cl
->cl_myf
> cl
->cl_cfmin
)
1419 if (f
!= cl
->cl_f
) {
1421 update_cfmin(cl
->cl_parent
);
1427 update_cfmin(struct hfsc_class
*cl
)
1429 struct hfsc_class
*p
;
1432 if (TAILQ_EMPTY(&cl
->cl_actc
)) {
1436 cfmin
= HT_INFINITY
;
1437 TAILQ_FOREACH(p
, &cl
->cl_actc
, cl_actlist
) {
1442 if (p
->cl_f
< cfmin
)
1445 cl
->cl_cfmin
= cfmin
;
1449 * TAILQ based ellist and actlist implementation
1450 * (ion wanted to make a calendar queue based implementation)
1453 * eligible list holds backlogged classes being sorted by their eligible times.
1454 * there is one eligible list per interface.
1458 ellist_insert(struct hfsc_class
*cl
)
1460 struct hfsc_if
*hif
= cl
->cl_hif
;
1461 struct hfsc_class
*p
;
1463 /* check the last entry first */
1464 if ((p
= TAILQ_LAST(&hif
->hif_eligible
, _eligible
)) == NULL
||
1465 p
->cl_e
<= cl
->cl_e
) {
1466 TAILQ_INSERT_TAIL(&hif
->hif_eligible
, cl
, cl_ellist
);
1470 TAILQ_FOREACH(p
, &hif
->hif_eligible
, cl_ellist
) {
1471 if (cl
->cl_e
< p
->cl_e
) {
1472 TAILQ_INSERT_BEFORE(p
, cl
, cl_ellist
);
1476 VERIFY(0); /* should not reach here */
1480 ellist_remove(struct hfsc_class
*cl
)
1482 struct hfsc_if
*hif
= cl
->cl_hif
;
1484 TAILQ_REMOVE(&hif
->hif_eligible
, cl
, cl_ellist
);
1488 ellist_update(struct hfsc_class
*cl
)
1490 struct hfsc_if
*hif
= cl
->cl_hif
;
1491 struct hfsc_class
*p
, *last
;
1494 * the eligible time of a class increases monotonically.
1495 * if the next entry has a larger eligible time, nothing to do.
1497 p
= TAILQ_NEXT(cl
, cl_ellist
);
1498 if (p
== NULL
|| cl
->cl_e
<= p
->cl_e
)
1501 /* check the last entry */
1502 last
= TAILQ_LAST(&hif
->hif_eligible
, _eligible
);
1503 VERIFY(last
!= NULL
);
1504 if (last
->cl_e
<= cl
->cl_e
) {
1505 TAILQ_REMOVE(&hif
->hif_eligible
, cl
, cl_ellist
);
1506 TAILQ_INSERT_TAIL(&hif
->hif_eligible
, cl
, cl_ellist
);
1511 * the new position must be between the next entry
1512 * and the last entry
1514 while ((p
= TAILQ_NEXT(p
, cl_ellist
)) != NULL
) {
1515 if (cl
->cl_e
< p
->cl_e
) {
1516 TAILQ_REMOVE(&hif
->hif_eligible
, cl
, cl_ellist
);
1517 TAILQ_INSERT_BEFORE(p
, cl
, cl_ellist
);
1521 VERIFY(0); /* should not reach here */
1524 /* find the class with the minimum deadline among the eligible classes */
1525 static struct hfsc_class
*
1526 ellist_get_mindl(ellist_t
*head
, u_int64_t cur_time
)
1528 struct hfsc_class
*p
, *cl
= NULL
;
1530 TAILQ_FOREACH(p
, head
, cl_ellist
) {
1531 if (p
->cl_e
> cur_time
)
1533 if (cl
== NULL
|| p
->cl_d
< cl
->cl_d
)
1540 * active children list holds backlogged child classes being sorted
1541 * by their virtual time.
1542 * each intermediate class has one active children list.
1546 actlist_insert(struct hfsc_class
*cl
)
1548 struct hfsc_class
*p
;
1550 /* check the last entry first */
1551 if ((p
= TAILQ_LAST(&cl
->cl_parent
->cl_actc
, _active
)) == NULL
||
1552 p
->cl_vt
<= cl
->cl_vt
) {
1553 TAILQ_INSERT_TAIL(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1557 TAILQ_FOREACH(p
, &cl
->cl_parent
->cl_actc
, cl_actlist
) {
1558 if (cl
->cl_vt
< p
->cl_vt
) {
1559 TAILQ_INSERT_BEFORE(p
, cl
, cl_actlist
);
1563 VERIFY(0); /* should not reach here */
1567 actlist_remove(struct hfsc_class
*cl
)
1569 TAILQ_REMOVE(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1573 actlist_update(struct hfsc_class
*cl
)
1575 struct hfsc_class
*p
, *last
;
1578 * the virtual time of a class increases monotonically during its
1579 * backlogged period.
1580 * if the next entry has a larger virtual time, nothing to do.
1582 p
= TAILQ_NEXT(cl
, cl_actlist
);
1583 if (p
== NULL
|| cl
->cl_vt
< p
->cl_vt
)
1586 /* check the last entry */
1587 last
= TAILQ_LAST(&cl
->cl_parent
->cl_actc
, _active
);
1588 VERIFY(last
!= NULL
);
1589 if (last
->cl_vt
<= cl
->cl_vt
) {
1590 TAILQ_REMOVE(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1591 TAILQ_INSERT_TAIL(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1596 * the new position must be between the next entry
1597 * and the last entry
1599 while ((p
= TAILQ_NEXT(p
, cl_actlist
)) != NULL
) {
1600 if (cl
->cl_vt
< p
->cl_vt
) {
1601 TAILQ_REMOVE(&cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1602 TAILQ_INSERT_BEFORE(p
, cl
, cl_actlist
);
1606 VERIFY(0); /* should not reach here */
1609 static struct hfsc_class
*
1610 actlist_firstfit(struct hfsc_class
*cl
, u_int64_t cur_time
)
1612 struct hfsc_class
*p
;
1614 TAILQ_FOREACH(p
, &cl
->cl_actc
, cl_actlist
) {
1615 if (p
->cl_f
<= cur_time
)
1622 * service curve support functions
1624 * external service curve parameters
1627 * internal service curve parameters
1628 * sm: (bytes/tsc_interval) << SM_SHIFT
1629 * ism: (tsc_count/byte) << ISM_SHIFT
1632 * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
1633 * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
1634 * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
1635 * digits in decimal using the following table.
1637 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
1638 * ----------+-------------------------------------------------------
1639 * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6
1640 * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6
1641 * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6
1643 * nsec/byte 80000 8000 800 80 8
1644 * ism(500MHz) 40000 4000 400 40 4
1645 * ism(200MHz) 16000 1600 160 16 1.6
1648 #define ISM_SHIFT 10
1650 #define SM_MASK ((1LL << SM_SHIFT) - 1)
1651 #define ISM_MASK ((1LL << ISM_SHIFT) - 1)
1653 static inline u_int64_t
1654 seg_x2y(u_int64_t x
, u_int64_t sm
)
1660 * y = x * sm >> SM_SHIFT
1661 * but divide it for the upper and lower bits to avoid overflow
1663 y
= (x
>> SM_SHIFT
) * sm
+ (((x
& SM_MASK
) * sm
) >> SM_SHIFT
);
1667 static inline u_int64_t
1668 seg_y2x(u_int64_t y
, u_int64_t ism
)
1674 else if (ism
== HT_INFINITY
)
1677 x
= (y
>> ISM_SHIFT
) * ism
1678 + (((y
& ISM_MASK
) * ism
) >> ISM_SHIFT
);
1683 static inline u_int64_t
1688 sm
= (m
<< SM_SHIFT
) / 8 / machclk_freq
;
1692 static inline u_int64_t
1700 ism
= ((u_int64_t
)machclk_freq
<< ISM_SHIFT
) * 8 / m
;
1704 static inline u_int64_t
1709 dx
= (d
* machclk_freq
) / 1000;
1718 m
= (sm
* 8 * machclk_freq
) >> SM_SHIFT
;
1727 d
= dx
* 1000 / machclk_freq
;
1732 sc2isc(struct hfsc_class
*cl
, struct service_curve
*sc
, struct internal_sc
*isc
,
1735 struct hfsc_if
*hif
= cl
->cl_hif
;
1736 struct internal_sc oisc
= *isc
;
1739 if (eff_rate
== 0 && (sc
->fl
& (HFSCF_M1_PCT
| HFSCF_M2_PCT
))) {
1741 * If service curve is configured with percentage and the
1742 * effective uplink rate is not known, assume this is a
1743 * transient case, and that the rate will be updated in
1744 * the near future via CLASSQ_EV_LINK_SPEED. Pick a
1745 * reasonable number for now, e.g. 10 Mbps.
1747 eff_rate
= (10 * 1000 * 1000);
1749 log(LOG_WARNING
, "%s: %s qid=%d slot=%d eff_rate unknown; "
1750 "using temporary rate %llu bps\n", if_name(HFSCIF_IFP(hif
)),
1751 hfsc_style(hif
), cl
->cl_handle
, cl
->cl_id
, eff_rate
);
1755 if (sc
->fl
& HFSCF_M1_PCT
) {
1756 VERIFY(m1
> 0 && m1
<= 100);
1757 m1
= (eff_rate
* m1
) / 100;
1761 if (sc
->fl
& HFSCF_M2_PCT
) {
1762 VERIFY(m2
> 0 && m2
<= 100);
1763 m2
= (eff_rate
* m2
) / 100;
1766 isc
->sm1
= m2sm(m1
);
1767 isc
->ism1
= m2ism(m1
);
1768 isc
->dx
= d2dx(sc
->d
);
1769 isc
->dy
= seg_x2y(isc
->dx
, isc
->sm1
);
1770 isc
->sm2
= m2sm(m2
);
1771 isc
->ism2
= m2ism(m2
);
1773 /* return non-zero if there's any change */
1774 return (bcmp(&oisc
, isc
, sizeof (*isc
)));
1778 * initialize the runtime service curve with the given internal
1779 * service curve starting at (x, y).
1782 rtsc_init(struct runtime_sc
*rtsc
, struct internal_sc
*isc
, u_int64_t x
,
1787 rtsc
->sm1
= isc
->sm1
;
1788 rtsc
->ism1
= isc
->ism1
;
1791 rtsc
->sm2
= isc
->sm2
;
1792 rtsc
->ism2
= isc
->ism2
;
1796 * calculate the y-projection of the runtime service curve by the
1797 * given x-projection value
1800 rtsc_y2x(struct runtime_sc
*rtsc
, u_int64_t y
)
1806 else if (y
<= rtsc
->y
+ rtsc
->dy
) {
1807 /* x belongs to the 1st segment */
1809 x
= rtsc
->x
+ rtsc
->dx
;
1811 x
= rtsc
->x
+ seg_y2x(y
- rtsc
->y
, rtsc
->ism1
);
1813 /* x belongs to the 2nd segment */
1814 x
= rtsc
->x
+ rtsc
->dx
1815 + seg_y2x(y
- rtsc
->y
- rtsc
->dy
, rtsc
->ism2
);
1821 rtsc_x2y(struct runtime_sc
*rtsc
, u_int64_t x
)
1827 else if (x
<= rtsc
->x
+ rtsc
->dx
)
1828 /* y belongs to the 1st segment */
1829 y
= rtsc
->y
+ seg_x2y(x
- rtsc
->x
, rtsc
->sm1
);
1831 /* y belongs to the 2nd segment */
1832 y
= rtsc
->y
+ rtsc
->dy
1833 + seg_x2y(x
- rtsc
->x
- rtsc
->dx
, rtsc
->sm2
);
1838 * update the runtime service curve by taking the minimum of the current
1839 * runtime service curve and the service curve starting at (x, y).
1842 rtsc_min(struct runtime_sc
*rtsc
, struct internal_sc
*isc
, u_int64_t x
,
1845 u_int64_t y1
, y2
, dx
, dy
;
1847 if (isc
->sm1
<= isc
->sm2
) {
1848 /* service curve is convex */
1849 y1
= rtsc_x2y(rtsc
, x
);
1851 /* the current rtsc is smaller */
1859 * service curve is concave
1860 * compute the two y values of the current rtsc
1864 y1
= rtsc_x2y(rtsc
, x
);
1866 /* rtsc is below isc, no change to rtsc */
1870 y2
= rtsc_x2y(rtsc
, x
+ isc
->dx
);
1871 if (y2
>= y
+ isc
->dy
) {
1872 /* rtsc is above isc, replace rtsc by isc */
1881 * the two curves intersect
1882 * compute the offsets (dx, dy) using the reverse
1883 * function of seg_x2y()
1884 * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
1886 dx
= ((y1
- y
) << SM_SHIFT
) / (isc
->sm1
- isc
->sm2
);
1888 * check if (x, y1) belongs to the 1st segment of rtsc.
1889 * if so, add the offset.
1891 if (rtsc
->x
+ rtsc
->dx
> x
)
1892 dx
+= rtsc
->x
+ rtsc
->dx
- x
;
1893 dy
= seg_x2y(dx
, isc
->sm1
);
1902 hfsc_get_class_stats(struct hfsc_if
*hif
, u_int32_t qid
,
1903 struct hfsc_classstats
*sp
)
1905 struct hfsc_class
*cl
;
1907 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
1909 if ((cl
= hfsc_clh_to_clp(hif
, qid
)) == NULL
)
1912 sp
->class_id
= cl
->cl_id
;
1913 sp
->class_handle
= cl
->cl_handle
;
1915 if (cl
->cl_flags
& HFCF_RSC
) {
1916 sp
->rsc
.m1
= sm2m(cl
->cl_rsc
.sm1
);
1917 sp
->rsc
.d
= dx2d(cl
->cl_rsc
.dx
);
1918 sp
->rsc
.m2
= sm2m(cl
->cl_rsc
.sm2
);
1924 if (cl
->cl_flags
& HFCF_FSC
) {
1925 sp
->fsc
.m1
= sm2m(cl
->cl_fsc
.sm1
);
1926 sp
->fsc
.d
= dx2d(cl
->cl_fsc
.dx
);
1927 sp
->fsc
.m2
= sm2m(cl
->cl_fsc
.sm2
);
1933 if (cl
->cl_flags
& HFCF_USC
) {
1934 sp
->usc
.m1
= sm2m(cl
->cl_usc
.sm1
);
1935 sp
->usc
.d
= dx2d(cl
->cl_usc
.dx
);
1936 sp
->usc
.m2
= sm2m(cl
->cl_usc
.sm2
);
1943 sp
->total
= cl
->cl_total
;
1944 sp
->cumul
= cl
->cl_cumul
;
1951 sp
->initvt
= cl
->cl_initvt
;
1952 sp
->vtperiod
= cl
->cl_vtperiod
;
1953 sp
->parentperiod
= cl
->cl_parentperiod
;
1954 sp
->nactive
= cl
->cl_nactive
;
1955 sp
->vtoff
= cl
->cl_vtoff
;
1956 sp
->cvtmax
= cl
->cl_cvtmax
;
1957 sp
->myf
= cl
->cl_myf
;
1958 sp
->cfmin
= cl
->cl_cfmin
;
1959 sp
->cvtmin
= cl
->cl_cvtmin
;
1960 sp
->myfadj
= cl
->cl_myfadj
;
1961 sp
->vtadj
= cl
->cl_vtadj
;
1963 sp
->cur_time
= read_machclk();
1964 sp
->machclk_freq
= machclk_freq
;
1966 sp
->qlength
= qlen(&cl
->cl_q
);
1967 sp
->qlimit
= qlimit(&cl
->cl_q
);
1968 sp
->xmit_cnt
= cl
->cl_stats
.xmit_cnt
;
1969 sp
->drop_cnt
= cl
->cl_stats
.drop_cnt
;
1970 sp
->period
= cl
->cl_stats
.period
;
1972 sp
->qtype
= qtype(&cl
->cl_q
);
1973 sp
->qstate
= qstate(&cl
->cl_q
);
1975 if (q_is_red(&cl
->cl_q
))
1976 red_getstats(cl
->cl_red
, &sp
->red
[0]);
1977 #endif /* CLASSQ_RED */
1979 if (q_is_rio(&cl
->cl_q
))
1980 rio_getstats(cl
->cl_rio
, &sp
->red
[0]);
1981 #endif /* CLASSQ_RIO */
1983 if (q_is_blue(&cl
->cl_q
))
1984 blue_getstats(cl
->cl_blue
, &sp
->blue
);
1985 #endif /* CLASSQ_BLUE */
1986 if (q_is_sfb(&cl
->cl_q
) && cl
->cl_sfb
!= NULL
)
1987 sfb_getstats(cl
->cl_sfb
, &sp
->sfb
);
1992 /* convert a class handle to the corresponding class pointer */
1993 static struct hfsc_class
*
1994 hfsc_clh_to_clp(struct hfsc_if
*hif
, u_int32_t chandle
)
1997 struct hfsc_class
*cl
;
1999 IFCQ_LOCK_ASSERT_HELD(hif
->hif_ifq
);
2002 * first, try optimistically the slot matching the lower bits of
2003 * the handle. if it fails, do the linear table search.
2005 i
= chandle
% hif
->hif_maxclasses
;
2006 if ((cl
= hif
->hif_class_tbl
[i
]) != NULL
&& cl
->cl_handle
== chandle
)
2008 for (i
= 0; i
< hif
->hif_maxclasses
; i
++)
2009 if ((cl
= hif
->hif_class_tbl
[i
]) != NULL
&&
2010 cl
->cl_handle
== chandle
)
2016 hfsc_style(struct hfsc_if
*hif
)
2018 return ((hif
->hif_flags
& HFSCIFF_ALTQ
) ? "ALTQ_HFSC" : "HFSC");
2022 hfsc_setup_ifclassq(struct ifclassq
*ifq
, u_int32_t flags
)
2024 #pragma unused(ifq, flags)
2025 return (ENXIO
); /* not yet */
2029 hfsc_teardown_ifclassq(struct ifclassq
*ifq
)
2031 struct hfsc_if
*hif
= ifq
->ifcq_disc
;
2034 IFCQ_LOCK_ASSERT_HELD(ifq
);
2035 VERIFY(hif
!= NULL
&& ifq
->ifcq_type
== PKTSCHEDT_HFSC
);
2037 (void) hfsc_destroy_locked(hif
);
2039 ifq
->ifcq_disc
= NULL
;
2040 for (i
= 0; i
< IFCQ_SC_MAX
; i
++) {
2041 ifq
->ifcq_disc_slots
[i
].qid
= 0;
2042 ifq
->ifcq_disc_slots
[i
].cl
= NULL
;
2045 return (ifclassq_detach(ifq
));
2049 hfsc_getqstats_ifclassq(struct ifclassq
*ifq
, u_int32_t slot
,
2050 struct if_ifclassq_stats
*ifqs
)
2052 struct hfsc_if
*hif
= ifq
->ifcq_disc
;
2054 IFCQ_LOCK_ASSERT_HELD(ifq
);
2055 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_HFSC
);
2057 if (slot
>= IFCQ_SC_MAX
)
2060 return (hfsc_get_class_stats(hif
, ifq
->ifcq_disc_slots
[slot
].qid
,
2061 &ifqs
->ifqs_hfsc_stats
));
2063 #endif /* PKTSCHED_HFSC */