]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/pktsched/pktsched_fairq.c
2 * Copyright (c) 2011-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
32 * This code is derived from software contributed to The DragonFly Project
33 * by Matthew Dillon <dillon@backplane.com>
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in
43 * the documentation and/or other materials provided with the
45 * 3. Neither the name of The DragonFly Project nor the names of its
46 * contributors may be used to endorse or promote products derived
47 * from this software without specific, prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
51 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
52 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
53 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
54 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
55 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
56 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
57 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
58 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
59 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.2 2008/05/14 11:59:23 sephe Exp $
65 * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
66 * fairq. The fairq algorithm is completely different then priq, of course,
67 * but because I used priq's skeleton I believe I should include priq's
70 * Copyright (C) 2000-2003
71 * Sony Computer Science Laboratories Inc. All rights reserved.
73 * Redistribution and use in source and binary forms, with or without
74 * modification, are permitted provided that the following conditions
76 * 1. Redistributions of source code must retain the above copyright
77 * notice, this list of conditions and the following disclaimer.
78 * 2. Redistributions in binary form must reproduce the above copyright
79 * notice, this list of conditions and the following disclaimer in the
80 * documentation and/or other materials provided with the distribution.
82 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
83 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
84 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
85 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
86 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
87 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
88 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
89 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
90 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
91 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
96 * FAIRQ - take traffic classified by keep state (hashed into
97 * pf->pftag_flowhash) and bucketize it. Fairly extract
98 * the first packet from each bucket in a round-robin fashion.
100 * TODO - better overall qlimit support (right now it is per-bucket).
101 * - NOTE: red etc is per bucket, not overall.
102 * - better service curve support.
106 * altq on em0 fairq bandwidth 650Kb queue { std, bulk }
107 * queue std priority 3 bandwidth 200Kb \
108 * fairq (buckets 64, default, hogs 1Kb) qlimit 50
109 * queue bulk priority 2 bandwidth 100Kb \
110 * fairq (buckets 64, hogs 1Kb) qlimit 50
112 * NOTE: When the aggregate bandwidth is less than the link bandwidth
113 * any remaining bandwidth is dynamically assigned using the
114 * existing bandwidth specs as weightings.
116 * pass out on em0 from any to any keep state queue std
117 * pass out on em0 inet proto tcp ..... port ... keep state queue bulk
122 #include <sys/cdefs.h>
123 #include <sys/param.h>
124 #include <sys/malloc.h>
125 #include <sys/mbuf.h>
126 #include <sys/systm.h>
127 #include <sys/errno.h>
128 #include <sys/kernel.h>
129 #include <sys/syslog.h>
131 #include <kern/zalloc.h>
134 #include <net/net_osdep.h>
136 #include <net/pktsched/pktsched_fairq.h>
137 #include <netinet/in.h>
140 * function prototypes
143 static int fairq_enqueue_ifclassq(struct ifclassq
*, struct mbuf
*);
144 static struct mbuf
*fairq_dequeue_ifclassq(struct ifclassq
*, cqdq_op_t
);
145 static int fairq_request_ifclassq(struct ifclassq
*, cqrq_t
, void *);
147 static int fairq_clear_interface(struct fairq_if
*);
148 static inline int fairq_addq(struct fairq_class
*, struct mbuf
*,
150 static inline struct mbuf
*fairq_getq(struct fairq_class
*, u_int64_t
);
151 static inline struct mbuf
*fairq_pollq(struct fairq_class
*, u_int64_t
, int *);
152 static fairq_bucket_t
*fairq_selectq(struct fairq_class
*, int);
153 static void fairq_purgeq(struct fairq_if
*, struct fairq_class
*, u_int32_t
,
154 u_int32_t
*, u_int32_t
*);
155 static void fairq_updateq(struct fairq_if
*, struct fairq_class
*, cqev_t
);
156 static struct fairq_class
*fairq_class_create(struct fairq_if
*, int, u_int32_t
,
157 u_int64_t
, u_int32_t
, int, u_int64_t
, u_int64_t
, u_int64_t
, u_int64_t
,
159 static int fairq_class_destroy(struct fairq_if
*, struct fairq_class
*);
160 static int fairq_destroy_locked(struct fairq_if
*);
161 static inline struct fairq_class
*fairq_clh_to_clp(struct fairq_if
*,
163 static const char *fairq_style(struct fairq_if
*);
165 #define FAIRQ_ZONE_MAX 32 /* maximum elements in zone */
166 #define FAIRQ_ZONE_NAME "pktsched_fairq" /* zone name */
168 static unsigned int fairq_size
; /* size of zone element */
169 static struct zone
*fairq_zone
; /* zone for fairq */
171 #define FAIRQ_CL_ZONE_MAX 32 /* maximum elements in zone */
172 #define FAIRQ_CL_ZONE_NAME "pktsched_fairq_cl" /* zone name */
174 static unsigned int fairq_cl_size
; /* size of zone element */
175 static struct zone
*fairq_cl_zone
; /* zone for fairq */
180 fairq_size
= sizeof (struct fairq_if
);
181 fairq_zone
= zinit(fairq_size
, FAIRQ_ZONE_MAX
* fairq_size
,
183 if (fairq_zone
== NULL
) {
184 panic("%s: failed allocating %s", __func__
, FAIRQ_ZONE_NAME
);
187 zone_change(fairq_zone
, Z_EXPAND
, TRUE
);
188 zone_change(fairq_zone
, Z_CALLERACCT
, TRUE
);
190 fairq_cl_size
= sizeof (struct fairq_class
);
191 fairq_cl_zone
= zinit(fairq_cl_size
, FAIRQ_CL_ZONE_MAX
* fairq_cl_size
,
192 0, FAIRQ_CL_ZONE_NAME
);
193 if (fairq_cl_zone
== NULL
) {
194 panic("%s: failed allocating %s", __func__
, FAIRQ_CL_ZONE_NAME
);
197 zone_change(fairq_cl_zone
, Z_EXPAND
, TRUE
);
198 zone_change(fairq_cl_zone
, Z_CALLERACCT
, TRUE
);
202 fairq_alloc(struct ifnet
*ifp
, int how
, boolean_t altq
)
204 struct fairq_if
*fif
;
206 fif
= (how
== M_WAITOK
) ?
207 zalloc(fairq_zone
) : zalloc_noblock(fairq_zone
);
211 bzero(fif
, fairq_size
);
212 fif
->fif_maxpri
= -1;
213 fif
->fif_ifq
= &ifp
->if_snd
;
215 fif
->fif_flags
|= FAIRQIFF_ALTQ
;
217 if (pktsched_verbose
) {
218 log(LOG_DEBUG
, "%s: %s scheduler allocated\n",
219 if_name(ifp
), fairq_style(fif
));
226 fairq_destroy(struct fairq_if
*fif
)
228 struct ifclassq
*ifq
= fif
->fif_ifq
;
232 err
= fairq_destroy_locked(fif
);
239 fairq_destroy_locked(struct fairq_if
*fif
)
241 IFCQ_LOCK_ASSERT_HELD(fif
->fif_ifq
);
243 (void) fairq_clear_interface(fif
);
245 if (pktsched_verbose
) {
246 log(LOG_DEBUG
, "%s: %s scheduler destroyed\n",
247 if_name(FAIRQIF_IFP(fif
)), fairq_style(fif
));
250 zfree(fairq_zone
, fif
);
256 * bring the interface back to the initial state by discarding
257 * all the filters and classes.
260 fairq_clear_interface(struct fairq_if
*fif
)
262 struct fairq_class
*cl
;
265 IFCQ_LOCK_ASSERT_HELD(fif
->fif_ifq
);
267 /* clear out the classes */
268 for (pri
= 0; pri
<= fif
->fif_maxpri
; pri
++)
269 if ((cl
= fif
->fif_classes
[pri
]) != NULL
)
270 fairq_class_destroy(fif
, cl
);
275 /* discard all the queued packets on the interface */
277 fairq_purge(struct fairq_if
*fif
)
279 struct fairq_class
*cl
;
282 IFCQ_LOCK_ASSERT_HELD(fif
->fif_ifq
);
284 for (pri
= 0; pri
<= fif
->fif_maxpri
; pri
++) {
285 if ((cl
= fif
->fif_classes
[pri
]) != NULL
&& cl
->cl_head
)
286 fairq_purgeq(fif
, cl
, 0, NULL
, NULL
);
290 * This assertion is safe to be made only when PF_ALTQ is not
291 * configured; otherwise, IFCQ_LEN represents the sum of the
292 * packets managed by ifcq_disc and altq_disc instances, which
293 * is possible when transitioning between the two.
295 VERIFY(IFCQ_LEN(fif
->fif_ifq
) == 0);
296 #endif /* !PF_ALTQ */
300 fairq_event(struct fairq_if
*fif
, cqev_t ev
)
302 struct fairq_class
*cl
;
305 IFCQ_LOCK_ASSERT_HELD(fif
->fif_ifq
);
307 for (pri
= 0; pri
<= fif
->fif_maxpri
; pri
++)
308 if ((cl
= fif
->fif_classes
[pri
]) != NULL
)
309 fairq_updateq(fif
, cl
, ev
);
313 fairq_add_queue(struct fairq_if
*fif
, int priority
, u_int32_t qlimit
,
314 u_int64_t bandwidth
, u_int32_t nbuckets
, int flags
, u_int64_t hogs_m1
,
315 u_int64_t lssc_m1
, u_int64_t lssc_d
, u_int64_t lssc_m2
, u_int32_t qid
,
316 struct fairq_class
**clp
)
318 struct fairq_class
*cl
;
320 IFCQ_LOCK_ASSERT_HELD(fif
->fif_ifq
);
322 /* check parameters */
323 if (priority
>= FAIRQ_MAXPRI
)
325 if (bandwidth
== 0 || (bandwidth
/ 8) == 0)
327 if (fif
->fif_classes
[priority
] != NULL
)
329 if (fairq_clh_to_clp(fif
, qid
) != NULL
)
332 cl
= fairq_class_create(fif
, priority
, qlimit
, bandwidth
,
333 nbuckets
, flags
, hogs_m1
, lssc_m1
, lssc_d
, lssc_m2
, qid
);
343 static struct fairq_class
*
344 fairq_class_create(struct fairq_if
*fif
, int pri
, u_int32_t qlimit
,
345 u_int64_t bandwidth
, u_int32_t nbuckets
, int flags
, u_int64_t hogs_m1
,
346 u_int64_t lssc_m1
, u_int64_t lssc_d
, u_int64_t lssc_m2
, u_int32_t qid
)
348 #pragma unused(lssc_d, lssc_m2)
350 struct ifclassq
*ifq
;
351 struct fairq_class
*cl
;
354 IFCQ_LOCK_ASSERT_HELD(fif
->fif_ifq
);
356 /* Sanitize flags unless internally configured */
357 if (fif
->fif_flags
& FAIRQIFF_ALTQ
)
358 flags
&= FARF_USERFLAGS
;
361 if (flags
& FARF_RED
) {
362 log(LOG_ERR
, "%s: %s RED not available!\n",
363 if_name(FAIRQIF_IFP(fif
)), fairq_style(fif
));
366 #endif /* !CLASSQ_RED */
369 if (flags
& FARF_RIO
) {
370 log(LOG_ERR
, "%s: %s RIO not available!\n",
371 if_name(FAIRQIF_IFP(fif
)), fairq_style(fif
));
374 #endif /* CLASSQ_RIO */
377 if (flags
& FARF_BLUE
) {
378 log(LOG_ERR
, "%s: %s BLUE not available!\n",
379 if_name(FAIRQIF_IFP(fif
)), fairq_style(fif
));
382 #endif /* CLASSQ_BLUE */
384 /* These are mutually exclusive */
385 if ((flags
& (FARF_RED
|FARF_RIO
|FARF_BLUE
|FARF_SFB
)) &&
386 (flags
& (FARF_RED
|FARF_RIO
|FARF_BLUE
|FARF_SFB
)) != FARF_RED
&&
387 (flags
& (FARF_RED
|FARF_RIO
|FARF_BLUE
|FARF_SFB
)) != FARF_RIO
&&
388 (flags
& (FARF_RED
|FARF_RIO
|FARF_BLUE
|FARF_SFB
)) != FARF_BLUE
&&
389 (flags
& (FARF_RED
|FARF_RIO
|FARF_BLUE
|FARF_SFB
)) != FARF_SFB
) {
390 log(LOG_ERR
, "%s: %s more than one RED|RIO|BLUE|SFB\n",
391 if_name(FAIRQIF_IFP(fif
)), fairq_style(fif
));
395 if (bandwidth
== 0 || (bandwidth
/ 8) == 0) {
396 log(LOG_ERR
, "%s: %s invalid data rate %llu\n",
397 if_name(FAIRQIF_IFP(fif
)), fairq_style(fif
), bandwidth
);
403 if (nbuckets
> FAIRQ_MAX_BUCKETS
)
404 nbuckets
= FAIRQ_MAX_BUCKETS
;
405 /* enforce power-of-2 size */
406 while ((nbuckets
^ (nbuckets
- 1)) != ((nbuckets
<< 1) - 1))
410 ifp
= FAIRQIF_IFP(fif
);
412 if ((cl
= fif
->fif_classes
[pri
]) != NULL
) {
413 /* modify the class instead of creating a new one */
415 fairq_purgeq(fif
, cl
, 0, NULL
, NULL
);
417 if (cl
->cl_qtype
== Q_RIO
)
418 rio_destroy(cl
->cl_rio
);
419 #endif /* CLASSQ_RIO */
421 if (cl
->cl_qtype
== Q_RED
)
422 red_destroy(cl
->cl_red
);
423 #endif /* CLASSQ_RED */
425 if (cl
->cl_qtype
== Q_BLUE
)
426 blue_destroy(cl
->cl_blue
);
427 #endif /* CLASSQ_BLUE */
428 if (cl
->cl_qtype
== Q_SFB
&& cl
->cl_sfb
!= NULL
)
429 sfb_destroy(cl
->cl_sfb
);
430 cl
->cl_qalg
.ptr
= NULL
;
431 cl
->cl_qtype
= Q_DROPTAIL
;
432 cl
->cl_qstate
= QS_RUNNING
;
434 cl
= zalloc(fairq_cl_zone
);
437 bzero(cl
, fairq_cl_size
);
438 cl
->cl_nbuckets
= nbuckets
;
439 cl
->cl_nbucket_mask
= nbuckets
- 1;
441 cl
->cl_buckets
= _MALLOC(sizeof (struct fairq_bucket
) *
442 cl
->cl_nbuckets
, M_DEVBUF
, M_WAITOK
|M_ZERO
);
443 if (cl
->cl_buckets
== NULL
)
448 fif
->fif_classes
[pri
] = cl
;
449 if (flags
& FARF_DEFAULTCLASS
)
450 fif
->fif_default
= cl
;
451 if (qlimit
== 0 || qlimit
> IFCQ_MAXLEN(ifq
)) {
452 qlimit
= IFCQ_MAXLEN(ifq
);
454 qlimit
= DEFAULT_QLIMIT
; /* use default */
456 cl
->cl_qlimit
= qlimit
;
457 for (i
= 0; i
< cl
->cl_nbuckets
; ++i
) {
458 _qinit(&cl
->cl_buckets
[i
].queue
, Q_DROPTAIL
, qlimit
);
460 cl
->cl_bandwidth
= bandwidth
/ 8; /* cvt to bytes per second */
461 cl
->cl_qtype
= Q_DROPTAIL
;
462 cl
->cl_qstate
= QS_RUNNING
;
463 cl
->cl_flags
= flags
;
465 if (pri
> fif
->fif_maxpri
)
466 fif
->fif_maxpri
= pri
;
469 cl
->cl_hogs_m1
= hogs_m1
/ 8;
470 cl
->cl_lssc_m1
= lssc_m1
/ 8; /* NOT YET USED */
471 cl
->cl_bw_current
= 0;
473 if (flags
& (FARF_RED
|FARF_RIO
|FARF_BLUE
|FARF_SFB
)) {
474 #if CLASSQ_RED || CLASSQ_RIO
475 u_int64_t ifbandwidth
= ifnet_output_linkrate(ifp
);
477 #endif /* CLASSQ_RED || CLASSQ_RIO */
480 if (flags
& FARF_ECN
) {
481 if (flags
& FARF_BLUE
)
482 cl
->cl_qflags
|= BLUEF_ECN
;
483 else if (flags
& FARF_SFB
)
484 cl
->cl_qflags
|= SFBF_ECN
;
485 else if (flags
& FARF_RED
)
486 cl
->cl_qflags
|= REDF_ECN
;
487 else if (flags
& FARF_RIO
)
488 cl
->cl_qflags
|= RIOF_ECN
;
490 if (flags
& FARF_FLOWCTL
) {
491 if (flags
& FARF_SFB
)
492 cl
->cl_qflags
|= SFBF_FLOWCTL
;
494 if (flags
& FARF_CLEARDSCP
) {
495 if (flags
& FARF_RIO
)
496 cl
->cl_qflags
|= RIOF_CLEARDSCP
;
498 #if CLASSQ_RED || CLASSQ_RIO
500 * XXX: RED & RIO should be watching link speed and MTU
501 * events and recompute pkttime accordingly.
504 pkttime
= 1000 * 1000 * 1000; /* 1 sec */
506 pkttime
= (int64_t)ifp
->if_mtu
* 1000 * 1000 * 1000 /
509 /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
511 if (flags
& FARF_RIO
) {
513 rio_alloc(ifp
, 0, NULL
, cl
->cl_qflags
, pkttime
);
514 if (cl
->cl_rio
!= NULL
)
515 cl
->cl_qtype
= Q_RIO
;
517 #endif /* CLASSQ_RIO */
519 if (flags
& FARF_RED
) {
520 cl
->cl_red
= red_alloc(ifp
, 0, 0,
521 cl
->cl_qlimit
* 10/100,
522 cl
->cl_qlimit
* 30/100,
523 cl
->cl_qflags
, pkttime
);
524 if (cl
->cl_red
!= NULL
)
525 cl
->cl_qtype
= Q_RED
;
527 #endif /* CLASSQ_RED */
528 #endif /* CLASSQ_RED || CLASSQ_RIO */
530 if (flags
& FARF_BLUE
) {
531 cl
->cl_blue
= blue_alloc(ifp
, 0, 0, cl
->cl_qflags
);
532 if (cl
->cl_blue
!= NULL
)
533 cl
->cl_qtype
= Q_BLUE
;
535 #endif /* CLASSQ_BLUE */
536 if (flags
& FARF_SFB
) {
537 if (!(cl
->cl_flags
& FARF_LAZY
))
538 cl
->cl_sfb
= sfb_alloc(ifp
, cl
->cl_handle
,
539 cl
->cl_qlimit
, cl
->cl_qflags
);
540 if (cl
->cl_sfb
!= NULL
|| (cl
->cl_flags
& FARF_LAZY
))
541 cl
->cl_qtype
= Q_SFB
;
545 if (pktsched_verbose
) {
546 log(LOG_DEBUG
, "%s: %s created qid=%d pri=%d qlimit=%d "
547 "flags=%b\n", if_name(ifp
), fairq_style(fif
),
548 cl
->cl_handle
, cl
->cl_pri
, cl
->cl_qlimit
, flags
, FARF_BITS
);
554 if (cl
->cl_buckets
!= NULL
)
555 _FREE(cl
->cl_buckets
, M_DEVBUF
);
558 if (cl
->cl_qalg
.ptr
!= NULL
) {
560 if (cl
->cl_qtype
== Q_RIO
)
561 rio_destroy(cl
->cl_rio
);
562 #endif /* CLASSQ_RIO */
564 if (cl
->cl_qtype
== Q_RED
)
565 red_destroy(cl
->cl_red
);
566 #endif /* CLASSQ_RED */
568 if (cl
->cl_qtype
== Q_BLUE
)
569 blue_destroy(cl
->cl_blue
);
570 #endif /* CLASSQ_BLUE */
571 if (cl
->cl_qtype
== Q_SFB
&& cl
->cl_sfb
!= NULL
)
572 sfb_destroy(cl
->cl_sfb
);
573 cl
->cl_qalg
.ptr
= NULL
;
574 cl
->cl_qtype
= Q_DROPTAIL
;
575 cl
->cl_qstate
= QS_RUNNING
;
577 zfree(fairq_cl_zone
, cl
);
583 fairq_remove_queue(struct fairq_if
*fif
, u_int32_t qid
)
585 struct fairq_class
*cl
;
587 IFCQ_LOCK_ASSERT_HELD(fif
->fif_ifq
);
589 if ((cl
= fairq_clh_to_clp(fif
, qid
)) == NULL
)
592 return (fairq_class_destroy(fif
, cl
));
596 fairq_class_destroy(struct fairq_if
*fif
, struct fairq_class
*cl
)
598 struct ifclassq
*ifq
= fif
->fif_ifq
;
601 IFCQ_LOCK_ASSERT_HELD(ifq
);
604 fairq_purgeq(fif
, cl
, 0, NULL
, NULL
);
606 fif
->fif_classes
[cl
->cl_pri
] = NULL
;
607 if (fif
->fif_poll_cache
== cl
)
608 fif
->fif_poll_cache
= NULL
;
609 if (fif
->fif_maxpri
== cl
->cl_pri
) {
610 for (pri
= cl
->cl_pri
; pri
>= 0; pri
--)
611 if (fif
->fif_classes
[pri
] != NULL
) {
612 fif
->fif_maxpri
= pri
;
616 fif
->fif_maxpri
= -1;
619 if (cl
->cl_qalg
.ptr
!= NULL
) {
621 if (cl
->cl_qtype
== Q_RIO
)
622 rio_destroy(cl
->cl_rio
);
623 #endif /* CLASSQ_RIO */
625 if (cl
->cl_qtype
== Q_RED
)
626 red_destroy(cl
->cl_red
);
627 #endif /* CLASSQ_RED */
629 if (cl
->cl_qtype
== Q_BLUE
)
630 blue_destroy(cl
->cl_blue
);
631 #endif /* CLASSQ_BLUE */
632 if (cl
->cl_qtype
== Q_SFB
&& cl
->cl_sfb
!= NULL
)
633 sfb_destroy(cl
->cl_sfb
);
634 cl
->cl_qalg
.ptr
= NULL
;
635 cl
->cl_qtype
= Q_DROPTAIL
;
636 cl
->cl_qstate
= QS_RUNNING
;
639 if (fif
->fif_default
== cl
)
640 fif
->fif_default
= NULL
;
642 if (pktsched_verbose
) {
643 log(LOG_DEBUG
, "%s: %s destroyed qid=%d pri=%d\n",
644 if_name(FAIRQIF_IFP(fif
)), fairq_style(fif
),
645 cl
->cl_handle
, cl
->cl_pri
);
648 _FREE(cl
->cl_buckets
, M_DEVBUF
);
649 cl
->cl_head
= NULL
; /* sanity */
650 cl
->cl_polled
= NULL
; /* sanity */
651 cl
->cl_buckets
= NULL
; /* sanity */
653 zfree(fairq_cl_zone
, cl
);
659 fairq_enqueue(struct fairq_if
*fif
, struct fairq_class
*cl
, struct mbuf
*m
,
662 struct ifclassq
*ifq
= fif
->fif_ifq
;
665 IFCQ_LOCK_ASSERT_HELD(ifq
);
666 VERIFY(cl
== NULL
|| cl
->cl_fif
== fif
);
670 cl
= fairq_clh_to_clp(fif
, t
->pftag_qid
);
672 cl
= fairq_clh_to_clp(fif
, 0);
673 #endif /* !PF_ALTQ */
675 cl
= fif
->fif_default
;
677 IFCQ_CONVERT_LOCK(ifq
);
684 cl
->cl_flags
|= FARF_HAS_PACKETS
;
687 ret
= fairq_addq(cl
, m
, t
);
689 if (ret
== CLASSQEQ_SUCCESS_FC
) {
690 /* packet enqueued, return advisory feedback */
693 VERIFY(ret
== CLASSQEQ_DROPPED
||
694 ret
== CLASSQEQ_DROPPED_FC
||
695 ret
== CLASSQEQ_DROPPED_SP
);
697 /* packet has been freed in fairq_addq */
698 PKTCNTR_ADD(&cl
->cl_dropcnt
, 1, len
);
699 IFCQ_DROP_ADD(ifq
, 1, len
);
701 case CLASSQEQ_DROPPED
:
703 case CLASSQEQ_DROPPED_FC
:
705 case CLASSQEQ_DROPPED_SP
:
706 return (EQSUSPENDED
);
713 /* successfully queued. */
718 * note: CLASSQDQ_POLL returns the next packet without removing the packet
719 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
720 * CLASSQDQ_REMOVE must return the same packet if called immediately
721 * after CLASSQDQ_POLL.
724 fairq_dequeue(struct fairq_if
*fif
, cqdq_op_t op
)
726 struct ifclassq
*ifq
= fif
->fif_ifq
;
727 struct fairq_class
*cl
;
728 struct fairq_class
*best_cl
;
731 u_int64_t cur_time
= read_machclk();
732 u_int32_t best_scale
;
737 IFCQ_LOCK_ASSERT_HELD(ifq
);
739 if (IFCQ_IS_EMPTY(ifq
)) {
740 /* no packet in the queue */
744 if (fif
->fif_poll_cache
&& op
== CLASSQDQ_REMOVE
) {
745 best_cl
= fif
->fif_poll_cache
;
746 m
= fairq_getq(best_cl
, cur_time
);
747 fif
->fif_poll_cache
= NULL
;
750 IFCQ_XMIT_ADD(ifq
, 1, m_pktlen(m
));
751 PKTCNTR_ADD(&best_cl
->cl_xmitcnt
, 1, m_pktlen(m
));
756 best_scale
= 0xFFFFFFFFU
;
758 for (pri
= fif
->fif_maxpri
; pri
>= 0; pri
--) {
759 if ((cl
= fif
->fif_classes
[pri
]) == NULL
)
761 if ((cl
->cl_flags
& FARF_HAS_PACKETS
) == 0)
763 m
= fairq_pollq(cl
, cur_time
, &hit_limit
);
765 cl
->cl_flags
&= ~FARF_HAS_PACKETS
;
770 * We can halt the search immediately if the queue
771 * did not hit its bandwidth limit.
773 if (hit_limit
== 0) {
780 * Otherwise calculate the scale factor and select
781 * the queue with the lowest scale factor. This
782 * apportions any unused bandwidth weighted by
783 * the relative bandwidth specification.
785 scale
= cl
->cl_bw_current
* 100 / cl
->cl_bandwidth
;
786 if (scale
< best_scale
) {
793 if (op
== CLASSQDQ_POLL
) {
794 fif
->fif_poll_cache
= best_cl
;
796 } else if (best_cl
!= NULL
) {
797 m
= fairq_getq(best_cl
, cur_time
);
800 IFCQ_XMIT_ADD(ifq
, 1, m_pktlen(m
));
801 PKTCNTR_ADD(&best_cl
->cl_xmitcnt
, 1,
812 fairq_addq(struct fairq_class
*cl
, struct mbuf
*m
, struct pf_mtag
*t
)
814 struct ifclassq
*ifq
= cl
->cl_fif
->fif_ifq
;
816 u_int32_t hash
= m
->m_pkthdr
.pkt_flowid
;
820 IFCQ_LOCK_ASSERT_HELD(ifq
);
823 * If the packet doesn't have any keep state put it on the end of
824 * our queue. XXX this can result in out of order delivery.
828 b
= cl
->cl_head
->prev
;
830 b
= &cl
->cl_buckets
[0];
832 hindex
= (hash
& cl
->cl_nbucket_mask
);
833 b
= &cl
->cl_buckets
[hindex
];
837 * Add the bucket to the end of the circular list of active buckets.
839 * As a special case we add the bucket to the beginning of the list
840 * instead of the end if it was not previously on the list and if
841 * its traffic is less then the hog level.
843 if (b
->in_use
== 0) {
845 if (cl
->cl_head
== NULL
) {
850 b
->next
= cl
->cl_head
;
851 b
->prev
= cl
->cl_head
->prev
;
855 if (b
->bw_delta
&& cl
->cl_hogs_m1
) {
856 bw
= b
->bw_bytes
* machclk_freq
/ b
->bw_delta
;
857 if (bw
< cl
->cl_hogs_m1
)
864 if (cl
->cl_qtype
== Q_RIO
)
865 return (rio_addq(cl
->cl_rio
, &b
->queue
, m
, t
));
867 #endif /* CLASSQ_RIO */
869 if (cl
->cl_qtype
== Q_RED
)
870 return (red_addq(cl
->cl_red
, &b
->queue
, m
, t
));
872 #endif /* CLASSQ_RED */
874 if (cl
->cl_qtype
== Q_BLUE
)
875 return (blue_addq(cl
->cl_blue
, &b
->queue
, m
, t
));
877 #endif /* CLASSQ_BLUE */
878 if (cl
->cl_qtype
== Q_SFB
) {
879 if (cl
->cl_sfb
== NULL
) {
880 struct ifnet
*ifp
= FAIRQIF_IFP(cl
->cl_fif
);
882 VERIFY(cl
->cl_flags
& FARF_LAZY
);
883 IFCQ_CONVERT_LOCK(ifq
);
885 cl
->cl_sfb
= sfb_alloc(ifp
, cl
->cl_handle
,
886 cl
->cl_qlimit
, cl
->cl_qflags
);
887 if (cl
->cl_sfb
== NULL
) {
888 /* fall back to droptail */
889 cl
->cl_qtype
= Q_DROPTAIL
;
890 cl
->cl_flags
&= ~FARF_SFB
;
891 cl
->cl_qflags
&= ~(SFBF_ECN
| SFBF_FLOWCTL
);
893 log(LOG_ERR
, "%s: %s SFB lazy allocation "
894 "failed for qid=%d pri=%d, falling back "
895 "to DROPTAIL\n", if_name(ifp
),
896 fairq_style(cl
->cl_fif
), cl
->cl_handle
,
900 if (cl
->cl_sfb
!= NULL
)
901 return (sfb_addq(cl
->cl_sfb
, &b
->queue
, m
, t
));
902 } else if (qlen(&b
->queue
) >= qlimit(&b
->queue
)) {
903 IFCQ_CONVERT_LOCK(ifq
);
905 return (CLASSQEQ_DROPPED
);
909 if (cl
->cl_flags
& FARF_CLEARDSCP
)
910 write_dsfield(m
, t
, 0);
918 static inline struct mbuf
*
919 fairq_getq(struct fairq_class
*cl
, u_int64_t cur_time
)
924 IFCQ_LOCK_ASSERT_HELD(cl
->cl_fif
->fif_ifq
);
926 b
= fairq_selectq(cl
, 0);
930 else if (cl
->cl_qtype
== Q_RIO
)
931 m
= rio_getq(cl
->cl_rio
, &b
->queue
);
932 #endif /* CLASSQ_RIO */
934 else if (cl
->cl_qtype
== Q_RED
)
935 m
= red_getq(cl
->cl_red
, &b
->queue
);
936 #endif /* CLASSQ_RED */
938 else if (cl
->cl_qtype
== Q_BLUE
)
939 m
= blue_getq(cl
->cl_blue
, &b
->queue
);
940 #endif /* CLASSQ_BLUE */
941 else if (cl
->cl_qtype
== Q_SFB
&& cl
->cl_sfb
!= NULL
)
942 m
= sfb_getq(cl
->cl_sfb
, &b
->queue
);
944 m
= _getq(&b
->queue
);
947 * Calculate the BW change
953 * Per-class bandwidth calculation
955 delta
= (cur_time
- cl
->cl_last_time
);
956 if (delta
> machclk_freq
* 8)
957 delta
= machclk_freq
* 8;
958 cl
->cl_bw_delta
+= delta
;
959 cl
->cl_bw_bytes
+= m
->m_pkthdr
.len
;
960 cl
->cl_last_time
= cur_time
;
961 if (cl
->cl_bw_delta
> machclk_freq
) {
962 cl
->cl_bw_delta
-= cl
->cl_bw_delta
>> 2;
963 cl
->cl_bw_bytes
-= cl
->cl_bw_bytes
>> 2;
967 * Per-bucket bandwidth calculation
969 delta
= (cur_time
- b
->last_time
);
970 if (delta
> machclk_freq
* 8)
971 delta
= machclk_freq
* 8;
972 b
->bw_delta
+= delta
;
973 b
->bw_bytes
+= m
->m_pkthdr
.len
;
974 b
->last_time
= cur_time
;
975 if (b
->bw_delta
> machclk_freq
) {
976 b
->bw_delta
-= b
->bw_delta
>> 2;
977 b
->bw_bytes
-= b
->bw_bytes
>> 2;
984 * Figure out what the next packet would be if there were no limits. If
985 * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
986 * it is set to 0. A non-NULL mbuf is returned either way.
988 static inline struct mbuf
*
989 fairq_pollq(struct fairq_class
*cl
, u_int64_t cur_time
, int *hit_limit
)
996 IFCQ_LOCK_ASSERT_HELD(cl
->cl_fif
->fif_ifq
);
999 b
= fairq_selectq(cl
, 1);
1002 m
= qhead(&b
->queue
);
1005 * Did this packet exceed the class bandwidth? Calculate the
1006 * bandwidth component of the packet.
1008 * - Calculate bytes per second
1010 delta
= cur_time
- cl
->cl_last_time
;
1011 if (delta
> machclk_freq
* 8)
1012 delta
= machclk_freq
* 8;
1013 cl
->cl_bw_delta
+= delta
;
1014 cl
->cl_last_time
= cur_time
;
1015 if (cl
->cl_bw_delta
) {
1016 bw
= cl
->cl_bw_bytes
* machclk_freq
/ cl
->cl_bw_delta
;
1018 if (bw
> cl
->cl_bandwidth
)
1020 cl
->cl_bw_current
= bw
;
1022 printf("BW %6lld relative to %6u %d queue 0x%llx\n",
1023 bw
, cl
->cl_bandwidth
, *hit_limit
,
1024 (uint64_t)VM_KERNEL_ADDRPERM(b
));
1031 * Locate the next queue we want to pull a packet out of. This code
1032 * is also responsible for removing empty buckets from the circular list.
1034 static fairq_bucket_t
*
1035 fairq_selectq(struct fairq_class
*cl
, int ispoll
)
1040 IFCQ_LOCK_ASSERT_HELD(cl
->cl_fif
->fif_ifq
);
1042 if (ispoll
== 0 && cl
->cl_polled
) {
1044 cl
->cl_polled
= NULL
;
1048 while ((b
= cl
->cl_head
) != NULL
) {
1050 * Remove empty queues from consideration
1052 if (qempty(&b
->queue
)) {
1054 cl
->cl_head
= b
->next
;
1055 if (cl
->cl_head
== b
) {
1058 b
->next
->prev
= b
->prev
;
1059 b
->prev
->next
= b
->next
;
1065 * Advance the round robin. Queues with bandwidths less
1066 * then the hog bandwidth are allowed to burst.
1068 if (cl
->cl_hogs_m1
== 0) {
1069 cl
->cl_head
= b
->next
;
1070 } else if (b
->bw_delta
) {
1071 bw
= b
->bw_bytes
* machclk_freq
/ b
->bw_delta
;
1072 if (bw
>= cl
->cl_hogs_m1
) {
1073 cl
->cl_head
= b
->next
;
1091 fairq_purgeq(struct fairq_if
*fif
, struct fairq_class
*cl
, u_int32_t flow
,
1092 u_int32_t
*packets
, u_int32_t
*bytes
)
1094 struct ifclassq
*ifq
= fif
->fif_ifq
;
1095 u_int32_t _cnt
= 0, _len
= 0;
1098 IFCQ_LOCK_ASSERT_HELD(ifq
);
1100 /* become regular mutex before freeing mbufs */
1101 IFCQ_CONVERT_LOCK(ifq
);
1103 while ((b
= fairq_selectq(cl
, 0)) != NULL
) {
1104 u_int32_t cnt
, len
, qlen
;
1106 if ((qlen
= qlen(&b
->queue
)) == 0)
1110 if (cl
->cl_qtype
== Q_RIO
)
1111 rio_purgeq(cl
->cl_rio
, &b
->queue
, flow
, &cnt
, &len
);
1113 #endif /* CLASSQ_RIO */
1115 if (cl
->cl_qtype
== Q_RED
)
1116 red_purgeq(cl
->cl_red
, &b
->queue
, flow
, &cnt
, &len
);
1118 #endif /* CLASSQ_RED */
1120 if (cl
->cl_qtype
== Q_BLUE
)
1121 blue_purgeq(cl
->cl_blue
, &b
->queue
, flow
, &cnt
, &len
);
1123 #endif /* CLASSQ_BLUE */
1124 if (cl
->cl_qtype
== Q_SFB
&& cl
->cl_sfb
!= NULL
)
1125 sfb_purgeq(cl
->cl_sfb
, &b
->queue
, flow
, &cnt
, &len
);
1127 _flushq_flow(&b
->queue
, flow
, &cnt
, &len
);
1132 VERIFY(qlen(&b
->queue
) == (qlen
- cnt
));
1134 PKTCNTR_ADD(&cl
->cl_dropcnt
, cnt
, len
);
1135 IFCQ_DROP_ADD(ifq
, cnt
, len
);
1137 VERIFY(((signed)IFCQ_LEN(ifq
) - cnt
) >= 0);
1138 IFCQ_LEN(ifq
) -= cnt
;
1143 if (pktsched_verbose
) {
1144 log(LOG_DEBUG
, "%s: %s purge qid=%d pri=%d "
1145 "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
1146 if_name(FAIRQIF_IFP(fif
)), fairq_style(fif
),
1147 cl
->cl_handle
, cl
->cl_pri
, qlen
, qlen(&b
->queue
),
1152 if (packets
!= NULL
)
1159 fairq_updateq(struct fairq_if
*fif
, struct fairq_class
*cl
, cqev_t ev
)
1161 IFCQ_LOCK_ASSERT_HELD(fif
->fif_ifq
);
1163 if (pktsched_verbose
) {
1164 log(LOG_DEBUG
, "%s: %s update qid=%d pri=%d event=%s\n",
1165 if_name(FAIRQIF_IFP(fif
)), fairq_style(fif
),
1166 cl
->cl_handle
, cl
->cl_pri
, ifclassq_ev2str(ev
));
1170 if (cl
->cl_qtype
== Q_RIO
)
1171 return (rio_updateq(cl
->cl_rio
, ev
));
1172 #endif /* CLASSQ_RIO */
1174 if (cl
->cl_qtype
== Q_RED
)
1175 return (red_updateq(cl
->cl_red
, ev
));
1176 #endif /* CLASSQ_RED */
1178 if (cl
->cl_qtype
== Q_BLUE
)
1179 return (blue_updateq(cl
->cl_blue
, ev
));
1180 #endif /* CLASSQ_BLUE */
1181 if (cl
->cl_qtype
== Q_SFB
&& cl
->cl_sfb
!= NULL
)
1182 return (sfb_updateq(cl
->cl_sfb
, ev
));
1186 fairq_get_class_stats(struct fairq_if
*fif
, u_int32_t qid
,
1187 struct fairq_classstats
*sp
)
1189 struct fairq_class
*cl
;
1192 IFCQ_LOCK_ASSERT_HELD(fif
->fif_ifq
);
1194 if ((cl
= fairq_clh_to_clp(fif
, qid
)) == NULL
)
1197 sp
->class_handle
= cl
->cl_handle
;
1198 sp
->priority
= cl
->cl_pri
;
1199 sp
->qlimit
= cl
->cl_qlimit
;
1200 sp
->xmit_cnt
= cl
->cl_xmitcnt
;
1201 sp
->drop_cnt
= cl
->cl_dropcnt
;
1202 sp
->qtype
= cl
->cl_qtype
;
1203 sp
->qstate
= cl
->cl_qstate
;
1209 sp
->qlength
+= qlen(&b
->queue
);
1211 } while (b
!= cl
->cl_head
);
1215 if (cl
->cl_qtype
== Q_RED
)
1216 red_getstats(cl
->cl_red
, &sp
->red
[0]);
1217 #endif /* CLASSQ_RED */
1219 if (cl
->cl_qtype
== Q_RIO
)
1220 rio_getstats(cl
->cl_rio
, &sp
->red
[0]);
1221 #endif /* CLASSQ_RIO */
1223 if (cl
->cl_qtype
== Q_BLUE
)
1224 blue_getstats(cl
->cl_blue
, &sp
->blue
);
1225 #endif /* CLASSQ_BLUE */
1226 if (cl
->cl_qtype
== Q_SFB
&& cl
->cl_sfb
!= NULL
)
1227 sfb_getstats(cl
->cl_sfb
, &sp
->sfb
);
1232 /* convert a class handle to the corresponding class pointer */
1233 static inline struct fairq_class
*
1234 fairq_clh_to_clp(struct fairq_if
*fif
, u_int32_t chandle
)
1236 struct fairq_class
*cl
;
1239 IFCQ_LOCK_ASSERT_HELD(fif
->fif_ifq
);
1241 for (idx
= fif
->fif_maxpri
; idx
>= 0; idx
--)
1242 if ((cl
= fif
->fif_classes
[idx
]) != NULL
&&
1243 cl
->cl_handle
== chandle
)
1250 fairq_style(struct fairq_if
*fif
)
1252 return ((fif
->fif_flags
& FAIRQIFF_ALTQ
) ? "ALTQ_FAIRQ" : "FAIRQ");
1256 fairq_setup_ifclassq(struct ifclassq
*ifq
, u_int32_t flags
)
1258 #pragma unused(ifq, flags)
1259 return (ENXIO
); /* not yet */
1263 fairq_teardown_ifclassq(struct ifclassq
*ifq
)
1265 struct fairq_if
*fif
= ifq
->ifcq_disc
;
1268 IFCQ_LOCK_ASSERT_HELD(ifq
);
1269 VERIFY(fif
!= NULL
&& ifq
->ifcq_type
== PKTSCHEDT_FAIRQ
);
1271 (void) fairq_destroy_locked(fif
);
1273 ifq
->ifcq_disc
= NULL
;
1274 for (i
= 0; i
< IFCQ_SC_MAX
; i
++) {
1275 ifq
->ifcq_disc_slots
[i
].qid
= 0;
1276 ifq
->ifcq_disc_slots
[i
].cl
= NULL
;
1279 return (ifclassq_detach(ifq
));
1283 fairq_getqstats_ifclassq(struct ifclassq
*ifq
, u_int32_t slot
,
1284 struct if_ifclassq_stats
*ifqs
)
1286 struct fairq_if
*fif
= ifq
->ifcq_disc
;
1288 IFCQ_LOCK_ASSERT_HELD(ifq
);
1289 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_FAIRQ
);
1291 if (slot
>= IFCQ_SC_MAX
)
1294 return (fairq_get_class_stats(fif
, ifq
->ifcq_disc_slots
[slot
].qid
,
1295 &ifqs
->ifqs_fairq_stats
));
1297 #endif /* PKTSCHED_FAIRQ */