]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2011-2012 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | /* | |
30 | * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente | |
31 | * All rights reserved | |
32 | * | |
33 | * Redistribution and use in source and binary forms, with or without | |
34 | * modification, are permitted provided that the following conditions | |
35 | * are met: | |
36 | * 1. Redistributions of source code must retain the above copyright | |
37 | * notice, this list of conditions and the following disclaimer. | |
38 | * 2. Redistributions in binary form must reproduce the above copyright | |
39 | * notice, this list of conditions and the following disclaimer in the | |
40 | * documentation and/or other materials provided with the distribution. | |
41 | * | |
42 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
43 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
44 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
45 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
46 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
47 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
48 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
49 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
50 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
51 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
52 | * SUCH DAMAGE. | |
53 | */ | |
54 | ||
55 | #ifndef _NET_PKTSCHED_PKTSCHED_QFQ_H_ | |
56 | #define _NET_PKTSCHED_PKTSCHED_QFQ_H_ | |
57 | ||
58 | #ifdef PRIVATE | |
59 | #include <net/pktsched/pktsched.h> | |
60 | #include <net/classq/classq.h> | |
61 | #include <net/classq/classq_red.h> | |
62 | #include <net/classq/classq_rio.h> | |
63 | #include <net/classq/classq_blue.h> | |
64 | #include <net/classq/classq_sfb.h> | |
65 | ||
66 | #ifdef __cplusplus | |
67 | extern "C" { | |
68 | #endif | |
69 | ||
70 | /* qfq class flags */ | |
71 | #define QFCF_RED 0x0001 /* use RED */ | |
72 | #define QFCF_ECN 0x0002 /* use ECN with RED/BLUE/SFB */ | |
73 | #define QFCF_RIO 0x0004 /* use RIO */ | |
74 | #define QFCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ | |
75 | #define QFCF_BLUE 0x0100 /* use BLUE */ | |
76 | #define QFCF_SFB 0x0200 /* use SFB */ | |
77 | #define QFCF_FLOWCTL 0x0400 /* enable flow control advisories */ | |
78 | #define QFCF_DEFAULTCLASS 0x1000 /* default class */ | |
79 | #define QFCF_DELAYBASED 0x2000 /* queue sizing is delay based */ | |
80 | #ifdef BSD_KERNEL_PRIVATE | |
81 | #define QFCF_LAZY 0x10000000 /* on-demand resource allocation */ | |
82 | #endif /* BSD_KERNEL_PRIVATE */ | |
83 | ||
84 | #define QFCF_USERFLAGS \ | |
85 | (QFCF_RED | QFCF_ECN | QFCF_RIO | QFCF_CLEARDSCP | QFCF_BLUE | \ | |
86 | QFCF_SFB | QFCF_FLOWCTL | QFCF_DEFAULTCLASS) | |
87 | ||
88 | #ifdef BSD_KERNEL_PRIVATE | |
89 | #define QFCF_BITS \ | |
90 | "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT" \ | |
91 | "\35LAZY" | |
92 | #else | |
93 | #define QFCF_BITS \ | |
94 | "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT" | |
95 | #endif /* !BSD_KERNEL_PRIVATE */ | |
96 | ||
97 | #define QFQ_MAX_CLASSES 32 | |
98 | #define QFQ_MAX_WSHIFT 16 /* log2(max_weight) */ | |
99 | #define QFQ_MAX_WEIGHT (1 << QFQ_MAX_WSHIFT) | |
100 | ||
101 | struct qfq_classstats { | |
102 | u_int32_t class_handle; | |
103 | u_int32_t index; | |
104 | u_int32_t weight; | |
105 | u_int32_t lmax; | |
106 | ||
107 | u_int32_t qlength; | |
108 | u_int32_t qlimit; | |
109 | u_int32_t period; | |
110 | struct pktcntr xmitcnt; /* transmitted packet counter */ | |
111 | struct pktcntr dropcnt; /* dropped packet counter */ | |
112 | ||
113 | /* RED, RIO, BLUE, SFB related info */ | |
114 | classq_type_t qtype; | |
115 | union { | |
116 | /* RIO has 3 red stats */ | |
117 | struct red_stats red[RIO_NDROPPREC]; | |
118 | struct blue_stats blue; | |
119 | struct sfb_stats sfb; | |
120 | }; | |
121 | classq_state_t qstate; | |
122 | }; | |
123 | ||
124 | #ifdef BSD_KERNEL_PRIVATE | |
125 | #define QFQ_DEBUG 1 /* enable extra debugging */ | |
126 | ||
127 | /* | |
128 | * Virtual time computations. | |
129 | * | |
130 | * S, F and V are all computed in fixed point arithmetic with | |
131 | * FRAC_BITS decimal bits. | |
132 | * | |
133 | * QFQ_MAX_INDEX is the maximum index allowed for a group. We need | |
134 | * one bit per index. | |
135 | * | |
136 | * QFQ_MAX_WSHIFT is the maximum power of two supported as a weight. | |
137 | * The layout of the bits is as below: | |
138 | * | |
139 | * [ MTU_SHIFT ][ FRAC_BITS ] | |
140 | * [ MAX_INDEX ][ MIN_SLOT_SHIFT ] | |
141 | * ^.__grp->index = 0 | |
142 | * *.__grp->slot_shift | |
143 | * | |
144 | * where MIN_SLOT_SHIFT is derived by difference from the others. | |
145 | * | |
146 | * The max group index corresponds to Lmax/w_min, where | |
147 | * Lmax=1<<MTU_SHIFT, w_min = 1 . | |
148 | * From this, and knowing how many groups (MAX_INDEX) we want, | |
149 | * we can derive the shift corresponding to each group. | |
150 | * | |
151 | * Because we often need to compute | |
152 | * F = S + len/w_i and V = V + len/wsum | |
153 | * instead of storing w_i store the value | |
154 | * inv_w = (1<<FRAC_BITS)/w_i | |
155 | * so we can do F = S + len * inv_w * wsum. | |
156 | * We use W_TOT in the formulas so we can easily move between | |
157 | * static and adaptive weight sum. | |
158 | * | |
159 | * The per-scheduler-instance data contain all the data structures | |
160 | * for the scheduler: bitmaps and bucket lists. | |
161 | */ | |
162 | ||
163 | /* | |
164 | * Shifts used for class<->group mapping. Class weights are in the | |
165 | * range [1, QFQ_MAX_WEIGHT], we need to map each class i to the | |
166 | * group with the smallest index that can support the L_i / r_i | |
167 | * configured for the class. | |
168 | * | |
169 | * grp->qfg_index is the index of the group; and grp->qfg_slot_shift | |
170 | * is the shift for the corresponding (scaled) sigma_i. | |
171 | * | |
172 | * When computing the group index, we do (len<<FP_SHIFT)/weight, | |
173 | * then compute an FLS (which is like a log2()), and if the result | |
174 | * is below the MAX_INDEX region we use 0 (which is the same as | |
175 | * using a larger len). | |
176 | */ | |
177 | #define QFQ_MAX_INDEX 19 | |
178 | #define QFQ_MAX_WSUM (2 * QFQ_MAX_WEIGHT) | |
179 | ||
180 | #define QFQ_FRAC_BITS 30 /* fixed point arithmetic */ | |
181 | #define QFQ_ONE_FP (1UL << QFQ_FRAC_BITS) | |
182 | #define QFQ_IWSUM (QFQ_ONE_FP / QFQ_MAX_WSUM) | |
183 | ||
184 | #define QFQ_MTU_SHIFT 11 /* log2(max_len) */ | |
185 | #define QFQ_MIN_SLOT_SHIFT (QFQ_FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX) | |
186 | ||
187 | /* | |
188 | * Possible group states, also indexes for the bitmaps array in | |
189 | * struct qfq_if. We rely on ER, IR, EB, IB being numbered 0..3 | |
190 | */ | |
191 | enum qfq_state { | |
192 | ER = 0, /* eligible, ready */ | |
193 | IR = 1, /* ineligible, ready */ | |
194 | EB = 2, /* eligible, backlogged */ | |
195 | IB = 3, /* ineligible, backlogged */ | |
196 | QFQ_MAX_STATE | |
197 | }; | |
198 | ||
199 | struct qfq_group; | |
200 | ||
201 | struct qfq_class { | |
202 | u_int32_t cl_handle; /* class handle */ | |
203 | class_queue_t cl_q; /* class queue structure */ | |
204 | u_int32_t cl_qflags; /* class queue flags */ | |
205 | union { | |
206 | void *ptr; | |
207 | struct red *red; /* RED state */ | |
208 | struct rio *rio; /* RIO state */ | |
209 | struct blue *blue; /* BLUE state */ | |
210 | struct sfb *sfb; /* SFB state */ | |
211 | } cl_qalg; | |
212 | struct qfq_if *cl_qif; /* back pointer to qif */ | |
213 | u_int32_t cl_flags; /* class flags */ | |
214 | ||
215 | u_int64_t cl_S, cl_F; /* flow timestamps (exact) */ | |
216 | struct qfq_class *cl_next; /* link for the slot list */ | |
217 | /* | |
218 | * Group we belong to. In principle we would need the index, | |
219 | * which is log_2(lmax/weight), but we never reference it | |
220 | * directly, only the group. | |
221 | */ | |
222 | struct qfq_group *cl_grp; | |
223 | u_int32_t cl_inv_w; /* QFQ_ONE_FP/weight */ | |
224 | u_int32_t cl_lmax; /* max packet size for this flow */ | |
225 | ||
226 | /* statistics */ | |
227 | u_int32_t cl_period; /* backlog period */ | |
228 | struct pktcntr cl_xmitcnt; /* transmitted packet counter */ | |
229 | struct pktcntr cl_dropcnt; /* dropped packet counter */ | |
230 | }; | |
231 | ||
232 | #define cl_red cl_qalg.red | |
233 | #define cl_rio cl_qalg.rio | |
234 | #define cl_blue cl_qalg.blue | |
235 | #define cl_sfb cl_qalg.sfb | |
236 | ||
237 | /* | |
238 | * Group descriptor, see the paper for details. | |
239 | * Basically this contains the bucket lists. | |
240 | */ | |
241 | struct qfq_group { | |
242 | u_int64_t qfg_S, qfg_F; /* group timestamps (approx) */ | |
243 | u_int8_t qfg_slot_shift; /* slot shift */ | |
244 | u_int8_t qfg_index; /* group index */ | |
245 | u_int8_t qfg_front; /* index of the front slot */ | |
246 | pktsched_bitmap_t qfg_full_slots; /* non-empty slots */ | |
247 | ||
248 | /* array of lists of active classes */ | |
249 | struct qfq_class **qfg_slots; | |
250 | }; | |
251 | ||
252 | /* qfq_if flags */ | |
253 | #define QFQIFF_ALTQ 0x1 /* configured via PF/ALTQ */ | |
254 | ||
255 | /* | |
256 | * qfq interface state | |
257 | */ | |
258 | struct qfq_if { | |
259 | struct ifclassq *qif_ifq; /* backpointer to ifclassq */ | |
260 | u_int32_t qif_flags; /* flags */ | |
261 | u_int32_t qif_throttle; /* throttling level */ | |
262 | u_int8_t qif_classes; /* # of classes in table */ | |
263 | u_int8_t qif_maxclasses; /* max # of classes in table */ | |
264 | u_int8_t qif_maxslots; /* max # of slots */ | |
265 | struct qfq_class *qif_default; /* default class */ | |
266 | struct qfq_class **qif_class_tbl; | |
267 | ||
268 | u_int64_t qif_V; /* precise virtual time */ | |
269 | u_int32_t qif_wsum; /* weight sum */ | |
270 | #if QFQ_DEBUG | |
271 | u_int32_t qif_i_wsum; /* QFQ_ONE_FP/w_sum */ | |
272 | u_int32_t qif_queued; /* debugging */ | |
273 | u_int32_t qif_emptygrp; /* debugging */ | |
274 | #endif /* QFQ_DEBUG */ | |
275 | pktsched_bitmap_t qif_bitmaps[QFQ_MAX_STATE]; /* group bitmaps */ | |
276 | struct qfq_group **qif_groups; /* the groups */ | |
277 | }; | |
278 | ||
279 | #define QFQIF_IFP(_qif) ((_qif)->qif_ifq->ifcq_ifp) | |
280 | ||
281 | struct if_ifclassq_stats; | |
282 | ||
283 | extern void qfq_init(void); | |
284 | extern struct qfq_if *qfq_alloc(struct ifnet *, int, boolean_t); | |
285 | extern int qfq_destroy(struct qfq_if *); | |
286 | extern void qfq_purge(struct qfq_if *); | |
287 | extern void qfq_event(struct qfq_if *, cqev_t); | |
288 | extern int qfq_add_queue(struct qfq_if *, u_int32_t, u_int32_t, u_int32_t, | |
289 | u_int32_t, u_int32_t, struct qfq_class **); | |
290 | extern int qfq_remove_queue(struct qfq_if *, u_int32_t); | |
291 | extern int qfq_get_class_stats(struct qfq_if *, u_int32_t, | |
292 | struct qfq_classstats *); | |
293 | extern int qfq_enqueue(struct qfq_if *, struct qfq_class *, struct mbuf *, | |
294 | struct pf_mtag *); | |
295 | extern struct mbuf *qfq_dequeue(struct qfq_if *, cqdq_op_t); | |
296 | extern int qfq_setup_ifclassq(struct ifclassq *, u_int32_t); | |
297 | extern int qfq_teardown_ifclassq(struct ifclassq *ifq); | |
298 | extern int qfq_getqstats_ifclassq(struct ifclassq *, u_int32_t, | |
299 | struct if_ifclassq_stats *); | |
300 | #endif /* BSD_KERNEL_PRIVATE */ | |
301 | #ifdef __cplusplus | |
302 | } | |
303 | #endif | |
304 | #endif /* PRIVATE */ | |
305 | #endif /* _NET_PKTSCHED_PKTSCHED_QFQ_H_ */ |