]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/pktsched/pktsched_qfq.h
xnu-2050.9.2.tar.gz
[apple/xnu.git] / bsd / net / pktsched / pktsched_qfq.h
1 /*
2 * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
31 * All rights reserved
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 */
54
55 #ifndef _NET_PKTSCHED_PKTSCHED_QFQ_H_
56 #define _NET_PKTSCHED_PKTSCHED_QFQ_H_
57
58 #ifdef PRIVATE
59 #include <net/pktsched/pktsched.h>
60 #include <net/classq/classq.h>
61 #include <net/classq/classq_red.h>
62 #include <net/classq/classq_rio.h>
63 #include <net/classq/classq_blue.h>
64 #include <net/classq/classq_sfb.h>
65
66 #ifdef __cplusplus
67 extern "C" {
68 #endif
69
70 /* qfq class flags */
71 #define QFCF_RED 0x0001 /* use RED */
72 #define QFCF_ECN 0x0002 /* use ECN with RED/BLUE/SFB */
73 #define QFCF_RIO 0x0004 /* use RIO */
74 #define QFCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
75 #define QFCF_BLUE 0x0100 /* use BLUE */
76 #define QFCF_SFB 0x0200 /* use SFB */
77 #define QFCF_FLOWCTL 0x0400 /* enable flow control advisories */
78 #define QFCF_DEFAULTCLASS 0x1000 /* default class */
79 #ifdef BSD_KERNEL_PRIVATE
80 #define QFCF_LAZY 0x10000000 /* on-demand resource allocation */
81 #endif /* BSD_KERNEL_PRIVATE */
82
83 #define QFCF_USERFLAGS \
84 (QFCF_RED | QFCF_ECN | QFCF_RIO | QFCF_CLEARDSCP | QFCF_BLUE | \
85 QFCF_SFB | QFCF_FLOWCTL | QFCF_DEFAULTCLASS)
86
87 #ifdef BSD_KERNEL_PRIVATE
88 #define QFCF_BITS \
89 "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT" \
90 "\35LAZY"
91 #else
92 #define QFCF_BITS \
93 "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT"
94 #endif /* !BSD_KERNEL_PRIVATE */
95
96 #define QFQ_MAX_CLASSES 32
97 #define QFQ_MAX_WSHIFT 16 /* log2(max_weight) */
98 #define QFQ_MAX_WEIGHT (1 << QFQ_MAX_WSHIFT)
99
100 struct qfq_classstats {
101 u_int32_t class_handle;
102 u_int32_t index;
103 u_int32_t weight;
104 u_int32_t lmax;
105
106 u_int32_t qlength;
107 u_int32_t qlimit;
108 u_int32_t period;
109 struct pktcntr xmitcnt; /* transmitted packet counter */
110 struct pktcntr dropcnt; /* dropped packet counter */
111
112 /* RED, RIO, BLUE, SFB related info */
113 classq_type_t qtype;
114 union {
115 /* RIO has 3 red stats */
116 struct red_stats red[RIO_NDROPPREC];
117 struct blue_stats blue;
118 struct sfb_stats sfb;
119 };
120 classq_state_t qstate;
121 };
122
123 #ifdef BSD_KERNEL_PRIVATE
124 #define QFQ_DEBUG 1 /* enable extra debugging */
125
126 /*
127 * Virtual time computations.
128 *
129 * S, F and V are all computed in fixed point arithmetic with
130 * FRAC_BITS decimal bits.
131 *
132 * QFQ_MAX_INDEX is the maximum index allowed for a group. We need
133 * one bit per index.
134 *
135 * QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
136 * The layout of the bits is as below:
137 *
138 * [ MTU_SHIFT ][ FRAC_BITS ]
139 * [ MAX_INDEX ][ MIN_SLOT_SHIFT ]
140 * ^.__grp->index = 0
141 * *.__grp->slot_shift
142 *
143 * where MIN_SLOT_SHIFT is derived by difference from the others.
144 *
145 * The max group index corresponds to Lmax/w_min, where
146 * Lmax=1<<MTU_SHIFT, w_min = 1 .
147 * From this, and knowing how many groups (MAX_INDEX) we want,
148 * we can derive the shift corresponding to each group.
149 *
150 * Because we often need to compute
151 * F = S + len/w_i and V = V + len/wsum
152 * instead of storing w_i store the value
153 * inv_w = (1<<FRAC_BITS)/w_i
154 * so we can do F = S + len * inv_w * wsum.
155 * We use W_TOT in the formulas so we can easily move between
156 * static and adaptive weight sum.
157 *
158 * The per-scheduler-instance data contain all the data structures
159 * for the scheduler: bitmaps and bucket lists.
160 */
161
162 /*
163 * Shifts used for class<->group mapping. Class weights are in the
164 * range [1, QFQ_MAX_WEIGHT], we need to map each class i to the
165 * group with the smallest index that can support the L_i / r_i
166 * configured for the class.
167 *
168 * grp->qfg_index is the index of the group; and grp->qfg_slot_shift
169 * is the shift for the corresponding (scaled) sigma_i.
170 *
171 * When computing the group index, we do (len<<FP_SHIFT)/weight,
172 * then compute an FLS (which is like a log2()), and if the result
173 * is below the MAX_INDEX region we use 0 (which is the same as
174 * using a larger len).
175 */
176 #define QFQ_MAX_INDEX 19
177 #define QFQ_MAX_WSUM (2 * QFQ_MAX_WEIGHT)
178
179 #define QFQ_FRAC_BITS 30 /* fixed point arithmetic */
180 #define QFQ_ONE_FP (1UL << QFQ_FRAC_BITS)
181 #define QFQ_IWSUM (QFQ_ONE_FP / QFQ_MAX_WSUM)
182
183 #define QFQ_MTU_SHIFT 11 /* log2(max_len) */
184 #define QFQ_MIN_SLOT_SHIFT (QFQ_FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
185
186 /*
187 * Possible group states, also indexes for the bitmaps array in
188 * struct qfq_if. We rely on ER, IR, EB, IB being numbered 0..3
189 */
190 enum qfq_state {
191 ER = 0, /* eligible, ready */
192 IR = 1, /* ineligible, ready */
193 EB = 2, /* eligible, backlogged */
194 IB = 3, /* ineligible, backlogged */
195 QFQ_MAX_STATE
196 };
197
198 struct qfq_group;
199
200 struct qfq_class {
201 u_int32_t cl_handle; /* class handle */
202 class_queue_t cl_q; /* class queue structure */
203 u_int32_t cl_qflags; /* class queue flags */
204 union {
205 void *ptr;
206 struct red *red; /* RED state */
207 struct rio *rio; /* RIO state */
208 struct blue *blue; /* BLUE state */
209 struct sfb *sfb; /* SFB state */
210 } cl_qalg;
211 struct qfq_if *cl_qif; /* back pointer to qif */
212 u_int32_t cl_flags; /* class flags */
213
214 u_int64_t cl_S, cl_F; /* flow timestamps (exact) */
215 struct qfq_class *cl_next; /* link for the slot list */
216 /*
217 * Group we belong to. In principle we would need the index,
218 * which is log_2(lmax/weight), but we never reference it
219 * directly, only the group.
220 */
221 struct qfq_group *cl_grp;
222 u_int32_t cl_inv_w; /* QFQ_ONE_FP/weight */
223 u_int32_t cl_lmax; /* max packet size for this flow */
224
225 /* statistics */
226 u_int32_t cl_period; /* backlog period */
227 struct pktcntr cl_xmitcnt; /* transmitted packet counter */
228 struct pktcntr cl_dropcnt; /* dropped packet counter */
229 };
230
231 #define cl_red cl_qalg.red
232 #define cl_rio cl_qalg.rio
233 #define cl_blue cl_qalg.blue
234 #define cl_sfb cl_qalg.sfb
235
236 /*
237 * Group descriptor, see the paper for details.
238 * Basically this contains the bucket lists.
239 */
240 struct qfq_group {
241 u_int64_t qfg_S, qfg_F; /* group timestamps (approx) */
242 u_int8_t qfg_slot_shift; /* slot shift */
243 u_int8_t qfg_index; /* group index */
244 u_int8_t qfg_front; /* index of the front slot */
245 pktsched_bitmap_t qfg_full_slots; /* non-empty slots */
246
247 /* array of lists of active classes */
248 struct qfq_class **qfg_slots;
249 };
250
251 /* qfq_if flags */
252 #define QFQIFF_ALTQ 0x1 /* configured via PF/ALTQ */
253
254 /*
255 * qfq interface state
256 */
257 struct qfq_if {
258 struct ifclassq *qif_ifq; /* backpointer to ifclassq */
259 u_int32_t qif_flags; /* flags */
260 u_int32_t qif_throttle; /* throttling level */
261 u_int8_t qif_classes; /* # of classes in table */
262 u_int8_t qif_maxclasses; /* max # of classes in table */
263 u_int8_t qif_maxslots; /* max # of slots */
264 struct qfq_class *qif_default; /* default class */
265 struct qfq_class **qif_class_tbl;
266
267 u_int64_t qif_V; /* precise virtual time */
268 u_int32_t qif_wsum; /* weight sum */
269 #if QFQ_DEBUG
270 u_int32_t qif_i_wsum; /* QFQ_ONE_FP/w_sum */
271 u_int32_t qif_queued; /* debugging */
272 u_int32_t qif_emptygrp; /* debugging */
273 #endif /* QFQ_DEBUG */
274 pktsched_bitmap_t qif_bitmaps[QFQ_MAX_STATE]; /* group bitmaps */
275 struct qfq_group **qif_groups; /* the groups */
276 };
277
278 #define QFQIF_IFP(_qif) ((_qif)->qif_ifq->ifcq_ifp)
279
280 struct if_ifclassq_stats;
281
282 extern void qfq_init(void);
283 extern struct qfq_if *qfq_alloc(struct ifnet *, int, boolean_t);
284 extern int qfq_destroy(struct qfq_if *);
285 extern void qfq_purge(struct qfq_if *);
286 extern void qfq_event(struct qfq_if *, cqev_t);
287 extern int qfq_add_queue(struct qfq_if *, u_int32_t, u_int32_t, u_int32_t,
288 u_int32_t, u_int32_t, struct qfq_class **);
289 extern int qfq_remove_queue(struct qfq_if *, u_int32_t);
290 extern int qfq_get_class_stats(struct qfq_if *, u_int32_t,
291 struct qfq_classstats *);
292 extern int qfq_enqueue(struct qfq_if *, struct qfq_class *, struct mbuf *,
293 struct pf_mtag *);
294 extern struct mbuf *qfq_dequeue(struct qfq_if *, cqdq_op_t);
295 extern int qfq_setup_ifclassq(struct ifclassq *, u_int32_t);
296 extern int qfq_teardown_ifclassq(struct ifclassq *ifq);
297 extern int qfq_getqstats_ifclassq(struct ifclassq *, u_int32_t,
298 struct if_ifclassq_stats *);
299 #endif /* BSD_KERNEL_PRIVATE */
300 #ifdef __cplusplus
301 }
302 #endif
303 #endif /* PRIVATE */
304 #endif /* _NET_PKTSCHED_PKTSCHED_QFQ_H_ */