]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/net/pktsched/pktsched_qfq.h
xnu-2782.20.48.tar.gz
[apple/xnu.git] / bsd / net / pktsched / pktsched_qfq.h
... / ...
CommitLineData
1/*
2 * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
31 * All rights reserved
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 */
54
55#ifndef _NET_PKTSCHED_PKTSCHED_QFQ_H_
56#define _NET_PKTSCHED_PKTSCHED_QFQ_H_
57
58#ifdef PRIVATE
59#include <net/pktsched/pktsched.h>
60#include <net/classq/classq.h>
61#include <net/classq/classq_red.h>
62#include <net/classq/classq_rio.h>
63#include <net/classq/classq_blue.h>
64#include <net/classq/classq_sfb.h>
65
66#ifdef __cplusplus
67extern "C" {
68#endif
69
70/* qfq class flags */
71#define QFCF_RED 0x0001 /* use RED */
72#define QFCF_ECN 0x0002 /* use ECN with RED/BLUE/SFB */
73#define QFCF_RIO 0x0004 /* use RIO */
74#define QFCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
75#define QFCF_BLUE 0x0100 /* use BLUE */
76#define QFCF_SFB 0x0200 /* use SFB */
77#define QFCF_FLOWCTL 0x0400 /* enable flow control advisories */
78#define QFCF_DEFAULTCLASS 0x1000 /* default class */
79#define QFCF_DELAYBASED 0x2000 /* queue sizing is delay based */
80#ifdef BSD_KERNEL_PRIVATE
81#define QFCF_LAZY 0x10000000 /* on-demand resource allocation */
82#endif /* BSD_KERNEL_PRIVATE */
83
84#define QFCF_USERFLAGS \
85 (QFCF_RED | QFCF_ECN | QFCF_RIO | QFCF_CLEARDSCP | QFCF_BLUE | \
86 QFCF_SFB | QFCF_FLOWCTL | QFCF_DEFAULTCLASS)
87
88#ifdef BSD_KERNEL_PRIVATE
89#define QFCF_BITS \
90 "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT" \
91 "\35LAZY"
92#else
93#define QFCF_BITS \
94 "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT"
95#endif /* !BSD_KERNEL_PRIVATE */
96
97#define QFQ_MAX_CLASSES 32
98#define QFQ_MAX_WSHIFT 16 /* log2(max_weight) */
99#define QFQ_MAX_WEIGHT (1 << QFQ_MAX_WSHIFT)
100
101struct qfq_classstats {
102 u_int32_t class_handle;
103 u_int32_t index;
104 u_int32_t weight;
105 u_int32_t lmax;
106
107 u_int32_t qlength;
108 u_int32_t qlimit;
109 u_int32_t period;
110 struct pktcntr xmitcnt; /* transmitted packet counter */
111 struct pktcntr dropcnt; /* dropped packet counter */
112
113 /* RED, RIO, BLUE, SFB related info */
114 classq_type_t qtype;
115 union {
116 /* RIO has 3 red stats */
117 struct red_stats red[RIO_NDROPPREC];
118 struct blue_stats blue;
119 struct sfb_stats sfb;
120 };
121 classq_state_t qstate;
122};
123
124#ifdef BSD_KERNEL_PRIVATE
125#define QFQ_DEBUG 1 /* enable extra debugging */
126
127/*
128 * Virtual time computations.
129 *
130 * S, F and V are all computed in fixed point arithmetic with
131 * FRAC_BITS decimal bits.
132 *
133 * QFQ_MAX_INDEX is the maximum index allowed for a group. We need
134 * one bit per index.
135 *
136 * QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
137 * The layout of the bits is as below:
138 *
139 * [ MTU_SHIFT ][ FRAC_BITS ]
140 * [ MAX_INDEX ][ MIN_SLOT_SHIFT ]
141 * ^.__grp->index = 0
142 * *.__grp->slot_shift
143 *
144 * where MIN_SLOT_SHIFT is derived by difference from the others.
145 *
146 * The max group index corresponds to Lmax/w_min, where
147 * Lmax=1<<MTU_SHIFT, w_min = 1 .
148 * From this, and knowing how many groups (MAX_INDEX) we want,
149 * we can derive the shift corresponding to each group.
150 *
151 * Because we often need to compute
152 * F = S + len/w_i and V = V + len/wsum
153 * instead of storing w_i store the value
154 * inv_w = (1<<FRAC_BITS)/w_i
155 * so we can do F = S + len * inv_w * wsum.
156 * We use W_TOT in the formulas so we can easily move between
157 * static and adaptive weight sum.
158 *
159 * The per-scheduler-instance data contain all the data structures
160 * for the scheduler: bitmaps and bucket lists.
161 */
162
163/*
164 * Shifts used for class<->group mapping. Class weights are in the
165 * range [1, QFQ_MAX_WEIGHT], we need to map each class i to the
166 * group with the smallest index that can support the L_i / r_i
167 * configured for the class.
168 *
169 * grp->qfg_index is the index of the group; and grp->qfg_slot_shift
170 * is the shift for the corresponding (scaled) sigma_i.
171 *
172 * When computing the group index, we do (len<<FP_SHIFT)/weight,
173 * then compute an FLS (which is like a log2()), and if the result
174 * is below the MAX_INDEX region we use 0 (which is the same as
175 * using a larger len).
176 */
177#define QFQ_MAX_INDEX 19
178#define QFQ_MAX_WSUM (2 * QFQ_MAX_WEIGHT)
179
180#define QFQ_FRAC_BITS 30 /* fixed point arithmetic */
181#define QFQ_ONE_FP (1UL << QFQ_FRAC_BITS)
182#define QFQ_IWSUM (QFQ_ONE_FP / QFQ_MAX_WSUM)
183
184#define QFQ_MTU_SHIFT 11 /* log2(max_len) */
185#define QFQ_MIN_SLOT_SHIFT (QFQ_FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
186
187/*
188 * Possible group states, also indexes for the bitmaps array in
189 * struct qfq_if. We rely on ER, IR, EB, IB being numbered 0..3
190 */
191enum qfq_state {
192 ER = 0, /* eligible, ready */
193 IR = 1, /* ineligible, ready */
194 EB = 2, /* eligible, backlogged */
195 IB = 3, /* ineligible, backlogged */
196 QFQ_MAX_STATE
197};
198
199struct qfq_group;
200
201struct qfq_class {
202 u_int32_t cl_handle; /* class handle */
203 class_queue_t cl_q; /* class queue structure */
204 u_int32_t cl_qflags; /* class queue flags */
205 union {
206 void *ptr;
207 struct red *red; /* RED state */
208 struct rio *rio; /* RIO state */
209 struct blue *blue; /* BLUE state */
210 struct sfb *sfb; /* SFB state */
211 } cl_qalg;
212 struct qfq_if *cl_qif; /* back pointer to qif */
213 u_int32_t cl_flags; /* class flags */
214
215 u_int64_t cl_S, cl_F; /* flow timestamps (exact) */
216 struct qfq_class *cl_next; /* link for the slot list */
217 /*
218 * Group we belong to. In principle we would need the index,
219 * which is log_2(lmax/weight), but we never reference it
220 * directly, only the group.
221 */
222 struct qfq_group *cl_grp;
223 u_int32_t cl_inv_w; /* QFQ_ONE_FP/weight */
224 u_int32_t cl_lmax; /* max packet size for this flow */
225
226 /* statistics */
227 u_int32_t cl_period; /* backlog period */
228 struct pktcntr cl_xmitcnt; /* transmitted packet counter */
229 struct pktcntr cl_dropcnt; /* dropped packet counter */
230};
231
232#define cl_red cl_qalg.red
233#define cl_rio cl_qalg.rio
234#define cl_blue cl_qalg.blue
235#define cl_sfb cl_qalg.sfb
236
237/*
238 * Group descriptor, see the paper for details.
239 * Basically this contains the bucket lists.
240 */
241struct qfq_group {
242 u_int64_t qfg_S, qfg_F; /* group timestamps (approx) */
243 u_int8_t qfg_slot_shift; /* slot shift */
244 u_int8_t qfg_index; /* group index */
245 u_int8_t qfg_front; /* index of the front slot */
246 pktsched_bitmap_t qfg_full_slots; /* non-empty slots */
247
248 /* array of lists of active classes */
249 struct qfq_class **qfg_slots;
250};
251
252/* qfq_if flags */
253#define QFQIFF_ALTQ 0x1 /* configured via PF/ALTQ */
254
255/*
256 * qfq interface state
257 */
258struct qfq_if {
259 struct ifclassq *qif_ifq; /* backpointer to ifclassq */
260 u_int32_t qif_flags; /* flags */
261 u_int32_t qif_throttle; /* throttling level */
262 u_int8_t qif_classes; /* # of classes in table */
263 u_int8_t qif_maxclasses; /* max # of classes in table */
264 u_int8_t qif_maxslots; /* max # of slots */
265 struct qfq_class *qif_default; /* default class */
266 struct qfq_class **qif_class_tbl;
267
268 u_int64_t qif_V; /* precise virtual time */
269 u_int32_t qif_wsum; /* weight sum */
270#if QFQ_DEBUG
271 u_int32_t qif_i_wsum; /* QFQ_ONE_FP/w_sum */
272 u_int32_t qif_queued; /* debugging */
273 u_int32_t qif_emptygrp; /* debugging */
274#endif /* QFQ_DEBUG */
275 pktsched_bitmap_t qif_bitmaps[QFQ_MAX_STATE]; /* group bitmaps */
276 struct qfq_group **qif_groups; /* the groups */
277};
278
279#define QFQIF_IFP(_qif) ((_qif)->qif_ifq->ifcq_ifp)
280
281struct if_ifclassq_stats;
282
283extern void qfq_init(void);
284extern struct qfq_if *qfq_alloc(struct ifnet *, int, boolean_t);
285extern int qfq_destroy(struct qfq_if *);
286extern void qfq_purge(struct qfq_if *);
287extern void qfq_event(struct qfq_if *, cqev_t);
288extern int qfq_add_queue(struct qfq_if *, u_int32_t, u_int32_t, u_int32_t,
289 u_int32_t, u_int32_t, struct qfq_class **);
290extern int qfq_remove_queue(struct qfq_if *, u_int32_t);
291extern int qfq_get_class_stats(struct qfq_if *, u_int32_t,
292 struct qfq_classstats *);
293extern int qfq_enqueue(struct qfq_if *, struct qfq_class *, struct mbuf *,
294 struct pf_mtag *);
295extern struct mbuf *qfq_dequeue(struct qfq_if *, cqdq_op_t);
296extern int qfq_setup_ifclassq(struct ifclassq *, u_int32_t);
297extern int qfq_teardown_ifclassq(struct ifclassq *ifq);
298extern int qfq_getqstats_ifclassq(struct ifclassq *, u_int32_t,
299 struct if_ifclassq_stats *);
300#endif /* BSD_KERNEL_PRIVATE */
301#ifdef __cplusplus
302}
303#endif
304#endif /* PRIVATE */
305#endif /* _NET_PKTSCHED_PKTSCHED_QFQ_H_ */