]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
b7266188 | 2 | * Copyright (c) 2000-2009 Apple Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b | 27 | */ |
91447636 A |
28 | /* |
29 | * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa | |
9bccf70c A |
30 | * Portions Copyright (c) 2000 Akamba Corp. |
31 | * All rights reserved | |
1c79356b | 32 | * |
9bccf70c A |
33 | * Redistribution and use in source and binary forms, with or without |
34 | * modification, are permitted provided that the following conditions | |
35 | * are met: | |
36 | * 1. Redistributions of source code must retain the above copyright | |
37 | * notice, this list of conditions and the following disclaimer. | |
38 | * 2. Redistributions in binary form must reproduce the above copyright | |
39 | * notice, this list of conditions and the following disclaimer in the | |
40 | * documentation and/or other materials provided with the distribution. | |
1c79356b | 41 | * |
9bccf70c A |
42 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
43 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
44 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
45 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
46 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
47 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
48 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
49 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
50 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
51 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
52 | * SUCH DAMAGE. | |
1c79356b | 53 | * |
91447636 | 54 | * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.84 2004/08/25 09:31:30 pjd Exp $ |
1c79356b A |
55 | */ |
56 | ||
91447636 | 57 | #define DUMMYNET_DEBUG |
9bccf70c | 58 | |
1c79356b A |
59 | /* |
60 | * This module implements IP dummynet, a bandwidth limiter/delay emulator | |
61 | * used in conjunction with the ipfw package. | |
9bccf70c A |
62 | * Description of the data structures used is in ip_dummynet.h |
63 | * Here you mainly find the following blocks of code: | |
64 | * + variable declarations; | |
65 | * + heap management functions; | |
66 | * + scheduler and dummynet functions; | |
67 | * + configuration and initialization. | |
68 | * | |
91447636 | 69 | * NOTA BENE: critical sections are protected by the "dummynet lock". |
1c79356b | 70 | * |
9bccf70c | 71 | * Most important Changes: |
1c79356b | 72 | * |
9bccf70c A |
73 | * 010124: Fixed WF2Q behaviour |
74 | * 010122: Fixed spl protection. | |
75 | * 000601: WF2Q support | |
76 | * 000106: large rewrite, use heaps to handle very many pipes. | |
1c79356b | 77 | * 980513: initial release |
9bccf70c A |
78 | * |
79 | * include files marked with XXX are probably not needed | |
1c79356b A |
80 | */ |
81 | ||
1c79356b A |
82 | #include <sys/param.h> |
83 | #include <sys/systm.h> | |
84 | #include <sys/malloc.h> | |
85 | #include <sys/mbuf.h> | |
86 | #include <sys/queue.h> /* XXX */ | |
87 | #include <sys/kernel.h> | |
88 | #include <sys/socket.h> | |
89 | #include <sys/socketvar.h> | |
90 | #include <sys/time.h> | |
91 | #include <sys/sysctl.h> | |
92 | #include <net/if.h> | |
93 | #include <net/route.h> | |
91447636 | 94 | #include <net/kpi_protocol.h> |
1c79356b A |
95 | #include <netinet/in.h> |
96 | #include <netinet/in_systm.h> | |
97 | #include <netinet/in_var.h> | |
98 | #include <netinet/ip.h> | |
99 | #include <netinet/ip_fw.h> | |
100 | #include <netinet/ip_dummynet.h> | |
101 | #include <netinet/ip_var.h> | |
102 | ||
9bccf70c A |
103 | /* |
104 | * We keep a private variable for the simulation time, but we could | |
105 | * probably use an existing one ("softticks" in sys/kern/kern_timer.c) | |
106 | */ | |
107 | static dn_key curr_time = 0 ; /* current simulation time */ | |
108 | ||
0c530ab8 A |
109 | /* this is for the timer that fires to call dummynet() - we only enable the timer when |
110 | there are packets to process, otherwise it's disabled */ | |
111 | static int timer_enabled = 0; | |
112 | ||
9bccf70c A |
113 | static int dn_hash_size = 64 ; /* default hash size */ |
114 | ||
115 | /* statistics on number of queue searches and search steps */ | |
116 | static int searches, search_steps ; | |
117 | static int pipe_expire = 1 ; /* expire queue if empty */ | |
118 | static int dn_max_ratio = 16 ; /* max queues/buckets ratio */ | |
119 | ||
120 | static int red_lookup_depth = 256; /* RED - default lookup table depth */ | |
121 | static int red_avg_pkt_size = 512; /* RED - default medium packet size */ | |
122 | static int red_max_pkt_size = 1500; /* RED - default max packet size */ | |
123 | ||
124 | /* | |
125 | * Three heaps contain queues and pipes that the scheduler handles: | |
126 | * | |
127 | * ready_heap contains all dn_flow_queue related to fixed-rate pipes. | |
128 | * | |
129 | * wfq_ready_heap contains the pipes associated with WF2Q flows | |
130 | * | |
131 | * extract_heap contains pipes associated with delay lines. | |
132 | * | |
133 | */ | |
134 | static struct dn_heap ready_heap, extract_heap, wfq_ready_heap ; | |
135 | ||
136 | static int heap_init(struct dn_heap *h, int size) ; | |
137 | static int heap_insert (struct dn_heap *h, dn_key key1, void *p); | |
138 | static void heap_extract(struct dn_heap *h, void *obj); | |
139 | ||
9bccf70c | 140 | |
b0d623f7 A |
141 | static void transmit_event(struct dn_pipe *pipe, struct mbuf **head, |
142 | struct mbuf **tail); | |
143 | static void ready_event(struct dn_flow_queue *q, struct mbuf **head, | |
144 | struct mbuf **tail); | |
145 | static void ready_event_wfq(struct dn_pipe *p, struct mbuf **head, | |
146 | struct mbuf **tail); | |
147 | ||
148 | /* | |
149 | * Packets are retrieved from queues in Dummynet in chains instead of | |
150 | * packet-by-packet. The entire list of packets is first dequeued and | |
151 | * sent out by the following function. | |
152 | */ | |
153 | static void dummynet_send(struct mbuf *m); | |
154 | ||
155 | /* Flag to signify the existance of a dequeued packet chain */ | |
156 | static int serialize = 0; | |
157 | ||
158 | #define HASHSIZE 16 | |
159 | #define HASH(num) ((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f) | |
160 | static struct dn_pipe_head pipehash[HASHSIZE]; /* all pipes */ | |
161 | static struct dn_flow_set_head flowsethash[HASHSIZE]; /* all flowsets */ | |
162 | ||
1c79356b | 163 | |
91447636 | 164 | #ifdef SYSCTL_NODE |
9bccf70c A |
165 | SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, |
166 | CTLFLAG_RW, 0, "Dummynet"); | |
167 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, | |
168 | CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size"); | |
b0d623f7 A |
169 | SYSCTL_QUAD(_net_inet_ip_dummynet, OID_AUTO, curr_time, |
170 | CTLFLAG_RD, &curr_time, "Current tick"); | |
9bccf70c A |
171 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap, |
172 | CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap"); | |
173 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap, | |
174 | CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap"); | |
175 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches, | |
176 | CTLFLAG_RD, &searches, 0, "Number of queue searches"); | |
177 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps, | |
178 | CTLFLAG_RD, &search_steps, 0, "Number of queue search steps"); | |
179 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, | |
180 | CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty"); | |
181 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len, | |
182 | CTLFLAG_RW, &dn_max_ratio, 0, | |
183 | "Max ratio between dynamic queues and buckets"); | |
184 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth, | |
185 | CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table"); | |
186 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size, | |
187 | CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size"); | |
188 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size, | |
189 | CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size"); | |
1c79356b A |
190 | #endif |
191 | ||
91447636 A |
192 | #ifdef DUMMYNET_DEBUG |
193 | int dummynet_debug = 0; | |
194 | #ifdef SYSCTL_NODE | |
195 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dummynet_debug, | |
196 | 0, "control debugging printfs"); | |
197 | #endif | |
198 | #define DPRINTF(X) if (dummynet_debug) printf X | |
199 | #else | |
200 | #define DPRINTF(X) | |
201 | #endif | |
202 | ||
2d21ac55 A |
203 | /* contrary to the comment above random(), it does not actually |
204 | * return a value [0, 2^31 - 1], which breaks plr amongst other | |
205 | * things. Masking it should work even if the behavior of | |
206 | * the function is fixed. | |
207 | */ | |
208 | #define MY_RANDOM (random() & 0x7FFFFFFF) | |
209 | ||
91447636 | 210 | /* dummynet lock */ |
b0d623f7 A |
211 | static lck_grp_t *dn_mutex_grp; |
212 | static lck_grp_attr_t *dn_mutex_grp_attr; | |
213 | static lck_attr_t *dn_mutex_attr; | |
214 | static lck_mtx_t *dn_mutex; | |
91447636 | 215 | |
9bccf70c | 216 | static int config_pipe(struct dn_pipe *p); |
1c79356b A |
217 | static int ip_dn_ctl(struct sockopt *sopt); |
218 | ||
1c79356b | 219 | static void dummynet(void *); |
1c79356b | 220 | static void dummynet_flush(void); |
9bccf70c | 221 | void dummynet_drain(void); |
91447636 A |
222 | static ip_dn_io_t dummynet_io; |
223 | static void dn_rule_delete(void *); | |
1c79356b | 224 | |
91447636 | 225 | int if_tx_rdy(struct ifnet *ifp); |
1c79356b | 226 | |
b0d623f7 A |
227 | static void cp_flow_set_to_64_user(struct dn_flow_set *set, struct dn_flow_set_64 *fs_bp); |
228 | static void cp_queue_to_64_user( struct dn_flow_queue *q, struct dn_flow_queue_64 *qp); | |
229 | static char *cp_pipe_to_64_user(struct dn_pipe *p, struct dn_pipe_64 *pipe_bp); | |
230 | static char* dn_copy_set_64(struct dn_flow_set *set, char *bp); | |
231 | static int cp_pipe_from_user_64( struct sockopt *sopt, struct dn_pipe *p ); | |
232 | ||
233 | static void cp_flow_set_to_32_user(struct dn_flow_set *set, struct dn_flow_set_32 *fs_bp); | |
234 | static void cp_queue_to_32_user( struct dn_flow_queue *q, struct dn_flow_queue_32 *qp); | |
235 | static char *cp_pipe_to_32_user(struct dn_pipe *p, struct dn_pipe_32 *pipe_bp); | |
236 | static char* dn_copy_set_32(struct dn_flow_set *set, char *bp); | |
237 | static int cp_pipe_from_user_32( struct sockopt *sopt, struct dn_pipe *p ); | |
238 | ||
239 | ||
9bccf70c A |
240 | /* |
241 | * Heap management functions. | |
242 | * | |
243 | * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. | |
244 | * Some macros help finding parent/children so we can optimize them. | |
245 | * | |
246 | * heap_init() is called to expand the heap when needed. | |
247 | * Increment size in blocks of 16 entries. | |
248 | * XXX failure to allocate a new element is a pretty bad failure | |
249 | * as we basically stall a whole queue forever!! | |
250 | * Returns 1 on error, 0 on success | |
251 | */ | |
252 | #define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 ) | |
253 | #define HEAP_LEFT(x) ( 2*(x) + 1 ) | |
254 | #define HEAP_IS_LEFT(x) ( (x) & 1 ) | |
255 | #define HEAP_RIGHT(x) ( 2*(x) + 2 ) | |
256 | #define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } | |
257 | #define HEAP_INCREMENT 15 | |
258 | ||
b0d623f7 A |
259 | |
260 | int cp_pipe_from_user_32( struct sockopt *sopt, struct dn_pipe *p ) | |
261 | { | |
262 | struct dn_pipe_32 user_pipe_32; | |
263 | int error=0; | |
264 | ||
265 | error = sooptcopyin(sopt, &user_pipe_32, sizeof(struct dn_pipe_32), sizeof(struct dn_pipe_32)); | |
266 | if ( !error ){ | |
267 | p->pipe_nr = user_pipe_32.pipe_nr; | |
268 | p->bandwidth = user_pipe_32.bandwidth; | |
269 | p->delay = user_pipe_32.delay; | |
270 | p->V = user_pipe_32.V; | |
271 | p->sum = user_pipe_32.sum; | |
272 | p->numbytes = user_pipe_32.numbytes; | |
273 | p->sched_time = user_pipe_32.sched_time; | |
274 | bcopy( user_pipe_32.if_name, p->if_name, IFNAMSIZ); | |
275 | p->ready = user_pipe_32.ready; | |
276 | ||
277 | p->fs.fs_nr = user_pipe_32.fs.fs_nr; | |
278 | p->fs.flags_fs = user_pipe_32.fs.flags_fs; | |
279 | p->fs.parent_nr = user_pipe_32.fs.parent_nr; | |
280 | p->fs.weight = user_pipe_32.fs.weight; | |
281 | p->fs.qsize = user_pipe_32.fs.qsize; | |
282 | p->fs.plr = user_pipe_32.fs.plr; | |
283 | p->fs.flow_mask = user_pipe_32.fs.flow_mask; | |
284 | p->fs.rq_size = user_pipe_32.fs.rq_size; | |
285 | p->fs.rq_elements = user_pipe_32.fs.rq_elements; | |
286 | p->fs.last_expired = user_pipe_32.fs.last_expired; | |
287 | p->fs.backlogged = user_pipe_32.fs.backlogged; | |
288 | p->fs.w_q = user_pipe_32.fs.w_q; | |
289 | p->fs.max_th = user_pipe_32.fs.max_th; | |
290 | p->fs.min_th = user_pipe_32.fs.min_th; | |
291 | p->fs.max_p = user_pipe_32.fs.max_p; | |
292 | p->fs.c_1 = user_pipe_32.fs.c_1; | |
293 | p->fs.c_2 = user_pipe_32.fs.c_2; | |
294 | p->fs.c_3 = user_pipe_32.fs.c_3; | |
295 | p->fs.c_4 = user_pipe_32.fs.c_4; | |
296 | p->fs.lookup_depth = user_pipe_32.fs.lookup_depth; | |
297 | p->fs.lookup_step = user_pipe_32.fs.lookup_step; | |
298 | p->fs.lookup_weight = user_pipe_32.fs.lookup_weight; | |
299 | p->fs.avg_pkt_size = user_pipe_32.fs.avg_pkt_size; | |
300 | p->fs.max_pkt_size = user_pipe_32.fs.max_pkt_size; | |
301 | } | |
302 | return error; | |
303 | } | |
304 | ||
305 | ||
306 | int cp_pipe_from_user_64( struct sockopt *sopt, struct dn_pipe *p ) | |
307 | { | |
308 | struct dn_pipe_64 user_pipe_64; | |
309 | int error=0; | |
310 | ||
311 | error = sooptcopyin(sopt, &user_pipe_64, sizeof(struct dn_pipe_64), sizeof(struct dn_pipe_64)); | |
312 | if ( !error ){ | |
313 | p->pipe_nr = user_pipe_64.pipe_nr; | |
314 | p->bandwidth = user_pipe_64.bandwidth; | |
315 | p->delay = user_pipe_64.delay; | |
316 | p->V = user_pipe_64.V; | |
317 | p->sum = user_pipe_64.sum; | |
318 | p->numbytes = user_pipe_64.numbytes; | |
319 | p->sched_time = user_pipe_64.sched_time; | |
320 | bcopy( user_pipe_64.if_name, p->if_name, IFNAMSIZ); | |
321 | p->ready = user_pipe_64.ready; | |
322 | ||
323 | p->fs.fs_nr = user_pipe_64.fs.fs_nr; | |
324 | p->fs.flags_fs = user_pipe_64.fs.flags_fs; | |
325 | p->fs.parent_nr = user_pipe_64.fs.parent_nr; | |
326 | p->fs.weight = user_pipe_64.fs.weight; | |
327 | p->fs.qsize = user_pipe_64.fs.qsize; | |
328 | p->fs.plr = user_pipe_64.fs.plr; | |
329 | p->fs.flow_mask = user_pipe_64.fs.flow_mask; | |
330 | p->fs.rq_size = user_pipe_64.fs.rq_size; | |
331 | p->fs.rq_elements = user_pipe_64.fs.rq_elements; | |
332 | p->fs.last_expired = user_pipe_64.fs.last_expired; | |
333 | p->fs.backlogged = user_pipe_64.fs.backlogged; | |
334 | p->fs.w_q = user_pipe_64.fs.w_q; | |
335 | p->fs.max_th = user_pipe_64.fs.max_th; | |
336 | p->fs.min_th = user_pipe_64.fs.min_th; | |
337 | p->fs.max_p = user_pipe_64.fs.max_p; | |
338 | p->fs.c_1 = user_pipe_64.fs.c_1; | |
339 | p->fs.c_2 = user_pipe_64.fs.c_2; | |
340 | p->fs.c_3 = user_pipe_64.fs.c_3; | |
341 | p->fs.c_4 = user_pipe_64.fs.c_4; | |
342 | p->fs.lookup_depth = user_pipe_64.fs.lookup_depth; | |
343 | p->fs.lookup_step = user_pipe_64.fs.lookup_step; | |
344 | p->fs.lookup_weight = user_pipe_64.fs.lookup_weight; | |
345 | p->fs.avg_pkt_size = user_pipe_64.fs.avg_pkt_size; | |
346 | p->fs.max_pkt_size = user_pipe_64.fs.max_pkt_size; | |
347 | } | |
348 | return error; | |
349 | } | |
350 | ||
351 | static void | |
352 | cp_flow_set_to_32_user(struct dn_flow_set *set, struct dn_flow_set_32 *fs_bp) | |
353 | { | |
354 | fs_bp->fs_nr = set->fs_nr; | |
355 | fs_bp->flags_fs = set->flags_fs ; | |
356 | fs_bp->parent_nr = set->parent_nr ; | |
357 | fs_bp->weight = set->weight ; | |
358 | fs_bp->qsize = set->qsize ; | |
359 | fs_bp->plr = set->plr ; | |
360 | fs_bp->flow_mask = set->flow_mask ; | |
361 | fs_bp->rq_size = set->rq_size ; | |
362 | fs_bp->rq_elements = set->rq_elements ; | |
363 | fs_bp->last_expired = set->last_expired ; | |
364 | fs_bp->backlogged = set->backlogged ; | |
365 | fs_bp->w_q = set->w_q ; | |
366 | fs_bp->max_th = set->max_th ; | |
367 | fs_bp->min_th = set->min_th ; | |
368 | fs_bp->max_p = set->max_p ; | |
369 | fs_bp->c_1 = set->c_1 ; | |
370 | fs_bp->c_2 = set->c_2 ; | |
371 | fs_bp->c_3 = set->c_3 ; | |
372 | fs_bp->c_4 = set->c_4 ; | |
373 | fs_bp->w_q_lookup = CAST_DOWN_EXPLICIT(user32_addr_t, set->w_q_lookup) ; | |
374 | fs_bp->lookup_depth = set->lookup_depth ; | |
375 | fs_bp->lookup_step = set->lookup_step ; | |
376 | fs_bp->lookup_weight = set->lookup_weight ; | |
377 | fs_bp->avg_pkt_size = set->avg_pkt_size ; | |
378 | fs_bp->max_pkt_size = set->max_pkt_size ; | |
379 | } | |
380 | ||
381 | static void | |
382 | cp_flow_set_to_64_user(struct dn_flow_set *set, struct dn_flow_set_64 *fs_bp) | |
383 | { | |
384 | fs_bp->fs_nr = set->fs_nr; | |
385 | fs_bp->flags_fs = set->flags_fs ; | |
386 | fs_bp->parent_nr = set->parent_nr ; | |
387 | fs_bp->weight = set->weight ; | |
388 | fs_bp->qsize = set->qsize ; | |
389 | fs_bp->plr = set->plr ; | |
390 | fs_bp->flow_mask = set->flow_mask ; | |
391 | fs_bp->rq_size = set->rq_size ; | |
392 | fs_bp->rq_elements = set->rq_elements ; | |
393 | fs_bp->last_expired = set->last_expired ; | |
394 | fs_bp->backlogged = set->backlogged ; | |
395 | fs_bp->w_q = set->w_q ; | |
396 | fs_bp->max_th = set->max_th ; | |
397 | fs_bp->min_th = set->min_th ; | |
398 | fs_bp->max_p = set->max_p ; | |
399 | fs_bp->c_1 = set->c_1 ; | |
400 | fs_bp->c_2 = set->c_2 ; | |
401 | fs_bp->c_3 = set->c_3 ; | |
402 | fs_bp->c_4 = set->c_4 ; | |
403 | fs_bp->w_q_lookup = CAST_DOWN(user64_addr_t, set->w_q_lookup) ; | |
404 | fs_bp->lookup_depth = set->lookup_depth ; | |
405 | fs_bp->lookup_step = set->lookup_step ; | |
406 | fs_bp->lookup_weight = set->lookup_weight ; | |
407 | fs_bp->avg_pkt_size = set->avg_pkt_size ; | |
408 | fs_bp->max_pkt_size = set->max_pkt_size ; | |
409 | } | |
410 | ||
411 | static | |
412 | void cp_queue_to_32_user( struct dn_flow_queue *q, struct dn_flow_queue_32 *qp) | |
413 | { | |
414 | qp->id = q->id; | |
415 | qp->len = q->len; | |
416 | qp->len_bytes = q->len_bytes; | |
417 | qp->numbytes = q->numbytes; | |
418 | qp->tot_pkts = q->tot_pkts; | |
419 | qp->tot_bytes = q->tot_bytes; | |
420 | qp->drops = q->drops; | |
421 | qp->hash_slot = q->hash_slot; | |
422 | qp->avg = q->avg; | |
423 | qp->count = q->count; | |
424 | qp->random = q->random; | |
425 | qp->q_time = q->q_time; | |
426 | qp->heap_pos = q->heap_pos; | |
427 | qp->sched_time = q->sched_time; | |
428 | qp->S = q->S; | |
429 | qp->F = q->F; | |
430 | } | |
431 | ||
432 | static | |
433 | void cp_queue_to_64_user( struct dn_flow_queue *q, struct dn_flow_queue_64 *qp) | |
434 | { | |
435 | qp->id = q->id; | |
436 | qp->len = q->len; | |
437 | qp->len_bytes = q->len_bytes; | |
438 | qp->numbytes = q->numbytes; | |
439 | qp->tot_pkts = q->tot_pkts; | |
440 | qp->tot_bytes = q->tot_bytes; | |
441 | qp->drops = q->drops; | |
442 | qp->hash_slot = q->hash_slot; | |
443 | qp->avg = q->avg; | |
444 | qp->count = q->count; | |
445 | qp->random = q->random; | |
446 | qp->q_time = q->q_time; | |
447 | qp->heap_pos = q->heap_pos; | |
448 | qp->sched_time = q->sched_time; | |
449 | qp->S = q->S; | |
450 | qp->F = q->F; | |
451 | } | |
452 | ||
453 | static | |
454 | char *cp_pipe_to_32_user(struct dn_pipe *p, struct dn_pipe_32 *pipe_bp) | |
455 | { | |
456 | char *bp; | |
457 | ||
458 | pipe_bp->pipe_nr = p->pipe_nr; | |
459 | pipe_bp->bandwidth = p->bandwidth; | |
460 | bcopy( &(p->scheduler_heap), &(pipe_bp->scheduler_heap), sizeof(struct dn_heap_32)); | |
461 | pipe_bp->scheduler_heap.p = CAST_DOWN_EXPLICIT(user32_addr_t, pipe_bp->scheduler_heap.p); | |
462 | bcopy( &(p->not_eligible_heap), &(pipe_bp->not_eligible_heap), sizeof(struct dn_heap_32)); | |
463 | pipe_bp->not_eligible_heap.p = CAST_DOWN_EXPLICIT(user32_addr_t, pipe_bp->not_eligible_heap.p); | |
464 | bcopy( &(p->idle_heap), &(pipe_bp->idle_heap), sizeof(struct dn_heap_32)); | |
465 | pipe_bp->idle_heap.p = CAST_DOWN_EXPLICIT(user32_addr_t, pipe_bp->idle_heap.p); | |
466 | pipe_bp->V = p->V; | |
467 | pipe_bp->sum = p->sum; | |
468 | pipe_bp->numbytes = p->numbytes; | |
469 | pipe_bp->sched_time = p->sched_time; | |
470 | bcopy( p->if_name, pipe_bp->if_name, IFNAMSIZ); | |
471 | pipe_bp->ifp = CAST_DOWN_EXPLICIT(user32_addr_t, p->ifp); | |
472 | pipe_bp->ready = p->ready; | |
473 | ||
474 | cp_flow_set_to_32_user( &(p->fs), &(pipe_bp->fs)); | |
475 | ||
476 | pipe_bp->delay = (pipe_bp->delay * 1000) / (hz*10) ; | |
477 | /* | |
478 | * XXX the following is a hack based on ->next being the | |
479 | * first field in dn_pipe and dn_flow_set. The correct | |
480 | * solution would be to move the dn_flow_set to the beginning | |
481 | * of struct dn_pipe. | |
482 | */ | |
483 | pipe_bp->next = CAST_DOWN_EXPLICIT( user32_addr_t, DN_IS_PIPE ); | |
484 | /* clean pointers */ | |
485 | pipe_bp->head = pipe_bp->tail = (user32_addr_t) 0 ; | |
486 | pipe_bp->fs.next = (user32_addr_t)0 ; | |
487 | pipe_bp->fs.pipe = (user32_addr_t)0 ; | |
488 | pipe_bp->fs.rq = (user32_addr_t)0 ; | |
489 | bp = ((char *)pipe_bp) + sizeof(struct dn_pipe_32); | |
490 | return( dn_copy_set_32( &(p->fs), bp) ); | |
491 | } | |
492 | ||
493 | static | |
494 | char *cp_pipe_to_64_user(struct dn_pipe *p, struct dn_pipe_64 *pipe_bp) | |
495 | { | |
496 | char *bp; | |
497 | ||
498 | pipe_bp->pipe_nr = p->pipe_nr; | |
499 | pipe_bp->bandwidth = p->bandwidth; | |
500 | bcopy( &(p->scheduler_heap), &(pipe_bp->scheduler_heap), sizeof(struct dn_heap_64)); | |
501 | pipe_bp->scheduler_heap.p = CAST_DOWN(user64_addr_t, pipe_bp->scheduler_heap.p); | |
502 | bcopy( &(p->not_eligible_heap), &(pipe_bp->not_eligible_heap), sizeof(struct dn_heap_64)); | |
503 | pipe_bp->not_eligible_heap.p = CAST_DOWN(user64_addr_t, pipe_bp->not_eligible_heap.p); | |
504 | bcopy( &(p->idle_heap), &(pipe_bp->idle_heap), sizeof(struct dn_heap_64)); | |
505 | pipe_bp->idle_heap.p = CAST_DOWN(user64_addr_t, pipe_bp->idle_heap.p); | |
506 | pipe_bp->V = p->V; | |
507 | pipe_bp->sum = p->sum; | |
508 | pipe_bp->numbytes = p->numbytes; | |
509 | pipe_bp->sched_time = p->sched_time; | |
510 | bcopy( p->if_name, pipe_bp->if_name, IFNAMSIZ); | |
511 | pipe_bp->ifp = CAST_DOWN(user64_addr_t, p->ifp); | |
512 | pipe_bp->ready = p->ready; | |
513 | ||
514 | cp_flow_set_to_64_user( &(p->fs), &(pipe_bp->fs)); | |
515 | ||
516 | pipe_bp->delay = (pipe_bp->delay * 1000) / (hz*10) ; | |
517 | /* | |
518 | * XXX the following is a hack based on ->next being the | |
519 | * first field in dn_pipe and dn_flow_set. The correct | |
520 | * solution would be to move the dn_flow_set to the beginning | |
521 | * of struct dn_pipe. | |
522 | */ | |
523 | pipe_bp->next = CAST_DOWN( user64_addr_t, DN_IS_PIPE ); | |
524 | /* clean pointers */ | |
525 | pipe_bp->head = pipe_bp->tail = USER_ADDR_NULL ; | |
526 | pipe_bp->fs.next = USER_ADDR_NULL ; | |
527 | pipe_bp->fs.pipe = USER_ADDR_NULL ; | |
528 | pipe_bp->fs.rq = USER_ADDR_NULL ; | |
529 | bp = ((char *)pipe_bp) + sizeof(struct dn_pipe_64); | |
530 | return( dn_copy_set_64( &(p->fs), bp) ); | |
531 | } | |
532 | ||
9bccf70c A |
533 | static int |
534 | heap_init(struct dn_heap *h, int new_size) | |
91447636 | 535 | { |
9bccf70c A |
536 | struct dn_heap_entry *p; |
537 | ||
538 | if (h->size >= new_size ) { | |
91447636 | 539 | printf("dummynet: heap_init, Bogus call, have %d want %d\n", |
9bccf70c A |
540 | h->size, new_size); |
541 | return 0 ; | |
91447636 | 542 | } |
9bccf70c | 543 | new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ; |
91447636 | 544 | p = _MALLOC(new_size * sizeof(*p), M_DUMMYNET, M_DONTWAIT ); |
9bccf70c | 545 | if (p == NULL) { |
91447636 | 546 | printf("dummynet: heap_init, resize %d failed\n", new_size ); |
9bccf70c A |
547 | return 1 ; /* error */ |
548 | } | |
549 | if (h->size > 0) { | |
550 | bcopy(h->p, p, h->size * sizeof(*p) ); | |
91447636 | 551 | FREE(h->p, M_DUMMYNET); |
9bccf70c A |
552 | } |
553 | h->p = p ; | |
554 | h->size = new_size ; | |
555 | return 0 ; | |
556 | } | |
557 | ||
558 | /* | |
559 | * Insert element in heap. Normally, p != NULL, we insert p in | |
560 | * a new position and bubble up. If p == NULL, then the element is | |
561 | * already in place, and key is the position where to start the | |
562 | * bubble-up. | |
563 | * Returns 1 on failure (cannot allocate new heap entry) | |
564 | * | |
565 | * If offset > 0 the position (index, int) of the element in the heap is | |
566 | * also stored in the element itself at the given offset in bytes. | |
567 | */ | |
568 | #define SET_OFFSET(heap, node) \ | |
569 | if (heap->offset > 0) \ | |
570 | *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ; | |
571 | /* | |
572 | * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value. | |
573 | */ | |
574 | #define RESET_OFFSET(heap, node) \ | |
575 | if (heap->offset > 0) \ | |
576 | *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ; | |
577 | static int | |
578 | heap_insert(struct dn_heap *h, dn_key key1, void *p) | |
91447636 | 579 | { |
9bccf70c A |
580 | int son = h->elements ; |
581 | ||
582 | if (p == NULL) /* data already there, set starting point */ | |
583 | son = key1 ; | |
584 | else { /* insert new element at the end, possibly resize */ | |
585 | son = h->elements ; | |
586 | if (son == h->size) /* need resize... */ | |
587 | if (heap_init(h, h->elements+1) ) | |
588 | return 1 ; /* failure... */ | |
589 | h->p[son].object = p ; | |
590 | h->p[son].key = key1 ; | |
591 | h->elements++ ; | |
592 | } | |
593 | while (son > 0) { /* bubble up */ | |
594 | int father = HEAP_FATHER(son) ; | |
595 | struct dn_heap_entry tmp ; | |
596 | ||
597 | if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) | |
91447636 | 598 | break ; /* found right position */ |
9bccf70c A |
599 | /* son smaller than father, swap and repeat */ |
600 | HEAP_SWAP(h->p[son], h->p[father], tmp) ; | |
601 | SET_OFFSET(h, son); | |
602 | son = father ; | |
603 | } | |
604 | SET_OFFSET(h, son); | |
605 | return 0 ; | |
1c79356b A |
606 | } |
607 | ||
608 | /* | |
9bccf70c | 609 | * remove top element from heap, or obj if obj != NULL |
1c79356b A |
610 | */ |
611 | static void | |
9bccf70c | 612 | heap_extract(struct dn_heap *h, void *obj) |
91447636 | 613 | { |
2d21ac55 | 614 | int child, father, maxelt = h->elements - 1 ; |
9bccf70c | 615 | |
2d21ac55 | 616 | if (maxelt < 0) { |
91447636 | 617 | printf("dummynet: warning, extract from empty heap 0x%p\n", h); |
9bccf70c A |
618 | return ; |
619 | } | |
620 | father = 0 ; /* default: move up smallest child */ | |
621 | if (obj != NULL) { /* extract specific element, index is at offset */ | |
622 | if (h->offset <= 0) | |
91447636 | 623 | panic("dummynet: heap_extract from middle not supported on this heap!!!\n"); |
9bccf70c A |
624 | father = *((int *)((char *)obj + h->offset)) ; |
625 | if (father < 0 || father >= h->elements) { | |
626 | printf("dummynet: heap_extract, father %d out of bound 0..%d\n", | |
627 | father, h->elements); | |
91447636 | 628 | panic("dummynet: heap_extract"); |
9bccf70c A |
629 | } |
630 | } | |
631 | RESET_OFFSET(h, father); | |
632 | child = HEAP_LEFT(father) ; /* left child */ | |
2d21ac55 A |
633 | while (child <= maxelt) { /* valid entry */ |
634 | if (child != maxelt && DN_KEY_LT(h->p[child+1].key, h->p[child].key) ) | |
9bccf70c A |
635 | child = child+1 ; /* take right child, otherwise left */ |
636 | h->p[father] = h->p[child] ; | |
637 | SET_OFFSET(h, father); | |
638 | father = child ; | |
639 | child = HEAP_LEFT(child) ; /* left child for next loop */ | |
91447636 | 640 | } |
9bccf70c | 641 | h->elements-- ; |
2d21ac55 | 642 | if (father != maxelt) { |
1c79356b | 643 | /* |
9bccf70c | 644 | * Fill hole with last entry and bubble up, reusing the insert code |
1c79356b | 645 | */ |
2d21ac55 | 646 | h->p[father] = h->p[maxelt] ; |
9bccf70c A |
647 | heap_insert(h, father, NULL); /* this one cannot fail */ |
648 | } | |
91447636 | 649 | } |
1c79356b | 650 | |
9bccf70c A |
651 | #if 0 |
652 | /* | |
653 | * change object position and update references | |
654 | * XXX this one is never used! | |
655 | */ | |
656 | static void | |
657 | heap_move(struct dn_heap *h, dn_key new_key, void *object) | |
658 | { | |
659 | int temp; | |
660 | int i ; | |
2d21ac55 | 661 | int maxelt = h->elements-1 ; |
9bccf70c | 662 | struct dn_heap_entry buf ; |
1c79356b | 663 | |
9bccf70c A |
664 | if (h->offset <= 0) |
665 | panic("cannot move items on this heap"); | |
666 | ||
667 | i = *((int *)((char *)object + h->offset)); | |
668 | if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */ | |
669 | h->p[i].key = new_key ; | |
670 | for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ; | |
671 | i = temp ) { /* bubble up */ | |
672 | HEAP_SWAP(h->p[i], h->p[temp], buf) ; | |
673 | SET_OFFSET(h, i); | |
674 | } | |
675 | } else { /* must move down */ | |
676 | h->p[i].key = new_key ; | |
2d21ac55 A |
677 | while ( (temp = HEAP_LEFT(i)) <= maxelt ) { /* found left child */ |
678 | if ((temp != maxelt) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key)) | |
9bccf70c A |
679 | temp++ ; /* select child with min key */ |
680 | if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */ | |
681 | HEAP_SWAP(h->p[i], h->p[temp], buf) ; | |
682 | SET_OFFSET(h, i); | |
683 | } else | |
684 | break ; | |
685 | i = temp ; | |
1c79356b | 686 | } |
1c79356b | 687 | } |
9bccf70c A |
688 | SET_OFFSET(h, i); |
689 | } | |
690 | #endif /* heap_move, unused */ | |
691 | ||
692 | /* | |
693 | * heapify() will reorganize data inside an array to maintain the | |
694 | * heap property. It is needed when we delete a bunch of entries. | |
695 | */ | |
696 | static void | |
697 | heapify(struct dn_heap *h) | |
698 | { | |
699 | int i ; | |
700 | ||
701 | for (i = 0 ; i < h->elements ; i++ ) | |
702 | heap_insert(h, i , NULL) ; | |
703 | } | |
704 | ||
705 | /* | |
706 | * cleanup the heap and free data structure | |
707 | */ | |
708 | static void | |
709 | heap_free(struct dn_heap *h) | |
710 | { | |
711 | if (h->size >0 ) | |
91447636 | 712 | FREE(h->p, M_DUMMYNET); |
2d21ac55 | 713 | bzero(h, sizeof(*h)); |
9bccf70c A |
714 | } |
715 | ||
716 | /* | |
717 | * --- end of heap management functions --- | |
718 | */ | |
719 | ||
91447636 A |
720 | /* |
721 | * Return the mbuf tag holding the dummynet state. As an optimization | |
722 | * this is assumed to be the first tag on the list. If this turns out | |
723 | * wrong we'll need to search the list. | |
724 | */ | |
725 | static struct dn_pkt_tag * | |
726 | dn_tag_get(struct mbuf *m) | |
727 | { | |
728 | struct m_tag *mtag = m_tag_first(m); | |
729 | /* KASSERT(mtag != NULL && | |
730 | mtag->m_tag_id == KERNEL_MODULE_TAG_ID && | |
731 | mtag->m_tag_type == KERNEL_TAG_TYPE_DUMMYNET, | |
732 | ("packet on dummynet queue w/o dummynet tag!")); | |
733 | */ | |
734 | return (struct dn_pkt_tag *)(mtag+1); | |
735 | } | |
736 | ||
9bccf70c A |
737 | /* |
738 | * Scheduler functions: | |
739 | * | |
740 | * transmit_event() is called when the delay-line needs to enter | |
741 | * the scheduler, either because of existing pkts getting ready, | |
742 | * or new packets entering the queue. The event handled is the delivery | |
743 | * time of the packet. | |
744 | * | |
745 | * ready_event() does something similar with fixed-rate queues, and the | |
746 | * event handled is the finish time of the head pkt. | |
747 | * | |
748 | * wfq_ready_event() does something similar with WF2Q queues, and the | |
749 | * event handled is the start time of the head pkt. | |
750 | * | |
751 | * In all cases, we make sure that the data structures are consistent | |
752 | * before passing pkts out, because this might trigger recursive | |
753 | * invocations of the procedures. | |
754 | */ | |
755 | static void | |
b0d623f7 | 756 | transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail) |
9bccf70c | 757 | { |
91447636 A |
758 | struct mbuf *m ; |
759 | struct dn_pkt_tag *pkt ; | |
b0d623f7 | 760 | |
91447636 | 761 | lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); |
b0d623f7 A |
762 | |
763 | /* Extract packets only if no pending chain is being currently processed */ | |
764 | if (serialize == 0) { | |
765 | while ((m = pipe->head) != NULL) { | |
766 | pkt = dn_tag_get(m); | |
767 | if (!DN_KEY_LEQ(pkt->output_time, curr_time)) | |
91447636 | 768 | break; |
b0d623f7 A |
769 | |
770 | pipe->head = m->m_nextpkt; | |
771 | if (*tail != NULL) | |
772 | (*tail)->m_nextpkt = m; | |
773 | else | |
774 | *head = m; | |
775 | *tail = m; | |
91447636 | 776 | } |
b0d623f7 A |
777 | if (*tail != NULL) |
778 | (*tail)->m_nextpkt = NULL; | |
779 | } | |
780 | ||
781 | /* if there are leftover packets, put the pipe into the heap for next ready event */ | |
782 | if ((m = pipe->head) != NULL) { | |
91447636 A |
783 | pkt = dn_tag_get(m); |
784 | /* XXX should check errors on heap_insert, by draining the | |
785 | * whole pipe p and hoping in the future we are more successful | |
786 | */ | |
787 | heap_insert(&extract_heap, pkt->output_time, pipe); | |
788 | } | |
1c79356b | 789 | } |
9bccf70c | 790 | |
1c79356b | 791 | /* |
9bccf70c A |
792 | * the following macro computes how many ticks we have to wait |
793 | * before being able to transmit a packet. The credit is taken from | |
794 | * either a pipe (WF2Q) or a flow_queue (per-flow queueing) | |
1c79356b | 795 | */ |
0c530ab8 A |
796 | |
797 | /* hz is 100, which gives a granularity of 10ms in the old timer. | |
798 | * The timer has been changed to fire every 1ms, so the use of | |
799 | * hz has been modified here. All instances of hz have been left | |
800 | * in place but adjusted by a factor of 10 so that hz is functionally | |
801 | * equal to 1000. | |
802 | */ | |
91447636 | 803 | #define SET_TICKS(_m, q, p) \ |
0c530ab8 | 804 | ((_m)->m_pkthdr.len*8*(hz*10) - (q)->numbytes + p->bandwidth - 1 ) / \ |
9bccf70c A |
805 | p->bandwidth ; |
806 | ||
807 | /* | |
808 | * extract pkt from queue, compute output time (could be now) | |
809 | * and put into delay line (p_queue) | |
810 | */ | |
811 | static void | |
91447636 | 812 | move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, |
9bccf70c | 813 | struct dn_pipe *p, int len) |
1c79356b | 814 | { |
91447636 A |
815 | struct dn_pkt_tag *dt = dn_tag_get(pkt); |
816 | ||
817 | q->head = pkt->m_nextpkt ; | |
9bccf70c A |
818 | q->len-- ; |
819 | q->len_bytes -= len ; | |
1c79356b | 820 | |
91447636 | 821 | dt->output_time = curr_time + p->delay ; |
1c79356b | 822 | |
9bccf70c A |
823 | if (p->head == NULL) |
824 | p->head = pkt; | |
825 | else | |
91447636 | 826 | p->tail->m_nextpkt = pkt; |
9bccf70c | 827 | p->tail = pkt; |
91447636 | 828 | p->tail->m_nextpkt = NULL; |
1c79356b A |
829 | } |
830 | ||
831 | /* | |
9bccf70c A |
832 | * ready_event() is invoked every time the queue must enter the |
833 | * scheduler, either because the first packet arrives, or because | |
834 | * a previously scheduled event fired. | |
835 | * On invokation, drain as many pkts as possible (could be 0) and then | |
836 | * if there are leftover packets reinsert the pkt in the scheduler. | |
1c79356b | 837 | */ |
9bccf70c | 838 | static void |
b0d623f7 | 839 | ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail) |
1c79356b | 840 | { |
91447636 | 841 | struct mbuf *pkt; |
9bccf70c A |
842 | struct dn_pipe *p = q->fs->pipe ; |
843 | int p_was_empty ; | |
1c79356b | 844 | |
91447636 A |
845 | lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); |
846 | ||
9bccf70c | 847 | if (p == NULL) { |
b0d623f7 A |
848 | printf("dummynet: ready_event pipe is gone\n"); |
849 | return ; | |
9bccf70c A |
850 | } |
851 | p_was_empty = (p->head == NULL) ; | |
1c79356b | 852 | |
1c79356b | 853 | /* |
9bccf70c A |
854 | * schedule fixed-rate queues linked to this pipe: |
855 | * Account for the bw accumulated since last scheduling, then | |
856 | * drain as many pkts as allowed by q->numbytes and move to | |
857 | * the delay line (in p) computing output time. | |
858 | * bandwidth==0 (no limit) means we can drain the whole queue, | |
859 | * setting len_scaled = 0 does the job. | |
860 | */ | |
861 | q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth; | |
862 | while ( (pkt = q->head) != NULL ) { | |
91447636 | 863 | int len = pkt->m_pkthdr.len; |
0c530ab8 | 864 | int len_scaled = p->bandwidth ? len*8*(hz*10) : 0 ; |
9bccf70c A |
865 | if (len_scaled > q->numbytes ) |
866 | break ; | |
867 | q->numbytes -= len_scaled ; | |
868 | move_pkt(pkt, q, p, len); | |
869 | } | |
870 | /* | |
871 | * If we have more packets queued, schedule next ready event | |
872 | * (can only occur when bandwidth != 0, otherwise we would have | |
873 | * flushed the whole queue in the previous loop). | |
874 | * To this purpose we record the current time and compute how many | |
875 | * ticks to go for the finish time of the packet. | |
876 | */ | |
877 | if ( (pkt = q->head) != NULL ) { /* this implies bandwidth != 0 */ | |
878 | dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */ | |
879 | q->sched_time = curr_time ; | |
880 | heap_insert(&ready_heap, curr_time + t, (void *)q ); | |
881 | /* XXX should check errors on heap_insert, and drain the whole | |
882 | * queue on error hoping next time we are luckier. | |
883 | */ | |
91447636 | 884 | } else { /* RED needs to know when the queue becomes empty */ |
9bccf70c | 885 | q->q_time = curr_time; |
91447636 A |
886 | q->numbytes = 0; |
887 | } | |
9bccf70c A |
888 | /* |
889 | * If the delay line was empty call transmit_event(p) now. | |
890 | * Otherwise, the scheduler will take care of it. | |
1c79356b | 891 | */ |
9bccf70c | 892 | if (p_was_empty) |
b0d623f7 | 893 | transmit_event(p, head, tail); |
9bccf70c | 894 | } |
1c79356b | 895 | |
9bccf70c A |
896 | /* |
897 | * Called when we can transmit packets on WF2Q queues. Take pkts out of | |
898 | * the queues at their start time, and enqueue into the delay line. | |
899 | * Packets are drained until p->numbytes < 0. As long as | |
900 | * len_scaled >= p->numbytes, the packet goes into the delay line | |
901 | * with a deadline p->delay. For the last packet, if p->numbytes<0, | |
902 | * there is an additional delay. | |
903 | */ | |
904 | static void | |
b0d623f7 | 905 | ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail) |
9bccf70c A |
906 | { |
907 | int p_was_empty = (p->head == NULL) ; | |
908 | struct dn_heap *sch = &(p->scheduler_heap); | |
909 | struct dn_heap *neh = &(p->not_eligible_heap) ; | |
b0d623f7 | 910 | int64_t p_numbytes = p->numbytes; |
9bccf70c | 911 | |
91447636 A |
912 | lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); |
913 | ||
9bccf70c | 914 | if (p->if_name[0] == 0) /* tx clock is simulated */ |
b0d623f7 | 915 | p_numbytes += ( curr_time - p->sched_time ) * p->bandwidth; |
9bccf70c A |
916 | else { /* tx clock is for real, the ifq must be empty or this is a NOP */ |
917 | if (p->ifp && p->ifp->if_snd.ifq_head != NULL) | |
918 | return ; | |
919 | else { | |
91447636 A |
920 | DPRINTF(("dummynet: pipe %d ready from %s --\n", |
921 | p->pipe_nr, p->if_name)); | |
9bccf70c | 922 | } |
1c79356b | 923 | } |
9bccf70c | 924 | |
1c79356b | 925 | /* |
9bccf70c A |
926 | * While we have backlogged traffic AND credit, we need to do |
927 | * something on the queue. | |
1c79356b | 928 | */ |
b0d623f7 | 929 | while ( p_numbytes >=0 && (sch->elements>0 || neh->elements >0) ) { |
9bccf70c A |
930 | if (sch->elements > 0) { /* have some eligible pkts to send out */ |
931 | struct dn_flow_queue *q = sch->p[0].object ; | |
91447636 A |
932 | struct mbuf *pkt = q->head; |
933 | struct dn_flow_set *fs = q->fs; | |
934 | u_int64_t len = pkt->m_pkthdr.len; | |
0c530ab8 | 935 | int len_scaled = p->bandwidth ? len*8*(hz*10) : 0 ; |
1c79356b | 936 | |
9bccf70c | 937 | heap_extract(sch, NULL); /* remove queue from heap */ |
b0d623f7 | 938 | p_numbytes -= len_scaled ; |
9bccf70c A |
939 | move_pkt(pkt, q, p, len); |
940 | ||
941 | p->V += (len<<MY_M) / p->sum ; /* update V */ | |
942 | q->S = q->F ; /* update start time */ | |
943 | if (q->len == 0) { /* Flow not backlogged any more */ | |
944 | fs->backlogged-- ; | |
945 | heap_insert(&(p->idle_heap), q->F, q); | |
946 | } else { /* still backlogged */ | |
947 | /* | |
948 | * update F and position in backlogged queue, then | |
949 | * put flow in not_eligible_heap (we will fix this later). | |
950 | */ | |
91447636 | 951 | len = (q->head)->m_pkthdr.len; |
9bccf70c A |
952 | q->F += (len<<MY_M)/(u_int64_t) fs->weight ; |
953 | if (DN_KEY_LEQ(q->S, p->V)) | |
954 | heap_insert(neh, q->S, q); | |
955 | else | |
956 | heap_insert(sch, q->F, q); | |
957 | } | |
958 | } | |
959 | /* | |
960 | * now compute V = max(V, min(S_i)). Remember that all elements in sch | |
961 | * have by definition S_i <= V so if sch is not empty, V is surely | |
962 | * the max and we must not update it. Conversely, if sch is empty | |
963 | * we only need to look at neh. | |
964 | */ | |
965 | if (sch->elements == 0 && neh->elements > 0) | |
966 | p->V = MAX64 ( p->V, neh->p[0].key ); | |
967 | /* move from neh to sch any packets that have become eligible */ | |
968 | while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V) ) { | |
969 | struct dn_flow_queue *q = neh->p[0].object ; | |
970 | heap_extract(neh, NULL); | |
971 | heap_insert(sch, q->F, q); | |
972 | } | |
973 | ||
974 | if (p->if_name[0] != '\0') {/* tx clock is from a real thing */ | |
b0d623f7 | 975 | p_numbytes = -1 ; /* mark not ready for I/O */ |
9bccf70c A |
976 | break ; |
977 | } | |
1c79356b | 978 | } |
b0d623f7 | 979 | if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0 |
9bccf70c A |
980 | && p->idle_heap.elements > 0) { |
981 | /* | |
982 | * no traffic and no events scheduled. We can get rid of idle-heap. | |
983 | */ | |
984 | int i ; | |
985 | ||
986 | for (i = 0 ; i < p->idle_heap.elements ; i++) { | |
987 | struct dn_flow_queue *q = p->idle_heap.p[i].object ; | |
1c79356b | 988 | |
9bccf70c A |
989 | q->F = 0 ; |
990 | q->S = q->F + 1 ; | |
991 | } | |
992 | p->sum = 0 ; | |
993 | p->V = 0 ; | |
994 | p->idle_heap.elements = 0 ; | |
995 | } | |
996 | /* | |
997 | * If we are getting clocks from dummynet (not a real interface) and | |
998 | * If we are under credit, schedule the next ready event. | |
999 | * Also fix the delivery time of the last packet. | |
1c79356b | 1000 | */ |
b0d623f7 | 1001 | if (p->if_name[0]==0 && p_numbytes < 0) { /* this implies bandwidth >0 */ |
9bccf70c | 1002 | dn_key t=0 ; /* number of ticks i have to wait */ |
1c79356b | 1003 | |
9bccf70c | 1004 | if (p->bandwidth > 0) |
b0d623f7 | 1005 | t = ( p->bandwidth -1 - p_numbytes) / p->bandwidth ; |
91447636 | 1006 | dn_tag_get(p->tail)->output_time += t ; |
9bccf70c A |
1007 | p->sched_time = curr_time ; |
1008 | heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); | |
1009 | /* XXX should check errors on heap_insert, and drain the whole | |
1010 | * queue on error hoping next time we are luckier. | |
1011 | */ | |
1c79356b | 1012 | } |
b0d623f7 A |
1013 | |
1014 | /* Fit (adjust if necessary) 64bit result into 32bit variable. */ | |
1015 | if (p_numbytes > INT_MAX) | |
1016 | p->numbytes = INT_MAX; | |
1017 | else if (p_numbytes < INT_MIN) | |
1018 | p->numbytes = INT_MIN; | |
1019 | else | |
1020 | p->numbytes = p_numbytes; | |
1021 | ||
9bccf70c A |
1022 | /* |
1023 | * If the delay line was empty call transmit_event(p) now. | |
1024 | * Otherwise, the scheduler will take care of it. | |
1025 | */ | |
1026 | if (p_was_empty) | |
b0d623f7 A |
1027 | transmit_event(p, head, tail); |
1028 | ||
1c79356b A |
1029 | } |
1030 | ||
1031 | /* | |
0c530ab8 | 1032 | * This is called every 1ms. It is used to |
9bccf70c | 1033 | * increment the current tick counter and schedule expired events. |
1c79356b A |
1034 | */ |
1035 | static void | |
2d21ac55 | 1036 | dummynet(__unused void * unused) |
1c79356b | 1037 | { |
9bccf70c A |
1038 | void *p ; /* generic parameter to handler */ |
1039 | struct dn_heap *h ; | |
9bccf70c | 1040 | struct dn_heap *heaps[3]; |
b0d623f7 | 1041 | struct mbuf *head = NULL, *tail = NULL; |
9bccf70c A |
1042 | int i; |
1043 | struct dn_pipe *pe ; | |
0c530ab8 A |
1044 | struct timespec ts; |
1045 | struct timeval tv; | |
1c79356b | 1046 | |
9bccf70c A |
1047 | heaps[0] = &ready_heap ; /* fixed-rate queues */ |
1048 | heaps[1] = &wfq_ready_heap ; /* wfq queues */ | |
1049 | heaps[2] = &extract_heap ; /* delay line */ | |
b0d623f7 | 1050 | |
91447636 A |
1051 | lck_mtx_lock(dn_mutex); |
1052 | ||
0c530ab8 A |
1053 | /* make all time measurements in milliseconds (ms) - |
1054 | * here we convert secs and usecs to msecs (just divide the | |
1055 | * usecs and take the closest whole number). | |
1056 | */ | |
1057 | microuptime(&tv); | |
1058 | curr_time = (tv.tv_sec * 1000) + (tv.tv_usec / 1000); | |
b0d623f7 | 1059 | |
9bccf70c A |
1060 | for (i=0; i < 3 ; i++) { |
1061 | h = heaps[i]; | |
1062 | while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) { | |
0c530ab8 | 1063 | if (h->p[0].key > curr_time) |
b0d623f7 A |
1064 | printf("dummynet: warning, heap %d is %d ticks late\n", |
1065 | i, (int)(curr_time - h->p[0].key)); | |
0c530ab8 A |
1066 | p = h->p[0].object ; /* store a copy before heap_extract */ |
1067 | heap_extract(h, NULL); /* need to extract before processing */ | |
1068 | if (i == 0) | |
b0d623f7 | 1069 | ready_event(p, &head, &tail) ; |
0c530ab8 | 1070 | else if (i == 1) { |
b0d623f7 A |
1071 | struct dn_pipe *pipe = p; |
1072 | if (pipe->if_name[0] != '\0') | |
1073 | printf("dummynet: bad ready_event_wfq for pipe %s\n", | |
1074 | pipe->if_name); | |
1075 | else | |
1076 | ready_event_wfq(p, &head, &tail) ; | |
1077 | } else { | |
1078 | transmit_event(p, &head, &tail); | |
1079 | } | |
9bccf70c | 1080 | } |
1c79356b | 1081 | } |
9bccf70c | 1082 | /* sweep pipes trying to expire idle flow_queues */ |
b0d623f7 A |
1083 | for (i = 0; i < HASHSIZE; i++) |
1084 | SLIST_FOREACH(pe, &pipehash[i], next) | |
9bccf70c A |
1085 | if (pe->idle_heap.elements > 0 && |
1086 | DN_KEY_LT(pe->idle_heap.p[0].key, pe->V) ) { | |
1087 | struct dn_flow_queue *q = pe->idle_heap.p[0].object ; | |
1c79356b | 1088 | |
9bccf70c A |
1089 | heap_extract(&(pe->idle_heap), NULL); |
1090 | q->S = q->F + 1 ; /* mark timestamp as invalid */ | |
1091 | pe->sum -= q->fs->weight ; | |
1092 | } | |
91447636 | 1093 | |
0c530ab8 A |
1094 | /* check the heaps to see if there's still stuff in there, and |
1095 | * only set the timer if there are packets to process | |
1096 | */ | |
1097 | timer_enabled = 0; | |
1098 | for (i=0; i < 3 ; i++) { | |
1099 | h = heaps[i]; | |
1100 | if (h->elements > 0) { // set the timer | |
1101 | ts.tv_sec = 0; | |
1102 | ts.tv_nsec = 1 * 1000000; // 1ms | |
1103 | timer_enabled = 1; | |
1104 | bsd_timeout(dummynet, NULL, &ts); | |
1105 | break; | |
1106 | } | |
1107 | } | |
b0d623f7 A |
1108 | |
1109 | /* | |
1110 | * If a packet chain has been dequeued, set serialize=1 so that new | |
1111 | * packets don't get dispatched out of turn | |
1112 | */ | |
1113 | if (head != NULL) | |
1114 | serialize = 1; | |
1115 | ||
0c530ab8 | 1116 | lck_mtx_unlock(dn_mutex); |
b0d623f7 A |
1117 | |
1118 | /* Send out the de-queued list of ready-to-send packets */ | |
1119 | if (head != NULL) { | |
1120 | dummynet_send(head); | |
1121 | lck_mtx_lock(dn_mutex); | |
1122 | serialize = 0; | |
1123 | lck_mtx_unlock(dn_mutex); | |
1124 | } | |
1125 | } | |
1126 | ||
1127 | ||
1128 | static void | |
1129 | dummynet_send(struct mbuf *m) | |
1130 | { | |
1131 | struct dn_pkt_tag *pkt; | |
1132 | struct mbuf *n; | |
1133 | ||
1134 | for (; m != NULL; m = n) { | |
1135 | n = m->m_nextpkt; | |
1136 | m->m_nextpkt = NULL; | |
1137 | pkt = dn_tag_get(m); | |
1138 | ||
1139 | switch (pkt->dn_dir) { | |
1140 | case DN_TO_IP_OUT: { | |
1141 | struct route tmp_rt = pkt->ro; | |
1142 | (void)ip_output(m, NULL, &tmp_rt, pkt->flags, NULL, NULL); | |
1143 | if (tmp_rt.ro_rt) { | |
1144 | rtfree(tmp_rt.ro_rt); | |
1145 | tmp_rt.ro_rt = NULL; | |
1146 | } | |
1147 | break ; | |
1148 | } | |
1149 | case DN_TO_IP_IN : | |
1150 | proto_inject(PF_INET, m); | |
1151 | break ; | |
1152 | ||
b0d623f7 A |
1153 | default: |
1154 | printf("dummynet: bad switch %d!\n", pkt->dn_dir); | |
1155 | m_freem(m); | |
1156 | break ; | |
1157 | } | |
1158 | } | |
9bccf70c | 1159 | } |
b0d623f7 A |
1160 | |
1161 | ||
9bccf70c | 1162 | |
1c79356b | 1163 | /* |
9bccf70c | 1164 | * called by an interface when tx_rdy occurs. |
1c79356b | 1165 | */ |
9bccf70c A |
1166 | int |
1167 | if_tx_rdy(struct ifnet *ifp) | |
1c79356b | 1168 | { |
9bccf70c | 1169 | struct dn_pipe *p; |
b0d623f7 A |
1170 | struct mbuf *head = NULL, *tail = NULL; |
1171 | int i; | |
1172 | ||
91447636 | 1173 | lck_mtx_lock(dn_mutex); |
b0d623f7 A |
1174 | |
1175 | for (i = 0; i < HASHSIZE; i++) | |
1176 | SLIST_FOREACH(p, &pipehash[i], next) | |
1177 | if (p->ifp == ifp) | |
1178 | break ; | |
9bccf70c A |
1179 | if (p == NULL) { |
1180 | char buf[32]; | |
2d21ac55 | 1181 | snprintf(buf, sizeof(buf), "%s%d",ifp->if_name, ifp->if_unit); |
b0d623f7 A |
1182 | for (i = 0; i < HASHSIZE; i++) |
1183 | SLIST_FOREACH(p, &pipehash[i], next) | |
9bccf70c A |
1184 | if (!strcmp(p->if_name, buf) ) { |
1185 | p->ifp = ifp ; | |
91447636 | 1186 | DPRINTF(("dummynet: ++ tx rdy from %s (now found)\n", buf)); |
9bccf70c A |
1187 | break ; |
1188 | } | |
1189 | } | |
1190 | if (p != NULL) { | |
91447636 A |
1191 | DPRINTF(("dummynet: ++ tx rdy from %s%d - qlen %d\n", ifp->if_name, |
1192 | ifp->if_unit, ifp->if_snd.ifq_len)); | |
9bccf70c | 1193 | p->numbytes = 0 ; /* mark ready for I/O */ |
b0d623f7 | 1194 | ready_event_wfq(p, &head, &tail); |
1c79356b | 1195 | } |
b0d623f7 A |
1196 | lck_mtx_unlock(dn_mutex); |
1197 | ||
1198 | ||
1199 | /* Send out the de-queued list of ready-to-send packets */ | |
1200 | if (head != NULL) | |
1201 | dummynet_send(head); | |
91447636 | 1202 | |
9bccf70c | 1203 | return 0; |
1c79356b A |
1204 | } |
1205 | ||
1c79356b | 1206 | /* |
9bccf70c A |
1207 | * Unconditionally expire empty queues in case of shortage. |
1208 | * Returns the number of queues freed. | |
1c79356b | 1209 | */ |
9bccf70c A |
1210 | static int |
1211 | expire_queues(struct dn_flow_set *fs) | |
1c79356b | 1212 | { |
9bccf70c A |
1213 | struct dn_flow_queue *q, *prev ; |
1214 | int i, initial_elements = fs->rq_elements ; | |
91447636 | 1215 | struct timeval timenow; |
1c79356b | 1216 | |
91447636 A |
1217 | getmicrotime(&timenow); |
1218 | ||
1219 | if (fs->last_expired == timenow.tv_sec) | |
9bccf70c | 1220 | return 0 ; |
91447636 | 1221 | fs->last_expired = timenow.tv_sec ; |
9bccf70c A |
1222 | for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */ |
1223 | for (prev=NULL, q = fs->rq[i] ; q != NULL ; ) | |
1224 | if (q->head != NULL || q->S != q->F+1) { | |
1225 | prev = q ; | |
1226 | q = q->next ; | |
1227 | } else { /* entry is idle, expire it */ | |
1228 | struct dn_flow_queue *old_q = q ; | |
1229 | ||
1230 | if (prev != NULL) | |
1231 | prev->next = q = q->next ; | |
1232 | else | |
1233 | fs->rq[i] = q = q->next ; | |
1234 | fs->rq_elements-- ; | |
91447636 | 1235 | FREE(old_q, M_DUMMYNET); |
1c79356b | 1236 | } |
9bccf70c | 1237 | return initial_elements - fs->rq_elements ; |
1c79356b A |
1238 | } |
1239 | ||
1240 | /* | |
9bccf70c A |
1241 | * If room, create a new queue and put at head of slot i; |
1242 | * otherwise, create or use the default queue. | |
1c79356b | 1243 | */ |
9bccf70c A |
1244 | static struct dn_flow_queue * |
1245 | create_queue(struct dn_flow_set *fs, int i) | |
1c79356b | 1246 | { |
9bccf70c | 1247 | struct dn_flow_queue *q ; |
1c79356b | 1248 | |
9bccf70c A |
1249 | if (fs->rq_elements > fs->rq_size * dn_max_ratio && |
1250 | expire_queues(fs) == 0) { | |
1251 | /* | |
1252 | * No way to get room, use or create overflow queue. | |
1253 | */ | |
1254 | i = fs->rq_size ; | |
1255 | if ( fs->rq[i] != NULL ) | |
1256 | return fs->rq[i] ; | |
1257 | } | |
91447636 | 1258 | q = _MALLOC(sizeof(*q), M_DUMMYNET, M_DONTWAIT | M_ZERO); |
9bccf70c | 1259 | if (q == NULL) { |
91447636 | 1260 | printf("dummynet: sorry, cannot allocate queue for new flow\n"); |
9bccf70c A |
1261 | return NULL ; |
1262 | } | |
9bccf70c A |
1263 | q->fs = fs ; |
1264 | q->hash_slot = i ; | |
1265 | q->next = fs->rq[i] ; | |
1266 | q->S = q->F + 1; /* hack - mark timestamp as invalid */ | |
1267 | fs->rq[i] = q ; | |
1268 | fs->rq_elements++ ; | |
1269 | return q ; | |
1270 | } | |
1c79356b | 1271 | |
9bccf70c A |
1272 | /* |
1273 | * Given a flow_set and a pkt in last_pkt, find a matching queue | |
1274 | * after appropriate masking. The queue is moved to front | |
1275 | * so that further searches take less time. | |
1276 | */ | |
1277 | static struct dn_flow_queue * | |
91447636 | 1278 | find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id) |
9bccf70c A |
1279 | { |
1280 | int i = 0 ; /* we need i and q for new allocations */ | |
1281 | struct dn_flow_queue *q, *prev; | |
1c79356b | 1282 | |
9bccf70c A |
1283 | if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) ) |
1284 | q = fs->rq[0] ; | |
1285 | else { | |
1286 | /* first, do the masking */ | |
91447636 A |
1287 | id->dst_ip &= fs->flow_mask.dst_ip ; |
1288 | id->src_ip &= fs->flow_mask.src_ip ; | |
1289 | id->dst_port &= fs->flow_mask.dst_port ; | |
1290 | id->src_port &= fs->flow_mask.src_port ; | |
1291 | id->proto &= fs->flow_mask.proto ; | |
1292 | id->flags = 0 ; /* we don't care about this one */ | |
9bccf70c | 1293 | /* then, hash function */ |
91447636 A |
1294 | i = ( (id->dst_ip) & 0xffff ) ^ |
1295 | ( (id->dst_ip >> 15) & 0xffff ) ^ | |
1296 | ( (id->src_ip << 1) & 0xffff ) ^ | |
1297 | ( (id->src_ip >> 16 ) & 0xffff ) ^ | |
1298 | (id->dst_port << 1) ^ (id->src_port) ^ | |
1299 | (id->proto ); | |
9bccf70c A |
1300 | i = i % fs->rq_size ; |
1301 | /* finally, scan the current list for a match */ | |
1302 | searches++ ; | |
1303 | for (prev=NULL, q = fs->rq[i] ; q ; ) { | |
1304 | search_steps++; | |
91447636 A |
1305 | if (id->dst_ip == q->id.dst_ip && |
1306 | id->src_ip == q->id.src_ip && | |
1307 | id->dst_port == q->id.dst_port && | |
1308 | id->src_port == q->id.src_port && | |
1309 | id->proto == q->id.proto && | |
1310 | id->flags == q->id.flags) | |
9bccf70c A |
1311 | break ; /* found */ |
1312 | else if (pipe_expire && q->head == NULL && q->S == q->F+1 ) { | |
1313 | /* entry is idle and not in any heap, expire it */ | |
1314 | struct dn_flow_queue *old_q = q ; | |
1c79356b | 1315 | |
9bccf70c A |
1316 | if (prev != NULL) |
1317 | prev->next = q = q->next ; | |
1318 | else | |
1319 | fs->rq[i] = q = q->next ; | |
1320 | fs->rq_elements-- ; | |
91447636 | 1321 | FREE(old_q, M_DUMMYNET); |
9bccf70c A |
1322 | continue ; |
1323 | } | |
1324 | prev = q ; | |
1325 | q = q->next ; | |
1c79356b | 1326 | } |
9bccf70c A |
1327 | if (q && prev != NULL) { /* found and not in front */ |
1328 | prev->next = q->next ; | |
1329 | q->next = fs->rq[i] ; | |
1330 | fs->rq[i] = q ; | |
1c79356b | 1331 | } |
9bccf70c A |
1332 | } |
1333 | if (q == NULL) { /* no match, need to allocate a new entry */ | |
1334 | q = create_queue(fs, i); | |
1335 | if (q != NULL) | |
91447636 | 1336 | q->id = *id ; |
9bccf70c A |
1337 | } |
1338 | return q ; | |
1339 | } | |
1340 | ||
1341 | static int | |
1342 | red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) | |
1343 | { | |
1344 | /* | |
1345 | * RED algorithm | |
91447636 | 1346 | * |
9bccf70c A |
1347 | * RED calculates the average queue size (avg) using a low-pass filter |
1348 | * with an exponential weighted (w_q) moving average: | |
1349 | * avg <- (1-w_q) * avg + w_q * q_size | |
1350 | * where q_size is the queue length (measured in bytes or * packets). | |
91447636 | 1351 | * |
9bccf70c A |
1352 | * If q_size == 0, we compute the idle time for the link, and set |
1353 | * avg = (1 - w_q)^(idle/s) | |
1354 | * where s is the time needed for transmitting a medium-sized packet. | |
91447636 | 1355 | * |
9bccf70c A |
1356 | * Now, if avg < min_th the packet is enqueued. |
1357 | * If avg > max_th the packet is dropped. Otherwise, the packet is | |
1358 | * dropped with probability P function of avg. | |
91447636 | 1359 | * |
9bccf70c A |
1360 | */ |
1361 | ||
1362 | int64_t p_b = 0; | |
1363 | /* queue in bytes or packets ? */ | |
1364 | u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? q->len_bytes : q->len; | |
1365 | ||
91447636 | 1366 | DPRINTF(("\ndummynet: %d q: %2u ", (int) curr_time, q_size)); |
9bccf70c A |
1367 | |
1368 | /* average queue size estimation */ | |
1369 | if (q_size != 0) { | |
1370 | /* | |
1371 | * queue is not empty, avg <- avg + (q_size - avg) * w_q | |
1372 | */ | |
1373 | int diff = SCALE(q_size) - q->avg; | |
1374 | int64_t v = SCALE_MUL((int64_t) diff, (int64_t) fs->w_q); | |
1375 | ||
1376 | q->avg += (int) v; | |
1377 | } else { | |
1378 | /* | |
1379 | * queue is empty, find for how long the queue has been | |
1380 | * empty and use a lookup table for computing | |
1381 | * (1 - * w_q)^(idle_time/s) where s is the time to send a | |
1382 | * (small) packet. | |
1383 | * XXX check wraps... | |
1384 | */ | |
1385 | if (q->avg) { | |
1386 | u_int t = (curr_time - q->q_time) / fs->lookup_step; | |
1387 | ||
1388 | q->avg = (t < fs->lookup_depth) ? | |
1389 | SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; | |
1390 | } | |
1391 | } | |
91447636 | 1392 | DPRINTF(("dummynet: avg: %u ", SCALE_VAL(q->avg))); |
9bccf70c A |
1393 | |
1394 | /* should i drop ? */ | |
1395 | ||
1396 | if (q->avg < fs->min_th) { | |
1397 | q->count = -1; | |
1398 | return 0; /* accept packet ; */ | |
1399 | } | |
1400 | if (q->avg >= fs->max_th) { /* average queue >= max threshold */ | |
1401 | if (fs->flags_fs & DN_IS_GENTLE_RED) { | |
1c79356b | 1402 | /* |
9bccf70c A |
1403 | * According to Gentle-RED, if avg is greater than max_th the |
1404 | * packet is dropped with a probability | |
1405 | * p_b = c_3 * avg - c_4 | |
1406 | * where c_3 = (1 - max_p) / max_th, and c_4 = 1 - 2 * max_p | |
1c79356b | 1407 | */ |
9bccf70c A |
1408 | p_b = SCALE_MUL((int64_t) fs->c_3, (int64_t) q->avg) - fs->c_4; |
1409 | } else { | |
1410 | q->count = -1; | |
91447636 | 1411 | DPRINTF(("dummynet: - drop")); |
9bccf70c A |
1412 | return 1 ; |
1413 | } | |
1414 | } else if (q->avg > fs->min_th) { | |
1415 | /* | |
1416 | * we compute p_b using the linear dropping function p_b = c_1 * | |
1417 | * avg - c_2, where c_1 = max_p / (max_th - min_th), and c_2 = | |
1418 | * max_p * min_th / (max_th - min_th) | |
1419 | */ | |
1420 | p_b = SCALE_MUL((int64_t) fs->c_1, (int64_t) q->avg) - fs->c_2; | |
1421 | } | |
1422 | if (fs->flags_fs & DN_QSIZE_IS_BYTES) | |
1423 | p_b = (p_b * len) / fs->max_pkt_size; | |
1424 | if (++q->count == 0) | |
2d21ac55 | 1425 | q->random = MY_RANDOM & 0xffff; |
9bccf70c A |
1426 | else { |
1427 | /* | |
1428 | * q->count counts packets arrived since last drop, so a greater | |
1429 | * value of q->count means a greater packet drop probability. | |
1430 | */ | |
1431 | if (SCALE_MUL(p_b, SCALE((int64_t) q->count)) > q->random) { | |
1432 | q->count = 0; | |
91447636 | 1433 | DPRINTF(("dummynet: - red drop")); |
9bccf70c | 1434 | /* after a drop we calculate a new random value */ |
2d21ac55 | 1435 | q->random = MY_RANDOM & 0xffff; |
9bccf70c A |
1436 | return 1; /* drop */ |
1437 | } | |
1438 | } | |
1439 | /* end of RED algorithm */ | |
1440 | return 0 ; /* accept */ | |
1441 | } | |
1442 | ||
1443 | static __inline | |
1444 | struct dn_flow_set * | |
b0d623f7 | 1445 | locate_flowset(int fs_nr) |
9bccf70c | 1446 | { |
91447636 | 1447 | struct dn_flow_set *fs; |
b0d623f7 A |
1448 | SLIST_FOREACH(fs, &flowsethash[HASH(fs_nr)], next) |
1449 | if (fs->fs_nr == fs_nr) | |
1450 | return fs ; | |
1451 | ||
1452 | return (NULL); | |
1453 | } | |
9bccf70c | 1454 | |
b0d623f7 A |
1455 | static __inline struct dn_pipe * |
1456 | locate_pipe(int pipe_nr) | |
1457 | { | |
1458 | struct dn_pipe *pipe; | |
91447636 | 1459 | |
b0d623f7 A |
1460 | SLIST_FOREACH(pipe, &pipehash[HASH(pipe_nr)], next) |
1461 | if (pipe->pipe_nr == pipe_nr) | |
1462 | return (pipe); | |
91447636 | 1463 | |
b0d623f7 A |
1464 | return (NULL); |
1465 | } | |
91447636 | 1466 | |
91447636 | 1467 | |
9bccf70c A |
1468 | |
1469 | /* | |
1470 | * dummynet hook for packets. Below 'pipe' is a pipe or a queue | |
1471 | * depending on whether WF2Q or fixed bw is used. | |
91447636 A |
1472 | * |
1473 | * pipe_nr pipe or queue the packet is destined for. | |
1474 | * dir where shall we send the packet after dummynet. | |
1475 | * m the mbuf with the packet | |
1476 | * ifp the 'ifp' parameter from the caller. | |
1477 | * NULL in ip_input, destination interface in ip_output, | |
1478 | * real_dst in bdg_forward | |
1479 | * ro route parameter (only used in ip_output, NULL otherwise) | |
1480 | * dst destination address, only used by ip_output | |
1481 | * rule matching rule, in case of multiple passes | |
1482 | * flags flags from the caller, only used in ip_output | |
1483 | * | |
9bccf70c | 1484 | */ |
91447636 A |
1485 | static int |
1486 | dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) | |
9bccf70c | 1487 | { |
b0d623f7 | 1488 | struct mbuf *head = NULL, *tail = NULL; |
91447636 A |
1489 | struct dn_pkt_tag *pkt; |
1490 | struct m_tag *mtag; | |
b0d623f7 | 1491 | struct dn_flow_set *fs = NULL; |
9bccf70c A |
1492 | struct dn_pipe *pipe ; |
1493 | u_int64_t len = m->m_pkthdr.len ; | |
1494 | struct dn_flow_queue *q = NULL ; | |
91447636 | 1495 | int is_pipe; |
0c530ab8 A |
1496 | struct timespec ts; |
1497 | struct timeval tv; | |
91447636 A |
1498 | |
1499 | #if IPFW2 | |
1500 | ipfw_insn *cmd = fwa->rule->cmd + fwa->rule->act_ofs; | |
1501 | ||
1502 | if (cmd->opcode == O_LOG) | |
1503 | cmd += F_LEN(cmd); | |
1504 | is_pipe = (cmd->opcode == O_PIPE); | |
1505 | #else | |
1506 | is_pipe = (fwa->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_PIPE; | |
1507 | #endif | |
9bccf70c A |
1508 | |
1509 | pipe_nr &= 0xffff ; | |
1510 | ||
91447636 | 1511 | lck_mtx_lock(dn_mutex); |
0c530ab8 A |
1512 | |
1513 | /* make all time measurements in milliseconds (ms) - | |
1514 | * here we convert secs and usecs to msecs (just divide the | |
1515 | * usecs and take the closest whole number). | |
1516 | */ | |
1517 | microuptime(&tv); | |
1518 | curr_time = (tv.tv_sec * 1000) + (tv.tv_usec / 1000); | |
91447636 A |
1519 | |
1520 | /* | |
1521 | * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule. | |
1522 | */ | |
b0d623f7 A |
1523 | if (is_pipe) { |
1524 | pipe = locate_pipe(pipe_nr); | |
1525 | if (pipe != NULL) | |
1526 | fs = &(pipe->fs); | |
1527 | } else | |
1528 | fs = locate_flowset(pipe_nr); | |
1529 | ||
1530 | ||
1531 | if (fs == NULL){ | |
91447636 | 1532 | goto dropit ; /* this queue/pipe does not exist! */ |
b0d623f7 | 1533 | } |
9bccf70c A |
1534 | pipe = fs->pipe ; |
1535 | if (pipe == NULL) { /* must be a queue, try find a matching pipe */ | |
b0d623f7 A |
1536 | pipe = locate_pipe(fs->parent_nr); |
1537 | ||
9bccf70c A |
1538 | if (pipe != NULL) |
1539 | fs->pipe = pipe ; | |
1540 | else { | |
91447636 | 1541 | printf("dummynet: no pipe %d for queue %d, drop pkt\n", |
9bccf70c A |
1542 | fs->parent_nr, fs->fs_nr); |
1543 | goto dropit ; | |
1544 | } | |
1545 | } | |
91447636 | 1546 | q = find_queue(fs, &(fwa->f_id)); |
9bccf70c A |
1547 | if ( q == NULL ) |
1548 | goto dropit ; /* cannot allocate queue */ | |
1549 | /* | |
1550 | * update statistics, then check reasons to drop pkt | |
1551 | */ | |
1552 | q->tot_bytes += len ; | |
1553 | q->tot_pkts++ ; | |
2d21ac55 | 1554 | if ( fs->plr && (MY_RANDOM < fs->plr) ) |
9bccf70c A |
1555 | goto dropit ; /* random pkt drop */ |
1556 | if ( fs->flags_fs & DN_QSIZE_IS_BYTES) { | |
1557 | if (q->len_bytes > fs->qsize) | |
1558 | goto dropit ; /* queue size overflow */ | |
1559 | } else { | |
1560 | if (q->len >= fs->qsize) | |
1561 | goto dropit ; /* queue count overflow */ | |
1562 | } | |
1563 | if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) ) | |
1564 | goto dropit ; | |
1565 | ||
91447636 A |
1566 | /* XXX expensive to zero, see if we can remove it*/ |
1567 | mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, | |
13fec989 | 1568 | sizeof(struct dn_pkt_tag), M_NOWAIT); |
91447636 A |
1569 | if ( mtag == NULL ) |
1570 | goto dropit ; /* cannot allocate packet header */ | |
1571 | m_tag_prepend(m, mtag); /* attach to mbuf chain */ | |
1572 | ||
1573 | pkt = (struct dn_pkt_tag *)(mtag+1); | |
13fec989 | 1574 | bzero(pkt, sizeof(struct dn_pkt_tag)); |
9bccf70c | 1575 | /* ok, i can handle the pkt now... */ |
9bccf70c | 1576 | /* build and enqueue packet + parameters */ |
91447636 | 1577 | pkt->rule = fwa->rule ; |
9bccf70c A |
1578 | pkt->dn_dir = dir ; |
1579 | ||
91447636 | 1580 | pkt->ifp = fwa->oif; |
9bccf70c A |
1581 | if (dir == DN_TO_IP_OUT) { |
1582 | /* | |
1583 | * We need to copy *ro because for ICMP pkts (and maybe others) | |
1584 | * the caller passed a pointer into the stack; dst might also be | |
1585 | * a pointer into *ro so it needs to be updated. | |
1586 | */ | |
91447636 A |
1587 | pkt->ro = *(fwa->ro); |
1588 | if (fwa->ro->ro_rt) | |
b0d623f7 A |
1589 | RT_ADDREF(fwa->ro->ro_rt); |
1590 | ||
91447636 A |
1591 | if (fwa->dst == (struct sockaddr_in *)&fwa->ro->ro_dst) /* dst points into ro */ |
1592 | fwa->dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ; | |
b0d623f7 | 1593 | |
91447636 A |
1594 | pkt->dn_dst = fwa->dst; |
1595 | pkt->flags = fwa->flags; | |
c910b4d9 A |
1596 | if (fwa->ipoa != NULL) |
1597 | pkt->ipoa = *(fwa->ipoa); | |
91447636 | 1598 | } |
9bccf70c | 1599 | if (q->head == NULL) |
91447636 | 1600 | q->head = m; |
9bccf70c | 1601 | else |
91447636 A |
1602 | q->tail->m_nextpkt = m; |
1603 | q->tail = m; | |
9bccf70c A |
1604 | q->len++; |
1605 | q->len_bytes += len ; | |
1606 | ||
91447636 | 1607 | if ( q->head != m ) /* flow was not idle, we are done */ |
9bccf70c A |
1608 | goto done; |
1609 | /* | |
1610 | * If we reach this point the flow was previously idle, so we need | |
1611 | * to schedule it. This involves different actions for fixed-rate or | |
1612 | * WF2Q queues. | |
1613 | */ | |
91447636 | 1614 | if (is_pipe) { |
9bccf70c A |
1615 | /* |
1616 | * Fixed-rate queue: just insert into the ready_heap. | |
1617 | */ | |
1618 | dn_key t = 0 ; | |
91447636 A |
1619 | if (pipe->bandwidth) |
1620 | t = SET_TICKS(m, q, pipe); | |
9bccf70c A |
1621 | q->sched_time = curr_time ; |
1622 | if (t == 0) /* must process it now */ | |
b0d623f7 | 1623 | ready_event( q , &head, &tail ); |
9bccf70c A |
1624 | else |
1625 | heap_insert(&ready_heap, curr_time + t , q ); | |
1626 | } else { | |
1627 | /* | |
1628 | * WF2Q. First, compute start time S: if the flow was idle (S=F+1) | |
1629 | * set S to the virtual time V for the controlling pipe, and update | |
1630 | * the sum of weights for the pipe; otherwise, remove flow from | |
1631 | * idle_heap and set S to max(F,V). | |
1632 | * Second, compute finish time F = S + len/weight. | |
1633 | * Third, if pipe was idle, update V=max(S, V). | |
1634 | * Fourth, count one more backlogged flow. | |
1635 | */ | |
1636 | if (DN_KEY_GT(q->S, q->F)) { /* means timestamps are invalid */ | |
1637 | q->S = pipe->V ; | |
1638 | pipe->sum += fs->weight ; /* add weight of new queue */ | |
1639 | } else { | |
1640 | heap_extract(&(pipe->idle_heap), q); | |
1641 | q->S = MAX64(q->F, pipe->V ) ; | |
1642 | } | |
1643 | q->F = q->S + ( len<<MY_M )/(u_int64_t) fs->weight; | |
1644 | ||
1645 | if (pipe->not_eligible_heap.elements == 0 && | |
1646 | pipe->scheduler_heap.elements == 0) | |
1647 | pipe->V = MAX64 ( q->S, pipe->V ); | |
1648 | fs->backlogged++ ; | |
1649 | /* | |
1650 | * Look at eligibility. A flow is not eligibile if S>V (when | |
1651 | * this happens, it means that there is some other flow already | |
1652 | * scheduled for the same pipe, so the scheduler_heap cannot be | |
1653 | * empty). If the flow is not eligible we just store it in the | |
1654 | * not_eligible_heap. Otherwise, we store in the scheduler_heap | |
1655 | * and possibly invoke ready_event_wfq() right now if there is | |
1656 | * leftover credit. | |
1657 | * Note that for all flows in scheduler_heap (SCH), S_i <= V, | |
1658 | * and for all flows in not_eligible_heap (NEH), S_i > V . | |
1659 | * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH, | |
1660 | * we only need to look into NEH. | |
1661 | */ | |
1662 | if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */ | |
1663 | if (pipe->scheduler_heap.elements == 0) | |
91447636 | 1664 | printf("dummynet: ++ ouch! not eligible but empty scheduler!\n"); |
9bccf70c A |
1665 | heap_insert(&(pipe->not_eligible_heap), q->S, q); |
1666 | } else { | |
1667 | heap_insert(&(pipe->scheduler_heap), q->F, q); | |
1668 | if (pipe->numbytes >= 0) { /* pipe is idle */ | |
1669 | if (pipe->scheduler_heap.elements != 1) | |
91447636 A |
1670 | printf("dummynet: OUCH! pipe should have been idle!\n"); |
1671 | DPRINTF(("dummynet: waking up pipe %d at %d\n", | |
1672 | pipe->pipe_nr, (int)(q->F >> MY_M))); | |
9bccf70c | 1673 | pipe->sched_time = curr_time ; |
b0d623f7 | 1674 | ready_event_wfq(pipe, &head, &tail); |
1c79356b | 1675 | } |
9bccf70c A |
1676 | } |
1677 | } | |
1678 | done: | |
0c530ab8 A |
1679 | /* start the timer and set global if not already set */ |
1680 | if (!timer_enabled) { | |
1681 | ts.tv_sec = 0; | |
1682 | ts.tv_nsec = 1 * 1000000; // 1ms | |
1683 | timer_enabled = 1; | |
1684 | bsd_timeout(dummynet, NULL, &ts); | |
1685 | } | |
1686 | ||
91447636 | 1687 | lck_mtx_unlock(dn_mutex); |
b0d623f7 A |
1688 | if (head != NULL) |
1689 | dummynet_send(head); | |
1690 | ||
9bccf70c A |
1691 | return 0; |
1692 | ||
1693 | dropit: | |
9bccf70c A |
1694 | if (q) |
1695 | q->drops++ ; | |
91447636 | 1696 | lck_mtx_unlock(dn_mutex); |
9bccf70c | 1697 | m_freem(m); |
91447636 | 1698 | return ( (fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); |
9bccf70c A |
1699 | } |
1700 | ||
1701 | /* | |
91447636 | 1702 | * Below, the rtfree is only needed when (pkt->dn_dir == DN_TO_IP_OUT) |
9bccf70c A |
1703 | * Doing this would probably save us the initial bzero of dn_pkt |
1704 | */ | |
b0d623f7 | 1705 | #define DN_FREE_PKT(_m) do { \ |
91447636 | 1706 | struct m_tag *tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL); \ |
b0d623f7 | 1707 | if (tag) { \ |
91447636 | 1708 | struct dn_pkt_tag *n = (struct dn_pkt_tag *)(tag+1); \ |
b0d623f7 A |
1709 | if (n->ro.ro_rt != NULL) { \ |
1710 | rtfree(n->ro.ro_rt); \ | |
1711 | n->ro.ro_rt = NULL; \ | |
1712 | } \ | |
1713 | } \ | |
1714 | m_tag_delete(_m, tag); \ | |
1715 | m_freem(_m); \ | |
91447636 | 1716 | } while (0) |
9bccf70c A |
1717 | |
1718 | /* | |
1719 | * Dispose all packets and flow_queues on a flow_set. | |
1720 | * If all=1, also remove red lookup table and other storage, | |
1721 | * including the descriptor itself. | |
1722 | * For the one in dn_pipe MUST also cleanup ready_heap... | |
1723 | */ | |
1724 | static void | |
1725 | purge_flow_set(struct dn_flow_set *fs, int all) | |
1726 | { | |
9bccf70c A |
1727 | struct dn_flow_queue *q, *qn ; |
1728 | int i ; | |
1729 | ||
91447636 A |
1730 | lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); |
1731 | ||
9bccf70c A |
1732 | for (i = 0 ; i <= fs->rq_size ; i++ ) { |
1733 | for (q = fs->rq[i] ; q ; q = qn ) { | |
91447636 A |
1734 | struct mbuf *m, *mnext; |
1735 | ||
1736 | mnext = q->head; | |
1737 | while ((m = mnext) != NULL) { | |
1738 | mnext = m->m_nextpkt; | |
1739 | DN_FREE_PKT(m); | |
1740 | } | |
9bccf70c | 1741 | qn = q->next ; |
91447636 | 1742 | FREE(q, M_DUMMYNET); |
9bccf70c A |
1743 | } |
1744 | fs->rq[i] = NULL ; | |
1745 | } | |
1746 | fs->rq_elements = 0 ; | |
1747 | if (all) { | |
1748 | /* RED - free lookup table */ | |
1749 | if (fs->w_q_lookup) | |
91447636 | 1750 | FREE(fs->w_q_lookup, M_DUMMYNET); |
9bccf70c | 1751 | if (fs->rq) |
91447636 | 1752 | FREE(fs->rq, M_DUMMYNET); |
9bccf70c A |
1753 | /* if this fs is not part of a pipe, free it */ |
1754 | if (fs->pipe && fs != &(fs->pipe->fs) ) | |
91447636 | 1755 | FREE(fs, M_DUMMYNET); |
9bccf70c A |
1756 | } |
1757 | } | |
1758 | ||
1759 | /* | |
1760 | * Dispose all packets queued on a pipe (not a flow_set). | |
1761 | * Also free all resources associated to a pipe, which is about | |
1762 | * to be deleted. | |
1763 | */ | |
1764 | static void | |
1765 | purge_pipe(struct dn_pipe *pipe) | |
1766 | { | |
91447636 | 1767 | struct mbuf *m, *mnext; |
9bccf70c A |
1768 | |
1769 | purge_flow_set( &(pipe->fs), 1 ); | |
1770 | ||
91447636 A |
1771 | mnext = pipe->head; |
1772 | while ((m = mnext) != NULL) { | |
1773 | mnext = m->m_nextpkt; | |
1774 | DN_FREE_PKT(m); | |
1775 | } | |
9bccf70c A |
1776 | |
1777 | heap_free( &(pipe->scheduler_heap) ); | |
1778 | heap_free( &(pipe->not_eligible_heap) ); | |
1779 | heap_free( &(pipe->idle_heap) ); | |
1780 | } | |
1781 | ||
1782 | /* | |
1783 | * Delete all pipes and heaps returning memory. Must also | |
1784 | * remove references from all ipfw rules to all pipes. | |
1785 | */ | |
1786 | static void | |
2d21ac55 | 1787 | dummynet_flush(void) |
9bccf70c | 1788 | { |
b0d623f7 A |
1789 | struct dn_pipe *pipe, *pipe1; |
1790 | struct dn_flow_set *fs, *fs1; | |
1791 | int i; | |
9bccf70c | 1792 | |
91447636 | 1793 | lck_mtx_lock(dn_mutex); |
9bccf70c A |
1794 | |
1795 | /* remove all references to pipes ...*/ | |
91447636 | 1796 | flush_pipe_ptrs(NULL); |
b0d623f7 A |
1797 | |
1798 | /* Free heaps so we don't have unwanted events. */ | |
1799 | heap_free(&ready_heap); | |
1800 | heap_free(&wfq_ready_heap); | |
1801 | heap_free(&extract_heap); | |
91447636 | 1802 | |
b0d623f7 A |
1803 | /* |
1804 | * Now purge all queued pkts and delete all pipes. | |
1805 | * | |
1806 | * XXXGL: can we merge the for(;;) cycles into one or not? | |
1807 | */ | |
1808 | for (i = 0; i < HASHSIZE; i++) | |
1809 | SLIST_FOREACH_SAFE(fs, &flowsethash[i], next, fs1) { | |
1810 | SLIST_REMOVE(&flowsethash[i], fs, dn_flow_set, next); | |
1811 | purge_flow_set(fs, 1); | |
1812 | } | |
1813 | for (i = 0; i < HASHSIZE; i++) | |
1814 | SLIST_FOREACH_SAFE(pipe, &pipehash[i], next, pipe1) { | |
1815 | SLIST_REMOVE(&pipehash[i], pipe, dn_pipe, next); | |
1816 | purge_pipe(pipe); | |
1817 | FREE(pipe, M_DUMMYNET); | |
1818 | } | |
91447636 | 1819 | lck_mtx_unlock(dn_mutex); |
9bccf70c A |
1820 | } |
1821 | ||
1822 | ||
91447636 | 1823 | extern struct ip_fw *ip_fw_default_rule ; |
9bccf70c A |
1824 | static void |
1825 | dn_rule_delete_fs(struct dn_flow_set *fs, void *r) | |
1826 | { | |
1827 | int i ; | |
1828 | struct dn_flow_queue *q ; | |
91447636 | 1829 | struct mbuf *m ; |
9bccf70c A |
1830 | |
1831 | for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */ | |
1832 | for (q = fs->rq[i] ; q ; q = q->next ) | |
91447636 A |
1833 | for (m = q->head ; m ; m = m->m_nextpkt ) { |
1834 | struct dn_pkt_tag *pkt = dn_tag_get(m) ; | |
1835 | if (pkt->rule == r) | |
1836 | pkt->rule = ip_fw_default_rule ; | |
1837 | } | |
9bccf70c A |
1838 | } |
1839 | /* | |
1840 | * when a firewall rule is deleted, scan all queues and remove the flow-id | |
1841 | * from packets matching this rule. | |
1842 | */ | |
1843 | void | |
1844 | dn_rule_delete(void *r) | |
1845 | { | |
1846 | struct dn_pipe *p ; | |
9bccf70c | 1847 | struct dn_flow_set *fs ; |
91447636 A |
1848 | struct dn_pkt_tag *pkt ; |
1849 | struct mbuf *m ; | |
b0d623f7 | 1850 | int i; |
91447636 A |
1851 | |
1852 | lck_mtx_lock(dn_mutex); | |
9bccf70c A |
1853 | |
1854 | /* | |
1855 | * If the rule references a queue (dn_flow_set), then scan | |
1856 | * the flow set, otherwise scan pipes. Should do either, but doing | |
1857 | * both does not harm. | |
1858 | */ | |
b0d623f7 A |
1859 | for (i = 0; i < HASHSIZE; i++) |
1860 | SLIST_FOREACH(fs, &flowsethash[i], next) | |
1861 | dn_rule_delete_fs(fs, r); | |
1862 | ||
1863 | for (i = 0; i < HASHSIZE; i++) | |
1864 | SLIST_FOREACH(p, &pipehash[i], next) { | |
1865 | fs = &(p->fs); | |
1866 | dn_rule_delete_fs(fs, r); | |
1867 | for (m = p->head ; m ; m = m->m_nextpkt ) { | |
1868 | pkt = dn_tag_get(m); | |
1869 | if (pkt->rule == r) | |
1870 | pkt->rule = ip_fw_default_rule; | |
1871 | } | |
91447636 | 1872 | } |
b0d623f7 | 1873 | lck_mtx_unlock(dn_mutex); |
9bccf70c A |
1874 | } |
1875 | ||
1876 | /* | |
1877 | * setup RED parameters | |
1878 | */ | |
1879 | static int | |
91447636 | 1880 | config_red(struct dn_flow_set *p, struct dn_flow_set * x) |
9bccf70c A |
1881 | { |
1882 | int i; | |
1883 | ||
1884 | x->w_q = p->w_q; | |
1885 | x->min_th = SCALE(p->min_th); | |
1886 | x->max_th = SCALE(p->max_th); | |
1887 | x->max_p = p->max_p; | |
1888 | ||
1889 | x->c_1 = p->max_p / (p->max_th - p->min_th); | |
1890 | x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th)); | |
1891 | if (x->flags_fs & DN_IS_GENTLE_RED) { | |
1892 | x->c_3 = (SCALE(1) - p->max_p) / p->max_th; | |
1893 | x->c_4 = (SCALE(1) - 2 * p->max_p); | |
1894 | } | |
1895 | ||
1896 | /* if the lookup table already exist, free and create it again */ | |
91447636 A |
1897 | if (x->w_q_lookup) { |
1898 | FREE(x->w_q_lookup, M_DUMMYNET); | |
1899 | x->w_q_lookup = NULL ; | |
1900 | } | |
9bccf70c | 1901 | if (red_lookup_depth == 0) { |
91447636 A |
1902 | printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth must be > 0\n"); |
1903 | FREE(x, M_DUMMYNET); | |
9bccf70c A |
1904 | return EINVAL; |
1905 | } | |
1906 | x->lookup_depth = red_lookup_depth; | |
1907 | x->w_q_lookup = (u_int *) _MALLOC(x->lookup_depth * sizeof(int), | |
91447636 | 1908 | M_DUMMYNET, M_DONTWAIT); |
9bccf70c | 1909 | if (x->w_q_lookup == NULL) { |
91447636 A |
1910 | printf("dummynet: sorry, cannot allocate red lookup table\n"); |
1911 | FREE(x, M_DUMMYNET); | |
9bccf70c A |
1912 | return ENOSPC; |
1913 | } | |
1914 | ||
1915 | /* fill the lookup table with (1 - w_q)^x */ | |
1916 | x->lookup_step = p->lookup_step ; | |
1917 | x->lookup_weight = p->lookup_weight ; | |
1918 | x->w_q_lookup[0] = SCALE(1) - x->w_q; | |
1919 | for (i = 1; i < x->lookup_depth; i++) | |
1920 | x->w_q_lookup[i] = SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight); | |
1921 | if (red_avg_pkt_size < 1) | |
1922 | red_avg_pkt_size = 512 ; | |
1923 | x->avg_pkt_size = red_avg_pkt_size ; | |
1924 | if (red_max_pkt_size < 1) | |
1925 | red_max_pkt_size = 1500 ; | |
1926 | x->max_pkt_size = red_max_pkt_size ; | |
1927 | return 0 ; | |
1928 | } | |
1929 | ||
1930 | static int | |
1931 | alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs) | |
1932 | { | |
1933 | if (x->flags_fs & DN_HAVE_FLOW_MASK) { /* allocate some slots */ | |
1934 | int l = pfs->rq_size; | |
1935 | ||
1936 | if (l == 0) | |
1937 | l = dn_hash_size; | |
1938 | if (l < 4) | |
1939 | l = 4; | |
91447636 A |
1940 | else if (l > DN_MAX_HASH_SIZE) |
1941 | l = DN_MAX_HASH_SIZE; | |
9bccf70c A |
1942 | x->rq_size = l; |
1943 | } else /* one is enough for null mask */ | |
1944 | x->rq_size = 1; | |
1945 | x->rq = _MALLOC((1 + x->rq_size) * sizeof(struct dn_flow_queue *), | |
91447636 | 1946 | M_DUMMYNET, M_DONTWAIT | M_ZERO); |
9bccf70c | 1947 | if (x->rq == NULL) { |
91447636 | 1948 | printf("dummynet: sorry, cannot allocate queue\n"); |
9bccf70c A |
1949 | return ENOSPC; |
1950 | } | |
9bccf70c A |
1951 | x->rq_elements = 0; |
1952 | return 0 ; | |
1953 | } | |
1954 | ||
1955 | static void | |
1956 | set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src) | |
1957 | { | |
1958 | x->flags_fs = src->flags_fs; | |
1959 | x->qsize = src->qsize; | |
1960 | x->plr = src->plr; | |
1961 | x->flow_mask = src->flow_mask; | |
1962 | if (x->flags_fs & DN_QSIZE_IS_BYTES) { | |
1963 | if (x->qsize > 1024*1024) | |
1964 | x->qsize = 1024*1024 ; | |
1965 | } else { | |
1966 | if (x->qsize == 0) | |
1967 | x->qsize = 50 ; | |
1968 | if (x->qsize > 100) | |
1969 | x->qsize = 50 ; | |
1970 | } | |
1971 | /* configuring RED */ | |
1972 | if ( x->flags_fs & DN_IS_RED ) | |
1973 | config_red(src, x) ; /* XXX should check errors */ | |
1974 | } | |
1975 | ||
1976 | /* | |
1977 | * setup pipe or queue parameters. | |
1978 | */ | |
1979 | ||
91447636 | 1980 | static int |
9bccf70c A |
1981 | config_pipe(struct dn_pipe *p) |
1982 | { | |
91447636 | 1983 | int i, r; |
9bccf70c | 1984 | struct dn_flow_set *pfs = &(p->fs); |
91447636 | 1985 | struct dn_flow_queue *q; |
9bccf70c | 1986 | |
91447636 A |
1987 | /* |
1988 | * The config program passes parameters as follows: | |
9bccf70c A |
1989 | * bw = bits/second (0 means no limits), |
1990 | * delay = ms, must be translated into ticks. | |
1991 | * qsize = slots/bytes | |
91447636 | 1992 | */ |
0c530ab8 | 1993 | p->delay = ( p->delay * (hz*10) ) / 1000 ; |
9bccf70c A |
1994 | /* We need either a pipe number or a flow_set number */ |
1995 | if (p->pipe_nr == 0 && pfs->fs_nr == 0) | |
1996 | return EINVAL ; | |
1997 | if (p->pipe_nr != 0 && pfs->fs_nr != 0) | |
1998 | return EINVAL ; | |
1999 | if (p->pipe_nr != 0) { /* this is a pipe */ | |
b0d623f7 | 2000 | struct dn_pipe *x, *b; |
91447636 A |
2001 | |
2002 | lck_mtx_lock(dn_mutex); | |
9bccf70c | 2003 | |
b0d623f7 A |
2004 | /* locate pipe */ |
2005 | b = locate_pipe(p->pipe_nr); | |
2006 | ||
9bccf70c | 2007 | if (b == NULL || b->pipe_nr != p->pipe_nr) { /* new pipe */ |
91447636 | 2008 | x = _MALLOC(sizeof(struct dn_pipe), M_DUMMYNET, M_DONTWAIT | M_ZERO) ; |
9bccf70c | 2009 | if (x == NULL) { |
91447636 A |
2010 | lck_mtx_unlock(dn_mutex); |
2011 | printf("dummynet: no memory for new pipe\n"); | |
9bccf70c | 2012 | return ENOSPC; |
1c79356b | 2013 | } |
9bccf70c A |
2014 | x->pipe_nr = p->pipe_nr; |
2015 | x->fs.pipe = x ; | |
2016 | /* idle_heap is the only one from which we extract from the middle. | |
2017 | */ | |
2018 | x->idle_heap.size = x->idle_heap.elements = 0 ; | |
b0d623f7 | 2019 | x->idle_heap.offset=offsetof(struct dn_flow_queue, heap_pos); |
91447636 | 2020 | } else { |
9bccf70c | 2021 | x = b; |
91447636 A |
2022 | /* Flush accumulated credit for all queues */ |
2023 | for (i = 0; i <= x->fs.rq_size; i++) | |
2024 | for (q = x->fs.rq[i]; q; q = q->next) | |
2025 | q->numbytes = 0; | |
2026 | } | |
9bccf70c | 2027 | |
91447636 | 2028 | x->bandwidth = p->bandwidth ; |
9bccf70c A |
2029 | x->numbytes = 0; /* just in case... */ |
2030 | bcopy(p->if_name, x->if_name, sizeof(p->if_name) ); | |
2031 | x->ifp = NULL ; /* reset interface ptr */ | |
91447636 | 2032 | x->delay = p->delay ; |
9bccf70c | 2033 | set_fs_parms(&(x->fs), pfs); |
1c79356b | 2034 | |
1c79356b | 2035 | |
9bccf70c | 2036 | if ( x->fs.rq == NULL ) { /* a new pipe */ |
91447636 A |
2037 | r = alloc_hash(&(x->fs), pfs) ; |
2038 | if (r) { | |
2039 | lck_mtx_unlock(dn_mutex); | |
2040 | FREE(x, M_DUMMYNET); | |
2041 | return r ; | |
9bccf70c | 2042 | } |
b0d623f7 A |
2043 | SLIST_INSERT_HEAD(&pipehash[HASH(x->pipe_nr)], |
2044 | x, next); | |
9bccf70c | 2045 | } |
91447636 | 2046 | lck_mtx_unlock(dn_mutex); |
9bccf70c | 2047 | } else { /* config queue */ |
b0d623f7 | 2048 | struct dn_flow_set *x, *b ; |
9bccf70c | 2049 | |
91447636 | 2050 | lck_mtx_lock(dn_mutex); |
9bccf70c | 2051 | /* locate flow_set */ |
b0d623f7 | 2052 | b = locate_flowset(pfs->fs_nr); |
1c79356b | 2053 | |
9bccf70c | 2054 | if (b == NULL || b->fs_nr != pfs->fs_nr) { /* new */ |
91447636 A |
2055 | if (pfs->parent_nr == 0) { /* need link to a pipe */ |
2056 | lck_mtx_unlock(dn_mutex); | |
2057 | return EINVAL ; | |
2058 | } | |
2059 | x = _MALLOC(sizeof(struct dn_flow_set), M_DUMMYNET, M_DONTWAIT | M_ZERO); | |
9bccf70c | 2060 | if (x == NULL) { |
91447636 A |
2061 | lck_mtx_unlock(dn_mutex); |
2062 | printf("dummynet: no memory for new flow_set\n"); | |
2063 | return ENOSPC; | |
1c79356b | 2064 | } |
9bccf70c A |
2065 | x->fs_nr = pfs->fs_nr; |
2066 | x->parent_nr = pfs->parent_nr; | |
2067 | x->weight = pfs->weight ; | |
2068 | if (x->weight == 0) | |
2069 | x->weight = 1 ; | |
2070 | else if (x->weight > 100) | |
2071 | x->weight = 100 ; | |
2072 | } else { | |
2073 | /* Change parent pipe not allowed; must delete and recreate */ | |
91447636 A |
2074 | if (pfs->parent_nr != 0 && b->parent_nr != pfs->parent_nr) { |
2075 | lck_mtx_unlock(dn_mutex); | |
2076 | return EINVAL ; | |
2077 | } | |
9bccf70c | 2078 | x = b; |
1c79356b | 2079 | } |
9bccf70c A |
2080 | set_fs_parms(x, pfs); |
2081 | ||
2082 | if ( x->rq == NULL ) { /* a new flow_set */ | |
91447636 A |
2083 | r = alloc_hash(x, pfs) ; |
2084 | if (r) { | |
2085 | lck_mtx_unlock(dn_mutex); | |
2086 | FREE(x, M_DUMMYNET); | |
2087 | return r ; | |
9bccf70c | 2088 | } |
b0d623f7 A |
2089 | SLIST_INSERT_HEAD(&flowsethash[HASH(x->fs_nr)], |
2090 | x, next); | |
9bccf70c | 2091 | } |
91447636 | 2092 | lck_mtx_unlock(dn_mutex); |
9bccf70c A |
2093 | } |
2094 | return 0 ; | |
2095 | } | |
2096 | ||
2097 | /* | |
2098 | * Helper function to remove from a heap queues which are linked to | |
2099 | * a flow_set about to be deleted. | |
2100 | */ | |
2101 | static void | |
2102 | fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs) | |
2103 | { | |
2104 | int i = 0, found = 0 ; | |
2105 | for (; i < h->elements ;) | |
2106 | if ( ((struct dn_flow_queue *)h->p[i].object)->fs == fs) { | |
2107 | h->elements-- ; | |
2108 | h->p[i] = h->p[h->elements] ; | |
2109 | found++ ; | |
2110 | } else | |
2111 | i++ ; | |
2112 | if (found) | |
2113 | heapify(h); | |
1c79356b A |
2114 | } |
2115 | ||
9bccf70c A |
2116 | /* |
2117 | * helper function to remove a pipe from a heap (can be there at most once) | |
2118 | */ | |
2119 | static void | |
2120 | pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p) | |
2121 | { | |
2122 | if (h->elements > 0) { | |
2123 | int i = 0 ; | |
2124 | for (i=0; i < h->elements ; i++ ) { | |
2125 | if (h->p[i].object == p) { /* found it */ | |
2126 | h->elements-- ; | |
2127 | h->p[i] = h->p[h->elements] ; | |
2128 | heapify(h); | |
2129 | break ; | |
2130 | } | |
2131 | } | |
2132 | } | |
2133 | } | |
2134 | ||
2135 | /* | |
2136 | * drain all queues. Called in case of severe mbuf shortage. | |
2137 | */ | |
1c79356b | 2138 | void |
2d21ac55 | 2139 | dummynet_drain(void) |
1c79356b | 2140 | { |
9bccf70c A |
2141 | struct dn_flow_set *fs; |
2142 | struct dn_pipe *p; | |
91447636 | 2143 | struct mbuf *m, *mnext; |
b0d623f7 | 2144 | int i; |
91447636 A |
2145 | |
2146 | lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); | |
9bccf70c A |
2147 | |
2148 | heap_free(&ready_heap); | |
2149 | heap_free(&wfq_ready_heap); | |
2150 | heap_free(&extract_heap); | |
2151 | /* remove all references to this pipe from flow_sets */ | |
b0d623f7 A |
2152 | for (i = 0; i < HASHSIZE; i++) |
2153 | SLIST_FOREACH(fs, &flowsethash[i], next) | |
2154 | purge_flow_set(fs, 0); | |
9bccf70c | 2155 | |
b0d623f7 A |
2156 | for (i = 0; i < HASHSIZE; i++) |
2157 | SLIST_FOREACH(p, &pipehash[i], next) { | |
2158 | purge_flow_set(&(p->fs), 0); | |
91447636 A |
2159 | |
2160 | mnext = p->head; | |
2161 | while ((m = mnext) != NULL) { | |
2162 | mnext = m->m_nextpkt; | |
2163 | DN_FREE_PKT(m); | |
2164 | } | |
9bccf70c A |
2165 | p->head = p->tail = NULL ; |
2166 | } | |
1c79356b A |
2167 | } |
2168 | ||
9bccf70c A |
2169 | /* |
2170 | * Fully delete a pipe or a queue, cleaning up associated info. | |
2171 | */ | |
91447636 | 2172 | static int |
9bccf70c A |
2173 | delete_pipe(struct dn_pipe *p) |
2174 | { | |
9bccf70c A |
2175 | if (p->pipe_nr == 0 && p->fs.fs_nr == 0) |
2176 | return EINVAL ; | |
2177 | if (p->pipe_nr != 0 && p->fs.fs_nr != 0) | |
2178 | return EINVAL ; | |
2179 | if (p->pipe_nr != 0) { /* this is an old-style pipe */ | |
b0d623f7 | 2180 | struct dn_pipe *b; |
9bccf70c | 2181 | struct dn_flow_set *fs; |
b0d623f7 | 2182 | int i; |
1c79356b | 2183 | |
91447636 | 2184 | lck_mtx_lock(dn_mutex); |
9bccf70c | 2185 | /* locate pipe */ |
b0d623f7 A |
2186 | b = locate_pipe(p->pipe_nr); |
2187 | if(b == NULL){ | |
91447636 | 2188 | lck_mtx_unlock(dn_mutex); |
9bccf70c | 2189 | return EINVAL ; /* not found */ |
91447636 | 2190 | } |
1c79356b | 2191 | |
b0d623f7 A |
2192 | /* Unlink from list of pipes. */ |
2193 | SLIST_REMOVE(&pipehash[HASH(b->pipe_nr)], b, dn_pipe, next); | |
2194 | ||
9bccf70c | 2195 | /* remove references to this pipe from the ip_fw rules. */ |
91447636 | 2196 | flush_pipe_ptrs(&(b->fs)); |
9bccf70c | 2197 | |
b0d623f7 A |
2198 | /* Remove all references to this pipe from flow_sets. */ |
2199 | for (i = 0; i < HASHSIZE; i++) | |
2200 | SLIST_FOREACH(fs, &flowsethash[i], next) | |
2201 | if (fs->pipe == b) { | |
2202 | printf("dummynet: ++ ref to pipe %d from fs %d\n", | |
2203 | p->pipe_nr, fs->fs_nr); | |
2204 | fs->pipe = NULL ; | |
2205 | purge_flow_set(fs, 0); | |
2206 | } | |
9bccf70c | 2207 | fs_remove_from_heap(&ready_heap, &(b->fs)); |
b0d623f7 | 2208 | |
9bccf70c A |
2209 | purge_pipe(b); /* remove all data associated to this pipe */ |
2210 | /* remove reference to here from extract_heap and wfq_ready_heap */ | |
2211 | pipe_remove_from_heap(&extract_heap, b); | |
2212 | pipe_remove_from_heap(&wfq_ready_heap, b); | |
91447636 A |
2213 | lck_mtx_unlock(dn_mutex); |
2214 | ||
2215 | FREE(b, M_DUMMYNET); | |
9bccf70c | 2216 | } else { /* this is a WF2Q queue (dn_flow_set) */ |
b0d623f7 | 2217 | struct dn_flow_set *b; |
9bccf70c | 2218 | |
91447636 | 2219 | lck_mtx_lock(dn_mutex); |
9bccf70c | 2220 | /* locate set */ |
b0d623f7 A |
2221 | b = locate_flowset(p->fs.fs_nr); |
2222 | if (b == NULL) { | |
91447636 | 2223 | lck_mtx_unlock(dn_mutex); |
9bccf70c | 2224 | return EINVAL ; /* not found */ |
91447636 | 2225 | } |
9bccf70c | 2226 | |
9bccf70c | 2227 | /* remove references to this flow_set from the ip_fw rules. */ |
91447636 | 2228 | flush_pipe_ptrs(b); |
9bccf70c | 2229 | |
b0d623f7 A |
2230 | /* Unlink from list of flowsets. */ |
2231 | SLIST_REMOVE( &flowsethash[HASH(b->fs_nr)], b, dn_flow_set, next); | |
2232 | ||
9bccf70c A |
2233 | if (b->pipe != NULL) { |
2234 | /* Update total weight on parent pipe and cleanup parent heaps */ | |
2235 | b->pipe->sum -= b->weight * b->backlogged ; | |
2236 | fs_remove_from_heap(&(b->pipe->not_eligible_heap), b); | |
2237 | fs_remove_from_heap(&(b->pipe->scheduler_heap), b); | |
2238 | #if 1 /* XXX should i remove from idle_heap as well ? */ | |
2239 | fs_remove_from_heap(&(b->pipe->idle_heap), b); | |
2240 | #endif | |
2241 | } | |
2242 | purge_flow_set(b, 1); | |
91447636 | 2243 | lck_mtx_unlock(dn_mutex); |
9bccf70c A |
2244 | } |
2245 | return 0 ; | |
2246 | } | |
2247 | ||
2248 | /* | |
2249 | * helper function used to copy data from kernel in DUMMYNET_GET | |
2250 | */ | |
b0d623f7 A |
2251 | static |
2252 | char* dn_copy_set_32(struct dn_flow_set *set, char *bp) | |
9bccf70c A |
2253 | { |
2254 | int i, copied = 0 ; | |
b0d623f7 A |
2255 | struct dn_flow_queue *q; |
2256 | struct dn_flow_queue_32 *qp = (struct dn_flow_queue_32 *)bp; | |
2257 | ||
91447636 | 2258 | lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); |
b0d623f7 A |
2259 | |
2260 | for (i = 0 ; i <= set->rq_size ; i++) | |
2261 | for (q = set->rq[i] ; q ; q = q->next, qp++ ) { | |
2262 | if (q->hash_slot != i) | |
2263 | printf("dummynet: ++ at %d: wrong slot (have %d, " | |
2264 | "should be %d)\n", copied, q->hash_slot, i); | |
2265 | if (q->fs != set) | |
2266 | printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n", | |
2267 | i, q->fs, set); | |
2268 | copied++ ; | |
2269 | cp_queue_to_32_user( q, qp ); | |
2270 | /* cleanup pointers */ | |
2271 | qp->next = (user32_addr_t)0 ; | |
2272 | qp->head = qp->tail = (user32_addr_t)0 ; | |
2273 | qp->fs = (user32_addr_t)0 ; | |
2274 | } | |
2275 | if (copied != set->rq_elements) | |
2276 | printf("dummynet: ++ wrong count, have %d should be %d\n", | |
2277 | copied, set->rq_elements); | |
2278 | return (char *)qp ; | |
2279 | } | |
91447636 | 2280 | |
b0d623f7 A |
2281 | static |
2282 | char* dn_copy_set_64(struct dn_flow_set *set, char *bp) | |
2283 | { | |
2284 | int i, copied = 0 ; | |
2285 | struct dn_flow_queue *q; | |
2286 | struct dn_flow_queue_64 *qp = (struct dn_flow_queue_64 *)bp; | |
2287 | ||
2288 | lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); | |
2289 | ||
9bccf70c | 2290 | for (i = 0 ; i <= set->rq_size ; i++) |
b0d623f7 A |
2291 | for (q = set->rq[i] ; q ; q = q->next, qp++ ) { |
2292 | if (q->hash_slot != i) | |
2293 | printf("dummynet: ++ at %d: wrong slot (have %d, " | |
2294 | "should be %d)\n", copied, q->hash_slot, i); | |
2295 | if (q->fs != set) | |
2296 | printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n", | |
2297 | i, q->fs, set); | |
2298 | copied++ ; | |
2299 | //bcopy(q, qp, sizeof(*q)); | |
2300 | cp_queue_to_64_user( q, qp ); | |
2301 | /* cleanup pointers */ | |
2302 | qp->next = USER_ADDR_NULL ; | |
2303 | qp->head = qp->tail = USER_ADDR_NULL ; | |
2304 | qp->fs = USER_ADDR_NULL ; | |
2305 | } | |
9bccf70c | 2306 | if (copied != set->rq_elements) |
b0d623f7 A |
2307 | printf("dummynet: ++ wrong count, have %d should be %d\n", |
2308 | copied, set->rq_elements); | |
9bccf70c A |
2309 | return (char *)qp ; |
2310 | } | |
1c79356b | 2311 | |
91447636 | 2312 | static size_t |
b0d623f7 | 2313 | dn_calc_size(int is64user) |
1c79356b | 2314 | { |
9bccf70c A |
2315 | struct dn_flow_set *set ; |
2316 | struct dn_pipe *p ; | |
b0d623f7 A |
2317 | size_t size = 0 ; |
2318 | size_t pipesize; | |
2319 | size_t queuesize; | |
2320 | size_t setsize; | |
2321 | int i; | |
91447636 A |
2322 | |
2323 | lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); | |
b0d623f7 A |
2324 | if ( is64user ){ |
2325 | pipesize = sizeof(struct dn_pipe_64); | |
2326 | queuesize = sizeof(struct dn_flow_queue_64); | |
2327 | setsize = sizeof(struct dn_flow_set_64); | |
2328 | } | |
2329 | else { | |
2330 | pipesize = sizeof(struct dn_pipe_32); | |
2331 | queuesize = sizeof( struct dn_flow_queue_32 ); | |
2332 | setsize = sizeof(struct dn_flow_set_32); | |
2333 | } | |
9bccf70c A |
2334 | /* |
2335 | * compute size of data structures: list of pipes and flow_sets. | |
2336 | */ | |
b0d623f7 A |
2337 | for (i = 0; i < HASHSIZE; i++) { |
2338 | SLIST_FOREACH(p, &pipehash[i], next) | |
2339 | size += sizeof(*p) + | |
2340 | p->fs.rq_elements * sizeof(struct dn_flow_queue); | |
2341 | SLIST_FOREACH(set, &flowsethash[i], next) | |
2342 | size += sizeof (*set) + | |
2343 | set->rq_elements * sizeof(struct dn_flow_queue); | |
2344 | } | |
2345 | return size; | |
91447636 A |
2346 | } |
2347 | ||
2348 | static int | |
2349 | dummynet_get(struct sockopt *sopt) | |
2350 | { | |
b0d623f7 | 2351 | char *buf, *bp=NULL; /* bp is the "copy-pointer" */ |
91447636 A |
2352 | size_t size ; |
2353 | struct dn_flow_set *set ; | |
2354 | struct dn_pipe *p ; | |
2355 | int error=0, i ; | |
b0d623f7 | 2356 | int is64user = 0; |
91447636 A |
2357 | |
2358 | /* XXX lock held too long */ | |
2359 | lck_mtx_lock(dn_mutex); | |
2360 | /* | |
2361 | * XXX: Ugly, but we need to allocate memory with M_WAITOK flag and we | |
2362 | * cannot use this flag while holding a mutex. | |
2363 | */ | |
b0d623f7 A |
2364 | if (proc_is64bit(sopt->sopt_p)) |
2365 | is64user = 1; | |
91447636 | 2366 | for (i = 0; i < 10; i++) { |
b0d623f7 | 2367 | size = dn_calc_size(is64user); |
91447636 A |
2368 | lck_mtx_unlock(dn_mutex); |
2369 | buf = _MALLOC(size, M_TEMP, M_WAITOK); | |
b0d623f7 A |
2370 | if (buf == NULL) |
2371 | return ENOBUFS; | |
91447636 | 2372 | lck_mtx_lock(dn_mutex); |
b0d623f7 | 2373 | if (size == dn_calc_size(is64user)) |
91447636 A |
2374 | break; |
2375 | FREE(buf, M_TEMP); | |
2376 | buf = NULL; | |
2377 | } | |
2378 | if (buf == NULL) { | |
2379 | lck_mtx_unlock(dn_mutex); | |
2380 | return ENOBUFS ; | |
9bccf70c | 2381 | } |
9bccf70c | 2382 | |
9bccf70c | 2383 | |
b0d623f7 A |
2384 | bp = buf; |
2385 | for (i = 0; i < HASHSIZE; i++) | |
2386 | SLIST_FOREACH(p, &pipehash[i], next) { | |
2387 | /* | |
2388 | * copy pipe descriptor into *bp, convert delay back to ms, | |
2389 | * then copy the flow_set descriptor(s) one at a time. | |
2390 | * After each flow_set, copy the queue descriptor it owns. | |
2391 | */ | |
2392 | if ( is64user ){ | |
2393 | bp = cp_pipe_to_64_user(p, (struct dn_pipe_64 *)bp); | |
2394 | } | |
2395 | else{ | |
2396 | bp = cp_pipe_to_32_user(p, (struct dn_pipe_32 *)bp); | |
2397 | } | |
9bccf70c | 2398 | } |
b0d623f7 A |
2399 | for (i = 0; i < HASHSIZE; i++) |
2400 | SLIST_FOREACH(set, &flowsethash[i], next) { | |
2401 | struct dn_flow_set_64 *fs_bp = (struct dn_flow_set_64 *)bp ; | |
2402 | cp_flow_set_to_64_user(set, fs_bp); | |
2403 | /* XXX same hack as above */ | |
2404 | fs_bp->next = CAST_DOWN(user64_addr_t, DN_IS_QUEUE); | |
2405 | fs_bp->pipe = USER_ADDR_NULL; | |
2406 | fs_bp->rq = USER_ADDR_NULL ; | |
2407 | bp += sizeof(struct dn_flow_set_64); | |
2408 | bp = dn_copy_set_64( set, bp ); | |
9bccf70c | 2409 | } |
91447636 A |
2410 | lck_mtx_unlock(dn_mutex); |
2411 | ||
9bccf70c A |
2412 | error = sooptcopyout(sopt, buf, size); |
2413 | FREE(buf, M_TEMP); | |
2414 | return error ; | |
1c79356b A |
2415 | } |
2416 | ||
9bccf70c A |
2417 | /* |
2418 | * Handler for the various dummynet socket options (get, flush, config, del) | |
2419 | */ | |
1c79356b | 2420 | static int |
9bccf70c | 2421 | ip_dn_ctl(struct sockopt *sopt) |
1c79356b | 2422 | { |
9bccf70c A |
2423 | int error = 0 ; |
2424 | struct dn_pipe *p, tmp_pipe; | |
2425 | ||
2426 | /* Disallow sets in really-really secure mode. */ | |
2427 | if (sopt->sopt_dir == SOPT_SET && securelevel >= 3) | |
2428 | return (EPERM); | |
2429 | ||
2430 | switch (sopt->sopt_name) { | |
2431 | default : | |
91447636 | 2432 | printf("dummynet: -- unknown option %d", sopt->sopt_name); |
9bccf70c A |
2433 | return EINVAL ; |
2434 | ||
2435 | case IP_DUMMYNET_GET : | |
2436 | error = dummynet_get(sopt); | |
2437 | break ; | |
2438 | ||
2439 | case IP_DUMMYNET_FLUSH : | |
2440 | dummynet_flush() ; | |
2441 | break ; | |
91447636 | 2442 | |
9bccf70c A |
2443 | case IP_DUMMYNET_CONFIGURE : |
2444 | p = &tmp_pipe ; | |
b0d623f7 A |
2445 | if (proc_is64bit(sopt->sopt_p)) |
2446 | error = cp_pipe_from_user_64( sopt, p ); | |
2447 | else | |
2448 | error = cp_pipe_from_user_32( sopt, p ); | |
2449 | ||
9bccf70c A |
2450 | if (error) |
2451 | break ; | |
2452 | error = config_pipe(p); | |
2453 | break ; | |
2454 | ||
2455 | case IP_DUMMYNET_DEL : /* remove a pipe or queue */ | |
2456 | p = &tmp_pipe ; | |
b0d623f7 A |
2457 | if (proc_is64bit(sopt->sopt_p)) |
2458 | error = cp_pipe_from_user_64( sopt, p ); | |
2459 | else | |
2460 | error = cp_pipe_from_user_32( sopt, p ); | |
9bccf70c A |
2461 | if (error) |
2462 | break ; | |
2463 | ||
2464 | error = delete_pipe(p); | |
2465 | break ; | |
2466 | } | |
2467 | return error ; | |
1c79356b A |
2468 | } |
2469 | ||
91447636 | 2470 | void |
9bccf70c | 2471 | ip_dn_init(void) |
1c79356b | 2472 | { |
91447636 A |
2473 | /* setup locks */ |
2474 | dn_mutex_grp_attr = lck_grp_attr_alloc_init(); | |
2475 | dn_mutex_grp = lck_grp_alloc_init("dn", dn_mutex_grp_attr); | |
2476 | dn_mutex_attr = lck_attr_alloc_init(); | |
91447636 A |
2477 | |
2478 | if ((dn_mutex = lck_mtx_alloc_init(dn_mutex_grp, dn_mutex_attr)) == NULL) { | |
2479 | printf("ip_dn_init: can't alloc dn_mutex\n"); | |
2480 | return; | |
2481 | } | |
2482 | ||
b0d623f7 | 2483 | ready_heap.size = ready_heap.elements = 0 ; |
9bccf70c A |
2484 | ready_heap.offset = 0 ; |
2485 | ||
2486 | wfq_ready_heap.size = wfq_ready_heap.elements = 0 ; | |
2487 | wfq_ready_heap.offset = 0 ; | |
2488 | ||
2489 | extract_heap.size = extract_heap.elements = 0 ; | |
2490 | extract_heap.offset = 0 ; | |
2491 | ip_dn_ctl_ptr = ip_dn_ctl; | |
91447636 A |
2492 | ip_dn_io_ptr = dummynet_io; |
2493 | ip_dn_ruledel_ptr = dn_rule_delete; | |
1c79356b | 2494 | } |