2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa
30 * Portions Copyright (c) 2000 Akamba Corp.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.84 2004/08/25 09:31:30 pjd Exp $
57 #define DUMMYNET_DEBUG
60 * This module implements IP dummynet, a bandwidth limiter/delay emulator
61 * used in conjunction with the ipfw package.
62 * Description of the data structures used is in ip_dummynet.h
63 * Here you mainly find the following blocks of code:
64 * + variable declarations;
65 * + heap management functions;
66 * + scheduler and dummynet functions;
67 * + configuration and initialization.
69 * NOTA BENE: critical sections are protected by the "dummynet lock".
71 * Most important Changes:
73 * 010124: Fixed WF2Q behaviour
74 * 010122: Fixed spl protection.
75 * 000601: WF2Q support
76 * 000106: large rewrite, use heaps to handle very many pipes.
77 * 980513: initial release
79 * include files marked with XXX are probably not needed
82 #include <sys/param.h>
83 #include <sys/systm.h>
84 #include <sys/malloc.h>
86 #include <sys/queue.h> /* XXX */
87 #include <sys/kernel.h>
88 #include <sys/random.h>
89 #include <sys/socket.h>
90 #include <sys/socketvar.h>
92 #include <sys/sysctl.h>
94 #include <net/route.h>
95 #include <net/kpi_protocol.h>
97 #include <net/kpi_protocol.h>
99 #include <net/nwk_wq.h>
100 #include <net/pfvar.h>
101 #include <netinet/in.h>
102 #include <netinet/in_systm.h>
103 #include <netinet/in_var.h>
104 #include <netinet/ip.h>
105 #include <netinet/ip_fw.h>
106 #include <netinet/ip_dummynet.h>
107 #include <netinet/ip_var.h>
109 #include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */
110 #include <netinet6/ip6_var.h>
112 static struct ip_fw default_rule
;
115 * We keep a private variable for the simulation time, but we could
116 * probably use an existing one ("softticks" in sys/kern/kern_timer.c)
118 static dn_key curr_time
= 0 ; /* current simulation time */
120 /* this is for the timer that fires to call dummynet() - we only enable the timer when
121 there are packets to process, otherwise it's disabled */
122 static int timer_enabled
= 0;
124 static int dn_hash_size
= 64 ; /* default hash size */
126 /* statistics on number of queue searches and search steps */
127 static int searches
, search_steps
;
128 static int pipe_expire
= 1 ; /* expire queue if empty */
129 static int dn_max_ratio
= 16 ; /* max queues/buckets ratio */
131 static int red_lookup_depth
= 256; /* RED - default lookup table depth */
132 static int red_avg_pkt_size
= 512; /* RED - default medium packet size */
133 static int red_max_pkt_size
= 1500; /* RED - default max packet size */
135 static int serialize
= 0;
138 * Three heaps contain queues and pipes that the scheduler handles:
140 * ready_heap contains all dn_flow_queue related to fixed-rate pipes.
142 * wfq_ready_heap contains the pipes associated with WF2Q flows
144 * extract_heap contains pipes associated with delay lines.
147 static struct dn_heap ready_heap
, extract_heap
, wfq_ready_heap
;
149 static int heap_init(struct dn_heap
*h
, int size
) ;
150 static int heap_insert (struct dn_heap
*h
, dn_key key1
, void *p
);
151 static void heap_extract(struct dn_heap
*h
, void *obj
);
154 static void transmit_event(struct dn_pipe
*pipe
, struct mbuf
**head
,
156 static void ready_event(struct dn_flow_queue
*q
, struct mbuf
**head
,
158 static void ready_event_wfq(struct dn_pipe
*p
, struct mbuf
**head
,
162 * Packets are retrieved from queues in Dummynet in chains instead of
163 * packet-by-packet. The entire list of packets is first dequeued and
164 * sent out by the following function.
166 static void dummynet_send(struct mbuf
*m
);
169 #define HASH(num) ((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f)
170 static struct dn_pipe_head pipehash
[HASHSIZE
]; /* all pipes */
171 static struct dn_flow_set_head flowsethash
[HASHSIZE
]; /* all flowsets */
174 SYSCTL_NODE(_net_inet_ip
, OID_AUTO
, dummynet
,
175 CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "Dummynet");
176 SYSCTL_INT(_net_inet_ip_dummynet
, OID_AUTO
, hash_size
,
177 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dn_hash_size
, 0, "Default hash table size");
178 SYSCTL_QUAD(_net_inet_ip_dummynet
, OID_AUTO
, curr_time
,
179 CTLFLAG_RD
| CTLFLAG_LOCKED
, &curr_time
, "Current tick");
180 SYSCTL_INT(_net_inet_ip_dummynet
, OID_AUTO
, ready_heap
,
181 CTLFLAG_RD
| CTLFLAG_LOCKED
, &ready_heap
.size
, 0, "Size of ready heap");
182 SYSCTL_INT(_net_inet_ip_dummynet
, OID_AUTO
, extract_heap
,
183 CTLFLAG_RD
| CTLFLAG_LOCKED
, &extract_heap
.size
, 0, "Size of extract heap");
184 SYSCTL_INT(_net_inet_ip_dummynet
, OID_AUTO
, searches
,
185 CTLFLAG_RD
| CTLFLAG_LOCKED
, &searches
, 0, "Number of queue searches");
186 SYSCTL_INT(_net_inet_ip_dummynet
, OID_AUTO
, search_steps
,
187 CTLFLAG_RD
| CTLFLAG_LOCKED
, &search_steps
, 0, "Number of queue search steps");
188 SYSCTL_INT(_net_inet_ip_dummynet
, OID_AUTO
, expire
,
189 CTLFLAG_RW
| CTLFLAG_LOCKED
, &pipe_expire
, 0, "Expire queue if empty");
190 SYSCTL_INT(_net_inet_ip_dummynet
, OID_AUTO
, max_chain_len
,
191 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dn_max_ratio
, 0,
192 "Max ratio between dynamic queues and buckets");
193 SYSCTL_INT(_net_inet_ip_dummynet
, OID_AUTO
, red_lookup_depth
,
194 CTLFLAG_RD
| CTLFLAG_LOCKED
, &red_lookup_depth
, 0, "Depth of RED lookup table");
195 SYSCTL_INT(_net_inet_ip_dummynet
, OID_AUTO
, red_avg_pkt_size
,
196 CTLFLAG_RD
| CTLFLAG_LOCKED
, &red_avg_pkt_size
, 0, "RED Medium packet size");
197 SYSCTL_INT(_net_inet_ip_dummynet
, OID_AUTO
, red_max_pkt_size
,
198 CTLFLAG_RD
| CTLFLAG_LOCKED
, &red_max_pkt_size
, 0, "RED Max packet size");
201 #ifdef DUMMYNET_DEBUG
202 int dummynet_debug
= 0;
204 SYSCTL_INT(_net_inet_ip_dummynet
, OID_AUTO
, debug
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &dummynet_debug
,
205 0, "control debugging printfs");
207 #define DPRINTF(X) if (dummynet_debug) printf X
213 static lck_grp_t
*dn_mutex_grp
;
214 static lck_grp_attr_t
*dn_mutex_grp_attr
;
215 static lck_attr_t
*dn_mutex_attr
;
216 decl_lck_mtx_data(static, dn_mutex_data
);
217 static lck_mtx_t
*dn_mutex
= &dn_mutex_data
;
219 static int config_pipe(struct dn_pipe
*p
);
220 static int ip_dn_ctl(struct sockopt
*sopt
);
222 static void dummynet(void *);
223 static void dummynet_flush(void);
224 void dummynet_drain(void);
225 static ip_dn_io_t dummynet_io
;
227 static void cp_flow_set_to_64_user(struct dn_flow_set
*set
, struct dn_flow_set_64
*fs_bp
);
228 static void cp_queue_to_64_user( struct dn_flow_queue
*q
, struct dn_flow_queue_64
*qp
);
229 static char *cp_pipe_to_64_user(struct dn_pipe
*p
, struct dn_pipe_64
*pipe_bp
);
230 static char* dn_copy_set_64(struct dn_flow_set
*set
, char *bp
);
231 static int cp_pipe_from_user_64( struct sockopt
*sopt
, struct dn_pipe
*p
);
233 static void cp_flow_set_to_32_user(struct dn_flow_set
*set
, struct dn_flow_set_32
*fs_bp
);
234 static void cp_queue_to_32_user( struct dn_flow_queue
*q
, struct dn_flow_queue_32
*qp
);
235 static char *cp_pipe_to_32_user(struct dn_pipe
*p
, struct dn_pipe_32
*pipe_bp
);
236 static char* dn_copy_set_32(struct dn_flow_set
*set
, char *bp
);
237 static int cp_pipe_from_user_32( struct sockopt
*sopt
, struct dn_pipe
*p
);
239 struct eventhandler_lists_ctxt dummynet_evhdlr_ctxt
;
241 uint32_t my_random(void)
244 read_frandom(&val
, sizeof(val
));
251 * Heap management functions.
253 * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
254 * Some macros help finding parent/children so we can optimize them.
256 * heap_init() is called to expand the heap when needed.
257 * Increment size in blocks of 16 entries.
258 * XXX failure to allocate a new element is a pretty bad failure
259 * as we basically stall a whole queue forever!!
260 * Returns 1 on error, 0 on success
262 #define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
263 #define HEAP_LEFT(x) ( 2*(x) + 1 )
264 #define HEAP_IS_LEFT(x) ( (x) & 1 )
265 #define HEAP_RIGHT(x) ( 2*(x) + 2 )
266 #define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
267 #define HEAP_INCREMENT 15
270 int cp_pipe_from_user_32( struct sockopt
*sopt
, struct dn_pipe
*p
)
272 struct dn_pipe_32 user_pipe_32
;
275 error
= sooptcopyin(sopt
, &user_pipe_32
, sizeof(struct dn_pipe_32
), sizeof(struct dn_pipe_32
));
277 p
->pipe_nr
= user_pipe_32
.pipe_nr
;
278 p
->bandwidth
= user_pipe_32
.bandwidth
;
279 p
->delay
= user_pipe_32
.delay
;
280 p
->V
= user_pipe_32
.V
;
281 p
->sum
= user_pipe_32
.sum
;
282 p
->numbytes
= user_pipe_32
.numbytes
;
283 p
->sched_time
= user_pipe_32
.sched_time
;
284 bcopy( user_pipe_32
.if_name
, p
->if_name
, IFNAMSIZ
);
285 p
->ready
= user_pipe_32
.ready
;
287 p
->fs
.fs_nr
= user_pipe_32
.fs
.fs_nr
;
288 p
->fs
.flags_fs
= user_pipe_32
.fs
.flags_fs
;
289 p
->fs
.parent_nr
= user_pipe_32
.fs
.parent_nr
;
290 p
->fs
.weight
= user_pipe_32
.fs
.weight
;
291 p
->fs
.qsize
= user_pipe_32
.fs
.qsize
;
292 p
->fs
.plr
= user_pipe_32
.fs
.plr
;
293 p
->fs
.flow_mask
= user_pipe_32
.fs
.flow_mask
;
294 p
->fs
.rq_size
= user_pipe_32
.fs
.rq_size
;
295 p
->fs
.rq_elements
= user_pipe_32
.fs
.rq_elements
;
296 p
->fs
.last_expired
= user_pipe_32
.fs
.last_expired
;
297 p
->fs
.backlogged
= user_pipe_32
.fs
.backlogged
;
298 p
->fs
.w_q
= user_pipe_32
.fs
.w_q
;
299 p
->fs
.max_th
= user_pipe_32
.fs
.max_th
;
300 p
->fs
.min_th
= user_pipe_32
.fs
.min_th
;
301 p
->fs
.max_p
= user_pipe_32
.fs
.max_p
;
302 p
->fs
.c_1
= user_pipe_32
.fs
.c_1
;
303 p
->fs
.c_2
= user_pipe_32
.fs
.c_2
;
304 p
->fs
.c_3
= user_pipe_32
.fs
.c_3
;
305 p
->fs
.c_4
= user_pipe_32
.fs
.c_4
;
306 p
->fs
.lookup_depth
= user_pipe_32
.fs
.lookup_depth
;
307 p
->fs
.lookup_step
= user_pipe_32
.fs
.lookup_step
;
308 p
->fs
.lookup_weight
= user_pipe_32
.fs
.lookup_weight
;
309 p
->fs
.avg_pkt_size
= user_pipe_32
.fs
.avg_pkt_size
;
310 p
->fs
.max_pkt_size
= user_pipe_32
.fs
.max_pkt_size
;
316 int cp_pipe_from_user_64( struct sockopt
*sopt
, struct dn_pipe
*p
)
318 struct dn_pipe_64 user_pipe_64
;
321 error
= sooptcopyin(sopt
, &user_pipe_64
, sizeof(struct dn_pipe_64
), sizeof(struct dn_pipe_64
));
323 p
->pipe_nr
= user_pipe_64
.pipe_nr
;
324 p
->bandwidth
= user_pipe_64
.bandwidth
;
325 p
->delay
= user_pipe_64
.delay
;
326 p
->V
= user_pipe_64
.V
;
327 p
->sum
= user_pipe_64
.sum
;
328 p
->numbytes
= user_pipe_64
.numbytes
;
329 p
->sched_time
= user_pipe_64
.sched_time
;
330 bcopy( user_pipe_64
.if_name
, p
->if_name
, IFNAMSIZ
);
331 p
->ready
= user_pipe_64
.ready
;
333 p
->fs
.fs_nr
= user_pipe_64
.fs
.fs_nr
;
334 p
->fs
.flags_fs
= user_pipe_64
.fs
.flags_fs
;
335 p
->fs
.parent_nr
= user_pipe_64
.fs
.parent_nr
;
336 p
->fs
.weight
= user_pipe_64
.fs
.weight
;
337 p
->fs
.qsize
= user_pipe_64
.fs
.qsize
;
338 p
->fs
.plr
= user_pipe_64
.fs
.plr
;
339 p
->fs
.flow_mask
= user_pipe_64
.fs
.flow_mask
;
340 p
->fs
.rq_size
= user_pipe_64
.fs
.rq_size
;
341 p
->fs
.rq_elements
= user_pipe_64
.fs
.rq_elements
;
342 p
->fs
.last_expired
= user_pipe_64
.fs
.last_expired
;
343 p
->fs
.backlogged
= user_pipe_64
.fs
.backlogged
;
344 p
->fs
.w_q
= user_pipe_64
.fs
.w_q
;
345 p
->fs
.max_th
= user_pipe_64
.fs
.max_th
;
346 p
->fs
.min_th
= user_pipe_64
.fs
.min_th
;
347 p
->fs
.max_p
= user_pipe_64
.fs
.max_p
;
348 p
->fs
.c_1
= user_pipe_64
.fs
.c_1
;
349 p
->fs
.c_2
= user_pipe_64
.fs
.c_2
;
350 p
->fs
.c_3
= user_pipe_64
.fs
.c_3
;
351 p
->fs
.c_4
= user_pipe_64
.fs
.c_4
;
352 p
->fs
.lookup_depth
= user_pipe_64
.fs
.lookup_depth
;
353 p
->fs
.lookup_step
= user_pipe_64
.fs
.lookup_step
;
354 p
->fs
.lookup_weight
= user_pipe_64
.fs
.lookup_weight
;
355 p
->fs
.avg_pkt_size
= user_pipe_64
.fs
.avg_pkt_size
;
356 p
->fs
.max_pkt_size
= user_pipe_64
.fs
.max_pkt_size
;
362 cp_flow_set_to_32_user(struct dn_flow_set
*set
, struct dn_flow_set_32
*fs_bp
)
364 fs_bp
->fs_nr
= set
->fs_nr
;
365 fs_bp
->flags_fs
= set
->flags_fs
;
366 fs_bp
->parent_nr
= set
->parent_nr
;
367 fs_bp
->weight
= set
->weight
;
368 fs_bp
->qsize
= set
->qsize
;
369 fs_bp
->plr
= set
->plr
;
370 fs_bp
->flow_mask
= set
->flow_mask
;
371 fs_bp
->rq_size
= set
->rq_size
;
372 fs_bp
->rq_elements
= set
->rq_elements
;
373 fs_bp
->last_expired
= set
->last_expired
;
374 fs_bp
->backlogged
= set
->backlogged
;
375 fs_bp
->w_q
= set
->w_q
;
376 fs_bp
->max_th
= set
->max_th
;
377 fs_bp
->min_th
= set
->min_th
;
378 fs_bp
->max_p
= set
->max_p
;
379 fs_bp
->c_1
= set
->c_1
;
380 fs_bp
->c_2
= set
->c_2
;
381 fs_bp
->c_3
= set
->c_3
;
382 fs_bp
->c_4
= set
->c_4
;
383 fs_bp
->w_q_lookup
= CAST_DOWN_EXPLICIT(user32_addr_t
, set
->w_q_lookup
) ;
384 fs_bp
->lookup_depth
= set
->lookup_depth
;
385 fs_bp
->lookup_step
= set
->lookup_step
;
386 fs_bp
->lookup_weight
= set
->lookup_weight
;
387 fs_bp
->avg_pkt_size
= set
->avg_pkt_size
;
388 fs_bp
->max_pkt_size
= set
->max_pkt_size
;
392 cp_flow_set_to_64_user(struct dn_flow_set
*set
, struct dn_flow_set_64
*fs_bp
)
394 fs_bp
->fs_nr
= set
->fs_nr
;
395 fs_bp
->flags_fs
= set
->flags_fs
;
396 fs_bp
->parent_nr
= set
->parent_nr
;
397 fs_bp
->weight
= set
->weight
;
398 fs_bp
->qsize
= set
->qsize
;
399 fs_bp
->plr
= set
->plr
;
400 fs_bp
->flow_mask
= set
->flow_mask
;
401 fs_bp
->rq_size
= set
->rq_size
;
402 fs_bp
->rq_elements
= set
->rq_elements
;
403 fs_bp
->last_expired
= set
->last_expired
;
404 fs_bp
->backlogged
= set
->backlogged
;
405 fs_bp
->w_q
= set
->w_q
;
406 fs_bp
->max_th
= set
->max_th
;
407 fs_bp
->min_th
= set
->min_th
;
408 fs_bp
->max_p
= set
->max_p
;
409 fs_bp
->c_1
= set
->c_1
;
410 fs_bp
->c_2
= set
->c_2
;
411 fs_bp
->c_3
= set
->c_3
;
412 fs_bp
->c_4
= set
->c_4
;
413 fs_bp
->w_q_lookup
= CAST_DOWN(user64_addr_t
, set
->w_q_lookup
) ;
414 fs_bp
->lookup_depth
= set
->lookup_depth
;
415 fs_bp
->lookup_step
= set
->lookup_step
;
416 fs_bp
->lookup_weight
= set
->lookup_weight
;
417 fs_bp
->avg_pkt_size
= set
->avg_pkt_size
;
418 fs_bp
->max_pkt_size
= set
->max_pkt_size
;
422 void cp_queue_to_32_user( struct dn_flow_queue
*q
, struct dn_flow_queue_32
*qp
)
426 qp
->len_bytes
= q
->len_bytes
;
427 qp
->numbytes
= q
->numbytes
;
428 qp
->tot_pkts
= q
->tot_pkts
;
429 qp
->tot_bytes
= q
->tot_bytes
;
430 qp
->drops
= q
->drops
;
431 qp
->hash_slot
= q
->hash_slot
;
433 qp
->count
= q
->count
;
434 qp
->random
= q
->random
;
435 qp
->q_time
= q
->q_time
;
436 qp
->heap_pos
= q
->heap_pos
;
437 qp
->sched_time
= q
->sched_time
;
443 void cp_queue_to_64_user( struct dn_flow_queue
*q
, struct dn_flow_queue_64
*qp
)
447 qp
->len_bytes
= q
->len_bytes
;
448 qp
->numbytes
= q
->numbytes
;
449 qp
->tot_pkts
= q
->tot_pkts
;
450 qp
->tot_bytes
= q
->tot_bytes
;
451 qp
->drops
= q
->drops
;
452 qp
->hash_slot
= q
->hash_slot
;
454 qp
->count
= q
->count
;
455 qp
->random
= q
->random
;
456 qp
->q_time
= q
->q_time
;
457 qp
->heap_pos
= q
->heap_pos
;
458 qp
->sched_time
= q
->sched_time
;
464 char *cp_pipe_to_32_user(struct dn_pipe
*p
, struct dn_pipe_32
*pipe_bp
)
468 pipe_bp
->pipe_nr
= p
->pipe_nr
;
469 pipe_bp
->bandwidth
= p
->bandwidth
;
470 pipe_bp
->delay
= p
->delay
;
471 bcopy( &(p
->scheduler_heap
), &(pipe_bp
->scheduler_heap
), sizeof(struct dn_heap_32
));
472 pipe_bp
->scheduler_heap
.p
= CAST_DOWN_EXPLICIT(user32_addr_t
, pipe_bp
->scheduler_heap
.p
);
473 bcopy( &(p
->not_eligible_heap
), &(pipe_bp
->not_eligible_heap
), sizeof(struct dn_heap_32
));
474 pipe_bp
->not_eligible_heap
.p
= CAST_DOWN_EXPLICIT(user32_addr_t
, pipe_bp
->not_eligible_heap
.p
);
475 bcopy( &(p
->idle_heap
), &(pipe_bp
->idle_heap
), sizeof(struct dn_heap_32
));
476 pipe_bp
->idle_heap
.p
= CAST_DOWN_EXPLICIT(user32_addr_t
, pipe_bp
->idle_heap
.p
);
478 pipe_bp
->sum
= p
->sum
;
479 pipe_bp
->numbytes
= p
->numbytes
;
480 pipe_bp
->sched_time
= p
->sched_time
;
481 bcopy( p
->if_name
, pipe_bp
->if_name
, IFNAMSIZ
);
482 pipe_bp
->ifp
= CAST_DOWN_EXPLICIT(user32_addr_t
, p
->ifp
);
483 pipe_bp
->ready
= p
->ready
;
485 cp_flow_set_to_32_user( &(p
->fs
), &(pipe_bp
->fs
));
487 pipe_bp
->delay
= (pipe_bp
->delay
* 1000) / (hz
*10) ;
489 * XXX the following is a hack based on ->next being the
490 * first field in dn_pipe and dn_flow_set. The correct
491 * solution would be to move the dn_flow_set to the beginning
494 pipe_bp
->next
= CAST_DOWN_EXPLICIT( user32_addr_t
, DN_IS_PIPE
);
496 pipe_bp
->head
= pipe_bp
->tail
= (user32_addr_t
) 0 ;
497 pipe_bp
->fs
.next
= (user32_addr_t
)0 ;
498 pipe_bp
->fs
.pipe
= (user32_addr_t
)0 ;
499 pipe_bp
->fs
.rq
= (user32_addr_t
)0 ;
500 bp
= ((char *)pipe_bp
) + sizeof(struct dn_pipe_32
);
501 return( dn_copy_set_32( &(p
->fs
), bp
) );
505 char *cp_pipe_to_64_user(struct dn_pipe
*p
, struct dn_pipe_64
*pipe_bp
)
509 pipe_bp
->pipe_nr
= p
->pipe_nr
;
510 pipe_bp
->bandwidth
= p
->bandwidth
;
511 pipe_bp
->delay
= p
->delay
;
512 bcopy( &(p
->scheduler_heap
), &(pipe_bp
->scheduler_heap
), sizeof(struct dn_heap_64
));
513 pipe_bp
->scheduler_heap
.p
= CAST_DOWN(user64_addr_t
, pipe_bp
->scheduler_heap
.p
);
514 bcopy( &(p
->not_eligible_heap
), &(pipe_bp
->not_eligible_heap
), sizeof(struct dn_heap_64
));
515 pipe_bp
->not_eligible_heap
.p
= CAST_DOWN(user64_addr_t
, pipe_bp
->not_eligible_heap
.p
);
516 bcopy( &(p
->idle_heap
), &(pipe_bp
->idle_heap
), sizeof(struct dn_heap_64
));
517 pipe_bp
->idle_heap
.p
= CAST_DOWN(user64_addr_t
, pipe_bp
->idle_heap
.p
);
519 pipe_bp
->sum
= p
->sum
;
520 pipe_bp
->numbytes
= p
->numbytes
;
521 pipe_bp
->sched_time
= p
->sched_time
;
522 bcopy( p
->if_name
, pipe_bp
->if_name
, IFNAMSIZ
);
523 pipe_bp
->ifp
= CAST_DOWN(user64_addr_t
, p
->ifp
);
524 pipe_bp
->ready
= p
->ready
;
526 cp_flow_set_to_64_user( &(p
->fs
), &(pipe_bp
->fs
));
528 pipe_bp
->delay
= (pipe_bp
->delay
* 1000) / (hz
*10) ;
530 * XXX the following is a hack based on ->next being the
531 * first field in dn_pipe and dn_flow_set. The correct
532 * solution would be to move the dn_flow_set to the beginning
535 pipe_bp
->next
= CAST_DOWN( user64_addr_t
, DN_IS_PIPE
);
537 pipe_bp
->head
= pipe_bp
->tail
= USER_ADDR_NULL
;
538 pipe_bp
->fs
.next
= USER_ADDR_NULL
;
539 pipe_bp
->fs
.pipe
= USER_ADDR_NULL
;
540 pipe_bp
->fs
.rq
= USER_ADDR_NULL
;
541 bp
= ((char *)pipe_bp
) + sizeof(struct dn_pipe_64
);
542 return( dn_copy_set_64( &(p
->fs
), bp
) );
546 heap_init(struct dn_heap
*h
, int new_size
)
548 struct dn_heap_entry
*p
;
550 if (h
->size
>= new_size
) {
551 printf("dummynet: heap_init, Bogus call, have %d want %d\n",
555 new_size
= (new_size
+ HEAP_INCREMENT
) & ~HEAP_INCREMENT
;
556 p
= _MALLOC(new_size
* sizeof(*p
), M_DUMMYNET
, M_DONTWAIT
);
558 printf("dummynet: heap_init, resize %d failed\n", new_size
);
559 return 1 ; /* error */
562 bcopy(h
->p
, p
, h
->size
* sizeof(*p
) );
563 FREE(h
->p
, M_DUMMYNET
);
571 * Insert element in heap. Normally, p != NULL, we insert p in
572 * a new position and bubble up. If p == NULL, then the element is
573 * already in place, and key is the position where to start the
575 * Returns 1 on failure (cannot allocate new heap entry)
577 * If offset > 0 the position (index, int) of the element in the heap is
578 * also stored in the element itself at the given offset in bytes.
580 #define SET_OFFSET(heap, node) \
581 if (heap->offset > 0) \
582 *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ;
584 * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value.
586 #define RESET_OFFSET(heap, node) \
587 if (heap->offset > 0) \
588 *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ;
590 heap_insert(struct dn_heap
*h
, dn_key key1
, void *p
)
592 int son
= h
->elements
;
594 if (p
== NULL
) /* data already there, set starting point */
596 else { /* insert new element at the end, possibly resize */
598 if (son
== h
->size
) /* need resize... */
599 if (heap_init(h
, h
->elements
+1) )
600 return 1 ; /* failure... */
601 h
->p
[son
].object
= p
;
602 h
->p
[son
].key
= key1
;
605 while (son
> 0) { /* bubble up */
606 int father
= HEAP_FATHER(son
) ;
607 struct dn_heap_entry tmp
;
609 if (DN_KEY_LT( h
->p
[father
].key
, h
->p
[son
].key
) )
610 break ; /* found right position */
611 /* son smaller than father, swap and repeat */
612 HEAP_SWAP(h
->p
[son
], h
->p
[father
], tmp
) ;
621 * remove top element from heap, or obj if obj != NULL
624 heap_extract(struct dn_heap
*h
, void *obj
)
626 int child
, father
, maxelt
= h
->elements
- 1 ;
629 printf("dummynet: warning, extract from empty heap 0x%llx\n",
630 (uint64_t)VM_KERNEL_ADDRPERM(h
));
633 father
= 0 ; /* default: move up smallest child */
634 if (obj
!= NULL
) { /* extract specific element, index is at offset */
636 panic("dummynet: heap_extract from middle not supported on this heap!!!\n");
637 father
= *((int *)((char *)obj
+ h
->offset
)) ;
638 if (father
< 0 || father
>= h
->elements
) {
639 printf("dummynet: heap_extract, father %d out of bound 0..%d\n",
640 father
, h
->elements
);
641 panic("dummynet: heap_extract");
644 RESET_OFFSET(h
, father
);
645 child
= HEAP_LEFT(father
) ; /* left child */
646 while (child
<= maxelt
) { /* valid entry */
647 if (child
!= maxelt
&& DN_KEY_LT(h
->p
[child
+1].key
, h
->p
[child
].key
) )
648 child
= child
+1 ; /* take right child, otherwise left */
649 h
->p
[father
] = h
->p
[child
] ;
650 SET_OFFSET(h
, father
);
652 child
= HEAP_LEFT(child
) ; /* left child for next loop */
655 if (father
!= maxelt
) {
657 * Fill hole with last entry and bubble up, reusing the insert code
659 h
->p
[father
] = h
->p
[maxelt
] ;
660 heap_insert(h
, father
, NULL
); /* this one cannot fail */
665 * heapify() will reorganize data inside an array to maintain the
666 * heap property. It is needed when we delete a bunch of entries.
669 heapify(struct dn_heap
*h
)
673 for (i
= 0 ; i
< h
->elements
; i
++ )
674 heap_insert(h
, i
, NULL
) ;
678 * cleanup the heap and free data structure
681 heap_free(struct dn_heap
*h
)
684 FREE(h
->p
, M_DUMMYNET
);
685 bzero(h
, sizeof(*h
));
689 * --- end of heap management functions ---
693 * Return the mbuf tag holding the dummynet state. As an optimization
694 * this is assumed to be the first tag on the list. If this turns out
695 * wrong we'll need to search the list.
697 static struct dn_pkt_tag
*
698 dn_tag_get(struct mbuf
*m
)
700 struct m_tag
*mtag
= m_tag_first(m
);
702 if (!(mtag
!= NULL
&&
703 mtag
->m_tag_id
== KERNEL_MODULE_TAG_ID
&&
704 mtag
->m_tag_type
== KERNEL_TAG_TYPE_DUMMYNET
))
705 panic("packet on dummynet queue w/o dummynet tag: 0x%llx",
706 (uint64_t)VM_KERNEL_ADDRPERM(m
));
708 return (struct dn_pkt_tag
*)(mtag
+1);
712 * Scheduler functions:
714 * transmit_event() is called when the delay-line needs to enter
715 * the scheduler, either because of existing pkts getting ready,
716 * or new packets entering the queue. The event handled is the delivery
717 * time of the packet.
719 * ready_event() does something similar with fixed-rate queues, and the
720 * event handled is the finish time of the head pkt.
722 * wfq_ready_event() does something similar with WF2Q queues, and the
723 * event handled is the start time of the head pkt.
725 * In all cases, we make sure that the data structures are consistent
726 * before passing pkts out, because this might trigger recursive
727 * invocations of the procedures.
730 transmit_event(struct dn_pipe
*pipe
, struct mbuf
**head
, struct mbuf
**tail
)
733 struct dn_pkt_tag
*pkt
= NULL
;
734 u_int64_t schedule_time
;
736 LCK_MTX_ASSERT(dn_mutex
, LCK_MTX_ASSERT_OWNED
);
737 ASSERT(serialize
>= 0);
738 if (serialize
== 0) {
739 while ((m
= pipe
->head
) != NULL
) {
741 if (!DN_KEY_LEQ(pkt
->dn_output_time
, curr_time
))
744 pipe
->head
= m
->m_nextpkt
;
746 (*tail
)->m_nextpkt
= m
;
753 (*tail
)->m_nextpkt
= NULL
;
756 schedule_time
= pkt
== NULL
|| DN_KEY_LEQ(pkt
->dn_output_time
, curr_time
) ?
757 curr_time
+ 1 : pkt
->dn_output_time
;
759 /* if there are leftover packets, put the pipe into the heap for next ready event */
760 if ((m
= pipe
->head
) != NULL
) {
762 /* XXX should check errors on heap_insert, by draining the
763 * whole pipe p and hoping in the future we are more successful
765 heap_insert(&extract_heap
, schedule_time
, pipe
);
770 * the following macro computes how many ticks we have to wait
771 * before being able to transmit a packet. The credit is taken from
772 * either a pipe (WF2Q) or a flow_queue (per-flow queueing)
775 /* hz is 100, which gives a granularity of 10ms in the old timer.
776 * The timer has been changed to fire every 1ms, so the use of
777 * hz has been modified here. All instances of hz have been left
778 * in place but adjusted by a factor of 10 so that hz is functionally
781 #define SET_TICKS(_m, q, p) \
782 ((_m)->m_pkthdr.len*8*(hz*10) - (q)->numbytes + p->bandwidth - 1 ) / \
786 * extract pkt from queue, compute output time (could be now)
787 * and put into delay line (p_queue)
790 move_pkt(struct mbuf
*pkt
, struct dn_flow_queue
*q
,
791 struct dn_pipe
*p
, int len
)
793 struct dn_pkt_tag
*dt
= dn_tag_get(pkt
);
795 q
->head
= pkt
->m_nextpkt
;
797 q
->len_bytes
-= len
;
799 dt
->dn_output_time
= curr_time
+ p
->delay
;
804 p
->tail
->m_nextpkt
= pkt
;
806 p
->tail
->m_nextpkt
= NULL
;
810 * ready_event() is invoked every time the queue must enter the
811 * scheduler, either because the first packet arrives, or because
812 * a previously scheduled event fired.
813 * On invokation, drain as many pkts as possible (could be 0) and then
814 * if there are leftover packets reinsert the pkt in the scheduler.
817 ready_event(struct dn_flow_queue
*q
, struct mbuf
**head
, struct mbuf
**tail
)
820 struct dn_pipe
*p
= q
->fs
->pipe
;
823 LCK_MTX_ASSERT(dn_mutex
, LCK_MTX_ASSERT_OWNED
);
826 printf("dummynet: ready_event pipe is gone\n");
829 p_was_empty
= (p
->head
== NULL
) ;
832 * schedule fixed-rate queues linked to this pipe:
833 * Account for the bw accumulated since last scheduling, then
834 * drain as many pkts as allowed by q->numbytes and move to
835 * the delay line (in p) computing output time.
836 * bandwidth==0 (no limit) means we can drain the whole queue,
837 * setting len_scaled = 0 does the job.
839 q
->numbytes
+= ( curr_time
- q
->sched_time
) * p
->bandwidth
;
840 while ( (pkt
= q
->head
) != NULL
) {
841 int len
= pkt
->m_pkthdr
.len
;
842 int len_scaled
= p
->bandwidth
? len
*8*(hz
*10) : 0 ;
843 if (len_scaled
> q
->numbytes
)
845 q
->numbytes
-= len_scaled
;
846 move_pkt(pkt
, q
, p
, len
);
849 * If we have more packets queued, schedule next ready event
850 * (can only occur when bandwidth != 0, otherwise we would have
851 * flushed the whole queue in the previous loop).
852 * To this purpose we record the current time and compute how many
853 * ticks to go for the finish time of the packet.
855 if ( (pkt
= q
->head
) != NULL
) { /* this implies bandwidth != 0 */
856 dn_key t
= SET_TICKS(pkt
, q
, p
); /* ticks i have to wait */
857 q
->sched_time
= curr_time
;
858 heap_insert(&ready_heap
, curr_time
+ t
, (void *)q
);
859 /* XXX should check errors on heap_insert, and drain the whole
860 * queue on error hoping next time we are luckier.
862 } else { /* RED needs to know when the queue becomes empty */
863 q
->q_time
= curr_time
;
867 * If the delay line was empty call transmit_event(p) now.
868 * Otherwise, the scheduler will take care of it.
871 transmit_event(p
, head
, tail
);
875 * Called when we can transmit packets on WF2Q queues. Take pkts out of
876 * the queues at their start time, and enqueue into the delay line.
877 * Packets are drained until p->numbytes < 0. As long as
878 * len_scaled >= p->numbytes, the packet goes into the delay line
879 * with a deadline p->delay. For the last packet, if p->numbytes<0,
880 * there is an additional delay.
883 ready_event_wfq(struct dn_pipe
*p
, struct mbuf
**head
, struct mbuf
**tail
)
885 int p_was_empty
= (p
->head
== NULL
) ;
886 struct dn_heap
*sch
= &(p
->scheduler_heap
);
887 struct dn_heap
*neh
= &(p
->not_eligible_heap
) ;
888 int64_t p_numbytes
= p
->numbytes
;
890 LCK_MTX_ASSERT(dn_mutex
, LCK_MTX_ASSERT_OWNED
);
892 if (p
->if_name
[0] == 0) /* tx clock is simulated */
893 p_numbytes
+= ( curr_time
- p
->sched_time
) * p
->bandwidth
;
894 else { /* tx clock is for real, the ifq must be empty or this is a NOP */
895 if (p
->ifp
&& !IFCQ_IS_EMPTY(&p
->ifp
->if_snd
))
898 DPRINTF(("dummynet: pipe %d ready from %s --\n",
899 p
->pipe_nr
, p
->if_name
));
904 * While we have backlogged traffic AND credit, we need to do
905 * something on the queue.
907 while ( p_numbytes
>=0 && (sch
->elements
>0 || neh
->elements
>0) ) {
908 if (sch
->elements
> 0) { /* have some eligible pkts to send out */
909 struct dn_flow_queue
*q
= sch
->p
[0].object
;
910 struct mbuf
*pkt
= q
->head
;
911 struct dn_flow_set
*fs
= q
->fs
;
912 u_int64_t len
= pkt
->m_pkthdr
.len
;
913 int len_scaled
= p
->bandwidth
? len
*8*(hz
*10) : 0 ;
915 heap_extract(sch
, NULL
); /* remove queue from heap */
916 p_numbytes
-= len_scaled
;
917 move_pkt(pkt
, q
, p
, len
);
919 p
->V
+= (len
<<MY_M
) / p
->sum
; /* update V */
920 q
->S
= q
->F
; /* update start time */
921 if (q
->len
== 0) { /* Flow not backlogged any more */
923 heap_insert(&(p
->idle_heap
), q
->F
, q
);
924 } else { /* still backlogged */
926 * update F and position in backlogged queue, then
927 * put flow in not_eligible_heap (we will fix this later).
929 len
= (q
->head
)->m_pkthdr
.len
;
930 q
->F
+= (len
<<MY_M
)/(u_int64_t
) fs
->weight
;
931 if (DN_KEY_LEQ(q
->S
, p
->V
))
932 heap_insert(neh
, q
->S
, q
);
934 heap_insert(sch
, q
->F
, q
);
938 * now compute V = max(V, min(S_i)). Remember that all elements in sch
939 * have by definition S_i <= V so if sch is not empty, V is surely
940 * the max and we must not update it. Conversely, if sch is empty
941 * we only need to look at neh.
943 if (sch
->elements
== 0 && neh
->elements
> 0)
944 p
->V
= MAX64 ( p
->V
, neh
->p
[0].key
);
945 /* move from neh to sch any packets that have become eligible */
946 while (neh
->elements
> 0 && DN_KEY_LEQ(neh
->p
[0].key
, p
->V
) ) {
947 struct dn_flow_queue
*q
= neh
->p
[0].object
;
948 heap_extract(neh
, NULL
);
949 heap_insert(sch
, q
->F
, q
);
952 if (p
->if_name
[0] != '\0') {/* tx clock is from a real thing */
953 p_numbytes
= -1 ; /* mark not ready for I/O */
957 if (sch
->elements
== 0 && neh
->elements
== 0 && p_numbytes
>= 0
958 && p
->idle_heap
.elements
> 0) {
960 * no traffic and no events scheduled. We can get rid of idle-heap.
964 for (i
= 0 ; i
< p
->idle_heap
.elements
; i
++) {
965 struct dn_flow_queue
*q
= p
->idle_heap
.p
[i
].object
;
972 p
->idle_heap
.elements
= 0 ;
975 * If we are getting clocks from dummynet (not a real interface) and
976 * If we are under credit, schedule the next ready event.
977 * Also fix the delivery time of the last packet.
979 if (p
->if_name
[0]==0 && p_numbytes
< 0) { /* this implies bandwidth >0 */
980 dn_key t
=0 ; /* number of ticks i have to wait */
982 if (p
->bandwidth
> 0)
983 t
= ( p
->bandwidth
-1 - p_numbytes
) / p
->bandwidth
;
984 dn_tag_get(p
->tail
)->dn_output_time
+= t
;
985 p
->sched_time
= curr_time
;
986 heap_insert(&wfq_ready_heap
, curr_time
+ t
, (void *)p
);
987 /* XXX should check errors on heap_insert, and drain the whole
988 * queue on error hoping next time we are luckier.
992 /* Fit (adjust if necessary) 64bit result into 32bit variable. */
993 if (p_numbytes
> INT_MAX
)
994 p
->numbytes
= INT_MAX
;
995 else if (p_numbytes
< INT_MIN
)
996 p
->numbytes
= INT_MIN
;
998 p
->numbytes
= p_numbytes
;
1001 * If the delay line was empty call transmit_event(p) now.
1002 * Otherwise, the scheduler will take care of it.
1005 transmit_event(p
, head
, tail
);
1010 * This is called every 1ms. It is used to
1011 * increment the current tick counter and schedule expired events.
1014 dummynet(__unused
void * unused
)
1016 void *p
; /* generic parameter to handler */
1018 struct dn_heap
*heaps
[3];
1019 struct mbuf
*head
= NULL
, *tail
= NULL
;
1021 struct dn_pipe
*pe
;
1025 heaps
[0] = &ready_heap
; /* fixed-rate queues */
1026 heaps
[1] = &wfq_ready_heap
; /* wfq queues */
1027 heaps
[2] = &extract_heap
; /* delay line */
1029 lck_mtx_lock(dn_mutex
);
1031 /* make all time measurements in milliseconds (ms) -
1032 * here we convert secs and usecs to msecs (just divide the
1033 * usecs and take the closest whole number).
1036 curr_time
= (tv
.tv_sec
* 1000) + (tv
.tv_usec
/ 1000);
1038 for (i
=0; i
< 3 ; i
++) {
1040 while (h
->elements
> 0 && DN_KEY_LEQ(h
->p
[0].key
, curr_time
) ) {
1041 if (h
->p
[0].key
> curr_time
)
1042 printf("dummynet: warning, heap %d is %d ticks late\n",
1043 i
, (int)(curr_time
- h
->p
[0].key
));
1044 p
= h
->p
[0].object
; /* store a copy before heap_extract */
1045 heap_extract(h
, NULL
); /* need to extract before processing */
1047 ready_event(p
, &head
, &tail
) ;
1049 struct dn_pipe
*pipe
= p
;
1050 if (pipe
->if_name
[0] != '\0')
1051 printf("dummynet: bad ready_event_wfq for pipe %s\n",
1054 ready_event_wfq(p
, &head
, &tail
) ;
1056 transmit_event(p
, &head
, &tail
);
1060 /* sweep pipes trying to expire idle flow_queues */
1061 for (i
= 0; i
< HASHSIZE
; i
++)
1062 SLIST_FOREACH(pe
, &pipehash
[i
], next
)
1063 if (pe
->idle_heap
.elements
> 0 &&
1064 DN_KEY_LT(pe
->idle_heap
.p
[0].key
, pe
->V
) ) {
1065 struct dn_flow_queue
*q
= pe
->idle_heap
.p
[0].object
;
1067 heap_extract(&(pe
->idle_heap
), NULL
);
1068 q
->S
= q
->F
+ 1 ; /* mark timestamp as invalid */
1069 pe
->sum
-= q
->fs
->weight
;
1072 /* check the heaps to see if there's still stuff in there, and
1073 * only set the timer if there are packets to process
1076 for (i
=0; i
< 3 ; i
++) {
1078 if (h
->elements
> 0) { // set the timer
1080 ts
.tv_nsec
= 1 * 1000000; // 1ms
1082 bsd_timeout(dummynet
, NULL
, &ts
);
1090 lck_mtx_unlock(dn_mutex
);
1092 /* Send out the de-queued list of ready-to-send packets */
1094 dummynet_send(head
);
1095 lck_mtx_lock(dn_mutex
);
1097 lck_mtx_unlock(dn_mutex
);
1103 dummynet_send(struct mbuf
*m
)
1105 struct dn_pkt_tag
*pkt
;
1108 for (; m
!= NULL
; m
= n
) {
1110 m
->m_nextpkt
= NULL
;
1111 pkt
= dn_tag_get(m
);
1113 DPRINTF(("dummynet_send m: 0x%llx dn_dir: %d dn_flags: 0x%x\n",
1114 (uint64_t)VM_KERNEL_ADDRPERM(m
), pkt
->dn_dir
,
1117 switch (pkt
->dn_dir
) {
1118 case DN_TO_IP_OUT
: {
1119 struct route tmp_rt
;
1121 /* route is already in the packet's dn_ro */
1122 bzero(&tmp_rt
, sizeof (tmp_rt
));
1124 /* Force IP_RAWOUTPUT as the IP header is fully formed */
1125 pkt
->dn_flags
|= IP_RAWOUTPUT
| IP_FORWARDING
;
1126 (void)ip_output(m
, NULL
, &tmp_rt
, pkt
->dn_flags
, NULL
, NULL
);
1127 ROUTE_RELEASE(&tmp_rt
);
1131 proto_inject(PF_INET
, m
);
1134 case DN_TO_IP6_OUT
: {
1135 /* routes already in the packet's dn_{ro6,pmtu} */
1136 ip6_output(m
, NULL
, NULL
, IPV6_FORWARDING
, NULL
, NULL
, NULL
);
1140 proto_inject(PF_INET6
, m
);
1144 printf("dummynet: bad switch %d!\n", pkt
->dn_dir
);
1152 * Unconditionally expire empty queues in case of shortage.
1153 * Returns the number of queues freed.
1156 expire_queues(struct dn_flow_set
*fs
)
1158 struct dn_flow_queue
*q
, *prev
;
1159 int i
, initial_elements
= fs
->rq_elements
;
1160 struct timeval timenow
;
1162 /* reviewed for getmicrotime usage */
1163 getmicrotime(&timenow
);
1165 if (fs
->last_expired
== timenow
.tv_sec
)
1167 fs
->last_expired
= timenow
.tv_sec
;
1168 for (i
= 0 ; i
<= fs
->rq_size
; i
++) /* last one is overflow */
1169 for (prev
=NULL
, q
= fs
->rq
[i
] ; q
!= NULL
; )
1170 if (q
->head
!= NULL
|| q
->S
!= q
->F
+1) {
1173 } else { /* entry is idle, expire it */
1174 struct dn_flow_queue
*old_q
= q
;
1177 prev
->next
= q
= q
->next
;
1179 fs
->rq
[i
] = q
= q
->next
;
1181 FREE(old_q
, M_DUMMYNET
);
1183 return initial_elements
- fs
->rq_elements
;
1187 * If room, create a new queue and put at head of slot i;
1188 * otherwise, create or use the default queue.
1190 static struct dn_flow_queue
*
1191 create_queue(struct dn_flow_set
*fs
, int i
)
1193 struct dn_flow_queue
*q
;
1195 if (fs
->rq_elements
> fs
->rq_size
* dn_max_ratio
&&
1196 expire_queues(fs
) == 0) {
1198 * No way to get room, use or create overflow queue.
1201 if ( fs
->rq
[i
] != NULL
)
1204 q
= _MALLOC(sizeof(*q
), M_DUMMYNET
, M_DONTWAIT
| M_ZERO
);
1206 printf("dummynet: sorry, cannot allocate queue for new flow\n");
1211 q
->next
= fs
->rq
[i
] ;
1212 q
->S
= q
->F
+ 1; /* hack - mark timestamp as invalid */
1219 * Given a flow_set and a pkt in last_pkt, find a matching queue
1220 * after appropriate masking. The queue is moved to front
1221 * so that further searches take less time.
1223 static struct dn_flow_queue
*
1224 find_queue(struct dn_flow_set
*fs
, struct ip_flow_id
*id
)
1226 int i
= 0 ; /* we need i and q for new allocations */
1227 struct dn_flow_queue
*q
, *prev
;
1228 int is_v6
= IS_IP6_FLOW_ID(id
);
1230 if ( !(fs
->flags_fs
& DN_HAVE_FLOW_MASK
) )
1233 /* first, do the masking, then hash */
1234 id
->dst_port
&= fs
->flow_mask
.dst_port
;
1235 id
->src_port
&= fs
->flow_mask
.src_port
;
1236 id
->proto
&= fs
->flow_mask
.proto
;
1237 id
->flags
= 0 ; /* we don't care about this one */
1239 APPLY_MASK(&id
->dst_ip6
, &fs
->flow_mask
.dst_ip6
);
1240 APPLY_MASK(&id
->src_ip6
, &fs
->flow_mask
.src_ip6
);
1241 id
->flow_id6
&= fs
->flow_mask
.flow_id6
;
1243 i
= ((id
->dst_ip6
.__u6_addr
.__u6_addr32
[0]) & 0xffff)^
1244 ((id
->dst_ip6
.__u6_addr
.__u6_addr32
[1]) & 0xffff)^
1245 ((id
->dst_ip6
.__u6_addr
.__u6_addr32
[2]) & 0xffff)^
1246 ((id
->dst_ip6
.__u6_addr
.__u6_addr32
[3]) & 0xffff)^
1248 ((id
->dst_ip6
.__u6_addr
.__u6_addr32
[0] >> 15) & 0xffff)^
1249 ((id
->dst_ip6
.__u6_addr
.__u6_addr32
[1] >> 15) & 0xffff)^
1250 ((id
->dst_ip6
.__u6_addr
.__u6_addr32
[2] >> 15) & 0xffff)^
1251 ((id
->dst_ip6
.__u6_addr
.__u6_addr32
[3] >> 15) & 0xffff)^
1253 ((id
->src_ip6
.__u6_addr
.__u6_addr32
[0] << 1) & 0xfffff)^
1254 ((id
->src_ip6
.__u6_addr
.__u6_addr32
[1] << 1) & 0xfffff)^
1255 ((id
->src_ip6
.__u6_addr
.__u6_addr32
[2] << 1) & 0xfffff)^
1256 ((id
->src_ip6
.__u6_addr
.__u6_addr32
[3] << 1) & 0xfffff)^
1258 ((id
->src_ip6
.__u6_addr
.__u6_addr32
[0] >> 16) & 0xffff)^
1259 ((id
->src_ip6
.__u6_addr
.__u6_addr32
[1] >> 16) & 0xffff)^
1260 ((id
->src_ip6
.__u6_addr
.__u6_addr32
[2] >> 16) & 0xffff)^
1261 ((id
->src_ip6
.__u6_addr
.__u6_addr32
[3] >> 16) & 0xffff)^
1263 (id
->dst_port
<< 1) ^ (id
->src_port
) ^
1267 id
->dst_ip
&= fs
->flow_mask
.dst_ip
;
1268 id
->src_ip
&= fs
->flow_mask
.src_ip
;
1270 i
= ( (id
->dst_ip
) & 0xffff ) ^
1271 ( (id
->dst_ip
>> 15) & 0xffff ) ^
1272 ( (id
->src_ip
<< 1) & 0xffff ) ^
1273 ( (id
->src_ip
>> 16 ) & 0xffff ) ^
1274 (id
->dst_port
<< 1) ^ (id
->src_port
) ^
1277 i
= i
% fs
->rq_size
;
1278 /* finally, scan the current list for a match */
1280 for (prev
=NULL
, q
= fs
->rq
[i
] ; q
; ) {
1283 IN6_ARE_ADDR_EQUAL(&id
->dst_ip6
,&q
->id
.dst_ip6
) &&
1284 IN6_ARE_ADDR_EQUAL(&id
->src_ip6
,&q
->id
.src_ip6
) &&
1285 id
->dst_port
== q
->id
.dst_port
&&
1286 id
->src_port
== q
->id
.src_port
&&
1287 id
->proto
== q
->id
.proto
&&
1288 id
->flags
== q
->id
.flags
&&
1289 id
->flow_id6
== q
->id
.flow_id6
)
1292 if (!is_v6
&& id
->dst_ip
== q
->id
.dst_ip
&&
1293 id
->src_ip
== q
->id
.src_ip
&&
1294 id
->dst_port
== q
->id
.dst_port
&&
1295 id
->src_port
== q
->id
.src_port
&&
1296 id
->proto
== q
->id
.proto
&&
1297 id
->flags
== q
->id
.flags
)
1300 /* No match. Check if we can expire the entry */
1301 if (pipe_expire
&& q
->head
== NULL
&& q
->S
== q
->F
+1 ) {
1302 /* entry is idle and not in any heap, expire it */
1303 struct dn_flow_queue
*old_q
= q
;
1306 prev
->next
= q
= q
->next
;
1308 fs
->rq
[i
] = q
= q
->next
;
1310 FREE(old_q
, M_DUMMYNET
);
1316 if (q
&& prev
!= NULL
) { /* found and not in front */
1317 prev
->next
= q
->next
;
1318 q
->next
= fs
->rq
[i
] ;
1322 if (q
== NULL
) { /* no match, need to allocate a new entry */
1323 q
= create_queue(fs
, i
);
1331 red_drops(struct dn_flow_set
*fs
, struct dn_flow_queue
*q
, int len
)
1336 * RED calculates the average queue size (avg) using a low-pass filter
1337 * with an exponential weighted (w_q) moving average:
1338 * avg <- (1-w_q) * avg + w_q * q_size
1339 * where q_size is the queue length (measured in bytes or * packets).
1341 * If q_size == 0, we compute the idle time for the link, and set
1342 * avg = (1 - w_q)^(idle/s)
1343 * where s is the time needed for transmitting a medium-sized packet.
1345 * Now, if avg < min_th the packet is enqueued.
1346 * If avg > max_th the packet is dropped. Otherwise, the packet is
1347 * dropped with probability P function of avg.
1352 /* queue in bytes or packets ? */
1353 u_int q_size
= (fs
->flags_fs
& DN_QSIZE_IS_BYTES
) ? q
->len_bytes
: q
->len
;
1355 DPRINTF(("\ndummynet: %d q: %2u ", (int) curr_time
, q_size
));
1357 /* average queue size estimation */
1360 * queue is not empty, avg <- avg + (q_size - avg) * w_q
1362 int diff
= SCALE(q_size
) - q
->avg
;
1363 int64_t v
= SCALE_MUL((int64_t) diff
, (int64_t) fs
->w_q
);
1368 * queue is empty, find for how long the queue has been
1369 * empty and use a lookup table for computing
1370 * (1 - * w_q)^(idle_time/s) where s is the time to send a
1372 * XXX check wraps...
1375 u_int t
= (curr_time
- q
->q_time
) / fs
->lookup_step
;
1377 q
->avg
= (t
< fs
->lookup_depth
) ?
1378 SCALE_MUL(q
->avg
, fs
->w_q_lookup
[t
]) : 0;
1381 DPRINTF(("dummynet: avg: %u ", SCALE_VAL(q
->avg
)));
1383 /* should i drop ? */
1385 if (q
->avg
< fs
->min_th
) {
1387 return 0; /* accept packet ; */
1389 if (q
->avg
>= fs
->max_th
) { /* average queue >= max threshold */
1390 if (fs
->flags_fs
& DN_IS_GENTLE_RED
) {
1392 * According to Gentle-RED, if avg is greater than max_th the
1393 * packet is dropped with a probability
1394 * p_b = c_3 * avg - c_4
1395 * where c_3 = (1 - max_p) / max_th, and c_4 = 1 - 2 * max_p
1397 p_b
= SCALE_MUL((int64_t) fs
->c_3
, (int64_t) q
->avg
) - fs
->c_4
;
1400 DPRINTF(("dummynet: - drop"));
1403 } else if (q
->avg
> fs
->min_th
) {
1405 * we compute p_b using the linear dropping function p_b = c_1 *
1406 * avg - c_2, where c_1 = max_p / (max_th - min_th), and c_2 =
1407 * max_p * min_th / (max_th - min_th)
1409 p_b
= SCALE_MUL((int64_t) fs
->c_1
, (int64_t) q
->avg
) - fs
->c_2
;
1411 if (fs
->flags_fs
& DN_QSIZE_IS_BYTES
)
1412 p_b
= (p_b
* len
) / fs
->max_pkt_size
;
1413 if (++q
->count
== 0)
1414 q
->random
= (my_random() & 0xffff);
1417 * q->count counts packets arrived since last drop, so a greater
1418 * value of q->count means a greater packet drop probability.
1420 if (SCALE_MUL(p_b
, SCALE((int64_t) q
->count
)) > q
->random
) {
1422 DPRINTF(("dummynet: - red drop"));
1423 /* after a drop we calculate a new random value */
1424 q
->random
= (my_random() & 0xffff);
1425 return 1; /* drop */
1428 /* end of RED algorithm */
1429 return 0 ; /* accept */
1433 struct dn_flow_set
*
1434 locate_flowset(int fs_nr
)
1436 struct dn_flow_set
*fs
;
1437 SLIST_FOREACH(fs
, &flowsethash
[HASH(fs_nr
)], next
)
1438 if (fs
->fs_nr
== fs_nr
)
1444 static __inline
struct dn_pipe
*
1445 locate_pipe(int pipe_nr
)
1447 struct dn_pipe
*pipe
;
1449 SLIST_FOREACH(pipe
, &pipehash
[HASH(pipe_nr
)], next
)
1450 if (pipe
->pipe_nr
== pipe_nr
)
1459 * dummynet hook for packets. Below 'pipe' is a pipe or a queue
1460 * depending on whether WF2Q or fixed bw is used.
1462 * pipe_nr pipe or queue the packet is destined for.
1463 * dir where shall we send the packet after dummynet.
1464 * m the mbuf with the packet
1465 * ifp the 'ifp' parameter from the caller.
1466 * NULL in ip_input, destination interface in ip_output,
1467 * real_dst in bdg_forward
1468 * ro route parameter (only used in ip_output, NULL otherwise)
1469 * dst destination address, only used by ip_output
1470 * rule matching rule, in case of multiple passes
1471 * flags flags from the caller, only used in ip_output
1475 dummynet_io(struct mbuf
*m
, int pipe_nr
, int dir
, struct ip_fw_args
*fwa
, int client
)
1477 struct mbuf
*head
= NULL
, *tail
= NULL
;
1478 struct dn_pkt_tag
*pkt
;
1480 struct dn_flow_set
*fs
= NULL
;
1481 struct dn_pipe
*pipe
;
1482 u_int64_t len
= m
->m_pkthdr
.len
;
1483 struct dn_flow_queue
*q
= NULL
;
1488 DPRINTF(("dummynet_io m: 0x%llx pipe: %d dir: %d client: %d\n",
1489 (uint64_t)VM_KERNEL_ADDRPERM(m
), pipe_nr
, dir
, client
));
1493 if (client
== DN_CLIENT_IPFW
) {
1494 ipfw_insn
*cmd
= fwa
->fwa_ipfw_rule
->cmd
+ fwa
->fwa_ipfw_rule
->act_ofs
;
1496 if (cmd
->opcode
== O_LOG
)
1498 is_pipe
= (cmd
->opcode
== O_PIPE
);
1501 if (client
== DN_CLIENT_IPFW
)
1502 is_pipe
= (fwa
->fwa_ipfw_rule
->fw_flg
& IP_FW_F_COMMAND
) == IP_FW_F_PIPE
;
1504 #endif /* IPFIREWALL */
1507 if (client
== DN_CLIENT_PF
)
1508 is_pipe
= fwa
->fwa_flags
== DN_IS_PIPE
? 1 : 0;
1509 #endif /* DUMMYNET */
1513 lck_mtx_lock(dn_mutex
);
1515 /* make all time measurements in milliseconds (ms) -
1516 * here we convert secs and usecs to msecs (just divide the
1517 * usecs and take the closest whole number).
1520 curr_time
= (tv
.tv_sec
* 1000) + (tv
.tv_usec
/ 1000);
1523 * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule.
1526 pipe
= locate_pipe(pipe_nr
);
1530 fs
= locate_flowset(pipe_nr
);
1534 goto dropit
; /* this queue/pipe does not exist! */
1537 if (pipe
== NULL
) { /* must be a queue, try find a matching pipe */
1538 pipe
= locate_pipe(fs
->parent_nr
);
1543 printf("dummynet: no pipe %d for queue %d, drop pkt\n",
1544 fs
->parent_nr
, fs
->fs_nr
);
1548 q
= find_queue(fs
, &(fwa
->fwa_id
));
1550 goto dropit
; /* cannot allocate queue */
1552 * update statistics, then check reasons to drop pkt
1554 q
->tot_bytes
+= len
;
1556 if ( fs
->plr
&& (my_random() < fs
->plr
))
1557 goto dropit
; /* random pkt drop */
1558 if ( fs
->flags_fs
& DN_QSIZE_IS_BYTES
) {
1559 if (q
->len_bytes
> fs
->qsize
)
1560 goto dropit
; /* queue size overflow */
1562 if (q
->len
>= fs
->qsize
)
1563 goto dropit
; /* queue count overflow */
1565 if ( fs
->flags_fs
& DN_IS_RED
&& red_drops(fs
, q
, len
) )
1568 /* XXX expensive to zero, see if we can remove it*/
1569 mtag
= m_tag_create(KERNEL_MODULE_TAG_ID
, KERNEL_TAG_TYPE_DUMMYNET
,
1570 sizeof(struct dn_pkt_tag
), M_NOWAIT
, m
);
1572 goto dropit
; /* cannot allocate packet header */
1573 m_tag_prepend(m
, mtag
); /* attach to mbuf chain */
1575 pkt
= (struct dn_pkt_tag
*)(mtag
+1);
1576 bzero(pkt
, sizeof(struct dn_pkt_tag
));
1577 /* ok, i can handle the pkt now... */
1578 /* build and enqueue packet + parameters */
1580 * PF is checked before ipfw so remember ipfw rule only when
1581 * the caller is ipfw. When the caller is PF, fwa_ipfw_rule
1582 * is a fake rule just used for convenience
1584 if (client
== DN_CLIENT_IPFW
)
1585 pkt
->dn_ipfw_rule
= fwa
->fwa_ipfw_rule
;
1586 pkt
->dn_pf_rule
= fwa
->fwa_pf_rule
;
1588 pkt
->dn_client
= client
;
1590 pkt
->dn_ifp
= fwa
->fwa_oif
;
1591 if (dir
== DN_TO_IP_OUT
) {
1593 * We need to copy *ro because for ICMP pkts (and maybe others)
1594 * the caller passed a pointer into the stack; dst might also be
1595 * a pointer into *ro so it needs to be updated.
1598 route_copyout(&pkt
->dn_ro
, fwa
->fwa_ro
, sizeof (pkt
->dn_ro
));
1601 if (fwa
->fwa_dst
== (struct sockaddr_in
*)&fwa
->fwa_ro
->ro_dst
) /* dst points into ro */
1602 fwa
->fwa_dst
= (struct sockaddr_in
*)&(pkt
->dn_ro
.ro_dst
) ;
1604 bcopy (fwa
->fwa_dst
, &pkt
->dn_dst
, sizeof(pkt
->dn_dst
));
1606 } else if (dir
== DN_TO_IP6_OUT
) {
1608 route_copyout((struct route
*)&pkt
->dn_ro6
,
1609 (struct route
*)fwa
->fwa_ro6
, sizeof (pkt
->dn_ro6
));
1611 if (fwa
->fwa_ro6_pmtu
) {
1612 route_copyout((struct route
*)&pkt
->dn_ro6_pmtu
,
1613 (struct route
*)fwa
->fwa_ro6_pmtu
, sizeof (pkt
->dn_ro6_pmtu
));
1615 if (fwa
->fwa_dst6
) {
1616 if (fwa
->fwa_dst6
== (struct sockaddr_in6
*)&fwa
->fwa_ro6
->ro_dst
) /* dst points into ro */
1617 fwa
->fwa_dst6
= (struct sockaddr_in6
*)&(pkt
->dn_ro6
.ro_dst
) ;
1619 bcopy (fwa
->fwa_dst6
, &pkt
->dn_dst6
, sizeof(pkt
->dn_dst6
));
1621 pkt
->dn_origifp
= fwa
->fwa_origifp
;
1622 pkt
->dn_mtu
= fwa
->fwa_mtu
;
1623 pkt
->dn_unfragpartlen
= fwa
->fwa_unfragpartlen
;
1624 if (fwa
->fwa_exthdrs
) {
1625 bcopy (fwa
->fwa_exthdrs
, &pkt
->dn_exthdrs
, sizeof(pkt
->dn_exthdrs
));
1627 * Need to zero out the source structure so the mbufs
1628 * won't be freed by ip6_output()
1630 bzero(fwa
->fwa_exthdrs
, sizeof(struct ip6_exthdrs
));
1633 if (dir
== DN_TO_IP_OUT
|| dir
== DN_TO_IP6_OUT
) {
1634 pkt
->dn_flags
= fwa
->fwa_oflags
;
1635 if (fwa
->fwa_ipoa
!= NULL
)
1636 pkt
->dn_ipoa
= *(fwa
->fwa_ipoa
);
1638 if (q
->head
== NULL
)
1641 q
->tail
->m_nextpkt
= m
;
1644 q
->len_bytes
+= len
;
1646 if ( q
->head
!= m
) /* flow was not idle, we are done */
1649 * If we reach this point the flow was previously idle, so we need
1650 * to schedule it. This involves different actions for fixed-rate or
1655 * Fixed-rate queue: just insert into the ready_heap.
1658 if (pipe
->bandwidth
)
1659 t
= SET_TICKS(m
, q
, pipe
);
1660 q
->sched_time
= curr_time
;
1661 if (t
== 0) /* must process it now */
1662 ready_event( q
, &head
, &tail
);
1664 heap_insert(&ready_heap
, curr_time
+ t
, q
);
1667 * WF2Q. First, compute start time S: if the flow was idle (S=F+1)
1668 * set S to the virtual time V for the controlling pipe, and update
1669 * the sum of weights for the pipe; otherwise, remove flow from
1670 * idle_heap and set S to max(F,V).
1671 * Second, compute finish time F = S + len/weight.
1672 * Third, if pipe was idle, update V=max(S, V).
1673 * Fourth, count one more backlogged flow.
1675 if (DN_KEY_GT(q
->S
, q
->F
)) { /* means timestamps are invalid */
1677 pipe
->sum
+= fs
->weight
; /* add weight of new queue */
1679 heap_extract(&(pipe
->idle_heap
), q
);
1680 q
->S
= MAX64(q
->F
, pipe
->V
) ;
1682 q
->F
= q
->S
+ ( len
<<MY_M
)/(u_int64_t
) fs
->weight
;
1684 if (pipe
->not_eligible_heap
.elements
== 0 &&
1685 pipe
->scheduler_heap
.elements
== 0)
1686 pipe
->V
= MAX64 ( q
->S
, pipe
->V
);
1689 * Look at eligibility. A flow is not eligibile if S>V (when
1690 * this happens, it means that there is some other flow already
1691 * scheduled for the same pipe, so the scheduler_heap cannot be
1692 * empty). If the flow is not eligible we just store it in the
1693 * not_eligible_heap. Otherwise, we store in the scheduler_heap
1694 * and possibly invoke ready_event_wfq() right now if there is
1696 * Note that for all flows in scheduler_heap (SCH), S_i <= V,
1697 * and for all flows in not_eligible_heap (NEH), S_i > V .
1698 * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH,
1699 * we only need to look into NEH.
1701 if (DN_KEY_GT(q
->S
, pipe
->V
) ) { /* not eligible */
1702 if (pipe
->scheduler_heap
.elements
== 0)
1703 printf("dummynet: ++ ouch! not eligible but empty scheduler!\n");
1704 heap_insert(&(pipe
->not_eligible_heap
), q
->S
, q
);
1706 heap_insert(&(pipe
->scheduler_heap
), q
->F
, q
);
1707 if (pipe
->numbytes
>= 0) { /* pipe is idle */
1708 if (pipe
->scheduler_heap
.elements
!= 1)
1709 printf("dummynet: OUCH! pipe should have been idle!\n");
1710 DPRINTF(("dummynet: waking up pipe %d at %d\n",
1711 pipe
->pipe_nr
, (int)(q
->F
>> MY_M
)));
1712 pipe
->sched_time
= curr_time
;
1713 ready_event_wfq(pipe
, &head
, &tail
);
1718 /* start the timer and set global if not already set */
1719 if (!timer_enabled
) {
1721 ts
.tv_nsec
= 1 * 1000000; // 1ms
1723 bsd_timeout(dummynet
, NULL
, &ts
);
1726 lck_mtx_unlock(dn_mutex
);
1729 dummynet_send(head
);
1737 lck_mtx_unlock(dn_mutex
);
1739 return ( (fs
&& (fs
->flags_fs
& DN_NOERROR
)) ? 0 : ENOBUFS
);
1743 * Below, the ROUTE_RELEASE is only needed when (pkt->dn_dir == DN_TO_IP_OUT)
1744 * Doing this would probably save us the initial bzero of dn_pkt
1746 #define DN_FREE_PKT(_m) do { \
1747 struct m_tag *tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL); \
1749 struct dn_pkt_tag *n = (struct dn_pkt_tag *)(tag+1); \
1750 ROUTE_RELEASE(&n->dn_ro); \
1752 m_tag_delete(_m, tag); \
1757 * Dispose all packets and flow_queues on a flow_set.
1758 * If all=1, also remove red lookup table and other storage,
1759 * including the descriptor itself.
1760 * For the one in dn_pipe MUST also cleanup ready_heap...
1763 purge_flow_set(struct dn_flow_set
*fs
, int all
)
1765 struct dn_flow_queue
*q
, *qn
;
1768 LCK_MTX_ASSERT(dn_mutex
, LCK_MTX_ASSERT_OWNED
);
1770 for (i
= 0 ; i
<= fs
->rq_size
; i
++ ) {
1771 for (q
= fs
->rq
[i
] ; q
; q
= qn
) {
1772 struct mbuf
*m
, *mnext
;
1775 while ((m
= mnext
) != NULL
) {
1776 mnext
= m
->m_nextpkt
;
1780 FREE(q
, M_DUMMYNET
);
1784 fs
->rq_elements
= 0 ;
1786 /* RED - free lookup table */
1788 FREE(fs
->w_q_lookup
, M_DUMMYNET
);
1790 FREE(fs
->rq
, M_DUMMYNET
);
1791 /* if this fs is not part of a pipe, free it */
1792 if (fs
->pipe
&& fs
!= &(fs
->pipe
->fs
) )
1793 FREE(fs
, M_DUMMYNET
);
1798 * Dispose all packets queued on a pipe (not a flow_set).
1799 * Also free all resources associated to a pipe, which is about
1803 purge_pipe(struct dn_pipe
*pipe
)
1805 struct mbuf
*m
, *mnext
;
1807 purge_flow_set( &(pipe
->fs
), 1 );
1810 while ((m
= mnext
) != NULL
) {
1811 mnext
= m
->m_nextpkt
;
1815 heap_free( &(pipe
->scheduler_heap
) );
1816 heap_free( &(pipe
->not_eligible_heap
) );
1817 heap_free( &(pipe
->idle_heap
) );
1821 * Delete all pipes and heaps returning memory. Must also
1822 * remove references from all ipfw rules to all pipes.
1825 dummynet_flush(void)
1827 struct dn_pipe
*pipe
, *pipe1
;
1828 struct dn_flow_set
*fs
, *fs1
;
1831 lck_mtx_lock(dn_mutex
);
1834 /* remove all references to pipes ...*/
1835 flush_pipe_ptrs(NULL
);
1838 /* Free heaps so we don't have unwanted events. */
1839 heap_free(&ready_heap
);
1840 heap_free(&wfq_ready_heap
);
1841 heap_free(&extract_heap
);
1844 * Now purge all queued pkts and delete all pipes.
1846 * XXXGL: can we merge the for(;;) cycles into one or not?
1848 for (i
= 0; i
< HASHSIZE
; i
++)
1849 SLIST_FOREACH_SAFE(fs
, &flowsethash
[i
], next
, fs1
) {
1850 SLIST_REMOVE(&flowsethash
[i
], fs
, dn_flow_set
, next
);
1851 purge_flow_set(fs
, 1);
1853 for (i
= 0; i
< HASHSIZE
; i
++)
1854 SLIST_FOREACH_SAFE(pipe
, &pipehash
[i
], next
, pipe1
) {
1855 SLIST_REMOVE(&pipehash
[i
], pipe
, dn_pipe
, next
);
1857 FREE(pipe
, M_DUMMYNET
);
1859 lck_mtx_unlock(dn_mutex
);
1864 dn_ipfw_rule_delete_fs(struct dn_flow_set
*fs
, void *r
)
1867 struct dn_flow_queue
*q
;
1870 for (i
= 0 ; i
<= fs
->rq_size
; i
++) /* last one is ovflow */
1871 for (q
= fs
->rq
[i
] ; q
; q
= q
->next
)
1872 for (m
= q
->head
; m
; m
= m
->m_nextpkt
) {
1873 struct dn_pkt_tag
*pkt
= dn_tag_get(m
) ;
1874 if (pkt
->dn_ipfw_rule
== r
)
1875 pkt
->dn_ipfw_rule
= &default_rule
;
1879 * when a firewall rule is deleted, scan all queues and remove the flow-id
1880 * from packets matching this rule.
1883 dn_ipfw_rule_delete(void *r
)
1886 struct dn_flow_set
*fs
;
1887 struct dn_pkt_tag
*pkt
;
1891 lck_mtx_lock(dn_mutex
);
1894 * If the rule references a queue (dn_flow_set), then scan
1895 * the flow set, otherwise scan pipes. Should do either, but doing
1896 * both does not harm.
1898 for (i
= 0; i
< HASHSIZE
; i
++)
1899 SLIST_FOREACH(fs
, &flowsethash
[i
], next
)
1900 dn_ipfw_rule_delete_fs(fs
, r
);
1902 for (i
= 0; i
< HASHSIZE
; i
++)
1903 SLIST_FOREACH(p
, &pipehash
[i
], next
) {
1905 dn_ipfw_rule_delete_fs(fs
, r
);
1906 for (m
= p
->head
; m
; m
= m
->m_nextpkt
) {
1907 pkt
= dn_tag_get(m
);
1908 if (pkt
->dn_ipfw_rule
== r
)
1909 pkt
->dn_ipfw_rule
= &default_rule
;
1912 lck_mtx_unlock(dn_mutex
);
1916 * setup RED parameters
1919 config_red(struct dn_flow_set
*p
, struct dn_flow_set
* x
)
1924 x
->min_th
= SCALE(p
->min_th
);
1925 x
->max_th
= SCALE(p
->max_th
);
1926 x
->max_p
= p
->max_p
;
1928 x
->c_1
= p
->max_p
/ (p
->max_th
- p
->min_th
);
1929 x
->c_2
= SCALE_MUL(x
->c_1
, SCALE(p
->min_th
));
1930 if (x
->flags_fs
& DN_IS_GENTLE_RED
) {
1931 x
->c_3
= (SCALE(1) - p
->max_p
) / p
->max_th
;
1932 x
->c_4
= (SCALE(1) - 2 * p
->max_p
);
1935 /* if the lookup table already exist, free and create it again */
1936 if (x
->w_q_lookup
) {
1937 FREE(x
->w_q_lookup
, M_DUMMYNET
);
1938 x
->w_q_lookup
= NULL
;
1940 if (red_lookup_depth
== 0) {
1941 printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth must be > 0\n");
1942 FREE(x
, M_DUMMYNET
);
1945 x
->lookup_depth
= red_lookup_depth
;
1946 x
->w_q_lookup
= (u_int
*) _MALLOC(x
->lookup_depth
* sizeof(int),
1947 M_DUMMYNET
, M_DONTWAIT
);
1948 if (x
->w_q_lookup
== NULL
) {
1949 printf("dummynet: sorry, cannot allocate red lookup table\n");
1950 FREE(x
, M_DUMMYNET
);
1954 /* fill the lookup table with (1 - w_q)^x */
1955 x
->lookup_step
= p
->lookup_step
;
1956 x
->lookup_weight
= p
->lookup_weight
;
1957 x
->w_q_lookup
[0] = SCALE(1) - x
->w_q
;
1958 for (i
= 1; i
< x
->lookup_depth
; i
++)
1959 x
->w_q_lookup
[i
] = SCALE_MUL(x
->w_q_lookup
[i
- 1], x
->lookup_weight
);
1960 if (red_avg_pkt_size
< 1)
1961 red_avg_pkt_size
= 512 ;
1962 x
->avg_pkt_size
= red_avg_pkt_size
;
1963 if (red_max_pkt_size
< 1)
1964 red_max_pkt_size
= 1500 ;
1965 x
->max_pkt_size
= red_max_pkt_size
;
1970 alloc_hash(struct dn_flow_set
*x
, struct dn_flow_set
*pfs
)
1972 if (x
->flags_fs
& DN_HAVE_FLOW_MASK
) { /* allocate some slots */
1973 int l
= pfs
->rq_size
;
1979 else if (l
> DN_MAX_HASH_SIZE
)
1980 l
= DN_MAX_HASH_SIZE
;
1982 } else /* one is enough for null mask */
1984 x
->rq
= _MALLOC((1 + x
->rq_size
) * sizeof(struct dn_flow_queue
*),
1985 M_DUMMYNET
, M_DONTWAIT
| M_ZERO
);
1986 if (x
->rq
== NULL
) {
1987 printf("dummynet: sorry, cannot allocate queue\n");
1995 set_fs_parms(struct dn_flow_set
*x
, struct dn_flow_set
*src
)
1997 x
->flags_fs
= src
->flags_fs
;
1998 x
->qsize
= src
->qsize
;
2000 x
->flow_mask
= src
->flow_mask
;
2001 if (x
->flags_fs
& DN_QSIZE_IS_BYTES
) {
2002 if (x
->qsize
> 1024*1024)
2003 x
->qsize
= 1024*1024 ;
2010 /* configuring RED */
2011 if ( x
->flags_fs
& DN_IS_RED
)
2012 config_red(src
, x
) ; /* XXX should check errors */
2016 * setup pipe or queue parameters.
2019 config_pipe(struct dn_pipe
*p
)
2022 struct dn_flow_set
*pfs
= &(p
->fs
);
2023 struct dn_flow_queue
*q
;
2026 * The config program passes parameters as follows:
2027 * bw = bits/second (0 means no limits),
2028 * delay = ms, must be translated into ticks.
2029 * qsize = slots/bytes
2031 p
->delay
= ( p
->delay
* (hz
*10) ) / 1000 ;
2032 /* We need either a pipe number or a flow_set number */
2033 if (p
->pipe_nr
== 0 && pfs
->fs_nr
== 0)
2035 if (p
->pipe_nr
!= 0 && pfs
->fs_nr
!= 0)
2037 if (p
->pipe_nr
!= 0) { /* this is a pipe */
2038 struct dn_pipe
*x
, *b
;
2039 struct dummynet_event dn_event
;
2040 lck_mtx_lock(dn_mutex
);
2043 b
= locate_pipe(p
->pipe_nr
);
2045 if (b
== NULL
|| b
->pipe_nr
!= p
->pipe_nr
) { /* new pipe */
2046 x
= _MALLOC(sizeof(struct dn_pipe
), M_DUMMYNET
, M_DONTWAIT
| M_ZERO
) ;
2048 lck_mtx_unlock(dn_mutex
);
2049 printf("dummynet: no memory for new pipe\n");
2052 x
->pipe_nr
= p
->pipe_nr
;
2054 /* idle_heap is the only one from which we extract from the middle.
2056 x
->idle_heap
.size
= x
->idle_heap
.elements
= 0 ;
2057 x
->idle_heap
.offset
=offsetof(struct dn_flow_queue
, heap_pos
);
2060 /* Flush accumulated credit for all queues */
2061 for (i
= 0; i
<= x
->fs
.rq_size
; i
++)
2062 for (q
= x
->fs
.rq
[i
]; q
; q
= q
->next
)
2066 x
->bandwidth
= p
->bandwidth
;
2067 x
->numbytes
= 0; /* just in case... */
2068 bcopy(p
->if_name
, x
->if_name
, sizeof(p
->if_name
) );
2069 x
->ifp
= NULL
; /* reset interface ptr */
2070 x
->delay
= p
->delay
;
2071 set_fs_parms(&(x
->fs
), pfs
);
2074 if ( x
->fs
.rq
== NULL
) { /* a new pipe */
2075 r
= alloc_hash(&(x
->fs
), pfs
) ;
2077 lck_mtx_unlock(dn_mutex
);
2078 FREE(x
, M_DUMMYNET
);
2081 SLIST_INSERT_HEAD(&pipehash
[HASH(x
->pipe_nr
)],
2084 lck_mtx_unlock(dn_mutex
);
2086 bzero(&dn_event
, sizeof(dn_event
));
2087 dn_event
.dn_event_code
= DUMMYNET_PIPE_CONFIG
;
2088 dn_event
.dn_event_pipe_config
.bandwidth
= p
->bandwidth
;
2089 dn_event
.dn_event_pipe_config
.delay
= p
->delay
;
2090 dn_event
.dn_event_pipe_config
.plr
= pfs
->plr
;
2092 dummynet_event_enqueue_nwk_wq_entry(&dn_event
);
2093 } else { /* config queue */
2094 struct dn_flow_set
*x
, *b
;
2096 lck_mtx_lock(dn_mutex
);
2097 /* locate flow_set */
2098 b
= locate_flowset(pfs
->fs_nr
);
2100 if (b
== NULL
|| b
->fs_nr
!= pfs
->fs_nr
) { /* new */
2101 if (pfs
->parent_nr
== 0) { /* need link to a pipe */
2102 lck_mtx_unlock(dn_mutex
);
2105 x
= _MALLOC(sizeof(struct dn_flow_set
), M_DUMMYNET
, M_DONTWAIT
| M_ZERO
);
2107 lck_mtx_unlock(dn_mutex
);
2108 printf("dummynet: no memory for new flow_set\n");
2111 x
->fs_nr
= pfs
->fs_nr
;
2112 x
->parent_nr
= pfs
->parent_nr
;
2113 x
->weight
= pfs
->weight
;
2116 else if (x
->weight
> 100)
2119 /* Change parent pipe not allowed; must delete and recreate */
2120 if (pfs
->parent_nr
!= 0 && b
->parent_nr
!= pfs
->parent_nr
) {
2121 lck_mtx_unlock(dn_mutex
);
2126 set_fs_parms(x
, pfs
);
2128 if ( x
->rq
== NULL
) { /* a new flow_set */
2129 r
= alloc_hash(x
, pfs
) ;
2131 lck_mtx_unlock(dn_mutex
);
2132 FREE(x
, M_DUMMYNET
);
2135 SLIST_INSERT_HEAD(&flowsethash
[HASH(x
->fs_nr
)],
2138 lck_mtx_unlock(dn_mutex
);
2144 * Helper function to remove from a heap queues which are linked to
2145 * a flow_set about to be deleted.
2148 fs_remove_from_heap(struct dn_heap
*h
, struct dn_flow_set
*fs
)
2150 int i
= 0, found
= 0 ;
2151 for (; i
< h
->elements
;)
2152 if ( ((struct dn_flow_queue
*)h
->p
[i
].object
)->fs
== fs
) {
2154 h
->p
[i
] = h
->p
[h
->elements
] ;
2163 * helper function to remove a pipe from a heap (can be there at most once)
2166 pipe_remove_from_heap(struct dn_heap
*h
, struct dn_pipe
*p
)
2168 if (h
->elements
> 0) {
2170 for (i
=0; i
< h
->elements
; i
++ ) {
2171 if (h
->p
[i
].object
== p
) { /* found it */
2173 h
->p
[i
] = h
->p
[h
->elements
] ;
2182 * drain all queues. Called in case of severe mbuf shortage.
2185 dummynet_drain(void)
2187 struct dn_flow_set
*fs
;
2189 struct mbuf
*m
, *mnext
;
2192 LCK_MTX_ASSERT(dn_mutex
, LCK_MTX_ASSERT_OWNED
);
2194 heap_free(&ready_heap
);
2195 heap_free(&wfq_ready_heap
);
2196 heap_free(&extract_heap
);
2197 /* remove all references to this pipe from flow_sets */
2198 for (i
= 0; i
< HASHSIZE
; i
++)
2199 SLIST_FOREACH(fs
, &flowsethash
[i
], next
)
2200 purge_flow_set(fs
, 0);
2202 for (i
= 0; i
< HASHSIZE
; i
++)
2203 SLIST_FOREACH(p
, &pipehash
[i
], next
) {
2204 purge_flow_set(&(p
->fs
), 0);
2207 while ((m
= mnext
) != NULL
) {
2208 mnext
= m
->m_nextpkt
;
2211 p
->head
= p
->tail
= NULL
;
2216 * Fully delete a pipe or a queue, cleaning up associated info.
2219 delete_pipe(struct dn_pipe
*p
)
2221 if (p
->pipe_nr
== 0 && p
->fs
.fs_nr
== 0)
2223 if (p
->pipe_nr
!= 0 && p
->fs
.fs_nr
!= 0)
2225 if (p
->pipe_nr
!= 0) { /* this is an old-style pipe */
2227 struct dn_flow_set
*fs
;
2230 lck_mtx_lock(dn_mutex
);
2232 b
= locate_pipe(p
->pipe_nr
);
2234 lck_mtx_unlock(dn_mutex
);
2235 return EINVAL
; /* not found */
2238 /* Unlink from list of pipes. */
2239 SLIST_REMOVE(&pipehash
[HASH(b
->pipe_nr
)], b
, dn_pipe
, next
);
2242 /* remove references to this pipe from the ip_fw rules. */
2243 flush_pipe_ptrs(&(b
->fs
));
2246 /* Remove all references to this pipe from flow_sets. */
2247 for (i
= 0; i
< HASHSIZE
; i
++)
2248 SLIST_FOREACH(fs
, &flowsethash
[i
], next
)
2249 if (fs
->pipe
== b
) {
2250 printf("dummynet: ++ ref to pipe %d from fs %d\n",
2251 p
->pipe_nr
, fs
->fs_nr
);
2253 purge_flow_set(fs
, 0);
2255 fs_remove_from_heap(&ready_heap
, &(b
->fs
));
2257 purge_pipe(b
); /* remove all data associated to this pipe */
2258 /* remove reference to here from extract_heap and wfq_ready_heap */
2259 pipe_remove_from_heap(&extract_heap
, b
);
2260 pipe_remove_from_heap(&wfq_ready_heap
, b
);
2261 lck_mtx_unlock(dn_mutex
);
2263 FREE(b
, M_DUMMYNET
);
2264 } else { /* this is a WF2Q queue (dn_flow_set) */
2265 struct dn_flow_set
*b
;
2267 lck_mtx_lock(dn_mutex
);
2269 b
= locate_flowset(p
->fs
.fs_nr
);
2271 lck_mtx_unlock(dn_mutex
);
2272 return EINVAL
; /* not found */
2276 /* remove references to this flow_set from the ip_fw rules. */
2280 /* Unlink from list of flowsets. */
2281 SLIST_REMOVE( &flowsethash
[HASH(b
->fs_nr
)], b
, dn_flow_set
, next
);
2283 if (b
->pipe
!= NULL
) {
2284 /* Update total weight on parent pipe and cleanup parent heaps */
2285 b
->pipe
->sum
-= b
->weight
* b
->backlogged
;
2286 fs_remove_from_heap(&(b
->pipe
->not_eligible_heap
), b
);
2287 fs_remove_from_heap(&(b
->pipe
->scheduler_heap
), b
);
2288 #if 1 /* XXX should i remove from idle_heap as well ? */
2289 fs_remove_from_heap(&(b
->pipe
->idle_heap
), b
);
2292 purge_flow_set(b
, 1);
2293 lck_mtx_unlock(dn_mutex
);
2299 * helper function used to copy data from kernel in DUMMYNET_GET
2302 char* dn_copy_set_32(struct dn_flow_set
*set
, char *bp
)
2305 struct dn_flow_queue
*q
;
2306 struct dn_flow_queue_32
*qp
= (struct dn_flow_queue_32
*)bp
;
2308 LCK_MTX_ASSERT(dn_mutex
, LCK_MTX_ASSERT_OWNED
);
2310 for (i
= 0 ; i
<= set
->rq_size
; i
++)
2311 for (q
= set
->rq
[i
] ; q
; q
= q
->next
, qp
++ ) {
2312 if (q
->hash_slot
!= i
)
2313 printf("dummynet: ++ at %d: wrong slot (have %d, "
2314 "should be %d)\n", copied
, q
->hash_slot
, i
);
2316 printf("dummynet: ++ at %d: wrong fs ptr "
2317 "(have 0x%llx, should be 0x%llx)\n", i
,
2318 (uint64_t)VM_KERNEL_ADDRPERM(q
->fs
),
2319 (uint64_t)VM_KERNEL_ADDRPERM(set
));
2321 cp_queue_to_32_user( q
, qp
);
2322 /* cleanup pointers */
2323 qp
->next
= (user32_addr_t
)0 ;
2324 qp
->head
= qp
->tail
= (user32_addr_t
)0 ;
2325 qp
->fs
= (user32_addr_t
)0 ;
2327 if (copied
!= set
->rq_elements
)
2328 printf("dummynet: ++ wrong count, have %d should be %d\n",
2329 copied
, set
->rq_elements
);
2334 char* dn_copy_set_64(struct dn_flow_set
*set
, char *bp
)
2337 struct dn_flow_queue
*q
;
2338 struct dn_flow_queue_64
*qp
= (struct dn_flow_queue_64
*)bp
;
2340 LCK_MTX_ASSERT(dn_mutex
, LCK_MTX_ASSERT_OWNED
);
2342 for (i
= 0 ; i
<= set
->rq_size
; i
++)
2343 for (q
= set
->rq
[i
] ; q
; q
= q
->next
, qp
++ ) {
2344 if (q
->hash_slot
!= i
)
2345 printf("dummynet: ++ at %d: wrong slot (have %d, "
2346 "should be %d)\n", copied
, q
->hash_slot
, i
);
2348 printf("dummynet: ++ at %d: wrong fs ptr "
2349 "(have 0x%llx, should be 0x%llx)\n", i
,
2350 (uint64_t)VM_KERNEL_ADDRPERM(q
->fs
),
2351 (uint64_t)VM_KERNEL_ADDRPERM(set
));
2353 //bcopy(q, qp, sizeof(*q));
2354 cp_queue_to_64_user( q
, qp
);
2355 /* cleanup pointers */
2356 qp
->next
= USER_ADDR_NULL
;
2357 qp
->head
= qp
->tail
= USER_ADDR_NULL
;
2358 qp
->fs
= USER_ADDR_NULL
;
2360 if (copied
!= set
->rq_elements
)
2361 printf("dummynet: ++ wrong count, have %d should be %d\n",
2362 copied
, set
->rq_elements
);
2367 dn_calc_size(int is64user
)
2369 struct dn_flow_set
*set
;
2377 LCK_MTX_ASSERT(dn_mutex
, LCK_MTX_ASSERT_OWNED
);
2379 pipesize
= sizeof(struct dn_pipe_64
);
2380 queuesize
= sizeof(struct dn_flow_queue_64
);
2381 setsize
= sizeof(struct dn_flow_set_64
);
2384 pipesize
= sizeof(struct dn_pipe_32
);
2385 queuesize
= sizeof( struct dn_flow_queue_32
);
2386 setsize
= sizeof(struct dn_flow_set_32
);
2389 * compute size of data structures: list of pipes and flow_sets.
2391 for (i
= 0; i
< HASHSIZE
; i
++) {
2392 SLIST_FOREACH(p
, &pipehash
[i
], next
)
2393 size
+= sizeof(*p
) +
2394 p
->fs
.rq_elements
* sizeof(struct dn_flow_queue
);
2395 SLIST_FOREACH(set
, &flowsethash
[i
], next
)
2396 size
+= sizeof (*set
) +
2397 set
->rq_elements
* sizeof(struct dn_flow_queue
);
2403 dummynet_get(struct sockopt
*sopt
)
2405 char *buf
= NULL
, *bp
= NULL
; /* bp is the "copy-pointer" */
2407 struct dn_flow_set
*set
;
2412 /* XXX lock held too long */
2413 lck_mtx_lock(dn_mutex
);
2415 * XXX: Ugly, but we need to allocate memory with M_WAITOK flag
2416 * and we cannot use this flag while holding a mutex.
2418 if (proc_is64bit(sopt
->sopt_p
))
2420 for (i
= 0; i
< 10; i
++) {
2421 size
= dn_calc_size(is64user
);
2422 lck_mtx_unlock(dn_mutex
);
2423 buf
= _MALLOC(size
, M_TEMP
, M_WAITOK
| M_ZERO
);
2426 lck_mtx_lock(dn_mutex
);
2427 if (size
== dn_calc_size(is64user
))
2433 lck_mtx_unlock(dn_mutex
);
2438 for (i
= 0; i
< HASHSIZE
; i
++) {
2439 SLIST_FOREACH(p
, &pipehash
[i
], next
) {
2441 * copy pipe descriptor into *bp, convert delay
2442 * back to ms, then copy the flow_set descriptor(s)
2443 * one at a time. After each flow_set, copy the
2444 * queue descriptor it owns.
2447 bp
= cp_pipe_to_64_user(p
,
2448 (struct dn_pipe_64
*)bp
);
2450 bp
= cp_pipe_to_32_user(p
,
2451 (struct dn_pipe_32
*)bp
);
2455 for (i
= 0; i
< HASHSIZE
; i
++) {
2456 SLIST_FOREACH(set
, &flowsethash
[i
], next
) {
2457 struct dn_flow_set_64
*fs_bp
=
2458 (struct dn_flow_set_64
*)bp
;
2459 cp_flow_set_to_64_user(set
, fs_bp
);
2460 /* XXX same hack as above */
2461 fs_bp
->next
= CAST_DOWN(user64_addr_t
,
2463 fs_bp
->pipe
= USER_ADDR_NULL
;
2464 fs_bp
->rq
= USER_ADDR_NULL
;
2465 bp
+= sizeof(struct dn_flow_set_64
);
2466 bp
= dn_copy_set_64( set
, bp
);
2469 lck_mtx_unlock(dn_mutex
);
2470 error
= sooptcopyout(sopt
, buf
, size
);
2476 * Handler for the various dummynet socket options (get, flush, config, del)
2479 ip_dn_ctl(struct sockopt
*sopt
)
2482 struct dn_pipe
*p
, tmp_pipe
;
2484 /* Disallow sets in really-really secure mode. */
2485 if (sopt
->sopt_dir
== SOPT_SET
&& securelevel
>= 3)
2488 switch (sopt
->sopt_name
) {
2490 printf("dummynet: -- unknown option %d", sopt
->sopt_name
);
2493 case IP_DUMMYNET_GET
:
2494 error
= dummynet_get(sopt
);
2497 case IP_DUMMYNET_FLUSH
:
2501 case IP_DUMMYNET_CONFIGURE
:
2503 if (proc_is64bit(sopt
->sopt_p
))
2504 error
= cp_pipe_from_user_64( sopt
, p
);
2506 error
= cp_pipe_from_user_32( sopt
, p
);
2510 error
= config_pipe(p
);
2513 case IP_DUMMYNET_DEL
: /* remove a pipe or queue */
2515 if (proc_is64bit(sopt
->sopt_p
))
2516 error
= cp_pipe_from_user_64( sopt
, p
);
2518 error
= cp_pipe_from_user_32( sopt
, p
);
2522 error
= delete_pipe(p
);
2531 eventhandler_lists_ctxt_init(&dummynet_evhdlr_ctxt
);
2538 dn_mutex_grp_attr
= lck_grp_attr_alloc_init();
2539 dn_mutex_grp
= lck_grp_alloc_init("dn", dn_mutex_grp_attr
);
2540 dn_mutex_attr
= lck_attr_alloc_init();
2541 lck_mtx_init(dn_mutex
, dn_mutex_grp
, dn_mutex_attr
);
2543 ready_heap
.size
= ready_heap
.elements
= 0 ;
2544 ready_heap
.offset
= 0 ;
2546 wfq_ready_heap
.size
= wfq_ready_heap
.elements
= 0 ;
2547 wfq_ready_heap
.offset
= 0 ;
2549 extract_heap
.size
= extract_heap
.elements
= 0 ;
2550 extract_heap
.offset
= 0 ;
2551 ip_dn_ctl_ptr
= ip_dn_ctl
;
2552 ip_dn_io_ptr
= dummynet_io
;
2554 bzero(&default_rule
, sizeof default_rule
);
2556 default_rule
.act_ofs
= 0;
2557 default_rule
.rulenum
= IPFW_DEFAULT_RULE
;
2558 default_rule
.cmd_len
= 1;
2559 default_rule
.set
= RESVD_SET
;
2561 default_rule
.cmd
[0].len
= 1;
2562 default_rule
.cmd
[0].opcode
=
2563 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
2570 struct dn_event_nwk_wq_entry
2572 struct nwk_wq_entry nwk_wqe
;
2573 struct dummynet_event dn_ev_arg
;
2577 dummynet_event_callback(void *arg
)
2579 struct dummynet_event
*p_dn_ev
= (struct dummynet_event
*)arg
;
2581 EVENTHANDLER_INVOKE(&dummynet_evhdlr_ctxt
, dummynet_event
, p_dn_ev
);
2586 dummynet_event_enqueue_nwk_wq_entry(struct dummynet_event
*p_dn_event
)
2588 struct dn_event_nwk_wq_entry
*p_dn_ev
= NULL
;
2590 MALLOC(p_dn_ev
, struct dn_event_nwk_wq_entry
*,
2591 sizeof(struct dn_event_nwk_wq_entry
),
2592 M_NWKWQ
, M_WAITOK
| M_ZERO
);
2594 p_dn_ev
->nwk_wqe
.func
= dummynet_event_callback
;
2595 p_dn_ev
->nwk_wqe
.is_arg_managed
= TRUE
;
2596 p_dn_ev
->nwk_wqe
.arg
= &p_dn_ev
->dn_ev_arg
;
2598 bcopy(p_dn_event
, &(p_dn_ev
->dn_ev_arg
),
2599 sizeof(struct dummynet_event
));
2600 nwk_wq_enqueue((struct nwk_wq_entry
*)p_dn_ev
);