]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/ip_dummynet.c
xnu-344.tar.gz
[apple/xnu.git] / bsd / netinet / ip_dummynet.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
9bccf70c
A
22 * Copyright (c) 1998-2001 Luigi Rizzo, Universita` di Pisa
23 * Portions Copyright (c) 2000 Akamba Corp.
24 * All rights reserved
1c79356b 25 *
9bccf70c
A
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
28 * are met:
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must reproduce the above copyright
32 * notice, this list of conditions and the following disclaimer in the
33 * documentation and/or other materials provided with the distribution.
1c79356b 34 *
9bccf70c
A
35 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
1c79356b 46 *
9bccf70c 47 * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.24.2.11 2001/02/09 23:18:08 luigi Exp $
1c79356b
A
48 */
49
9bccf70c
A
50#define DEB(x)
51#define DDB(x) x
52
1c79356b
A
53/*
54 * This module implements IP dummynet, a bandwidth limiter/delay emulator
55 * used in conjunction with the ipfw package.
9bccf70c
A
56 * Description of the data structures used is in ip_dummynet.h
57 * Here you mainly find the following blocks of code:
58 * + variable declarations;
59 * + heap management functions;
60 * + scheduler and dummynet functions;
61 * + configuration and initialization.
62 *
63 * NOTA BENE: critical sections are protected by splimp()/splx()
64 * pairs. One would think that splnet() is enough as for most of
65 * the netinet code, but it is not so because when used with
66 * bridging, dummynet is invoked at splimp().
1c79356b 67 *
9bccf70c 68 * Most important Changes:
1c79356b 69 *
9bccf70c
A
70 * 010124: Fixed WF2Q behaviour
71 * 010122: Fixed spl protection.
72 * 000601: WF2Q support
73 * 000106: large rewrite, use heaps to handle very many pipes.
1c79356b 74 * 980513: initial release
9bccf70c
A
75 *
76 * include files marked with XXX are probably not needed
1c79356b
A
77 */
78
1c79356b
A
79#include <sys/param.h>
80#include <sys/systm.h>
81#include <sys/malloc.h>
82#include <sys/mbuf.h>
83#include <sys/queue.h> /* XXX */
84#include <sys/kernel.h>
85#include <sys/socket.h>
86#include <sys/socketvar.h>
87#include <sys/time.h>
88#include <sys/sysctl.h>
89#include <net/if.h>
90#include <net/route.h>
91#include <netinet/in.h>
92#include <netinet/in_systm.h>
93#include <netinet/in_var.h>
94#include <netinet/ip.h>
95#include <netinet/ip_fw.h>
96#include <netinet/ip_dummynet.h>
97#include <netinet/ip_var.h>
98
99#if BRIDGE
100#include <netinet/if_ether.h> /* for struct arpcom */
101#include <net/bridge.h>
102#endif
103
9bccf70c
A
104/*
105 * We keep a private variable for the simulation time, but we could
106 * probably use an existing one ("softticks" in sys/kern/kern_timer.c)
107 */
108static dn_key curr_time = 0 ; /* current simulation time */
109
110static int dn_hash_size = 64 ; /* default hash size */
111
112/* statistics on number of queue searches and search steps */
113static int searches, search_steps ;
114static int pipe_expire = 1 ; /* expire queue if empty */
115static int dn_max_ratio = 16 ; /* max queues/buckets ratio */
116
117static int red_lookup_depth = 256; /* RED - default lookup table depth */
118static int red_avg_pkt_size = 512; /* RED - default medium packet size */
119static int red_max_pkt_size = 1500; /* RED - default max packet size */
120
121/*
122 * Three heaps contain queues and pipes that the scheduler handles:
123 *
124 * ready_heap contains all dn_flow_queue related to fixed-rate pipes.
125 *
126 * wfq_ready_heap contains the pipes associated with WF2Q flows
127 *
128 * extract_heap contains pipes associated with delay lines.
129 *
130 */
131static struct dn_heap ready_heap, extract_heap, wfq_ready_heap ;
132
133static int heap_init(struct dn_heap *h, int size) ;
134static int heap_insert (struct dn_heap *h, dn_key key1, void *p);
135static void heap_extract(struct dn_heap *h, void *obj);
136
137static void transmit_event(struct dn_pipe *pipe);
138static void ready_event(struct dn_flow_queue *q);
139
1c79356b 140static struct dn_pipe *all_pipes = NULL ; /* list of all pipes */
9bccf70c 141static struct dn_flow_set *all_flow_sets = NULL ;/* list of all flow_sets */
1c79356b 142
9bccf70c
A
143#if SYSCTL_NODE
144SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet,
145 CTLFLAG_RW, 0, "Dummynet");
146SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
147 CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size");
148SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time,
149 CTLFLAG_RD, &curr_time, 0, "Current tick");
150SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
151 CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
152SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
153 CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
154SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches,
155 CTLFLAG_RD, &searches, 0, "Number of queue searches");
156SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps,
157 CTLFLAG_RD, &search_steps, 0, "Number of queue search steps");
158SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
159 CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty");
160SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len,
161 CTLFLAG_RW, &dn_max_ratio, 0,
162 "Max ratio between dynamic queues and buckets");
163SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
164 CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table");
165SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
166 CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size");
167SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
168 CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size");
1c79356b
A
169#endif
170
9bccf70c 171static int config_pipe(struct dn_pipe *p);
1c79356b
A
172static int ip_dn_ctl(struct sockopt *sopt);
173
174static void rt_unref(struct rtentry *);
175static void dummynet(void *);
1c79356b 176static void dummynet_flush(void);
9bccf70c
A
177void dummynet_drain(void);
178int if_tx_rdy(struct ifnet *ifp);
1c79356b
A
179
180/*
9bccf70c 181 * ip_fw_chain is used when deleting a pipe, because ipfw rules can
1c79356b
A
182 * hold references to the pipe.
183 */
9bccf70c 184extern LIST_HEAD (ip_fw_head, ip_fw_chain) ip_fw_chain_head;
1c79356b
A
185
186static void
187rt_unref(struct rtentry *rt)
188{
189 if (rt == NULL)
190 return ;
191 if (rt->rt_refcnt <= 0)
9bccf70c
A
192 printf("-- warning, refcnt now %ld, decreasing\n", rt->rt_refcnt);
193 rtfree(rt);
194}
195
196/*
197 * Heap management functions.
198 *
199 * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
200 * Some macros help finding parent/children so we can optimize them.
201 *
202 * heap_init() is called to expand the heap when needed.
203 * Increment size in blocks of 16 entries.
204 * XXX failure to allocate a new element is a pretty bad failure
205 * as we basically stall a whole queue forever!!
206 * Returns 1 on error, 0 on success
207 */
208#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
209#define HEAP_LEFT(x) ( 2*(x) + 1 )
210#define HEAP_IS_LEFT(x) ( (x) & 1 )
211#define HEAP_RIGHT(x) ( 2*(x) + 2 )
212#define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
213#define HEAP_INCREMENT 15
214
215static int
216heap_init(struct dn_heap *h, int new_size)
217{
218 struct dn_heap_entry *p;
219
220 if (h->size >= new_size ) {
221 printf("heap_init, Bogus call, have %d want %d\n",
222 h->size, new_size);
223 return 0 ;
224 }
225 new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ;
226 p = _MALLOC(new_size * sizeof(*p), M_IPFW, M_DONTWAIT );
227 if (p == NULL) {
228 printf(" heap_init, resize %d failed\n", new_size );
229 return 1 ; /* error */
230 }
231 if (h->size > 0) {
232 bcopy(h->p, p, h->size * sizeof(*p) );
233 FREE(h->p, M_IPFW);
234 }
235 h->p = p ;
236 h->size = new_size ;
237 return 0 ;
238}
239
240/*
241 * Insert element in heap. Normally, p != NULL, we insert p in
242 * a new position and bubble up. If p == NULL, then the element is
243 * already in place, and key is the position where to start the
244 * bubble-up.
245 * Returns 1 on failure (cannot allocate new heap entry)
246 *
247 * If offset > 0 the position (index, int) of the element in the heap is
248 * also stored in the element itself at the given offset in bytes.
249 */
250#define SET_OFFSET(heap, node) \
251 if (heap->offset > 0) \
252 *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ;
253/*
254 * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value.
255 */
256#define RESET_OFFSET(heap, node) \
257 if (heap->offset > 0) \
258 *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ;
259static int
260heap_insert(struct dn_heap *h, dn_key key1, void *p)
261{
262 int son = h->elements ;
263
264 if (p == NULL) /* data already there, set starting point */
265 son = key1 ;
266 else { /* insert new element at the end, possibly resize */
267 son = h->elements ;
268 if (son == h->size) /* need resize... */
269 if (heap_init(h, h->elements+1) )
270 return 1 ; /* failure... */
271 h->p[son].object = p ;
272 h->p[son].key = key1 ;
273 h->elements++ ;
274 }
275 while (son > 0) { /* bubble up */
276 int father = HEAP_FATHER(son) ;
277 struct dn_heap_entry tmp ;
278
279 if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
280 break ; /* found right position */
281 /* son smaller than father, swap and repeat */
282 HEAP_SWAP(h->p[son], h->p[father], tmp) ;
283 SET_OFFSET(h, son);
284 son = father ;
285 }
286 SET_OFFSET(h, son);
287 return 0 ;
1c79356b
A
288}
289
290/*
9bccf70c 291 * remove top element from heap, or obj if obj != NULL
1c79356b
A
292 */
293static void
9bccf70c
A
294heap_extract(struct dn_heap *h, void *obj)
295{
296 int child, father, max = h->elements - 1 ;
297
298 if (max < 0) {
299 printf("warning, extract from empty heap 0x%p\n", h);
300 return ;
301 }
302 father = 0 ; /* default: move up smallest child */
303 if (obj != NULL) { /* extract specific element, index is at offset */
304 if (h->offset <= 0)
305 panic("*** heap_extract from middle not supported on this heap!!!\n");
306 father = *((int *)((char *)obj + h->offset)) ;
307 if (father < 0 || father >= h->elements) {
308 printf("dummynet: heap_extract, father %d out of bound 0..%d\n",
309 father, h->elements);
310 panic("heap_extract");
311 }
312 }
313 RESET_OFFSET(h, father);
314 child = HEAP_LEFT(father) ; /* left child */
315 while (child <= max) { /* valid entry */
316 if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
317 child = child+1 ; /* take right child, otherwise left */
318 h->p[father] = h->p[child] ;
319 SET_OFFSET(h, father);
320 father = child ;
321 child = HEAP_LEFT(child) ; /* left child for next loop */
322 }
323 h->elements-- ;
324 if (father != max) {
1c79356b 325 /*
9bccf70c 326 * Fill hole with last entry and bubble up, reusing the insert code
1c79356b 327 */
9bccf70c
A
328 h->p[father] = h->p[max] ;
329 heap_insert(h, father, NULL); /* this one cannot fail */
330 }
331}
1c79356b 332
9bccf70c
A
333#if 0
334/*
335 * change object position and update references
336 * XXX this one is never used!
337 */
338static void
339heap_move(struct dn_heap *h, dn_key new_key, void *object)
340{
341 int temp;
342 int i ;
343 int max = h->elements-1 ;
344 struct dn_heap_entry buf ;
1c79356b 345
9bccf70c
A
346 if (h->offset <= 0)
347 panic("cannot move items on this heap");
348
349 i = *((int *)((char *)object + h->offset));
350 if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */
351 h->p[i].key = new_key ;
352 for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ;
353 i = temp ) { /* bubble up */
354 HEAP_SWAP(h->p[i], h->p[temp], buf) ;
355 SET_OFFSET(h, i);
356 }
357 } else { /* must move down */
358 h->p[i].key = new_key ;
359 while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */
360 if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key))
361 temp++ ; /* select child with min key */
362 if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */
363 HEAP_SWAP(h->p[i], h->p[temp], buf) ;
364 SET_OFFSET(h, i);
365 } else
366 break ;
367 i = temp ;
1c79356b 368 }
1c79356b 369 }
9bccf70c
A
370 SET_OFFSET(h, i);
371}
372#endif /* heap_move, unused */
373
374/*
375 * heapify() will reorganize data inside an array to maintain the
376 * heap property. It is needed when we delete a bunch of entries.
377 */
378static void
379heapify(struct dn_heap *h)
380{
381 int i ;
382
383 for (i = 0 ; i < h->elements ; i++ )
384 heap_insert(h, i , NULL) ;
385}
386
387/*
388 * cleanup the heap and free data structure
389 */
390static void
391heap_free(struct dn_heap *h)
392{
393 if (h->size >0 )
394 FREE(h->p, M_IPFW);
395 bzero(h, sizeof(*h) );
396}
397
398/*
399 * --- end of heap management functions ---
400 */
401
402/*
403 * Scheduler functions:
404 *
405 * transmit_event() is called when the delay-line needs to enter
406 * the scheduler, either because of existing pkts getting ready,
407 * or new packets entering the queue. The event handled is the delivery
408 * time of the packet.
409 *
410 * ready_event() does something similar with fixed-rate queues, and the
411 * event handled is the finish time of the head pkt.
412 *
413 * wfq_ready_event() does something similar with WF2Q queues, and the
414 * event handled is the start time of the head pkt.
415 *
416 * In all cases, we make sure that the data structures are consistent
417 * before passing pkts out, because this might trigger recursive
418 * invocations of the procedures.
419 */
420static void
421transmit_event(struct dn_pipe *pipe)
422{
423 struct dn_pkt *pkt ;
1c79356b 424
9bccf70c 425 while ( (pkt = pipe->head) && DN_KEY_LEQ(pkt->output_time, curr_time) ) {
1c79356b 426 /*
9bccf70c
A
427 * first unlink, then call procedures, since ip_input() can invoke
428 * ip_output() and viceversa, thus causing nested calls
1c79356b 429 */
9bccf70c 430 pipe->head = DN_NEXT(pkt) ;
1c79356b
A
431
432 /*
9bccf70c
A
433 * The actual mbuf is preceded by a struct dn_pkt, resembling an mbuf
434 * (NOT A REAL one, just a small block of malloc'ed memory) with
435 * m_type = MT_DUMMYNET
436 * m_next = actual mbuf to be processed by ip_input/output
437 * m_data = the matching rule
438 * and some other fields.
439 * The block IS FREED HERE because it contains parameters passed
440 * to the called routine.
1c79356b
A
441 */
442 switch (pkt->dn_dir) {
9bccf70c
A
443 case DN_TO_IP_OUT:
444 (void)ip_output((struct mbuf *)pkt, NULL, NULL, 0, NULL);
445 rt_unref (pkt->ro.ro_rt) ;
1c79356b 446 break ;
9bccf70c 447
1c79356b
A
448 case DN_TO_IP_IN :
449 ip_input((struct mbuf *)pkt) ;
450 break ;
9bccf70c 451
1c79356b 452#if BRIDGE
9bccf70c
A
453 case DN_TO_BDG_FWD : {
454 struct mbuf *m = (struct mbuf *)pkt ;
455 struct ether_header *eh;
456
457 if (pkt->dn_m->m_len < ETHER_HDR_LEN
458 && (pkt->dn_m = m_pullup(pkt->dn_m, ETHER_HDR_LEN)) == NULL) {
459 printf("dummynet/bridge: pullup fail, dropping pkt\n");
460 break;
461 }
462 /*
463 * same as ether_input, make eh be a pointer into the mbuf
464 */
465 eh = mtod(pkt->dn_m, struct ether_header *);
466 m_adj(pkt->dn_m, ETHER_HDR_LEN);
467 /*
468 * bdg_forward() wants a pointer to the pseudo-mbuf-header, but
469 * on return it will supply the pointer to the actual packet
470 * (originally pkt->dn_m, but could be something else now) if
471 * it has not consumed it.
472 */
473 m = bdg_forward(m, eh, pkt->ifp);
474 if (m)
475 m_freem(m);
476 }
1c79356b
A
477 break ;
478#endif
9bccf70c 479
1c79356b
A
480 default:
481 printf("dummynet: bad switch %d!\n", pkt->dn_dir);
482 m_freem(pkt->dn_m);
1c79356b
A
483 break ;
484 }
9bccf70c 485 FREE(pkt, M_IPFW);
1c79356b 486 }
9bccf70c
A
487 /* if there are leftover packets, put into the heap for next event */
488 if ( (pkt = pipe->head) )
489 heap_insert(&extract_heap, pkt->output_time, pipe ) ;
490 /* XXX should check errors on heap_insert, by draining the
491 * whole pipe p and hoping in the future we are more successful
492 */
1c79356b 493}
9bccf70c 494
1c79356b 495/*
9bccf70c
A
496 * the following macro computes how many ticks we have to wait
497 * before being able to transmit a packet. The credit is taken from
498 * either a pipe (WF2Q) or a flow_queue (per-flow queueing)
1c79356b 499 */
9bccf70c
A
500#define SET_TICKS(pkt, q, p) \
501 (pkt->dn_m->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \
502 p->bandwidth ;
503
504/*
505 * extract pkt from queue, compute output time (could be now)
506 * and put into delay line (p_queue)
507 */
508static void
509move_pkt(struct dn_pkt *pkt, struct dn_flow_queue *q,
510 struct dn_pipe *p, int len)
1c79356b 511{
9bccf70c
A
512 q->head = DN_NEXT(pkt) ;
513 q->len-- ;
514 q->len_bytes -= len ;
1c79356b 515
9bccf70c 516 pkt->output_time = curr_time + p->delay ;
1c79356b 517
9bccf70c
A
518 if (p->head == NULL)
519 p->head = pkt;
520 else
521 DN_NEXT(p->tail) = pkt;
522 p->tail = pkt;
523 DN_NEXT(p->tail) = NULL;
1c79356b
A
524}
525
526/*
9bccf70c
A
527 * ready_event() is invoked every time the queue must enter the
528 * scheduler, either because the first packet arrives, or because
529 * a previously scheduled event fired.
530 * On invokation, drain as many pkts as possible (could be 0) and then
531 * if there are leftover packets reinsert the pkt in the scheduler.
1c79356b 532 */
9bccf70c
A
533static void
534ready_event(struct dn_flow_queue *q)
1c79356b
A
535{
536 struct dn_pkt *pkt;
9bccf70c
A
537 struct dn_pipe *p = q->fs->pipe ;
538 int p_was_empty ;
1c79356b 539
9bccf70c
A
540 if (p == NULL) {
541 printf("ready_event- pipe is gone\n");
542 return ;
543 }
544 p_was_empty = (p->head == NULL) ;
1c79356b 545
1c79356b 546 /*
9bccf70c
A
547 * schedule fixed-rate queues linked to this pipe:
548 * Account for the bw accumulated since last scheduling, then
549 * drain as many pkts as allowed by q->numbytes and move to
550 * the delay line (in p) computing output time.
551 * bandwidth==0 (no limit) means we can drain the whole queue,
552 * setting len_scaled = 0 does the job.
553 */
554 q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth;
555 while ( (pkt = q->head) != NULL ) {
556 int len = pkt->dn_m->m_pkthdr.len;
557 int len_scaled = p->bandwidth ? len*8*hz : 0 ;
558 if (len_scaled > q->numbytes )
559 break ;
560 q->numbytes -= len_scaled ;
561 move_pkt(pkt, q, p, len);
562 }
563 /*
564 * If we have more packets queued, schedule next ready event
565 * (can only occur when bandwidth != 0, otherwise we would have
566 * flushed the whole queue in the previous loop).
567 * To this purpose we record the current time and compute how many
568 * ticks to go for the finish time of the packet.
569 */
570 if ( (pkt = q->head) != NULL ) { /* this implies bandwidth != 0 */
571 dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */
572 q->sched_time = curr_time ;
573 heap_insert(&ready_heap, curr_time + t, (void *)q );
574 /* XXX should check errors on heap_insert, and drain the whole
575 * queue on error hoping next time we are luckier.
576 */
577 } else /* RED needs to know when the queue becomes empty */
578 q->q_time = curr_time;
579 /*
580 * If the delay line was empty call transmit_event(p) now.
581 * Otherwise, the scheduler will take care of it.
1c79356b 582 */
9bccf70c
A
583 if (p_was_empty)
584 transmit_event(p);
585}
1c79356b 586
9bccf70c
A
587/*
588 * Called when we can transmit packets on WF2Q queues. Take pkts out of
589 * the queues at their start time, and enqueue into the delay line.
590 * Packets are drained until p->numbytes < 0. As long as
591 * len_scaled >= p->numbytes, the packet goes into the delay line
592 * with a deadline p->delay. For the last packet, if p->numbytes<0,
593 * there is an additional delay.
594 */
595static void
596ready_event_wfq(struct dn_pipe *p)
597{
598 int p_was_empty = (p->head == NULL) ;
599 struct dn_heap *sch = &(p->scheduler_heap);
600 struct dn_heap *neh = &(p->not_eligible_heap) ;
601
602 if (p->if_name[0] == 0) /* tx clock is simulated */
603 p->numbytes += ( curr_time - p->sched_time ) * p->bandwidth;
604 else { /* tx clock is for real, the ifq must be empty or this is a NOP */
605 if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
606 return ;
607 else {
608 DEB(printf("pipe %d ready from %s --\n",
609 p->pipe_nr, p->if_name);)
610 }
1c79356b 611 }
9bccf70c 612
1c79356b 613 /*
9bccf70c
A
614 * While we have backlogged traffic AND credit, we need to do
615 * something on the queue.
1c79356b 616 */
9bccf70c
A
617 while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) {
618 if (sch->elements > 0) { /* have some eligible pkts to send out */
619 struct dn_flow_queue *q = sch->p[0].object ;
620 struct dn_pkt *pkt = q->head;
621 struct dn_flow_set *fs = q->fs;
622 u_int64_t len = pkt->dn_m->m_pkthdr.len;
623 int len_scaled = p->bandwidth ? len*8*hz : 0 ;
1c79356b 624
9bccf70c
A
625 heap_extract(sch, NULL); /* remove queue from heap */
626 p->numbytes -= len_scaled ;
627 move_pkt(pkt, q, p, len);
628
629 p->V += (len<<MY_M) / p->sum ; /* update V */
630 q->S = q->F ; /* update start time */
631 if (q->len == 0) { /* Flow not backlogged any more */
632 fs->backlogged-- ;
633 heap_insert(&(p->idle_heap), q->F, q);
634 } else { /* still backlogged */
635 /*
636 * update F and position in backlogged queue, then
637 * put flow in not_eligible_heap (we will fix this later).
638 */
639 len = (q->head)->dn_m->m_pkthdr.len;
640 q->F += (len<<MY_M)/(u_int64_t) fs->weight ;
641 if (DN_KEY_LEQ(q->S, p->V))
642 heap_insert(neh, q->S, q);
643 else
644 heap_insert(sch, q->F, q);
645 }
646 }
647 /*
648 * now compute V = max(V, min(S_i)). Remember that all elements in sch
649 * have by definition S_i <= V so if sch is not empty, V is surely
650 * the max and we must not update it. Conversely, if sch is empty
651 * we only need to look at neh.
652 */
653 if (sch->elements == 0 && neh->elements > 0)
654 p->V = MAX64 ( p->V, neh->p[0].key );
655 /* move from neh to sch any packets that have become eligible */
656 while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V) ) {
657 struct dn_flow_queue *q = neh->p[0].object ;
658 heap_extract(neh, NULL);
659 heap_insert(sch, q->F, q);
660 }
661
662 if (p->if_name[0] != '\0') {/* tx clock is from a real thing */
663 p->numbytes = -1 ; /* mark not ready for I/O */
664 break ;
665 }
1c79356b 666 }
9bccf70c
A
667 if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0
668 && p->idle_heap.elements > 0) {
669 /*
670 * no traffic and no events scheduled. We can get rid of idle-heap.
671 */
672 int i ;
673
674 for (i = 0 ; i < p->idle_heap.elements ; i++) {
675 struct dn_flow_queue *q = p->idle_heap.p[i].object ;
1c79356b 676
9bccf70c
A
677 q->F = 0 ;
678 q->S = q->F + 1 ;
679 }
680 p->sum = 0 ;
681 p->V = 0 ;
682 p->idle_heap.elements = 0 ;
683 }
684 /*
685 * If we are getting clocks from dummynet (not a real interface) and
686 * If we are under credit, schedule the next ready event.
687 * Also fix the delivery time of the last packet.
1c79356b 688 */
9bccf70c
A
689 if (p->if_name[0]==0 && p->numbytes < 0) { /* this implies bandwidth >0 */
690 dn_key t=0 ; /* number of ticks i have to wait */
1c79356b 691
9bccf70c
A
692 if (p->bandwidth > 0)
693 t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ;
694 p->tail->output_time += t ;
695 p->sched_time = curr_time ;
696 heap_insert(&wfq_ready_heap, curr_time + t, (void *)p);
697 /* XXX should check errors on heap_insert, and drain the whole
698 * queue on error hoping next time we are luckier.
699 */
1c79356b 700 }
9bccf70c
A
701 /*
702 * If the delay line was empty call transmit_event(p) now.
703 * Otherwise, the scheduler will take care of it.
704 */
705 if (p_was_empty)
706 transmit_event(p);
1c79356b
A
707}
708
709/*
9bccf70c
A
710 * This is called once per tick, or HZ times per second. It is used to
711 * increment the current tick counter and schedule expired events.
1c79356b
A
712 */
713static void
9bccf70c 714dummynet(void * __unused unused)
1c79356b 715{
9bccf70c
A
716 void *p ; /* generic parameter to handler */
717 struct dn_heap *h ;
718 int s ;
719 struct dn_heap *heaps[3];
720 int i;
721 struct dn_pipe *pe ;
1c79356b 722
9bccf70c
A
723 heaps[0] = &ready_heap ; /* fixed-rate queues */
724 heaps[1] = &wfq_ready_heap ; /* wfq queues */
725 heaps[2] = &extract_heap ; /* delay line */
726 s = splimp(); /* see note on top, splnet() is not enough */
727 curr_time++ ;
728 for (i=0; i < 3 ; i++) {
729 h = heaps[i];
730 while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) {
731 DDB(if (h->p[0].key > curr_time)
732 printf("-- dummynet: warning, heap %d is %d ticks late\n",
733 i, (int)(curr_time - h->p[0].key));)
734 p = h->p[0].object ; /* store a copy before heap_extract */
735 heap_extract(h, NULL); /* need to extract before processing */
736 if (i == 0)
737 ready_event(p) ;
738 else if (i == 1) {
739 struct dn_pipe *pipe = p;
740 if (pipe->if_name[0] != '\0')
741 printf("*** bad ready_event_wfq for pipe %s\n",
742 pipe->if_name);
743 else
744 ready_event_wfq(p) ;
745 } else
746 transmit_event(p);
747 }
1c79356b 748 }
9bccf70c
A
749 /* sweep pipes trying to expire idle flow_queues */
750 for (pe = all_pipes; pe ; pe = pe->next )
751 if (pe->idle_heap.elements > 0 &&
752 DN_KEY_LT(pe->idle_heap.p[0].key, pe->V) ) {
753 struct dn_flow_queue *q = pe->idle_heap.p[0].object ;
1c79356b 754
9bccf70c
A
755 heap_extract(&(pe->idle_heap), NULL);
756 q->S = q->F + 1 ; /* mark timestamp as invalid */
757 pe->sum -= q->fs->weight ;
758 }
759 splx(s);
760 timeout(dummynet, NULL, 1);
761}
762
1c79356b 763/*
9bccf70c 764 * called by an interface when tx_rdy occurs.
1c79356b 765 */
9bccf70c
A
766int
767if_tx_rdy(struct ifnet *ifp)
1c79356b 768{
9bccf70c 769 struct dn_pipe *p;
1c79356b 770
9bccf70c
A
771 for (p = all_pipes; p ; p = p->next )
772 if (p->ifp == ifp)
773 break ;
774 if (p == NULL) {
775 char buf[32];
776 sprintf(buf, "%s%d",ifp->if_name, ifp->if_unit);
777 for (p = all_pipes; p ; p = p->next )
778 if (!strcmp(p->if_name, buf) ) {
779 p->ifp = ifp ;
780 DEB(printf("++ tx rdy from %s (now found)\n", buf);)
781 break ;
782 }
783 }
784 if (p != NULL) {
785 DEB(printf("++ tx rdy from %s%d - qlen %d\n", ifp->if_name,
786 ifp->if_unit, ifp->if_snd.ifq_len);)
787 p->numbytes = 0 ; /* mark ready for I/O */
788 ready_event_wfq(p);
1c79356b 789 }
9bccf70c 790 return 0;
1c79356b
A
791}
792
1c79356b 793/*
9bccf70c
A
794 * Unconditionally expire empty queues in case of shortage.
795 * Returns the number of queues freed.
1c79356b 796 */
9bccf70c
A
797static int
798expire_queues(struct dn_flow_set *fs)
1c79356b 799{
9bccf70c
A
800 struct dn_flow_queue *q, *prev ;
801 int i, initial_elements = fs->rq_elements ;
1c79356b 802
9bccf70c
A
803 if (fs->last_expired == time_second)
804 return 0 ;
805 fs->last_expired = time_second ;
806 for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */
807 for (prev=NULL, q = fs->rq[i] ; q != NULL ; )
808 if (q->head != NULL || q->S != q->F+1) {
809 prev = q ;
810 q = q->next ;
811 } else { /* entry is idle, expire it */
812 struct dn_flow_queue *old_q = q ;
813
814 if (prev != NULL)
815 prev->next = q = q->next ;
816 else
817 fs->rq[i] = q = q->next ;
818 fs->rq_elements-- ;
819 FREE(old_q, M_IPFW);
1c79356b 820 }
9bccf70c 821 return initial_elements - fs->rq_elements ;
1c79356b
A
822}
823
824/*
9bccf70c
A
825 * If room, create a new queue and put at head of slot i;
826 * otherwise, create or use the default queue.
1c79356b 827 */
9bccf70c
A
828static struct dn_flow_queue *
829create_queue(struct dn_flow_set *fs, int i)
1c79356b 830{
9bccf70c 831 struct dn_flow_queue *q ;
1c79356b 832
9bccf70c
A
833 if (fs->rq_elements > fs->rq_size * dn_max_ratio &&
834 expire_queues(fs) == 0) {
835 /*
836 * No way to get room, use or create overflow queue.
837 */
838 i = fs->rq_size ;
839 if ( fs->rq[i] != NULL )
840 return fs->rq[i] ;
841 }
842 q = _MALLOC(sizeof(*q), M_IPFW, M_DONTWAIT) ;
843 if (q == NULL) {
844 printf("sorry, cannot allocate queue for new flow\n");
845 return NULL ;
846 }
847 bzero(q, sizeof(*q) ); /* needed */
848 q->fs = fs ;
849 q->hash_slot = i ;
850 q->next = fs->rq[i] ;
851 q->S = q->F + 1; /* hack - mark timestamp as invalid */
852 fs->rq[i] = q ;
853 fs->rq_elements++ ;
854 return q ;
855}
1c79356b 856
9bccf70c
A
857/*
858 * Given a flow_set and a pkt in last_pkt, find a matching queue
859 * after appropriate masking. The queue is moved to front
860 * so that further searches take less time.
861 */
862static struct dn_flow_queue *
863find_queue(struct dn_flow_set *fs)
864{
865 int i = 0 ; /* we need i and q for new allocations */
866 struct dn_flow_queue *q, *prev;
1c79356b 867
9bccf70c
A
868 if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) )
869 q = fs->rq[0] ;
870 else {
871 /* first, do the masking */
872 last_pkt.dst_ip &= fs->flow_mask.dst_ip ;
873 last_pkt.src_ip &= fs->flow_mask.src_ip ;
874 last_pkt.dst_port &= fs->flow_mask.dst_port ;
875 last_pkt.src_port &= fs->flow_mask.src_port ;
876 last_pkt.proto &= fs->flow_mask.proto ;
877 last_pkt.flags = 0 ; /* we don't care about this one */
878 /* then, hash function */
879 i = ( (last_pkt.dst_ip) & 0xffff ) ^
880 ( (last_pkt.dst_ip >> 15) & 0xffff ) ^
881 ( (last_pkt.src_ip << 1) & 0xffff ) ^
882 ( (last_pkt.src_ip >> 16 ) & 0xffff ) ^
883 (last_pkt.dst_port << 1) ^ (last_pkt.src_port) ^
884 (last_pkt.proto );
885 i = i % fs->rq_size ;
886 /* finally, scan the current list for a match */
887 searches++ ;
888 for (prev=NULL, q = fs->rq[i] ; q ; ) {
889 search_steps++;
890 if (bcmp(&last_pkt, &(q->id), sizeof(q->id) ) == 0)
891 break ; /* found */
892 else if (pipe_expire && q->head == NULL && q->S == q->F+1 ) {
893 /* entry is idle and not in any heap, expire it */
894 struct dn_flow_queue *old_q = q ;
1c79356b 895
9bccf70c
A
896 if (prev != NULL)
897 prev->next = q = q->next ;
898 else
899 fs->rq[i] = q = q->next ;
900 fs->rq_elements-- ;
901 FREE(old_q, M_IPFW);
902 continue ;
903 }
904 prev = q ;
905 q = q->next ;
1c79356b 906 }
9bccf70c
A
907 if (q && prev != NULL) { /* found and not in front */
908 prev->next = q->next ;
909 q->next = fs->rq[i] ;
910 fs->rq[i] = q ;
1c79356b 911 }
9bccf70c
A
912 }
913 if (q == NULL) { /* no match, need to allocate a new entry */
914 q = create_queue(fs, i);
915 if (q != NULL)
916 q->id = last_pkt ;
917 }
918 return q ;
919}
920
921static int
922red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len)
923{
924 /*
925 * RED algorithm
926 *
927 * RED calculates the average queue size (avg) using a low-pass filter
928 * with an exponential weighted (w_q) moving average:
929 * avg <- (1-w_q) * avg + w_q * q_size
930 * where q_size is the queue length (measured in bytes or * packets).
931 *
932 * If q_size == 0, we compute the idle time for the link, and set
933 * avg = (1 - w_q)^(idle/s)
934 * where s is the time needed for transmitting a medium-sized packet.
935 *
936 * Now, if avg < min_th the packet is enqueued.
937 * If avg > max_th the packet is dropped. Otherwise, the packet is
938 * dropped with probability P function of avg.
939 *
940 */
941
942 int64_t p_b = 0;
943 /* queue in bytes or packets ? */
944 u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? q->len_bytes : q->len;
945
946 DEB(printf("\n%d q: %2u ", (int) curr_time, q_size);)
947
948 /* average queue size estimation */
949 if (q_size != 0) {
950 /*
951 * queue is not empty, avg <- avg + (q_size - avg) * w_q
952 */
953 int diff = SCALE(q_size) - q->avg;
954 int64_t v = SCALE_MUL((int64_t) diff, (int64_t) fs->w_q);
955
956 q->avg += (int) v;
957 } else {
958 /*
959 * queue is empty, find for how long the queue has been
960 * empty and use a lookup table for computing
961 * (1 - * w_q)^(idle_time/s) where s is the time to send a
962 * (small) packet.
963 * XXX check wraps...
964 */
965 if (q->avg) {
966 u_int t = (curr_time - q->q_time) / fs->lookup_step;
967
968 q->avg = (t < fs->lookup_depth) ?
969 SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
970 }
971 }
972 DEB(printf("avg: %u ", SCALE_VAL(q->avg));)
973
974 /* should i drop ? */
975
976 if (q->avg < fs->min_th) {
977 q->count = -1;
978 return 0; /* accept packet ; */
979 }
980 if (q->avg >= fs->max_th) { /* average queue >= max threshold */
981 if (fs->flags_fs & DN_IS_GENTLE_RED) {
1c79356b 982 /*
9bccf70c
A
983 * According to Gentle-RED, if avg is greater than max_th the
984 * packet is dropped with a probability
985 * p_b = c_3 * avg - c_4
986 * where c_3 = (1 - max_p) / max_th, and c_4 = 1 - 2 * max_p
1c79356b 987 */
9bccf70c
A
988 p_b = SCALE_MUL((int64_t) fs->c_3, (int64_t) q->avg) - fs->c_4;
989 } else {
990 q->count = -1;
991 printf("- drop");
992 return 1 ;
993 }
994 } else if (q->avg > fs->min_th) {
995 /*
996 * we compute p_b using the linear dropping function p_b = c_1 *
997 * avg - c_2, where c_1 = max_p / (max_th - min_th), and c_2 =
998 * max_p * min_th / (max_th - min_th)
999 */
1000 p_b = SCALE_MUL((int64_t) fs->c_1, (int64_t) q->avg) - fs->c_2;
1001 }
1002 if (fs->flags_fs & DN_QSIZE_IS_BYTES)
1003 p_b = (p_b * len) / fs->max_pkt_size;
1004 if (++q->count == 0)
1005 q->random = random() & 0xffff;
1006 else {
1007 /*
1008 * q->count counts packets arrived since last drop, so a greater
1009 * value of q->count means a greater packet drop probability.
1010 */
1011 if (SCALE_MUL(p_b, SCALE((int64_t) q->count)) > q->random) {
1012 q->count = 0;
1013 DEB(printf("- red drop");)
1014 /* after a drop we calculate a new random value */
1015 q->random = random() & 0xffff;
1016 return 1; /* drop */
1017 }
1018 }
1019 /* end of RED algorithm */
1020 return 0 ; /* accept */
1021}
1022
1023static __inline
1024struct dn_flow_set *
1025locate_flowset(int pipe_nr, struct ip_fw_chain *rule)
1026{
1027 struct dn_flow_set *fs = NULL ;
1028
1029 if ( (rule->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_QUEUE )
1030 for (fs=all_flow_sets; fs && fs->fs_nr != pipe_nr; fs=fs->next)
1031 ;
1032 else {
1033 struct dn_pipe *p1;
1034 for (p1 = all_pipes; p1 && p1->pipe_nr != pipe_nr; p1 = p1->next)
1035 ;
1036 if (p1 != NULL)
1037 fs = &(p1->fs) ;
1038 }
1039 if (fs != NULL)
1040 rule->rule->pipe_ptr = fs ; /* record for the future */
1041 return fs ;
1042}
1043
1044/*
1045 * dummynet hook for packets. Below 'pipe' is a pipe or a queue
1046 * depending on whether WF2Q or fixed bw is used.
1047 */
1048int
1049dummynet_io(int pipe_nr, int dir, /* pipe_nr can also be a fs_nr */
1050 struct mbuf *m, struct ifnet *ifp, struct route *ro,
1051 struct sockaddr_in *dst,
1052 struct ip_fw_chain *rule, int flags)
1053{
1054 struct dn_pkt *pkt;
1055 struct dn_flow_set *fs;
1056 struct dn_pipe *pipe ;
1057 u_int64_t len = m->m_pkthdr.len ;
1058 struct dn_flow_queue *q = NULL ;
1059 int s ;
1060
1061 s = splimp();
1062
1063 pipe_nr &= 0xffff ;
1064
1065 if ( (fs = rule->rule->pipe_ptr) == NULL ) {
1066 fs = locate_flowset(pipe_nr, rule);
1067 if (fs == NULL)
1068 goto dropit ; /* this queue/pipe does not exist! */
1069 }
1070 pipe = fs->pipe ;
1071 if (pipe == NULL) { /* must be a queue, try find a matching pipe */
1072 for (pipe = all_pipes; pipe && pipe->pipe_nr != fs->parent_nr;
1073 pipe = pipe->next)
1074 ;
1075 if (pipe != NULL)
1076 fs->pipe = pipe ;
1077 else {
1078 printf("No pipe %d for queue %d, drop pkt\n",
1079 fs->parent_nr, fs->fs_nr);
1080 goto dropit ;
1081 }
1082 }
1083 q = find_queue(fs);
1084 if ( q == NULL )
1085 goto dropit ; /* cannot allocate queue */
1086 /*
1087 * update statistics, then check reasons to drop pkt
1088 */
1089 q->tot_bytes += len ;
1090 q->tot_pkts++ ;
1091 if ( fs->plr && random() < fs->plr )
1092 goto dropit ; /* random pkt drop */
1093 if ( fs->flags_fs & DN_QSIZE_IS_BYTES) {
1094 if (q->len_bytes > fs->qsize)
1095 goto dropit ; /* queue size overflow */
1096 } else {
1097 if (q->len >= fs->qsize)
1098 goto dropit ; /* queue count overflow */
1099 }
1100 if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) )
1101 goto dropit ;
1102
1103 pkt = (struct dn_pkt *)_MALLOC(sizeof (*pkt), M_IPFW, M_NOWAIT) ;
1104 if ( pkt == NULL )
1105 goto dropit ; /* cannot allocate packet header */
1106 /* ok, i can handle the pkt now... */
1107 bzero(pkt, sizeof(*pkt) ); /* XXX expensive, see if we can remove it*/
1108 /* build and enqueue packet + parameters */
1109 pkt->hdr.mh_type = MT_DUMMYNET ;
1110 (struct ip_fw_chain *)pkt->hdr.mh_data = rule ;
1111 DN_NEXT(pkt) = NULL;
1112 pkt->dn_m = m;
1113 pkt->dn_dir = dir ;
1114
1115 pkt->ifp = ifp;
1116 if (dir == DN_TO_IP_OUT) {
1117 /*
1118 * We need to copy *ro because for ICMP pkts (and maybe others)
1119 * the caller passed a pointer into the stack; dst might also be
1120 * a pointer into *ro so it needs to be updated.
1121 */
1122 pkt->ro = *ro;
1123 if (ro->ro_rt)
1124 rtref(ro->ro_rt);
1125 if (dst == (struct sockaddr_in *)&ro->ro_dst) /* dst points into ro */
1126 dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ;
1127
1128 pkt->dn_dst = dst;
1129 pkt->flags = flags ;
1130 }
1131 if (q->head == NULL)
1132 q->head = pkt;
1133 else
1134 DN_NEXT(q->tail) = pkt;
1135 q->tail = pkt;
1136 q->len++;
1137 q->len_bytes += len ;
1138
1139 if ( q->head != pkt ) /* flow was not idle, we are done */
1140 goto done;
1141 /*
1142 * If we reach this point the flow was previously idle, so we need
1143 * to schedule it. This involves different actions for fixed-rate or
1144 * WF2Q queues.
1145 */
1146 if ( (rule->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_PIPE ) {
1147 /*
1148 * Fixed-rate queue: just insert into the ready_heap.
1149 */
1150 dn_key t = 0 ;
1151 if (pipe->bandwidth)
1152 t = SET_TICKS(pkt, q, pipe);
1153 q->sched_time = curr_time ;
1154 if (t == 0) /* must process it now */
1155 ready_event( q );
1156 else
1157 heap_insert(&ready_heap, curr_time + t , q );
1158 } else {
1159 /*
1160 * WF2Q. First, compute start time S: if the flow was idle (S=F+1)
1161 * set S to the virtual time V for the controlling pipe, and update
1162 * the sum of weights for the pipe; otherwise, remove flow from
1163 * idle_heap and set S to max(F,V).
1164 * Second, compute finish time F = S + len/weight.
1165 * Third, if pipe was idle, update V=max(S, V).
1166 * Fourth, count one more backlogged flow.
1167 */
1168 if (DN_KEY_GT(q->S, q->F)) { /* means timestamps are invalid */
1169 q->S = pipe->V ;
1170 pipe->sum += fs->weight ; /* add weight of new queue */
1171 } else {
1172 heap_extract(&(pipe->idle_heap), q);
1173 q->S = MAX64(q->F, pipe->V ) ;
1174 }
1175 q->F = q->S + ( len<<MY_M )/(u_int64_t) fs->weight;
1176
1177 if (pipe->not_eligible_heap.elements == 0 &&
1178 pipe->scheduler_heap.elements == 0)
1179 pipe->V = MAX64 ( q->S, pipe->V );
1180 fs->backlogged++ ;
1181 /*
1182 * Look at eligibility. A flow is not eligibile if S>V (when
1183 * this happens, it means that there is some other flow already
1184 * scheduled for the same pipe, so the scheduler_heap cannot be
1185 * empty). If the flow is not eligible we just store it in the
1186 * not_eligible_heap. Otherwise, we store in the scheduler_heap
1187 * and possibly invoke ready_event_wfq() right now if there is
1188 * leftover credit.
1189 * Note that for all flows in scheduler_heap (SCH), S_i <= V,
1190 * and for all flows in not_eligible_heap (NEH), S_i > V .
1191 * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH,
1192 * we only need to look into NEH.
1193 */
1194 if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */
1195 if (pipe->scheduler_heap.elements == 0)
1196 printf("++ ouch! not eligible but empty scheduler!\n");
1197 heap_insert(&(pipe->not_eligible_heap), q->S, q);
1198 } else {
1199 heap_insert(&(pipe->scheduler_heap), q->F, q);
1200 if (pipe->numbytes >= 0) { /* pipe is idle */
1201 if (pipe->scheduler_heap.elements != 1)
1202 printf("*** OUCH! pipe should have been idle!\n");
1203 DEB(printf("Waking up pipe %d at %d\n",
1204 pipe->pipe_nr, (int)(q->F >> MY_M)); )
1205 pipe->sched_time = curr_time ;
1206 ready_event_wfq(pipe);
1c79356b 1207 }
9bccf70c
A
1208 }
1209 }
1210done:
1211 splx(s);
1212 return 0;
1213
1214dropit:
1215 splx(s);
1216 if (q)
1217 q->drops++ ;
1218 m_freem(m);
1219 return ENOBUFS ;
1220}
1221
1222/*
1223 * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT)
1224 * Doing this would probably save us the initial bzero of dn_pkt
1225 */
1226#define DN_FREE_PKT(pkt) { \
1227 struct dn_pkt *n = pkt ; \
1228 rt_unref ( n->ro.ro_rt ) ; \
1229 m_freem(n->dn_m); \
1230 pkt = DN_NEXT(n) ; \
1231 FREE(n, M_IPFW) ; }
1232
1233/*
1234 * Dispose all packets and flow_queues on a flow_set.
1235 * If all=1, also remove red lookup table and other storage,
1236 * including the descriptor itself.
1237 * For the one in dn_pipe MUST also cleanup ready_heap...
1238 */
1239static void
1240purge_flow_set(struct dn_flow_set *fs, int all)
1241{
1242 struct dn_pkt *pkt ;
1243 struct dn_flow_queue *q, *qn ;
1244 int i ;
1245
1246 for (i = 0 ; i <= fs->rq_size ; i++ ) {
1247 for (q = fs->rq[i] ; q ; q = qn ) {
1248 for (pkt = q->head ; pkt ; )
1249 DN_FREE_PKT(pkt) ;
1250 qn = q->next ;
1251 FREE(q, M_IPFW);
1252 }
1253 fs->rq[i] = NULL ;
1254 }
1255 fs->rq_elements = 0 ;
1256 if (all) {
1257 /* RED - free lookup table */
1258 if (fs->w_q_lookup)
1259 FREE(fs->w_q_lookup, M_IPFW);
1260 if (fs->rq)
1261 FREE(fs->rq, M_IPFW);
1262 /* if this fs is not part of a pipe, free it */
1263 if (fs->pipe && fs != &(fs->pipe->fs) )
1264 FREE(fs, M_IPFW);
1265 }
1266}
1267
1268/*
1269 * Dispose all packets queued on a pipe (not a flow_set).
1270 * Also free all resources associated to a pipe, which is about
1271 * to be deleted.
1272 */
1273static void
1274purge_pipe(struct dn_pipe *pipe)
1275{
1276 struct dn_pkt *pkt ;
1277
1278 purge_flow_set( &(pipe->fs), 1 );
1279
1280 for (pkt = pipe->head ; pkt ; )
1281 DN_FREE_PKT(pkt) ;
1282
1283 heap_free( &(pipe->scheduler_heap) );
1284 heap_free( &(pipe->not_eligible_heap) );
1285 heap_free( &(pipe->idle_heap) );
1286}
1287
1288/*
1289 * Delete all pipes and heaps returning memory. Must also
1290 * remove references from all ipfw rules to all pipes.
1291 */
1292static void
1293dummynet_flush()
1294{
1295 struct dn_pipe *curr_p, *p ;
1296 struct ip_fw_chain *chain ;
1297 struct dn_flow_set *fs, *curr_fs;
1298 int s ;
1299
1300 s = splimp() ;
1301
1302 /* remove all references to pipes ...*/
1303 LIST_FOREACH(chain, &ip_fw_chain_head, next)
1304 chain->rule->pipe_ptr = NULL ;
1305 /* prevent future matches... */
1306 p = all_pipes ;
1307 all_pipes = NULL ;
1308 fs = all_flow_sets ;
1309 all_flow_sets = NULL ;
1310 /* and free heaps so we don't have unwanted events */
1311 heap_free(&ready_heap);
1312 heap_free(&wfq_ready_heap);
1313 heap_free(&extract_heap);
1314 splx(s) ;
1315 /*
1316 * Now purge all queued pkts and delete all pipes
1317 */
1318 /* scan and purge all flow_sets. */
1319 for ( ; fs ; ) {
1320 curr_fs = fs ;
1321 fs = fs->next ;
1322 purge_flow_set(curr_fs, 1);
1323 }
1324 for ( ; p ; ) {
1325 purge_pipe(p);
1326 curr_p = p ;
1327 p = p->next ;
1328 FREE(q, M_IPFW);
1329 }
1330}
1331
1332
1333extern struct ip_fw_chain *ip_fw_default_rule ;
1334static void
1335dn_rule_delete_fs(struct dn_flow_set *fs, void *r)
1336{
1337 int i ;
1338 struct dn_flow_queue *q ;
1339 struct dn_pkt *pkt ;
1340
1341 for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */
1342 for (q = fs->rq[i] ; q ; q = q->next )
1343 for (pkt = q->head ; pkt ; pkt = DN_NEXT(pkt) )
1344 if (pkt->hdr.mh_data == r)
1345 pkt->hdr.mh_data = (void *)ip_fw_default_rule ;
1346}
1347/*
1348 * when a firewall rule is deleted, scan all queues and remove the flow-id
1349 * from packets matching this rule.
1350 */
1351void
1352dn_rule_delete(void *r)
1353{
1354 struct dn_pipe *p ;
1355 struct dn_pkt *pkt ;
1356 struct dn_flow_set *fs ;
1357
1358 /*
1359 * If the rule references a queue (dn_flow_set), then scan
1360 * the flow set, otherwise scan pipes. Should do either, but doing
1361 * both does not harm.
1362 */
1363 for ( fs = all_flow_sets ; fs ; fs = fs->next )
1364 dn_rule_delete_fs(fs, r);
1365 for ( p = all_pipes ; p ; p = p->next ) {
1366 fs = &(p->fs) ;
1367 dn_rule_delete_fs(fs, r);
1368 for (pkt = p->head ; pkt ; pkt = DN_NEXT(pkt) )
1369 if (pkt->hdr.mh_data == r)
1370 pkt->hdr.mh_data = (void *)ip_fw_default_rule ;
1371 }
1372}
1373
1374/*
1375 * setup RED parameters
1376 */
1377static int
1378config_red(struct dn_flow_set *p, struct dn_flow_set * x)
1379{
1380 int i;
1381
1382 x->w_q = p->w_q;
1383 x->min_th = SCALE(p->min_th);
1384 x->max_th = SCALE(p->max_th);
1385 x->max_p = p->max_p;
1386
1387 x->c_1 = p->max_p / (p->max_th - p->min_th);
1388 x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th));
1389 if (x->flags_fs & DN_IS_GENTLE_RED) {
1390 x->c_3 = (SCALE(1) - p->max_p) / p->max_th;
1391 x->c_4 = (SCALE(1) - 2 * p->max_p);
1392 }
1393
1394 /* if the lookup table already exist, free and create it again */
1395 if (x->w_q_lookup)
1396 FREE(x->w_q_lookup, M_IPFW);
1397 if (red_lookup_depth == 0) {
1398 printf("\nnet.inet.ip.dummynet.red_lookup_depth must be > 0");
1399 FREE(x, M_IPFW);
1400 return EINVAL;
1401 }
1402 x->lookup_depth = red_lookup_depth;
1403 x->w_q_lookup = (u_int *) _MALLOC(x->lookup_depth * sizeof(int),
1404 M_IPFW, M_DONTWAIT);
1405 if (x->w_q_lookup == NULL) {
1406 printf("sorry, cannot allocate red lookup table\n");
1407 FREE(x, M_IPFW);
1408 return ENOSPC;
1409 }
1410
1411 /* fill the lookup table with (1 - w_q)^x */
1412 x->lookup_step = p->lookup_step ;
1413 x->lookup_weight = p->lookup_weight ;
1414 x->w_q_lookup[0] = SCALE(1) - x->w_q;
1415 for (i = 1; i < x->lookup_depth; i++)
1416 x->w_q_lookup[i] = SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight);
1417 if (red_avg_pkt_size < 1)
1418 red_avg_pkt_size = 512 ;
1419 x->avg_pkt_size = red_avg_pkt_size ;
1420 if (red_max_pkt_size < 1)
1421 red_max_pkt_size = 1500 ;
1422 x->max_pkt_size = red_max_pkt_size ;
1423 return 0 ;
1424}
1425
1426static int
1427alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs)
1428{
1429 if (x->flags_fs & DN_HAVE_FLOW_MASK) { /* allocate some slots */
1430 int l = pfs->rq_size;
1431
1432 if (l == 0)
1433 l = dn_hash_size;
1434 if (l < 4)
1435 l = 4;
1436 else if (l > 1024)
1437 l = 1024;
1438 x->rq_size = l;
1439 } else /* one is enough for null mask */
1440 x->rq_size = 1;
1441 x->rq = _MALLOC((1 + x->rq_size) * sizeof(struct dn_flow_queue *),
1442 M_IPFW, M_DONTWAIT);
1443 if (x->rq == NULL) {
1444 printf("sorry, cannot allocate queue\n");
1445 return ENOSPC;
1446 }
1447 bzero(x->rq, (1+x->rq_size) * sizeof(struct dn_flow_queue *));
1448 x->rq_elements = 0;
1449 return 0 ;
1450}
1451
1452static void
1453set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src)
1454{
1455 x->flags_fs = src->flags_fs;
1456 x->qsize = src->qsize;
1457 x->plr = src->plr;
1458 x->flow_mask = src->flow_mask;
1459 if (x->flags_fs & DN_QSIZE_IS_BYTES) {
1460 if (x->qsize > 1024*1024)
1461 x->qsize = 1024*1024 ;
1462 } else {
1463 if (x->qsize == 0)
1464 x->qsize = 50 ;
1465 if (x->qsize > 100)
1466 x->qsize = 50 ;
1467 }
1468 /* configuring RED */
1469 if ( x->flags_fs & DN_IS_RED )
1470 config_red(src, x) ; /* XXX should check errors */
1471}
1472
1473/*
1474 * setup pipe or queue parameters.
1475 */
1476
1477static int
1478config_pipe(struct dn_pipe *p)
1479{
1480 int s ;
1481 struct dn_flow_set *pfs = &(p->fs);
1482
1483 /*
1484 * The config program passes parameters as follows:
1485 * bw = bits/second (0 means no limits),
1486 * delay = ms, must be translated into ticks.
1487 * qsize = slots/bytes
1488 */
1489 p->delay = ( p->delay * hz ) / 1000 ;
1490 /* We need either a pipe number or a flow_set number */
1491 if (p->pipe_nr == 0 && pfs->fs_nr == 0)
1492 return EINVAL ;
1493 if (p->pipe_nr != 0 && pfs->fs_nr != 0)
1494 return EINVAL ;
1495 if (p->pipe_nr != 0) { /* this is a pipe */
1496 struct dn_pipe *x, *a, *b;
1497 /* locate pipe */
1498 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
1c79356b 1499 a = b , b = b->next) ;
9bccf70c
A
1500
1501 if (b == NULL || b->pipe_nr != p->pipe_nr) { /* new pipe */
1502 x = _MALLOC(sizeof(struct dn_pipe), M_IPFW, M_DONTWAIT) ;
1503 if (x == NULL) {
1504 printf("ip_dummynet.c: no memory for new pipe\n");
1505 return ENOSPC;
1c79356b 1506 }
9bccf70c
A
1507 bzero(x, sizeof(struct dn_pipe));
1508 x->pipe_nr = p->pipe_nr;
1509 x->fs.pipe = x ;
1510 /* idle_heap is the only one from which we extract from the middle.
1511 */
1512 x->idle_heap.size = x->idle_heap.elements = 0 ;
1513 x->idle_heap.offset=OFFSET_OF(struct dn_flow_queue, heap_pos);
1514 } else
1515 x = b;
1516
1517 x->bandwidth = p->bandwidth ;
1518 x->numbytes = 0; /* just in case... */
1519 bcopy(p->if_name, x->if_name, sizeof(p->if_name) );
1520 x->ifp = NULL ; /* reset interface ptr */
1521 x->delay = p->delay ;
1522 set_fs_parms(&(x->fs), pfs);
1c79356b 1523
1c79356b 1524
9bccf70c
A
1525 if ( x->fs.rq == NULL ) { /* a new pipe */
1526 s = alloc_hash(&(x->fs), pfs) ;
1527 if (s) {
1528 FREE(x, M_IPFW);
1529 return s ;
1530 }
1531 s = splimp() ;
1532 x->next = b ;
1533 if (a == NULL)
1534 all_pipes = x ;
1535 else
1536 a->next = x ;
1537 splx(s);
1538 }
1539 } else { /* config queue */
1540 struct dn_flow_set *x, *a, *b ;
1541
1542 /* locate flow_set */
1543 for (a=NULL, b=all_flow_sets ; b && b->fs_nr < pfs->fs_nr ;
1c79356b 1544 a = b , b = b->next) ;
1c79356b 1545
9bccf70c
A
1546 if (b == NULL || b->fs_nr != pfs->fs_nr) { /* new */
1547 if (pfs->parent_nr == 0) /* need link to a pipe */
1548 return EINVAL ;
1549 x = _MALLOC(sizeof(struct dn_flow_set), M_IPFW, M_DONTWAIT);
1550 if (x == NULL) {
1551 printf("ip_dummynet.c: no memory for new flow_set\n");
1552 return ENOSPC;
1c79356b 1553 }
9bccf70c
A
1554 bzero(x, sizeof(struct dn_flow_set));
1555 x->fs_nr = pfs->fs_nr;
1556 x->parent_nr = pfs->parent_nr;
1557 x->weight = pfs->weight ;
1558 if (x->weight == 0)
1559 x->weight = 1 ;
1560 else if (x->weight > 100)
1561 x->weight = 100 ;
1562 } else {
1563 /* Change parent pipe not allowed; must delete and recreate */
1564 if (pfs->parent_nr != 0 && b->parent_nr != pfs->parent_nr)
1565 return EINVAL ;
1566 x = b;
1c79356b 1567 }
9bccf70c
A
1568 set_fs_parms(x, pfs);
1569
1570 if ( x->rq == NULL ) { /* a new flow_set */
1571 s = alloc_hash(x, pfs) ;
1572 if (s) {
1573 FREE(x, M_IPFW);
1574 return s ;
1575 }
1576 s = splimp() ;
1577 x->next = b;
1578 if (a == NULL)
1579 all_flow_sets = x;
1580 else
1581 a->next = x;
1582 splx(s);
1583 }
1584 }
1585 return 0 ;
1586}
1587
1588/*
1589 * Helper function to remove from a heap queues which are linked to
1590 * a flow_set about to be deleted.
1591 */
1592static void
1593fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs)
1594{
1595 int i = 0, found = 0 ;
1596 for (; i < h->elements ;)
1597 if ( ((struct dn_flow_queue *)h->p[i].object)->fs == fs) {
1598 h->elements-- ;
1599 h->p[i] = h->p[h->elements] ;
1600 found++ ;
1601 } else
1602 i++ ;
1603 if (found)
1604 heapify(h);
1c79356b
A
1605}
1606
9bccf70c
A
1607/*
1608 * helper function to remove a pipe from a heap (can be there at most once)
1609 */
1610static void
1611pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p)
1612{
1613 if (h->elements > 0) {
1614 int i = 0 ;
1615 for (i=0; i < h->elements ; i++ ) {
1616 if (h->p[i].object == p) { /* found it */
1617 h->elements-- ;
1618 h->p[i] = h->p[h->elements] ;
1619 heapify(h);
1620 break ;
1621 }
1622 }
1623 }
1624}
1625
1626/*
1627 * drain all queues. Called in case of severe mbuf shortage.
1628 */
1c79356b 1629void
9bccf70c 1630dummynet_drain()
1c79356b 1631{
9bccf70c
A
1632 struct dn_flow_set *fs;
1633 struct dn_pipe *p;
1634 struct dn_pkt *pkt;
1635
1636 heap_free(&ready_heap);
1637 heap_free(&wfq_ready_heap);
1638 heap_free(&extract_heap);
1639 /* remove all references to this pipe from flow_sets */
1640 for (fs = all_flow_sets; fs; fs= fs->next )
1641 purge_flow_set(fs, 0);
1642
1643 for (p = all_pipes; p; p= p->next ) {
1644 purge_flow_set(&(p->fs), 0);
1645 for (pkt = p->head ; pkt ; )
1646 DN_FREE_PKT(pkt) ;
1647 p->head = p->tail = NULL ;
1648 }
1c79356b
A
1649}
1650
9bccf70c
A
1651/*
1652 * Fully delete a pipe or a queue, cleaning up associated info.
1653 */
1654static int
1655delete_pipe(struct dn_pipe *p)
1656{
1657 int s ;
1658 struct ip_fw_chain *chain ;
1659
1660 if (p->pipe_nr == 0 && p->fs.fs_nr == 0)
1661 return EINVAL ;
1662 if (p->pipe_nr != 0 && p->fs.fs_nr != 0)
1663 return EINVAL ;
1664 if (p->pipe_nr != 0) { /* this is an old-style pipe */
1665 struct dn_pipe *a, *b;
1666 struct dn_flow_set *fs;
1c79356b 1667
9bccf70c
A
1668 /* locate pipe */
1669 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
1670 a = b , b = b->next) ;
1671 if (b == NULL || (b->pipe_nr != p->pipe_nr) )
1672 return EINVAL ; /* not found */
1c79356b 1673
9bccf70c 1674 s = splimp() ;
1c79356b 1675
9bccf70c
A
1676 /* unlink from list of pipes */
1677 if (a == NULL)
1678 all_pipes = b->next ;
1679 else
1680 a->next = b->next ;
1681 /* remove references to this pipe from the ip_fw rules. */
1682 LIST_FOREACH(chain, &ip_fw_chain_head, next)
1683 if (chain->rule->pipe_ptr == &(b->fs))
1684 chain->rule->pipe_ptr = NULL ;
1685
1686 /* remove all references to this pipe from flow_sets */
1687 for (fs = all_flow_sets; fs; fs= fs->next )
1688 if (fs->pipe == b) {
1689 printf("++ ref to pipe %d from fs %d\n",
1690 p->pipe_nr, fs->fs_nr);
1691 fs->pipe = NULL ;
1692 purge_flow_set(fs, 0);
1693 }
1694 fs_remove_from_heap(&ready_heap, &(b->fs));
1695 purge_pipe(b); /* remove all data associated to this pipe */
1696 /* remove reference to here from extract_heap and wfq_ready_heap */
1697 pipe_remove_from_heap(&extract_heap, b);
1698 pipe_remove_from_heap(&wfq_ready_heap, b);
1699 splx(s);
1700 FREE(b, M_IPFW);
1701 } else { /* this is a WF2Q queue (dn_flow_set) */
1702 struct dn_flow_set *a, *b;
1703
1704 /* locate set */
1705 for (a = NULL, b = all_flow_sets ; b && b->fs_nr < p->fs.fs_nr ;
1706 a = b , b = b->next) ;
1707 if (b == NULL || (b->fs_nr != p->fs.fs_nr) )
1708 return EINVAL ; /* not found */
1709
1710 s = splimp() ;
1711 if (a == NULL)
1712 all_flow_sets = b->next ;
1713 else
1714 a->next = b->next ;
1715 /* remove references to this flow_set from the ip_fw rules. */
1716 LIST_FOREACH(chain, &ip_fw_chain_head, next)
1717 if (chain->rule->pipe_ptr == b)
1718 chain->rule->pipe_ptr = NULL ;
1719
1720 if (b->pipe != NULL) {
1721 /* Update total weight on parent pipe and cleanup parent heaps */
1722 b->pipe->sum -= b->weight * b->backlogged ;
1723 fs_remove_from_heap(&(b->pipe->not_eligible_heap), b);
1724 fs_remove_from_heap(&(b->pipe->scheduler_heap), b);
1725#if 1 /* XXX should i remove from idle_heap as well ? */
1726 fs_remove_from_heap(&(b->pipe->idle_heap), b);
1727#endif
1728 }
1729 purge_flow_set(b, 1);
1730 splx(s);
1731 }
1732 return 0 ;
1733}
1734
1735/*
1736 * helper function used to copy data from kernel in DUMMYNET_GET
1737 */
1738static char *
1739dn_copy_set(struct dn_flow_set *set, char *bp)
1740{
1741 int i, copied = 0 ;
1742 struct dn_flow_queue *q, *qp = (struct dn_flow_queue *)bp;
1743
1744 for (i = 0 ; i <= set->rq_size ; i++)
1745 for (q = set->rq[i] ; q ; q = q->next, qp++ ) {
1746 if (q->hash_slot != i)
1747 printf("++ at %d: wrong slot (have %d, "
1748 "should be %d)\n", copied, q->hash_slot, i);
1749 if (q->fs != set)
1750 printf("++ at %d: wrong fs ptr (have %p, should be %p)\n",
1751 i, q->fs, set);
1752 copied++ ;
1753 bcopy(q, qp, sizeof( *q ) );
1754 /* cleanup pointers */
1755 qp->next = NULL ;
1756 qp->head = qp->tail = NULL ;
1757 qp->fs = NULL ;
1758 }
1759 if (copied != set->rq_elements)
1760 printf("++ wrong count, have %d should be %d\n",
1761 copied, set->rq_elements);
1762 return (char *)qp ;
1763}
1c79356b
A
1764
1765static int
9bccf70c 1766dummynet_get(struct sockopt *sopt)
1c79356b 1767{
9bccf70c
A
1768 char *buf, *bp ; /* bp is the "copy-pointer" */
1769 size_t size ;
1770 struct dn_flow_set *set ;
1771 struct dn_pipe *p ;
1772 int s, error=0 ;
1773
1774 s = splimp();
1775 /*
1776 * compute size of data structures: list of pipes and flow_sets.
1777 */
1778 for (p = all_pipes, size = 0 ; p ; p = p->next )
1779 size += sizeof( *p ) +
1780 p->fs.rq_elements * sizeof(struct dn_flow_queue);
1781 for (set = all_flow_sets ; set ; set = set->next )
1782 size += sizeof ( *set ) +
1783 set->rq_elements * sizeof(struct dn_flow_queue);
1784 buf = _MALLOC(size, M_TEMP, M_DONTWAIT);
1785 if (buf == 0) {
1c79356b 1786 splx(s);
9bccf70c
A
1787 return ENOBUFS ;
1788 }
1789 for (p = all_pipes, bp = buf ; p ; p = p->next ) {
1790 struct dn_pipe *pipe_bp = (struct dn_pipe *)bp ;
1791
1792 /*
1793 * copy pipe descriptor into *bp, convert delay back to ms,
1794 * then copy the flow_set descriptor(s) one at a time.
1795 * After each flow_set, copy the queue descriptor it owns.
1796 */
1797 bcopy(p, bp, sizeof( *p ) );
1798 pipe_bp->delay = (pipe_bp->delay * 1000) / hz ;
1799 /*
1800 * XXX the following is a hack based on ->next being the
1801 * first field in dn_pipe and dn_flow_set. The correct
1802 * solution would be to move the dn_flow_set to the beginning
1803 * of struct dn_pipe.
1804 */
1805 pipe_bp->next = (struct dn_pipe *)DN_IS_PIPE ;
1806 /* clean pointers */
1807 pipe_bp->head = pipe_bp->tail = NULL ;
1808 pipe_bp->fs.next = NULL ;
1809 pipe_bp->fs.pipe = NULL ;
1810 pipe_bp->fs.rq = NULL ;
1811
1812 bp += sizeof( *p ) ;
1813 bp = dn_copy_set( &(p->fs), bp );
1814 }
1815 for (set = all_flow_sets ; set ; set = set->next ) {
1816 struct dn_flow_set *fs_bp = (struct dn_flow_set *)bp ;
1817 bcopy(set, bp, sizeof( *set ) );
1818 /* XXX same hack as above */
1819 fs_bp->next = (struct dn_flow_set *)DN_IS_QUEUE ;
1820 fs_bp->pipe = NULL ;
1821 fs_bp->rq = NULL ;
1822 bp += sizeof( *set ) ;
1823 bp = dn_copy_set( set, bp );
1824 }
1825 splx(s);
1826 error = sooptcopyout(sopt, buf, size);
1827 FREE(buf, M_TEMP);
1828 return error ;
1c79356b
A
1829}
1830
9bccf70c
A
1831/*
1832 * Handler for the various dummynet socket options (get, flush, config, del)
1833 */
1c79356b 1834static int
9bccf70c 1835ip_dn_ctl(struct sockopt *sopt)
1c79356b 1836{
9bccf70c
A
1837 int error = 0 ;
1838 struct dn_pipe *p, tmp_pipe;
1839
1840 /* Disallow sets in really-really secure mode. */
1841 if (sopt->sopt_dir == SOPT_SET && securelevel >= 3)
1842 return (EPERM);
1843
1844 switch (sopt->sopt_name) {
1845 default :
1846 printf("ip_dn_ctl -- unknown option %d", sopt->sopt_name);
1847 return EINVAL ;
1848
1849 case IP_DUMMYNET_GET :
1850 error = dummynet_get(sopt);
1851 break ;
1852
1853 case IP_DUMMYNET_FLUSH :
1854 dummynet_flush() ;
1855 break ;
1856 case IP_DUMMYNET_CONFIGURE :
1857 p = &tmp_pipe ;
1858 error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
1859 if (error)
1860 break ;
1861 error = config_pipe(p);
1862 break ;
1863
1864 case IP_DUMMYNET_DEL : /* remove a pipe or queue */
1865 p = &tmp_pipe ;
1866 error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
1867 if (error)
1868 break ;
1869
1870 error = delete_pipe(p);
1871 break ;
1872 }
1873 return error ;
1c79356b
A
1874}
1875
9bccf70c
A
1876static void
1877ip_dn_init(void)
1c79356b 1878{
9bccf70c
A
1879 printf("DUMMYNET initialized (010124)\n");
1880 all_pipes = NULL ;
1881 all_flow_sets = NULL ;
1882 ready_heap.size = ready_heap.elements = 0 ;
1883 ready_heap.offset = 0 ;
1884
1885 wfq_ready_heap.size = wfq_ready_heap.elements = 0 ;
1886 wfq_ready_heap.offset = 0 ;
1887
1888 extract_heap.size = extract_heap.elements = 0 ;
1889 extract_heap.offset = 0 ;
1890 ip_dn_ctl_ptr = ip_dn_ctl;
1891 timeout(dummynet, NULL, 1);
1c79356b 1892}
9bccf70c
A
1893
1894static ip_dn_ctl_t *old_dn_ctl_ptr ;
1895
1896static int
1897dummynet_modevent(module_t mod, int type, void *data)
1898{
1899 int s ;
1900 switch (type) {
1901 case MOD_LOAD:
1902 s = splimp();
1903 old_dn_ctl_ptr = ip_dn_ctl_ptr;
1904 ip_dn_init();
1905 splx(s);
1906 break;
1907 case MOD_UNLOAD:
1908 s = splimp();
1909 ip_dn_ctl_ptr = old_dn_ctl_ptr;
1910 splx(s);
1911 dummynet_flush();
1912 break ;
1913 default:
1914 break ;
1915 }
1916 return 0 ;
1917}
1918
1919static moduledata_t dummynet_mod = {
1920 "dummynet",
1921 dummynet_modevent,
1922 NULL
1923} ;
1924DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);