]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_dummynet.c
xnu-201.tar.gz
[apple/xnu.git] / bsd / netinet / ip_dummynet.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * Copyright (c) 1998 Luigi Rizzo
24 *
25 * Redistribution and use in source forms, with and without modification,
26 * are permitted provided that this entire comment appears intact.
27 *
28 * Redistribution in binary form may occur without any restrictions.
29 * Obviously, it would be nice if you gave credit where credit is due
30 * but requiring it would be too onerous.
31 *
32 * This software is provided ``AS IS'' without any warranties of any kind.
33 *
34 */
35
36 /*
37 * This module implements IP dummynet, a bandwidth limiter/delay emulator
38 * used in conjunction with the ipfw package.
39 *
40 * Changes:
41 *
42 * 980821: changed conventions in the queueing logic
43 * packets passed from dummynet to ip_in/out are prepended with
44 * a vestigial mbuf type MT_DUMMYNET which contains a pointer
45 * to the matching rule.
46 * ip_input/output will extract the parameters, free the vestigial mbuf,
47 * and do the processing.
48 *
49 * 980519: fixed behaviour when deleting rules.
50 * 980518: added splimp()/splx() to protect against races
51 * 980513: initial release
52 */
53
54 /* include files marked with XXX are probably not needed */
55
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/malloc.h>
59 #include <sys/mbuf.h>
60 #include <sys/queue.h> /* XXX */
61 #include <sys/kernel.h>
62 #include <sys/socket.h>
63 #include <sys/socketvar.h>
64 #include <sys/time.h>
65 #include <sys/sysctl.h>
66 #include <net/if.h>
67 #include <net/route.h>
68 #include <netinet/in.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/in_var.h>
71 #include <netinet/ip.h>
72 #include <netinet/ip_fw.h>
73 #include <netinet/ip_dummynet.h>
74 #include <netinet/ip_var.h>
75
76 #if BRIDGE
77 #include <netinet/if_ether.h> /* for struct arpcom */
78 #include <net/bridge.h>
79 #endif
80
81 static struct dn_pipe *all_pipes = NULL ; /* list of all pipes */
82
83 static int dn_debug = 0 ; /* verbose */
84 static int dn_calls = 0 ; /* number of calls */
85 static int dn_idle = 1;
86 #ifdef SYSCTL_NODE
87 SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
88 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dn_debug, 0, "");
89 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, calls, CTLFLAG_RD, &dn_calls, 0, "");
90 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, idle, CTLFLAG_RD, &dn_idle, 0, "");
91 #endif
92
93 static int ip_dn_ctl(struct sockopt *sopt);
94
95 static void rt_unref(struct rtentry *);
96 static void dummynet(void *);
97 static void dn_restart(void);
98 static void dn_move(struct dn_pipe *pipe, int immediate);
99 static void dummynet_flush(void);
100
101 /*
102 * the following is needed when deleting a pipe, because rules can
103 * hold references to the pipe.
104 */
105 extern LIST_HEAD (ip_fw_head, ip_fw_chain) ip_fw_chain;
106
107 /*
108 * invoked to reschedule the periodic task if necessary.
109 * Should only be called when dn_idle = 1 ;
110 */
111 static void
112 dn_restart()
113 {
114 struct dn_pipe *pipe;
115
116 if (!dn_idle)
117 return;
118
119 for (pipe = all_pipes ; pipe ; pipe = pipe->next ) {
120 /* if there any pipe that needs work, restart */
121 if (pipe->r.head || pipe->p.head || pipe->numbytes < 0 ) {
122 dn_idle = 0;
123 timeout(dummynet, NULL, 1);
124 return ;
125 }
126 }
127 }
128
129 static void
130 rt_unref(struct rtentry *rt)
131 {
132 if (rt == NULL)
133 return ;
134 if (rt->rt_refcnt <= 0)
135 printf("-- warning, refcnt now %d, decreasing\n", rt->rt_refcnt);
136 RTFREE(rt);
137 }
138
139 /*
140 * move packets from R-queue to P-queue
141 */
142 static void
143 dn_move(struct dn_pipe *pipe, int immediate)
144 {
145 struct dn_pkt *pkt;
146
147 /*
148 * consistency check, should catch new pipes which are
149 * not initialized properly.
150 */
151 if ( pipe->p.head == NULL &&
152 pipe->ticks_from_last_insert != pipe->delay) {
153 printf("Warning, empty pipe and delay %d (should be %d)\n",
154 pipe->ticks_from_last_insert, pipe->delay);
155 pipe->ticks_from_last_insert = pipe->delay;
156 }
157 /* this ought to go in dn_dequeue() */
158 if (!immediate && pipe->ticks_from_last_insert < pipe->delay)
159 pipe->ticks_from_last_insert++;
160 if ( pkt = pipe->r.head ) {
161 /*
162 * Move at most numbytes bytes from src and move to dst.
163 * delay is set to ticks_from_last_insert, which
164 * is reset after the first insertion;
165 */
166 while ( pkt ) {
167 struct ip *ip=mtod(pkt->dn_m, struct ip *);
168
169 /*
170 * queue limitation: pass packets down if the len is
171 * such that the pkt would go out before the next tick.
172 */
173 if (pipe->bandwidth) {
174 if (pipe->numbytes < ip->ip_len)
175 break;
176 pipe->numbytes -= ip->ip_len;
177 }
178 pipe->r_len--; /* elements in queue */
179 pipe->r_len_bytes -= ip->ip_len ;
180
181 /*
182 * to add delay jitter, must act here. A lower value
183 * (bounded to 0) means lower delay.
184 */
185 pkt->delay = pipe->ticks_from_last_insert;
186 pipe->ticks_from_last_insert = 0;
187 /* compensate the decrement done next in dn_dequeue */
188 if (!immediate && pkt->delay >0 && pipe->p.head==NULL)
189 pkt->delay++;
190 if (pipe->p.head == NULL)
191 pipe->p.head = pkt;
192 else
193 (struct dn_pkt *)pipe->p.tail->dn_next = pkt;
194 pipe->p.tail = pkt;
195 pkt = (struct dn_pkt *)pkt->dn_next;
196 pipe->p.tail->dn_next = NULL;
197 }
198 pipe->r.head = pkt;
199
200 /*** XXX just a sanity check */
201 if ( ( pkt == NULL && pipe->r_len != 0) ||
202 ( pkt != NULL && pipe->r_len == 0) )
203 printf("-- Warning, pipe head %p len %d\n",
204 (void *)pkt, pipe->r_len);
205 }
206
207 /*
208 * deliver packets downstream after the delay in the P-queue.
209 */
210
211 if (pipe->p.head == NULL)
212 return;
213 if (!immediate)
214 pipe->p.head->delay--;
215 while ( (pkt = pipe->p.head) && pkt->delay < 1) {
216 /*
217 * first unlink, then call procedures since ip_input()
218 * can result in a call to ip_output cnd viceversa,
219 * thus causing nested calls
220 */
221 pipe->p.head = (struct dn_pkt *) pkt->dn_next ;
222
223 /*
224 * the trick to avoid flow-id settings here is to prepend a
225 * vestigial mbuf to the packet, with the following values:
226 * m_type = MT_DUMMYNET
227 * m_next = the actual mbuf to be processed by ip_input/output
228 * m_data = the matching rule
229 * The vestigial element is the same memory area used by
230 * the dn_pkt, and IS FREED IN ip_input/ip_output. IT IS
231 * NOT A REAL MBUF, just a block of memory acquired with malloc().
232 */
233 switch (pkt->dn_dir) {
234 case DN_TO_IP_OUT: {
235 struct rtentry *tmp_rt = pkt->ro.ro_rt ;
236
237 (void)ip_output((struct mbuf *)pkt, (struct mbuf *)pkt->ifp,
238 &(pkt->ro), pkt->dn_hlen, NULL);
239 rt_unref (tmp_rt) ;
240 }
241 break ;
242 case DN_TO_IP_IN :
243 ip_input((struct mbuf *)pkt) ;
244 break ;
245 #if BRIDGE
246 case DN_TO_BDG_FWD :
247 bdg_forward((struct mbuf **)&pkt, pkt->ifp);
248 break ;
249 #endif
250 default:
251 printf("dummynet: bad switch %d!\n", pkt->dn_dir);
252 m_freem(pkt->dn_m);
253 FREE(pkt, M_IPFW);
254 break ;
255 }
256 }
257 }
258 /*
259 * this is the periodic task that moves packets between the R-
260 * and the P- queue
261 */
262 /*ARGSUSED*/
263 void
264 dummynet(void * __unused unused)
265 {
266 struct dn_pipe *p ;
267 int s ;
268 boolean_t funnel_state;
269
270 funnel_state = thread_funnel_set(network_flock, TRUE);
271 dn_calls++ ;
272 for (p = all_pipes ; p ; p = p->next ) {
273 /*
274 * Increment the amount of data that can be sent. However,
275 * don't do that if the channel is idle
276 * (r.head == NULL && numbytes >= bandwidth).
277 * This bug fix is from tim shepard (shep@bbn.com)
278 */
279 s = splimp();
280 if (p->r.head != NULL || p->numbytes < p->bandwidth )
281 p->numbytes += p->bandwidth ;
282 dn_move(p, 0); /* is it really 0 (also below) ? */
283 splx(s);
284 }
285
286 /*
287 * finally, if some queue has data, restart the timer.
288 */
289 dn_idle = 1;
290 dn_restart();
291 (void) thread_funnel_set(network_flock, funnel_state);
292
293 }
294
295 /*
296 * dummynet hook for packets.
297 * input and output use the same code, so i use bit 16 in the pipe
298 * number to chose the direction: 1 for output packets, 0 for input.
299 * for input, only m is significant. For output, also the others.
300 */
301 int
302 dummynet_io(int pipe_nr, int dir,
303 struct mbuf *m, struct ifnet *ifp, struct route *ro, int hlen,
304 struct ip_fw_chain *rule)
305 {
306 struct dn_pkt *pkt;
307 struct dn_pipe *pipe;
308 struct ip *ip=mtod(m, struct ip *);
309
310 int s=splimp();
311
312 pipe_nr &= 0xffff ;
313 /*
314 * locate pipe. First time is expensive, next have direct access.
315 */
316
317 if ( (pipe = rule->rule->pipe_ptr) == NULL ) {
318 for (pipe=all_pipes; pipe && pipe->pipe_nr !=pipe_nr; pipe=pipe->next)
319 ;
320 if (pipe == NULL) {
321 splx(s);
322 if (dn_debug)
323 printf("warning, pkt for no pipe %d\n", pipe_nr);
324 m_freem(m);
325 return 0 ;
326 } else
327 rule->rule->pipe_ptr = pipe ;
328 }
329
330 /*
331 * should i drop ?
332 * This section implements random packet drop.
333 */
334 if ( (pipe->plr && random() < pipe->plr) ||
335 (pipe->queue_size && pipe->r_len >= pipe->queue_size) ||
336 (pipe->queue_size_bytes &&
337 ip->ip_len + pipe->r_len_bytes > pipe->queue_size_bytes) ||
338 (pkt = (struct dn_pkt *) _MALLOC(sizeof (*pkt),
339 M_IPFW, M_WAITOK) ) == NULL ) {
340 splx(s);
341 if (dn_debug)
342 printf("-- dummynet: drop from pipe %d, have %d pks, %d bytes\n",
343 pipe_nr, pipe->r_len, pipe->r_len_bytes);
344 pipe->r_drops++ ;
345 m_freem(m);
346 return 0 ; /* XXX error */
347 }
348 bzero(pkt, sizeof(*pkt) );
349 /* build and enqueue packet */
350 pkt->hdr.mh_type = MT_DUMMYNET ;
351 (struct ip_fw_chain *)pkt->hdr.mh_data = rule ;
352 pkt->dn_next = NULL;
353 pkt->dn_m = m;
354 pkt->dn_dir = dir ;
355 pkt->delay = 0;
356
357 pkt->ifp = ifp;
358 if (dir == DN_TO_IP_OUT) {
359 pkt->ro = *ro; /* XXX copied! */
360 if (ro->ro_rt)
361 ro->ro_rt->rt_refcnt++ ; /* XXX */
362 }
363 pkt->dn_hlen = hlen;
364 if (pipe->r.head == NULL)
365 pipe->r.head = pkt;
366 else
367 (struct dn_pkt *)pipe->r.tail->dn_next = pkt;
368 pipe->r.tail = pkt;
369 pipe->r_len++;
370 pipe->r_len_bytes += ip->ip_len ;
371
372 /*
373 * here we could implement RED if we like to
374 */
375
376 if (pipe->r.head == pkt) { /* process immediately */
377 dn_move(pipe, 1);
378 }
379 splx(s);
380 if (dn_idle)
381 dn_restart();
382 return 0;
383 }
384
385 /*
386 * dispose all packets queued on a pipe
387 */
388 static void
389 purge_pipe(struct dn_pipe *pipe)
390 {
391 struct dn_pkt *pkt, *n ;
392 struct rtentry *tmp_rt ;
393
394 for (pkt = pipe->r.head ; pkt ; ) {
395 rt_unref (tmp_rt = pkt->ro.ro_rt ) ;
396 m_freem(pkt->dn_m);
397 n = pkt ;
398 pkt = (struct dn_pkt *)pkt->dn_next ;
399 FREE(n, M_IPFW) ;
400 }
401 for (pkt = pipe->p.head ; pkt ; ) {
402 rt_unref (tmp_rt = pkt->ro.ro_rt ) ;
403 m_freem(pkt->dn_m);
404 n = pkt ;
405 pkt = (struct dn_pkt *)pkt->dn_next ;
406 FREE(n, M_IPFW) ;
407 }
408 }
409
410 /*
411 * delete all pipes returning memory
412 */
413 static void
414 dummynet_flush()
415 {
416 struct dn_pipe *q, *p = all_pipes ;
417 int s = splnet() ;
418
419 all_pipes = NULL ;
420 splx(s) ;
421 /*
422 * purge all queued pkts and delete all pipes
423 */
424 for ( ; p ; ) {
425 purge_pipe(p);
426 q = p ;
427 p = p->next ;
428 FREE(q, M_IPFW);
429 }
430 }
431
432 extern struct ip_fw_chain *ip_fw_default_rule ;
433 /*
434 * when a firewall rule is deleted, scan all pipes and remove the flow-id
435 * from packets matching this rule.
436 */
437 void
438 dn_rule_delete(void *r)
439 {
440 struct dn_pipe *p ;
441 int matches = 0 ;
442
443 for ( p = all_pipes ; p ; p = p->next ) {
444 struct dn_pkt *x ;
445 for (x = p->r.head ; x ; x = (struct dn_pkt *)x->dn_next )
446 if (x->hdr.mh_data == r) {
447 matches++ ;
448 x->hdr.mh_data = (void *)ip_fw_default_rule ;
449 }
450 for (x = p->p.head ; x ; x = (struct dn_pkt *)x->dn_next )
451 if (x->hdr.mh_data == r) {
452 matches++ ;
453 x->hdr.mh_data = (void *)ip_fw_default_rule ;
454 }
455 }
456 printf("dn_rule_delete, r %p, default %p%s, %d matches\n",
457 (void *)r, (void *)ip_fw_default_rule,
458 r == ip_fw_default_rule ? " AARGH!":"", matches);
459 }
460
461 /*
462 * handler for the various dummynet socket options
463 * (get, flush, config, del)
464 */
465 static int
466 ip_dn_ctl(struct sockopt *sopt)
467 {
468 int error = 0 ;
469 size_t size ;
470 char *buf, *bp ;
471 struct dn_pipe *p, tmp_pipe ;
472
473 struct dn_pipe *x, *a, *b ;
474
475 /* Disallow sets in really-really secure mode. */
476 if (sopt->sopt_dir == SOPT_SET && securelevel >= 3)
477 return (EPERM);
478
479 switch (sopt->sopt_name) {
480 default :
481 panic("ip_dn_ctl -- unknown option");
482
483 case IP_DUMMYNET_GET :
484 for (p = all_pipes, size = 0 ; p ; p = p->next )
485 size += sizeof( *p ) ;
486 buf = _MALLOC(size, M_TEMP, M_WAITOK);
487 if (buf == 0) {
488 error = ENOBUFS ;
489 break ;
490 }
491 for (p = all_pipes, bp = buf ; p ; p = p->next ) {
492 struct dn_pipe *q = (struct dn_pipe *)bp ;
493
494 bcopy(p, bp, sizeof( *p ) );
495 /*
496 * return bw and delay in bits/s and ms, respectively
497 */
498 q->bandwidth *= (8*hz) ;
499 q->delay = (q->delay * 1000) / hz ;
500 bp += sizeof( *p ) ;
501 }
502 error = sooptcopyout(sopt, buf, size);
503 FREE(buf, M_TEMP);
504 break ;
505 case IP_DUMMYNET_FLUSH :
506 dummynet_flush() ;
507 break ;
508 case IP_DUMMYNET_CONFIGURE :
509 p = &tmp_pipe ;
510 error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
511 if (error)
512 break ;
513 /*
514 * The config program passes parameters as follows:
515 * bandwidth = bits/second (0 = no limits);
516 * must be translated in bytes/tick.
517 * delay = ms
518 * must be translated in ticks.
519 * queue_size = slots (0 = no limit)
520 * queue_size_bytes = bytes (0 = no limit)
521 * only one can be set, must be bound-checked
522 */
523 if ( p->bandwidth > 0 ) {
524 p->bandwidth = p->bandwidth / 8 / hz ;
525 if (p->bandwidth == 0) /* too little does not make sense! */
526 p->bandwidth = 10 ;
527 }
528 p->delay = ( p->delay * hz ) / 1000 ;
529 if (p->queue_size == 0 && p->queue_size_bytes == 0)
530 p->queue_size = 100 ;
531 if (p->queue_size != 0 ) /* buffers are prevailing */
532 p->queue_size_bytes = 0 ;
533 if (p->queue_size > 100)
534 p->queue_size = 100 ;
535 if (p->queue_size_bytes > 1024*1024)
536 p->queue_size_bytes = 1024*1024 ;
537 #if 0
538 printf("ip_dn: config pipe %d %d bit/s %d ms %d bufs\n",
539 p->pipe_nr,
540 p->bandwidth * 8 * hz ,
541 p->delay * 1000 / hz , p->queue_size);
542 #endif
543 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
544 a = b , b = b->next) ;
545 if (b && b->pipe_nr == p->pipe_nr) {
546 /* XXX should spl and flush old pipe... */
547 b->bandwidth = p->bandwidth ;
548 b->delay = p->delay ;
549 b->ticks_from_last_insert = p->delay ;
550 b->queue_size = p->queue_size ;
551 b->queue_size_bytes = p->queue_size_bytes ;
552 b->plr = p->plr ;
553 } else {
554 int s ;
555 x = _MALLOC(sizeof(struct dn_pipe), M_IPFW, M_NOWAIT) ;
556 if (x == NULL) {
557 printf("ip_dummynet.c: sorry no memory\n");
558 error = ENOSPC ;
559 break ;
560 }
561 bzero(x, sizeof(*x) );
562 x->bandwidth = p->bandwidth ;
563 x->delay = p->delay ;
564 x->ticks_from_last_insert = p->delay ;
565 x->pipe_nr = p->pipe_nr ;
566 x->queue_size = p->queue_size ;
567 x->queue_size_bytes = p->queue_size_bytes ;
568 x->plr = p->plr ;
569
570 s = splnet() ;
571 x->next = b ;
572 if (a == NULL)
573 all_pipes = x ;
574 else
575 a->next = x ;
576 splx(s);
577 }
578 break ;
579
580 case IP_DUMMYNET_DEL :
581 p = &tmp_pipe ;
582 error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
583 if (error)
584 break ;
585
586 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
587 a = b , b = b->next) ;
588 if (b && b->pipe_nr == p->pipe_nr) { /* found pipe */
589 int s = splnet() ;
590 struct ip_fw_chain *chain = ip_fw_chain.lh_first;
591
592 if (a == NULL)
593 all_pipes = b->next ;
594 else
595 a->next = b->next ;
596 /*
597 * remove references to this pipe from the ip_fw rules.
598 */
599 for (; chain; chain = chain->chain.le_next) {
600 register struct ip_fw *const f = chain->rule;
601 if (f->pipe_ptr == b)
602 f->pipe_ptr = NULL ;
603 }
604 splx(s);
605 purge_pipe(b); /* remove pkts from here */
606 FREE(b, M_IPFW);
607 }
608 break ;
609 }
610 return error ;
611 }
612
613 void
614 ip_dn_init(void)
615 {
616 printf("DUMMYNET initialized (980901) -- size dn_pkt %d\n",
617 sizeof(struct dn_pkt));
618 all_pipes = NULL ;
619 ip_dn_ctl_ptr = ip_dn_ctl;
620 }
621
622 #if DUMMYNET_MODULE
623
624 #include <sys/exec.h>
625 #include <sys/sysent.h>
626 #include <sys/lkm.h>
627
628 MOD_MISC(dummynet);
629
630 static ip_dn_ctl_t *old_dn_ctl_ptr ;
631
632 static int
633 dummynet_load(struct lkm_table *lkmtp, int cmd)
634 {
635 int s=splnet();
636 old_dn_ctl_ptr = ip_dn_ctl_ptr;
637 ip_dn_init();
638 splx(s);
639 return 0;
640 }
641
642 static int
643 dummynet_unload(struct lkm_table *lkmtp, int cmd)
644 {
645 int s=splnet();
646 ip_dn_ctl_ptr = old_dn_ctl_ptr;
647 splx(s);
648 dummynet_flush();
649 printf("DUMMYNET unloaded\n");
650 return 0;
651 }
652
653 int
654 dummynet_mod(struct lkm_table *lkmtp, int cmd, int ver)
655 {
656 DISPATCH(lkmtp, cmd, ver, dummynet_load, dummynet_unload, lkm_nullcmd);
657 }
658 #endif