]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_mbuf.c
xnu-201.tar.gz
[apple/xnu.git] / bsd / kern / uipc_mbuf.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1982, 1986, 1988, 1991, 1993
25 * The Regents of the University of California. All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94
56 */
57 /* HISTORY
58 *
59 * 10/15/97 Annette DeSchon (deschon@apple.com)
60 * Fixed bug in which all cluster mbufs were broken up
61 * into regular mbufs: Some clusters are now reserved.
62 * When a cluster is needed, regular mbufs are no longer
63 * used. (Radar 1683621)
64 * 20-May-95 Mac Gillon (mgillon) at NeXT
65 * New version based on 4.4
66 */
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/malloc.h>
71 #include <sys/mbuf.h>
72 #include <sys/kernel.h>
73 #include <sys/syslog.h>
74 #include <sys/protosw.h>
75 #include <sys/domain.h>
76 #include <net/netisr.h>
77
78 #include <kern/queue.h>
79
80 extern kernel_pmap; /* The kernel's pmap */
81
82 decl_simple_lock_data(, mbuf_slock);
83 struct mbuf *mfree; /* mbuf free list */
84 struct mbuf *mfreelater; /* mbuf deallocation list */
85 extern vm_map_t mb_map; /* special map */
86 int m_want; /* sleepers on mbufs */
87 extern int nmbclusters; /* max number of mapped clusters */
88 short *mclrefcnt; /* mapped cluster reference counts */
89 int *mcl_paddr;
90 union mcluster *mclfree; /* mapped cluster free list */
91 int max_linkhdr; /* largest link-level header */
92 int max_protohdr; /* largest protocol header */
93 int max_hdr; /* largest link+protocol header */
94 int max_datalen; /* MHLEN - max_hdr */
95 struct mbstat mbstat; /* statistics */
96 union mcluster *mbutl; /* first mapped cluster address */
97 union mcluster *embutl; /* ending virtual address of mclusters */
98
99 static int nclpp; /* # clusters per physical page */
100 static char mbfail[] = "mbuf not mapped";
101
102 static int m_howmany();
103
104 /* The number of cluster mbufs that are allocated, to start. */
105 #define MINCL max(16, 2)
106
107 extern int dlil_input_thread_wakeup;
108 extern int dlil_expand_mcl;
109 extern int dlil_initialized;
110
111
112 void
113 mbinit()
114 {
115 int s,m;
116 int initmcl = 32;
117
118 if (nclpp)
119 return;
120 nclpp = round_page(MCLBYTES) / MCLBYTES; /* see mbufgc() */
121 if (nclpp < 1) nclpp = 1;
122 MBUF_LOCKINIT();
123 // NETISR_LOCKINIT();
124 if (nmbclusters == 0)
125 nmbclusters = NMBCLUSTERS;
126 MALLOC(mclrefcnt, short *, nmbclusters * sizeof (short),
127 M_TEMP, M_WAITOK);
128 if (mclrefcnt == 0)
129 panic("mbinit");
130 for (m = 0; m < nmbclusters; m++)
131 mclrefcnt[m] = -1;
132
133 MALLOC(mcl_paddr, int *, (nmbclusters/(PAGE_SIZE/CLBYTES)) * sizeof (int),
134 M_TEMP, M_WAITOK);
135 if (mcl_paddr == 0)
136 panic("mbinit1");
137 bzero((char *)mcl_paddr, (nmbclusters/(PAGE_SIZE/CLBYTES)) * sizeof (int));
138
139 embutl = (union mcluster *)((unsigned char *)mbutl + (nmbclusters * MCLBYTES));
140
141 PE_parse_boot_arg("initmcl", &initmcl);
142
143 if (m_clalloc(max(PAGE_SIZE/CLBYTES, 1) * initmcl, M_WAIT) == 0)
144 goto bad;
145 MBUF_UNLOCK();
146 return;
147 bad:
148 panic("mbinit");
149 }
150
151 /*
152 * Allocate some number of mbuf clusters
153 * and place on cluster free list.
154 */
155 /* ARGSUSED */
156 m_clalloc(ncl, nowait)
157 register int ncl;
158 int nowait;
159 {
160 register union mcluster *mcl;
161 register int i;
162 vm_size_t size;
163 static char doing_alloc;
164
165 /*
166 * Honor the caller's wish to block or not block.
167 * We have a way to grow the pool asynchronously,
168 * by kicking the dlil_input_thread.
169 */
170 if ((i = m_howmany()) <= 0)
171 goto out;
172
173 if ((nowait == M_DONTWAIT))
174 goto out;
175
176 if (ncl < i)
177 ncl = i;
178 size = round_page(ncl * MCLBYTES);
179 mcl = (union mcluster *)kmem_mb_alloc(mb_map, size);
180
181 if (mcl == 0 && ncl > 1) {
182 size = round_page(MCLBYTES); /* Try for 1 if failed */
183 mcl = (union mcluster *)kmem_mb_alloc(mb_map, size);
184 }
185
186 if (mcl) {
187 MBUF_LOCK();
188 ncl = size / MCLBYTES;
189 for (i = 0; i < ncl; i++) {
190 if (++mclrefcnt[mtocl(mcl)] != 0)
191 panic("m_clalloc already there");
192 if (((int)mcl & PAGE_MASK) == 0)
193 mcl_paddr[((char *)mcl - (char *)mbutl)/PAGE_SIZE] = pmap_extract(kernel_pmap, (char *)mcl);
194
195 mcl->mcl_next = mclfree;
196 mclfree = mcl++;
197 }
198 mbstat.m_clfree += ncl;
199 mbstat.m_clusters += ncl;
200 return (ncl);
201 } /* else ... */
202 out:
203 MBUF_LOCK();
204
205 /*
206 * When non-blocking we kick the dlil thread if we havve to grow the
207 * pool or if the number of free clusters is less than requested.
208 */
209 if ((nowait == M_DONTWAIT) && (i > 0 || ncl >= mbstat.m_clfree)) {
210 dlil_expand_mcl = 1;
211 if (dlil_initialized)
212 wakeup((caddr_t)&dlil_input_thread_wakeup);
213 }
214
215 if (mbstat.m_clfree >= ncl)
216 return 1;
217
218 mbstat.m_drops++;
219
220 return 0;
221 }
222
223 /*
224 * Add more free mbufs by cutting up a cluster.
225 */
226 m_expand(canwait)
227 int canwait;
228 {
229 register caddr_t mcl;
230
231 if (mbstat.m_clfree < (mbstat.m_clusters >> 4))
232 /* 1/16th of the total number of cluster mbufs allocated is
233 reserved for large packets. The number reserved must
234 always be < 1/2, or future allocation will be prevented.
235 */
236 return 0;
237
238 MCLALLOC(mcl, canwait);
239 if (mcl) {
240 register struct mbuf *m = (struct mbuf *)mcl;
241 register int i = NMBPCL;
242 MBUF_LOCK();
243 mbstat.m_mtypes[MT_FREE] += i;
244 mbstat.m_mbufs += i;
245 while (i--) {
246 m->m_type = MT_FREE;
247 m->m_next = mfree;
248 mfree = m++;
249 }
250 i = m_want;
251 m_want = 0;
252 MBUF_UNLOCK();
253 if (i) wakeup((caddr_t)&mfree);
254 return 1;
255 }
256 return 0;
257 }
258
259 /*
260 * When MGET failes, ask protocols to free space when short of memory,
261 * then re-attempt to allocate an mbuf.
262 */
263 struct mbuf *
264 m_retry(canwait, type)
265 int canwait, type;
266 {
267 #define m_retry(h, t) 0
268 register struct mbuf *m;
269 int wait, s;
270 funnel_t * fnl;
271 int fnl_switch = 0;
272 boolean_t funnel_state;
273
274 for (;;) {
275 (void) m_expand(canwait);
276 MGET(m, XXX, type);
277 if (m || canwait == M_DONTWAIT)
278 break;
279 MBUF_LOCK();
280 wait = m_want++;
281
282 dlil_expand_mcl = 1;
283 MBUF_UNLOCK();
284
285 if (dlil_initialized)
286 wakeup((caddr_t)&dlil_input_thread_wakeup);
287
288 if (wait == 0) {
289 mbstat.m_drain++;
290 }
291 else {
292 assert_wait((caddr_t)&mfree, THREAD_UNINT);
293 mbstat.m_wait++;
294 }
295
296 /*
297 * Grab network funnel because m_reclaim calls into the
298 * socket domains and tsleep end-up calling splhigh
299 */
300 fnl = thread_funnel_get();
301 if (fnl && (fnl == kernel_flock)) {
302 fnl_switch = 1;
303 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
304 } else
305 funnel_state = thread_funnel_set(network_flock, TRUE);
306 if (wait == 0) {
307 m_reclaim();
308 } else {
309 /* Sleep with a small timeout as insurance */
310 (void) tsleep((caddr_t)0, PZERO-1, "m_retry", hz);
311 }
312 if (fnl_switch)
313 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
314 else
315 thread_funnel_set(network_flock, funnel_state);
316 }
317 return (m);
318 #undef m_retry
319 }
320
321 /*
322 * As above; retry an MGETHDR.
323 */
324 struct mbuf *
325 m_retryhdr(canwait, type)
326 int canwait, type;
327 {
328 register struct mbuf *m;
329
330 if (m = m_retry(canwait, type)) {
331 m->m_flags |= M_PKTHDR;
332 m->m_data = m->m_pktdat;
333 }
334 return (m);
335 }
336
337 m_reclaim()
338 {
339 register struct domain *dp;
340 register struct protosw *pr;
341
342 for (dp = domains; dp; dp = dp->dom_next)
343 for (pr = dp->dom_protosw; pr; pr = pr->pr_next)
344 if (pr->pr_drain)
345 (*pr->pr_drain)();
346 mbstat.m_drain++;
347 }
348
349 /*
350 * Space allocation routines.
351 * These are also available as macros
352 * for critical paths.
353 */
354 struct mbuf *
355 m_get(nowait, type)
356 int nowait, type;
357 {
358 register struct mbuf *m;
359
360 MGET(m, nowait, type);
361 return (m);
362 }
363
364 struct mbuf *
365 m_gethdr(nowait, type)
366 int nowait, type;
367 {
368 register struct mbuf *m;
369
370 MGETHDR(m, nowait, type);
371 return (m);
372 }
373
374 struct mbuf *
375 m_getclr(nowait, type)
376 int nowait, type;
377 {
378 register struct mbuf *m;
379
380 MGET(m, nowait, type);
381 if (m == 0)
382 return (0);
383 bzero(mtod(m, caddr_t), MLEN);
384 return (m);
385 }
386
387 struct mbuf *
388 m_free(m)
389 struct mbuf *m;
390 {
391 struct mbuf *n = m->m_next;
392 int i, s;
393
394 if (m->m_type == MT_FREE)
395 panic("freeing free mbuf");
396
397 MBUF_LOCK();
398 if (m->m_flags & M_EXT) {
399 if (MCLHASREFERENCE(m)) {
400 remque((queue_t)&m->m_ext.ext_refs);
401 } else if (m->m_ext.ext_free == NULL) {
402 union mcluster *mcl= (union mcluster *)m->m_ext.ext_buf;
403 if (MCLUNREF(mcl)) {
404 mcl->mcl_next = mclfree;
405 mclfree = mcl;
406 ++mbstat.m_clfree;
407 }
408 #ifdef COMMENT_OUT
409 /* *** Since m_split() increments "mclrefcnt[mtocl(m->m_ext.ext_buf)]",
410 and AppleTalk ADSP uses m_split(), this incorrect sanity check
411 caused a panic.
412 *** */
413 else /* sanity check - not referenced this way */
414 panic("m_free m_ext cluster not free");
415 #endif
416 } else {
417 (*(m->m_ext.ext_free))(m->m_ext.ext_buf,
418 m->m_ext.ext_size, m->m_ext.ext_arg);
419 }
420 }
421 mbstat.m_mtypes[m->m_type]--;
422 (void) MCLUNREF(m);
423 m->m_type = MT_FREE;
424 mbstat.m_mtypes[m->m_type]++;
425 m->m_flags = 0;
426 m->m_next = mfree;
427 m->m_len = 0;
428 mfree = m;
429 i = m_want;
430 m_want = 0;
431 MBUF_UNLOCK();
432 if (i) wakeup((caddr_t)&mfree);
433 return (n);
434 }
435
436 /* Best effort to get a mbuf cluster + pkthdr under one lock.
437 * If we don't have them avail, just bail out and use the regular
438 * path.
439 * Used by drivers to allocated packets on receive ring.
440 */
441 struct mbuf *
442 m_getpacket(void)
443 {
444 struct mbuf *m;
445 m_clalloc(1, M_DONTWAIT); /* takes the MBUF_LOCK, but doesn't release it... */
446 if ((mfree != 0) && (mclfree != 0)) { /* mbuf + cluster are available */
447 m = mfree;
448 mfree = m->m_next;
449 MCHECK(m);
450 ++mclrefcnt[mtocl(m)];
451 mbstat.m_mtypes[MT_FREE]--;
452 mbstat.m_mtypes[MT_DATA]++;
453 m->m_ext.ext_buf = (caddr_t)mclfree; /* get the cluster */
454 ++mclrefcnt[mtocl(m->m_ext.ext_buf)];
455 mbstat.m_clfree--;
456 mclfree = ((union mcluster *)(m->m_ext.ext_buf))->mcl_next;
457
458 m->m_next = m->m_nextpkt = 0;
459 m->m_ext.ext_free = 0;
460 m->m_type = MT_DATA;
461 m->m_data = m->m_ext.ext_buf;
462 m->m_flags = M_PKTHDR | M_EXT;
463 m->m_pkthdr.aux = (struct mbuf *)NULL;
464 m->m_pkthdr.csum_data = 0;
465 m->m_pkthdr.csum_flags = 0;
466 m->m_ext.ext_size = MCLBYTES;
467 m->m_ext.ext_refs.forward = m->m_ext.ext_refs.backward =
468 &m->m_ext.ext_refs;
469 MBUF_UNLOCK();
470 }
471 else { /* slow path: either mbuf or cluster need to be allocated anyway */
472 MBUF_UNLOCK();
473
474 MGETHDR(m, M_WAITOK, MT_DATA );
475
476 if ( m == 0 )
477 return (NULL);
478
479 MCLGET( m, M_WAITOK );
480 if ( ( m->m_flags & M_EXT ) == 0 )
481 {
482 m_free(m); m = 0;
483 }
484 }
485 return (m);
486 }
487
488 /* free and mbuf list (m_nextpkt) while following m_next under one lock.
489 * returns the count for mbufs packets freed. Used by the drivers.
490 */
491 int
492 m_freem_list(m)
493 struct mbuf *m;
494 {
495 struct mbuf *nextpkt;
496 int i, s, count=0;
497
498 // s = splimp();
499 MBUF_LOCK();
500 while (m) {
501 if (m)
502 nextpkt = m->m_nextpkt; /* chain of linked mbufs from driver */
503 else
504 nextpkt = 0;
505 count++;
506 while (m) { /* free the mbuf chain (like mfreem) */
507 struct mbuf *n = m->m_next;
508 if (n && n->m_nextpkt)
509 panic("m_freem_list: m_nextpkt of m_next != NULL");
510 if (m->m_type == MT_FREE)
511 panic("freeing free mbuf");
512 if (m->m_flags & M_EXT) {
513 if (MCLHASREFERENCE(m)) {
514 remque((queue_t)&m->m_ext.ext_refs);
515 } else if (m->m_ext.ext_free == NULL) {
516 union mcluster *mcl= (union mcluster *)m->m_ext.ext_buf;
517 if (MCLUNREF(mcl)) {
518 mcl->mcl_next = mclfree;
519 mclfree = mcl;
520 ++mbstat.m_clfree;
521 }
522 } else {
523 (*(m->m_ext.ext_free))(m->m_ext.ext_buf,
524 m->m_ext.ext_size, m->m_ext.ext_arg);
525 }
526 }
527 mbstat.m_mtypes[m->m_type]--;
528 (void) MCLUNREF(m);
529 m->m_type = MT_FREE;
530 mbstat.m_mtypes[m->m_type]++;
531 m->m_flags = 0;
532 m->m_len = 0;
533 m->m_next = mfree;
534 mfree = m;
535 m = n;
536 }
537 m = nextpkt; /* bump m with saved nextpkt if any */
538 }
539 i = m_want;
540 m_want = 0;
541 MBUF_UNLOCK();
542 if (i) wakeup((caddr_t)&mfree);
543 return (count);
544 }
545
546 void
547 m_freem(m)
548 register struct mbuf *m;
549 {
550 while (m)
551 m = m_free(m);
552 }
553
554 /*
555 * Mbuffer utility routines.
556 */
557 /*
558 * Compute the amount of space available
559 * before the current start of data in an mbuf.
560 */
561 m_leadingspace(m)
562 register struct mbuf *m;
563 {
564 if (m->m_flags & M_EXT) {
565 if (MCLHASREFERENCE(m))
566 return(0);
567 return (m->m_data - m->m_ext.ext_buf);
568 }
569 if (m->m_flags & M_PKTHDR)
570 return (m->m_data - m->m_pktdat);
571 return (m->m_data - m->m_dat);
572 }
573
574 /*
575 * Compute the amount of space available
576 * after the end of data in an mbuf.
577 */
578 m_trailingspace(m)
579 register struct mbuf *m;
580 {
581 if (m->m_flags & M_EXT) {
582 if (MCLHASREFERENCE(m))
583 return(0);
584 return (m->m_ext.ext_buf + m->m_ext.ext_size -
585 (m->m_data + m->m_len));
586 }
587 return (&m->m_dat[MLEN] - (m->m_data + m->m_len));
588 }
589
590 /*
591 * Lesser-used path for M_PREPEND:
592 * allocate new mbuf to prepend to chain,
593 * copy junk along.
594 */
595 struct mbuf *
596 m_prepend(m, len, how)
597 register struct mbuf *m;
598 int len, how;
599 {
600 struct mbuf *mn;
601
602 MGET(mn, how, m->m_type);
603 if (mn == (struct mbuf *)NULL) {
604 m_freem(m);
605 return ((struct mbuf *)NULL);
606 }
607 if (m->m_flags & M_PKTHDR) {
608 M_COPY_PKTHDR(mn, m);
609 m->m_flags &= ~M_PKTHDR;
610 }
611 mn->m_next = m;
612 m = mn;
613 if (len < MHLEN)
614 MH_ALIGN(m, len);
615 m->m_len = len;
616 return (m);
617 }
618
619 /*
620 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
621 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
622 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
623 */
624 int MCFail;
625
626 struct mbuf *
627 m_copym(m, off0, len, wait)
628 register struct mbuf *m;
629 int off0, wait;
630 register int len;
631 {
632 register struct mbuf *n, **np;
633 register int off = off0;
634 struct mbuf *top;
635 int copyhdr = 0;
636
637 if (off < 0 || len < 0)
638 panic("m_copym");
639 if (off == 0 && m->m_flags & M_PKTHDR)
640 copyhdr = 1;
641 while (off > 0) {
642 if (m == 0)
643 panic("m_copym");
644 if (off < m->m_len)
645 break;
646 off -= m->m_len;
647 m = m->m_next;
648 }
649 np = &top;
650 top = 0;
651 while (len > 0) {
652 if (m == 0) {
653 if (len != M_COPYALL)
654 panic("m_copym");
655 break;
656 }
657 MGET(n, wait, m->m_type);
658 *np = n;
659 if (n == 0)
660 goto nospace;
661 if (copyhdr) {
662 M_COPY_PKTHDR(n, m);
663 if (len == M_COPYALL)
664 n->m_pkthdr.len -= off0;
665 else
666 n->m_pkthdr.len = len;
667 copyhdr = 0;
668 }
669 if (len == M_COPYALL) {
670 if (min(len, (m->m_len - off)) == len) {
671 printf("m->m_len %d - off %d = %d, %d\n",
672 m->m_len, off, m->m_len - off,
673 min(len, (m->m_len - off)));
674 }
675 }
676 n->m_len = min(len, (m->m_len - off));
677 if (n->m_len == M_COPYALL) {
678 printf("n->m_len == M_COPYALL, fixing\n");
679 n->m_len = MHLEN;
680 }
681 if (m->m_flags & M_EXT) {
682 MBUF_LOCK();
683 n->m_ext = m->m_ext;
684 insque((queue_t)&n->m_ext.ext_refs, (queue_t)&m->m_ext.ext_refs);
685 MBUF_UNLOCK();
686 n->m_data = m->m_data + off;
687 n->m_flags |= M_EXT;
688 } else
689 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
690 (unsigned)n->m_len);
691 if (len != M_COPYALL)
692 len -= n->m_len;
693 off = 0;
694 m = m->m_next;
695 np = &n->m_next;
696 }
697 if (top == 0)
698 MCFail++;
699 return (top);
700 nospace:
701 m_freem(top);
702 MCFail++;
703 return (0);
704 }
705
706 /*
707 * Copy data from an mbuf chain starting "off" bytes from the beginning,
708 * continuing for "len" bytes, into the indicated buffer.
709 */
710 void m_copydata(m, off, len, cp)
711 register struct mbuf *m;
712 register int off;
713 register int len;
714 caddr_t cp;
715 {
716 register unsigned count;
717
718 if (off < 0 || len < 0)
719 panic("m_copydata");
720 while (off > 0) {
721 if (m == 0)
722 panic("m_copydata");
723 if (off < m->m_len)
724 break;
725 off -= m->m_len;
726 m = m->m_next;
727 }
728 while (len > 0) {
729 if (m == 0)
730 panic("m_copydata");
731 count = min(m->m_len - off, len);
732 bcopy(mtod(m, caddr_t) + off, cp, count);
733 len -= count;
734 cp += count;
735 off = 0;
736 m = m->m_next;
737 }
738 }
739
740 /*
741 * Concatenate mbuf chain n to m.
742 * Both chains must be of the same type (e.g. MT_DATA).
743 * Any m_pkthdr is not updated.
744 */
745 void m_cat(m, n)
746 register struct mbuf *m, *n;
747 {
748 while (m->m_next)
749 m = m->m_next;
750 while (n) {
751 if (m->m_flags & M_EXT ||
752 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
753 /* just join the two chains */
754 m->m_next = n;
755 return;
756 }
757 /* splat the data from one into the other */
758 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
759 (u_int)n->m_len);
760 m->m_len += n->m_len;
761 n = m_free(n);
762 }
763 }
764
765 void
766 m_adj(mp, req_len)
767 struct mbuf *mp;
768 int req_len;
769 {
770 register int len = req_len;
771 register struct mbuf *m;
772 register count;
773
774 if ((m = mp) == NULL)
775 return;
776 if (len >= 0) {
777 /*
778 * Trim from head.
779 */
780 while (m != NULL && len > 0) {
781 if (m->m_len <= len) {
782 len -= m->m_len;
783 m->m_len = 0;
784 m = m->m_next;
785 } else {
786 m->m_len -= len;
787 m->m_data += len;
788 len = 0;
789 }
790 }
791 m = mp;
792 if (m->m_flags & M_PKTHDR)
793 m->m_pkthdr.len -= (req_len - len);
794 } else {
795 /*
796 * Trim from tail. Scan the mbuf chain,
797 * calculating its length and finding the last mbuf.
798 * If the adjustment only affects this mbuf, then just
799 * adjust and return. Otherwise, rescan and truncate
800 * after the remaining size.
801 */
802 len = -len;
803 count = 0;
804 for (;;) {
805 count += m->m_len;
806 if (m->m_next == (struct mbuf *)0)
807 break;
808 m = m->m_next;
809 }
810 if (m->m_len >= len) {
811 m->m_len -= len;
812 m = mp;
813 if (m->m_flags & M_PKTHDR)
814 m->m_pkthdr.len -= len;
815 return;
816 }
817 count -= len;
818 if (count < 0)
819 count = 0;
820 /*
821 * Correct length for chain is "count".
822 * Find the mbuf with last data, adjust its length,
823 * and toss data from remaining mbufs on chain.
824 */
825 m = mp;
826 if (m->m_flags & M_PKTHDR)
827 m->m_pkthdr.len = count;
828 for (; m; m = m->m_next) {
829 if (m->m_len >= count) {
830 m->m_len = count;
831 break;
832 }
833 count -= m->m_len;
834 }
835 while (m = m->m_next)
836 m->m_len = 0;
837 }
838 }
839
840 /*
841 * Rearange an mbuf chain so that len bytes are contiguous
842 * and in the data area of an mbuf (so that mtod and dtom
843 * will work for a structure of size len). Returns the resulting
844 * mbuf chain on success, frees it and returns null on failure.
845 * If there is room, it will add up to max_protohdr-len extra bytes to the
846 * contiguous region in an attempt to avoid being called next time.
847 */
848 int MPFail;
849
850 struct mbuf *
851 m_pullup(n, len)
852 register struct mbuf *n;
853 int len;
854 {
855 register struct mbuf *m;
856 register int count;
857 int space;
858
859 /*
860 * If first mbuf has no cluster, and has room for len bytes
861 * without shifting current data, pullup into it,
862 * otherwise allocate a new mbuf to prepend to the chain.
863 */
864 if ((n->m_flags & M_EXT) == 0 &&
865 n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
866 if (n->m_len >= len)
867 return (n);
868 m = n;
869 n = n->m_next;
870 len -= m->m_len;
871 } else {
872 if (len > MHLEN)
873 goto bad;
874 MGET(m, M_DONTWAIT, n->m_type);
875 if (m == 0)
876 goto bad;
877 m->m_len = 0;
878 if (n->m_flags & M_PKTHDR) {
879 M_COPY_PKTHDR(m, n);
880 n->m_flags &= ~M_PKTHDR;
881 }
882 }
883 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
884 do {
885 count = min(min(max(len, max_protohdr), space), n->m_len);
886 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
887 (unsigned)count);
888 len -= count;
889 m->m_len += count;
890 n->m_len -= count;
891 space -= count;
892 if (n->m_len)
893 n->m_data += count;
894 else
895 n = m_free(n);
896 } while (len > 0 && n);
897 if (len > 0) {
898 (void) m_free(m);
899 goto bad;
900 }
901 m->m_next = n;
902 return (m);
903 bad:
904 m_freem(n);
905 MPFail++;
906 return (0);
907 }
908
909 /*
910 * Partition an mbuf chain in two pieces, returning the tail --
911 * all but the first len0 bytes. In case of failure, it returns NULL and
912 * attempts to restore the chain to its original state.
913 */
914 struct mbuf *
915 m_split(m0, len0, wait)
916 register struct mbuf *m0;
917 int len0, wait;
918 {
919 register struct mbuf *m, *n;
920 unsigned len = len0, remain;
921
922 for (m = m0; m && len > m->m_len; m = m->m_next)
923 len -= m->m_len;
924 if (m == 0)
925 return (0);
926 remain = m->m_len - len;
927 if (m0->m_flags & M_PKTHDR) {
928 MGETHDR(n, wait, m0->m_type);
929 if (n == 0)
930 return (0);
931 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
932 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
933 m0->m_pkthdr.len = len0;
934 if (m->m_flags & M_EXT)
935 goto extpacket;
936 if (remain > MHLEN) {
937 /* m can't be the lead packet */
938 MH_ALIGN(n, 0);
939 n->m_next = m_split(m, len, wait);
940 if (n->m_next == 0) {
941 (void) m_free(n);
942 return (0);
943 } else
944 return (n);
945 } else
946 MH_ALIGN(n, remain);
947 } else if (remain == 0) {
948 n = m->m_next;
949 m->m_next = 0;
950 return (n);
951 } else {
952 MGET(n, wait, m->m_type);
953 if (n == 0)
954 return (0);
955 M_ALIGN(n, remain);
956 }
957 extpacket:
958 if (m->m_flags & M_EXT) {
959 n->m_flags |= M_EXT;
960 MBUF_LOCK();
961 n->m_ext = m->m_ext;
962 insque((queue_t)&n->m_ext.ext_refs, (queue_t)&m->m_ext.ext_refs);
963 MBUF_UNLOCK();
964 n->m_data = m->m_data + len;
965 } else {
966 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
967 }
968 n->m_len = remain;
969 m->m_len = len;
970 n->m_next = m->m_next;
971 m->m_next = 0;
972 return (n);
973 }
974 /*
975 * Routine to copy from device local memory into mbufs.
976 */
977 struct mbuf *
978 m_devget(buf, totlen, off0, ifp, copy)
979 char *buf;
980 int totlen, off0;
981 struct ifnet *ifp;
982 void (*copy)();
983 {
984 register struct mbuf *m;
985 struct mbuf *top = 0, **mp = &top;
986 register int off = off0, len;
987 register char *cp;
988 char *epkt;
989
990 cp = buf;
991 epkt = cp + totlen;
992 if (off) {
993 /*
994 * If 'off' is non-zero, packet is trailer-encapsulated,
995 * so we have to skip the type and length fields.
996 */
997 cp += off + 2 * sizeof(u_int16_t);
998 totlen -= 2 * sizeof(u_int16_t);
999 }
1000 MGETHDR(m, M_DONTWAIT, MT_DATA);
1001 if (m == 0)
1002 return (0);
1003 m->m_pkthdr.rcvif = ifp;
1004 m->m_pkthdr.len = totlen;
1005 m->m_len = MHLEN;
1006
1007 while (totlen > 0) {
1008 if (top) {
1009 MGET(m, M_DONTWAIT, MT_DATA);
1010 if (m == 0) {
1011 m_freem(top);
1012 return (0);
1013 }
1014 m->m_len = MLEN;
1015 }
1016 len = min(totlen, epkt - cp);
1017 if (len >= MINCLSIZE) {
1018 MCLGET(m, M_DONTWAIT);
1019 if (m->m_flags & M_EXT)
1020 m->m_len = len = min(len, MCLBYTES);
1021 else {
1022 /* give up when it's out of cluster mbufs */
1023 if (top)
1024 m_freem(top);
1025 m_freem(m);
1026 return (0);
1027 }
1028 } else {
1029 /*
1030 * Place initial small packet/header at end of mbuf.
1031 */
1032 if (len < m->m_len) {
1033 if (top == 0 && len + max_linkhdr <= m->m_len)
1034 m->m_data += max_linkhdr;
1035 m->m_len = len;
1036 } else
1037 len = m->m_len;
1038 }
1039 if (copy)
1040 copy(cp, mtod(m, caddr_t), (unsigned)len);
1041 else
1042 bcopy(cp, mtod(m, caddr_t), (unsigned)len);
1043 cp += len;
1044 *mp = m;
1045 mp = &m->m_next;
1046 totlen -= len;
1047 if (cp == epkt)
1048 cp = buf;
1049 }
1050 return (top);
1051 }
1052
1053 /*
1054 * Cluster freelist allocation check. The mbuf lock must be held.
1055 * Ensure hysteresis between hi/lo.
1056 */
1057 static int
1058 m_howmany()
1059 {
1060 register int i;
1061
1062 /* Under minimum */
1063 if (mbstat.m_clusters < MINCL)
1064 return (MINCL - mbstat.m_clusters);
1065 /* Too few (free < 1/2 total) and not over maximum */
1066 if (mbstat.m_clusters < nmbclusters &&
1067 (i = ((mbstat.m_clusters >> 1) - mbstat.m_clfree)) > 0)
1068 return i;
1069 return 0;
1070 }
1071
1072
1073 /*
1074 * Copy data from a buffer back into the indicated mbuf chain,
1075 * starting "off" bytes from the beginning, extending the mbuf
1076 * chain if necessary.
1077 */
1078 void
1079 m_copyback(m0, off, len, cp)
1080 struct mbuf *m0;
1081 register int off;
1082 register int len;
1083 caddr_t cp;
1084 {
1085 register int mlen;
1086 register struct mbuf *m = m0, *n;
1087 int totlen = 0;
1088
1089 if (m0 == 0)
1090 return;
1091 while (off > (mlen = m->m_len)) {
1092 off -= mlen;
1093 totlen += mlen;
1094 if (m->m_next == 0) {
1095 n = m_getclr(M_DONTWAIT, m->m_type);
1096 if (n == 0)
1097 goto out;
1098 n->m_len = min(MLEN, len + off);
1099 m->m_next = n;
1100 }
1101 m = m->m_next;
1102 }
1103 while (len > 0) {
1104 mlen = min (m->m_len - off, len);
1105 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
1106 cp += mlen;
1107 len -= mlen;
1108 mlen += off;
1109 off = 0;
1110 totlen += mlen;
1111 if (len == 0)
1112 break;
1113 if (m->m_next == 0) {
1114 n = m_get(M_DONTWAIT, m->m_type);
1115 if (n == 0)
1116 break;
1117 n->m_len = min(MLEN, len);
1118 m->m_next = n;
1119 }
1120 m = m->m_next;
1121 }
1122 out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
1123 m->m_pkthdr.len = totlen;
1124 }
1125
1126
1127 char *mcl_to_paddr(register char *addr) {
1128 register int base_phys;
1129
1130 if (addr < (char *)mbutl || addr >= (char *)embutl)
1131 return (0);
1132 base_phys = mcl_paddr[(addr - (char *)mbutl) >> PAGE_SHIFT];
1133
1134 if (base_phys == 0)
1135 return (0);
1136 return ((char *)((int)base_phys | ((int)addr & PAGE_MASK)));
1137 }
1138
1139 /*
1140 * Dup the mbuf chain passed in. The whole thing. No cute additional cruft.
1141 * And really copy the thing. That way, we don't "precompute" checksums
1142 * for unsuspecting consumers.
1143 * Assumption: m->m_nextpkt == 0.
1144 * Trick: for small packets, don't dup into a cluster. That way received
1145 * packets don't take up too much room in the sockbuf (cf. sbspace()).
1146 */
1147 int MDFail;
1148
1149 struct mbuf *
1150 m_dup(register struct mbuf *m, int how)
1151 { register struct mbuf *n, **np;
1152 struct mbuf *top;
1153 int copyhdr = 0;
1154
1155 np = &top;
1156 top = 0;
1157 if (m->m_flags & M_PKTHDR)
1158 copyhdr = 1;
1159
1160 /*
1161 * Quick check: if we have one mbuf and its data fits in an
1162 * mbuf with packet header, just copy and go.
1163 */
1164 if (m->m_next == NULL)
1165 { /* Then just move the data into an mbuf and be done... */
1166 if (copyhdr)
1167 { if (m->m_pkthdr.len <= MHLEN)
1168 { if ((n = m_gethdr(how, m->m_type)) == NULL)
1169 return(NULL);
1170 n->m_len = m->m_len;
1171 n->m_flags |= (m->m_flags & M_COPYFLAGS);
1172 n->m_pkthdr.len = m->m_pkthdr.len;
1173 n->m_pkthdr.rcvif = m->m_pkthdr.rcvif;
1174 n->m_pkthdr.header = NULL;
1175 n->m_pkthdr.aux = NULL;
1176 bcopy(m->m_data, n->m_data, m->m_pkthdr.len);
1177 return(n);
1178 }
1179 } else if (m->m_len <= MLEN)
1180 { if ((n = m_get(how, m->m_type)) == NULL)
1181 return(NULL);
1182 bcopy(m->m_data, n->m_data, m->m_len);
1183 n->m_len = m->m_len;
1184 return(n);
1185 }
1186 }
1187 while (m)
1188 {
1189 #if BLUE_DEBUG
1190 kprintf("<%x: %x, %x, %x\n", m, m->m_flags, m->m_len,
1191 m->m_data);
1192 #endif
1193 if (copyhdr)
1194 n = m_gethdr(how, m->m_type);
1195 else
1196 n = m_get(how, m->m_type);
1197 if (n == 0)
1198 goto nospace;
1199 if (m->m_flags & M_EXT)
1200 { MCLGET(n, how);
1201 if ((n->m_flags & M_EXT) == 0)
1202 goto nospace;
1203 }
1204 *np = n;
1205 if (copyhdr)
1206 { /* Don't use M_COPY_PKTHDR: preserve m_data */
1207 n->m_pkthdr = m->m_pkthdr;
1208 n->m_flags |= (m->m_flags & M_COPYFLAGS);
1209 copyhdr = 0;
1210 if ((n->m_flags & M_EXT) == 0)
1211 n->m_data = n->m_pktdat;
1212 }
1213 n->m_len = m->m_len;
1214 /*
1215 * Get the dup on the same bdry as the original
1216 * Assume that the two mbufs have the same offset to data area
1217 * (up to word bdries)
1218 */
1219 bcopy(mtod(m, caddr_t), mtod(n, caddr_t), (unsigned)n->m_len);
1220 m = m->m_next;
1221 np = &n->m_next;
1222 #if BLUE_DEBUG
1223 kprintf(">%x: %x, %x, %x\n", n, n->m_flags, n->m_len,
1224 n->m_data);
1225 #endif
1226 }
1227
1228 if (top == 0)
1229 MDFail++;
1230 return (top);
1231 nospace:
1232 m_freem(top);
1233 MDFail++;
1234 return (0);
1235 }
1236
1237 #if 0
1238 #include <sys/sysctl.h>
1239
1240 static int mhog_num = 0;
1241 static struct mbuf *mhog_chain = 0;
1242 static int mhog_wait = 1;
1243
1244 static int
1245 sysctl_mhog_num SYSCTL_HANDLER_ARGS
1246 {
1247 int old = mhog_num;
1248 int error;
1249
1250 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
1251 if (!error && req->newptr) {
1252 int i;
1253 struct mbuf *m;
1254
1255 if (mhog_chain) {
1256 m_freem(mhog_chain);
1257 mhog_chain = 0;
1258 }
1259
1260 for (i = 0; i < mhog_num; i++) {
1261 MGETHDR(m, mhog_wait ? M_WAIT : M_DONTWAIT, MT_DATA);
1262 if (m == 0)
1263 break;
1264
1265 MCLGET(m, mhog_wait ? M_WAIT : M_DONTWAIT);
1266 if ((m->m_flags & M_EXT) == 0) {
1267 m_free(m);
1268 m = 0;
1269 break;
1270 }
1271 m->m_next = mhog_chain;
1272 mhog_chain = m;
1273 }
1274 mhog_num = i;
1275 }
1276
1277 return error;
1278 }
1279
1280 SYSCTL_NODE(_kern_ipc, OID_AUTO, mhog, CTLFLAG_RW, 0, "mbuf hog");
1281
1282 SYSCTL_PROC(_kern_ipc_mhog, OID_AUTO, cluster, CTLTYPE_INT|CTLFLAG_RW,
1283 &mhog_num, 0, &sysctl_mhog_num, "I", "");
1284 SYSCTL_INT(_kern_ipc_mhog, OID_AUTO, wait, CTLFLAG_RW, &mhog_wait,
1285 0, "");
1286 #endif
1287