]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/bpf.c
xnu-4570.31.3.tar.gz
[apple/xnu.git] / bsd / net / bpf.c
1 /*
2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1990, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
66 *
67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include "bpf.h"
77
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99
100 #include <sys/poll.h>
101
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109
110 #include <netinet/in.h>
111 #include <netinet/in_pcb.h>
112 #include <netinet/in_var.h>
113 #include <netinet/ip_var.h>
114 #include <netinet/tcp.h>
115 #include <netinet/tcp_var.h>
116 #include <netinet/udp.h>
117 #include <netinet/udp_var.h>
118 #include <netinet/if_ether.h>
119 #include <sys/kernel.h>
120 #include <sys/sysctl.h>
121 #include <net/firewire.h>
122
123 #include <miscfs/devfs/devfs.h>
124 #include <net/dlil.h>
125 #include <net/pktap.h>
126
127 #include <kern/locks.h>
128 #include <kern/thread_call.h>
129 #include <libkern/section_keywords.h>
130
131 #if CONFIG_MACF_NET
132 #include <security/mac_framework.h>
133 #endif /* MAC_NET */
134
135 extern int tvtohz(struct timeval *);
136
137 #define BPF_BUFSIZE 4096
138 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
139
140
141 #define PRINET 26 /* interruptible */
142
143 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
144
145 /*
146 * The default read buffer size is patchable.
147 */
148 static unsigned int bpf_bufsize = BPF_BUFSIZE;
149 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
150 &bpf_bufsize, 0, "");
151 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
152 SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
153 &bpf_maxbufsize, 0, "");
154 static unsigned int bpf_maxdevices = 256;
155 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
156 &bpf_maxdevices, 0, "");
157 /*
158 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
159 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
160 * explicitly to be able to use DLT_PKTAP.
161 */
162 #if CONFIG_EMBEDDED
163 static unsigned int bpf_wantpktap = 1;
164 #else
165 static unsigned int bpf_wantpktap = 0;
166 #endif
167 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
168 &bpf_wantpktap, 0, "");
169
170 static int bpf_debug = 0;
171 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
172 &bpf_debug, 0, "");
173
174 /*
175 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
176 * bpf_dtab holds pointer to the descriptors, indexed by minor device #
177 */
178 static struct bpf_if *bpf_iflist;
179 #ifdef __APPLE__
180 /*
181 * BSD now stores the bpf_d in the dev_t which is a struct
182 * on their system. Our dev_t is an int, so we still store
183 * the bpf_d in a separate table indexed by minor device #.
184 *
185 * The value stored in bpf_dtab[n] represent three states:
186 * 0: device not opened
187 * 1: device opening or closing
188 * other: device <n> opened with pointer to storage
189 */
190 static struct bpf_d **bpf_dtab = NULL;
191 static unsigned int bpf_dtab_size = 0;
192 static unsigned int nbpfilter = 0;
193
194 decl_lck_mtx_data(static, bpf_mlock_data);
195 static lck_mtx_t *bpf_mlock = &bpf_mlock_data;
196 static lck_grp_t *bpf_mlock_grp;
197 static lck_grp_attr_t *bpf_mlock_grp_attr;
198 static lck_attr_t *bpf_mlock_attr;
199
200 #endif /* __APPLE__ */
201
202 static int bpf_allocbufs(struct bpf_d *);
203 static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
204 static int bpf_detachd(struct bpf_d *d, int);
205 static void bpf_freed(struct bpf_d *);
206 static int bpf_movein(struct uio *, int,
207 struct mbuf **, struct sockaddr *, int *);
208 static int bpf_setif(struct bpf_d *, ifnet_t ifp);
209 static void bpf_timed_out(void *, void *);
210 static void bpf_wakeup(struct bpf_d *);
211 static void catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
212 static void reset_d(struct bpf_d *);
213 static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
214 static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
215 static int bpf_setdlt(struct bpf_d *, u_int);
216 static int bpf_set_traffic_class(struct bpf_d *, int);
217 static void bpf_set_packet_service_class(struct mbuf *, int);
218
219 static void bpf_acquire_d(struct bpf_d *);
220 static void bpf_release_d(struct bpf_d *);
221
222 static int bpf_devsw_installed;
223
224 void bpf_init(void *unused);
225 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
226
227 /*
228 * Darwin differs from BSD here, the following are static
229 * on BSD and not static on Darwin.
230 */
231 d_open_t bpfopen;
232 d_close_t bpfclose;
233 d_read_t bpfread;
234 d_write_t bpfwrite;
235 ioctl_fcn_t bpfioctl;
236 select_fcn_t bpfselect;
237
238
239 /* Darwin's cdevsw struct differs slightly from BSDs */
240 #define CDEV_MAJOR 23
241 static struct cdevsw bpf_cdevsw = {
242 /* open */ bpfopen,
243 /* close */ bpfclose,
244 /* read */ bpfread,
245 /* write */ bpfwrite,
246 /* ioctl */ bpfioctl,
247 /* stop */ eno_stop,
248 /* reset */ eno_reset,
249 /* tty */ NULL,
250 /* select */ bpfselect,
251 /* mmap */ eno_mmap,
252 /* strategy*/ eno_strat,
253 /* getc */ eno_getc,
254 /* putc */ eno_putc,
255 /* type */ 0
256 };
257
258 #define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
259
260 static int
261 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *sockp, int *datlen)
262 {
263 struct mbuf *m;
264 int error;
265 int len;
266 uint8_t sa_family;
267 int hlen;
268
269 switch (linktype) {
270
271 #if SLIP
272 case DLT_SLIP:
273 sa_family = AF_INET;
274 hlen = 0;
275 break;
276 #endif /* SLIP */
277
278 case DLT_EN10MB:
279 sa_family = AF_UNSPEC;
280 /* XXX Would MAXLINKHDR be better? */
281 hlen = sizeof(struct ether_header);
282 break;
283
284 #if FDDI
285 case DLT_FDDI:
286 #if defined(__FreeBSD__) || defined(__bsdi__)
287 sa_family = AF_IMPLINK;
288 hlen = 0;
289 #else
290 sa_family = AF_UNSPEC;
291 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
292 hlen = 24;
293 #endif
294 break;
295 #endif /* FDDI */
296
297 case DLT_RAW:
298 case DLT_NULL:
299 sa_family = AF_UNSPEC;
300 hlen = 0;
301 break;
302
303 #ifdef __FreeBSD__
304 case DLT_ATM_RFC1483:
305 /*
306 * en atm driver requires 4-byte atm pseudo header.
307 * though it isn't standard, vpi:vci needs to be
308 * specified anyway.
309 */
310 sa_family = AF_UNSPEC;
311 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
312 break;
313 #endif
314
315 case DLT_PPP:
316 sa_family = AF_UNSPEC;
317 hlen = 4; /* This should match PPP_HDRLEN */
318 break;
319
320 case DLT_APPLE_IP_OVER_IEEE1394:
321 sa_family = AF_UNSPEC;
322 hlen = sizeof(struct firewire_header);
323 break;
324
325 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
326 sa_family = AF_IEEE80211;
327 hlen = 0;
328 break;
329
330 case DLT_IEEE802_11_RADIO:
331 sa_family = AF_IEEE80211;
332 hlen = 0;
333 break;
334
335 default:
336 return (EIO);
337 }
338
339 // LP64todo - fix this!
340 len = uio_resid(uio);
341 *datlen = len - hlen;
342 if ((unsigned)len > MCLBYTES)
343 return (EIO);
344
345 if (sockp) {
346 /*
347 * Build a sockaddr based on the data link layer type.
348 * We do this at this level because the ethernet header
349 * is copied directly into the data field of the sockaddr.
350 * In the case of SLIP, there is no header and the packet
351 * is forwarded as is.
352 * Also, we are careful to leave room at the front of the mbuf
353 * for the link level header.
354 */
355 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
356 return (EIO);
357 }
358 sockp->sa_family = sa_family;
359 } else {
360 /*
361 * We're directly sending the packet data supplied by
362 * the user; we don't need to make room for the link
363 * header, and don't need the header length value any
364 * more, so set it to 0.
365 */
366 hlen = 0;
367 }
368
369 MGETHDR(m, M_WAIT, MT_DATA);
370 if (m == 0)
371 return (ENOBUFS);
372 if ((unsigned)len > MHLEN) {
373 MCLGET(m, M_WAIT);
374 if ((m->m_flags & M_EXT) == 0) {
375 error = ENOBUFS;
376 goto bad;
377 }
378 }
379 m->m_pkthdr.len = m->m_len = len;
380 m->m_pkthdr.rcvif = NULL;
381 *mp = m;
382
383 /*
384 * Make room for link header.
385 */
386 if (hlen != 0) {
387 m->m_pkthdr.len -= hlen;
388 m->m_len -= hlen;
389 m->m_data += hlen; /* XXX */
390 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
391 if (error)
392 goto bad;
393 }
394 error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
395 if (error)
396 goto bad;
397
398 /* Check for multicast destination */
399 switch (linktype) {
400 case DLT_EN10MB: {
401 struct ether_header *eh = mtod(m, struct ether_header *);
402
403 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
404 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0)
405 m->m_flags |= M_BCAST;
406 else
407 m->m_flags |= M_MCAST;
408 }
409 break;
410 }
411 }
412
413 return 0;
414 bad:
415 m_freem(m);
416 return (error);
417 }
418
419 #ifdef __APPLE__
420
421 /*
422 * The dynamic addition of a new device node must block all processes that
423 * are opening the last device so that no process will get an unexpected
424 * ENOENT
425 */
426 static void
427 bpf_make_dev_t(int maj)
428 {
429 static int bpf_growing = 0;
430 unsigned int cur_size = nbpfilter, i;
431
432 if (nbpfilter >= bpf_maxdevices)
433 return;
434
435 while (bpf_growing) {
436 /* Wait until new device has been created */
437 (void)tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
438 }
439 if (nbpfilter > cur_size) {
440 /* other thread grew it already */
441 return;
442 }
443 bpf_growing = 1;
444
445 /* need to grow bpf_dtab first */
446 if (nbpfilter == bpf_dtab_size) {
447 int new_dtab_size;
448 struct bpf_d **new_dtab = NULL;
449 struct bpf_d **old_dtab = NULL;
450
451 new_dtab_size = bpf_dtab_size + NBPFILTER;
452 new_dtab = (struct bpf_d **)_MALLOC(sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
453 if (new_dtab == 0) {
454 printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
455 goto done;
456 }
457 if (bpf_dtab) {
458 bcopy(bpf_dtab, new_dtab,
459 sizeof(struct bpf_d *) * bpf_dtab_size);
460 }
461 bzero(new_dtab + bpf_dtab_size,
462 sizeof(struct bpf_d *) * NBPFILTER);
463 old_dtab = bpf_dtab;
464 bpf_dtab = new_dtab;
465 bpf_dtab_size = new_dtab_size;
466 if (old_dtab != NULL)
467 _FREE(old_dtab, M_DEVBUF);
468 }
469 i = nbpfilter++;
470 (void) devfs_make_node(makedev(maj, i),
471 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
472 "bpf%d", i);
473 done:
474 bpf_growing = 0;
475 wakeup((caddr_t)&bpf_growing);
476 }
477
478 #endif
479
480 /*
481 * Attach file to the bpf interface, i.e. make d listen on bp.
482 */
483 static errno_t
484 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
485 {
486 int first = bp->bif_dlist == NULL;
487 int error = 0;
488
489 /*
490 * Point d at bp, and add d to the interface's list of listeners.
491 * Finally, point the driver's bpf cookie at the interface so
492 * it will divert packets to bpf.
493 */
494 d->bd_bif = bp;
495 d->bd_next = bp->bif_dlist;
496 bp->bif_dlist = d;
497
498 /*
499 * Take a reference on the device even if an error is returned
500 * because we keep the device in the interface's list of listeners
501 */
502 bpf_acquire_d(d);
503
504 if (first) {
505 /* Find the default bpf entry for this ifp */
506 if (bp->bif_ifp->if_bpf == NULL) {
507 struct bpf_if *tmp, *primary = NULL;
508
509 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
510 if (tmp->bif_ifp == bp->bif_ifp) {
511 primary = tmp;
512 break;
513 }
514 }
515 bp->bif_ifp->if_bpf = primary;
516 }
517 /* Only call dlil_set_bpf_tap for primary dlt */
518 if (bp->bif_ifp->if_bpf == bp)
519 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT, bpf_tap_callback);
520
521 if (bp->bif_tap != NULL)
522 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT);
523 }
524
525 /*
526 * Reset the detach flags in case we previously detached an interface
527 */
528 d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
529
530 if (bp->bif_dlt == DLT_PKTAP) {
531 d->bd_flags |= BPF_FINALIZE_PKTAP;
532 } else {
533 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
534 }
535 return error;
536 }
537
538 /*
539 * Detach a file from its interface.
540 *
541 * Return 1 if was closed by some thread, 0 otherwise
542 */
543 static int
544 bpf_detachd(struct bpf_d *d, int closing)
545 {
546 struct bpf_d **p;
547 struct bpf_if *bp;
548 struct ifnet *ifp;
549
550 /*
551 * Some other thread already detached
552 */
553 if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0)
554 goto done;
555 /*
556 * This thread is doing the detach
557 */
558 d->bd_flags |= BPF_DETACHING;
559
560 ifp = d->bd_bif->bif_ifp;
561 bp = d->bd_bif;
562
563 if (bpf_debug != 0)
564 printf("%s: %llx %s%s\n",
565 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
566 if_name(ifp), closing ? " closing" : "");
567
568 /* Remove d from the interface's descriptor list. */
569 p = &bp->bif_dlist;
570 while (*p != d) {
571 p = &(*p)->bd_next;
572 if (*p == 0)
573 panic("bpf_detachd: descriptor not in list");
574 }
575 *p = (*p)->bd_next;
576 if (bp->bif_dlist == 0) {
577 /*
578 * Let the driver know that there are no more listeners.
579 */
580 /* Only call dlil_set_bpf_tap for primary dlt */
581 if (bp->bif_ifp->if_bpf == bp)
582 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
583 if (bp->bif_tap)
584 bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
585
586 for (bp = bpf_iflist; bp; bp = bp->bif_next)
587 if (bp->bif_ifp == ifp && bp->bif_dlist != 0)
588 break;
589 if (bp == NULL)
590 ifp->if_bpf = NULL;
591 }
592 d->bd_bif = NULL;
593 /*
594 * Check if this descriptor had requested promiscuous mode.
595 * If so, turn it off.
596 */
597 if (d->bd_promisc) {
598 d->bd_promisc = 0;
599 lck_mtx_unlock(bpf_mlock);
600 if (ifnet_set_promiscuous(ifp, 0)) {
601 /*
602 * Something is really wrong if we were able to put
603 * the driver into promiscuous mode, but can't
604 * take it out.
605 * Most likely the network interface is gone.
606 */
607 printf("%s: ifnet_set_promiscuous failed\n", __func__);
608 }
609 lck_mtx_lock(bpf_mlock);
610 }
611
612 /*
613 * Wake up other thread that are waiting for this thread to finish
614 * detaching
615 */
616 d->bd_flags &= ~BPF_DETACHING;
617 d->bd_flags |= BPF_DETACHED;
618 /*
619 * Note that We've kept the reference because we may have dropped
620 * the lock when turning off promiscuous mode
621 */
622 bpf_release_d(d);
623
624 done:
625 /*
626 * When closing makes sure no other thread refer to the bpf_d
627 */
628 if (bpf_debug != 0)
629 printf("%s: %llx done\n",
630 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
631 /*
632 * Let the caller know the bpf_d is closed
633 */
634 if ((d->bd_flags & BPF_CLOSING))
635 return (1);
636 else
637 return (0);
638 }
639
640
641 /*
642 * Start asynchronous timer, if necessary.
643 * Must be called with bpf_mlock held.
644 */
645 static void
646 bpf_start_timer(struct bpf_d *d)
647 {
648 uint64_t deadline;
649 struct timeval tv;
650
651 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
652 tv.tv_sec = d->bd_rtout / hz;
653 tv.tv_usec = (d->bd_rtout % hz) * tick;
654
655 clock_interval_to_deadline(
656 (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
657 NSEC_PER_USEC, &deadline);
658 /*
659 * The state is BPF_IDLE, so the timer hasn't
660 * been started yet, and hasn't gone off yet;
661 * there is no thread call scheduled, so this
662 * won't change the schedule.
663 *
664 * XXX - what if, by the time it gets entered,
665 * the deadline has already passed?
666 */
667 thread_call_enter_delayed(d->bd_thread_call, deadline);
668 d->bd_state = BPF_WAITING;
669 }
670 }
671
672 /*
673 * Cancel asynchronous timer.
674 * Must be called with bpf_mlock held.
675 */
676 static boolean_t
677 bpf_stop_timer(struct bpf_d *d)
678 {
679 /*
680 * If the timer has already gone off, this does nothing.
681 * Our caller is expected to set d->bd_state to BPF_IDLE,
682 * with the bpf_mlock, after we are called. bpf_timed_out()
683 * also grabs bpf_mlock, so, if the timer has gone off and
684 * bpf_timed_out() hasn't finished, it's waiting for the
685 * lock; when this thread releases the lock, it will
686 * find the state is BPF_IDLE, and just release the
687 * lock and return.
688 */
689 return (thread_call_cancel(d->bd_thread_call));
690 }
691
692 void
693 bpf_acquire_d(struct bpf_d *d)
694 {
695 void *lr_saved = __builtin_return_address(0);
696
697 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
698
699 d->bd_refcnt += 1;
700
701 d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
702 d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
703 }
704
705 void
706 bpf_release_d(struct bpf_d *d)
707 {
708 void *lr_saved = __builtin_return_address(0);
709
710 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
711
712 if (d->bd_refcnt <= 0)
713 panic("%s: %p refcnt <= 0", __func__, d);
714
715 d->bd_refcnt -= 1;
716
717 d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
718 d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
719
720 if (d->bd_refcnt == 0) {
721 /* Assert the device is detached */
722 if ((d->bd_flags & BPF_DETACHED) == 0)
723 panic("%s: %p BPF_DETACHED not set", __func__, d);
724
725 _FREE(d, M_DEVBUF);
726 }
727 }
728
729 /*
730 * Open ethernet device. Returns ENXIO for illegal minor device number,
731 * EBUSY if file is open by another process.
732 */
733 /* ARGSUSED */
734 int
735 bpfopen(dev_t dev, int flags, __unused int fmt,
736 __unused struct proc *p)
737 {
738 struct bpf_d *d;
739
740 lck_mtx_lock(bpf_mlock);
741 if ((unsigned int) minor(dev) >= nbpfilter) {
742 lck_mtx_unlock(bpf_mlock);
743 return (ENXIO);
744 }
745 /*
746 * New device nodes are created on demand when opening the last one.
747 * The programming model is for processes to loop on the minor starting at 0
748 * as long as EBUSY is returned. The loop stops when either the open succeeds or
749 * an error other that EBUSY is returned. That means that bpf_make_dev_t() must
750 * block all processes that are opening the last node. If not all
751 * processes are blocked, they could unexpectedly get ENOENT and abort their
752 * opening loop.
753 */
754 if ((unsigned int) minor(dev) == (nbpfilter - 1))
755 bpf_make_dev_t(major(dev));
756
757 /*
758 * Each minor can be opened by only one process. If the requested
759 * minor is in use, return EBUSY.
760 *
761 * Important: bpfopen() and bpfclose() have to check and set the status of a device
762 * in the same lockin context otherwise the device may be leaked because the vnode use count
763 * will be unpextectly greater than 1 when close() is called.
764 */
765 if (bpf_dtab[minor(dev)] == 0) {
766 bpf_dtab[minor(dev)] = (void *)1; /* Mark opening */
767 } else {
768 lck_mtx_unlock(bpf_mlock);
769 return (EBUSY);
770 }
771 d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
772 M_WAIT | M_ZERO);
773 if (d == NULL) {
774 /* this really is a catastrophic failure */
775 printf("bpfopen: malloc bpf_d failed\n");
776 bpf_dtab[minor(dev)] = NULL;
777 lck_mtx_unlock(bpf_mlock);
778 return ENOMEM;
779 }
780
781 /* Mark "in use" and do most initialization. */
782 bpf_acquire_d(d);
783 d->bd_bufsize = bpf_bufsize;
784 d->bd_sig = SIGIO;
785 d->bd_seesent = 1;
786 d->bd_oflags = flags;
787 d->bd_state = BPF_IDLE;
788 d->bd_traffic_class = SO_TC_BE;
789 d->bd_flags |= BPF_DETACHED;
790 if (bpf_wantpktap)
791 d->bd_flags |= BPF_WANT_PKTAP;
792 else
793 d->bd_flags &= ~BPF_WANT_PKTAP;
794 d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
795 if (d->bd_thread_call == NULL) {
796 printf("bpfopen: malloc thread call failed\n");
797 bpf_dtab[minor(dev)] = NULL;
798 bpf_release_d(d);
799 lck_mtx_unlock(bpf_mlock);
800
801 return (ENOMEM);
802 }
803 #if CONFIG_MACF_NET
804 mac_bpfdesc_label_init(d);
805 mac_bpfdesc_label_associate(kauth_cred_get(), d);
806 #endif
807 bpf_dtab[minor(dev)] = d; /* Mark opened */
808 lck_mtx_unlock(bpf_mlock);
809
810 return (0);
811 }
812
813 /*
814 * Close the descriptor by detaching it from its interface,
815 * deallocating its buffers, and marking it free.
816 */
817 /* ARGSUSED */
818 int
819 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
820 __unused struct proc *p)
821 {
822 struct bpf_d *d;
823
824 /* Take BPF lock to ensure no other thread is using the device */
825 lck_mtx_lock(bpf_mlock);
826
827 d = bpf_dtab[minor(dev)];
828 if (d == 0 || d == (void *)1) {
829 lck_mtx_unlock(bpf_mlock);
830 return (ENXIO);
831 }
832
833 /*
834 * Other threads may call bpd_detachd() if we drop the bpf_mlock
835 */
836 d->bd_flags |= BPF_CLOSING;
837
838 if (bpf_debug != 0)
839 printf("%s: %llx\n",
840 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
841
842 bpf_dtab[minor(dev)] = (void *)1; /* Mark closing */
843
844 /*
845 * Deal with any in-progress timeouts.
846 */
847 switch (d->bd_state) {
848 case BPF_IDLE:
849 /*
850 * Not waiting for a timeout, and no timeout happened.
851 */
852 break;
853
854 case BPF_WAITING:
855 /*
856 * Waiting for a timeout.
857 * Cancel any timer that has yet to go off,
858 * and mark the state as "closing".
859 * Then drop the lock to allow any timers that
860 * *have* gone off to run to completion, and wait
861 * for them to finish.
862 */
863 if (!bpf_stop_timer(d)) {
864 /*
865 * There was no pending call, so the call must
866 * have been in progress. Wait for the call to
867 * complete; we have to drop the lock while
868 * waiting. to let the in-progrss call complete
869 */
870 d->bd_state = BPF_DRAINING;
871 while (d->bd_state == BPF_DRAINING)
872 msleep((caddr_t)d, bpf_mlock, PRINET,
873 "bpfdraining", NULL);
874 }
875 d->bd_state = BPF_IDLE;
876 break;
877
878 case BPF_TIMED_OUT:
879 /*
880 * Timer went off, and the timeout routine finished.
881 */
882 d->bd_state = BPF_IDLE;
883 break;
884
885 case BPF_DRAINING:
886 /*
887 * Another thread is blocked on a close waiting for
888 * a timeout to finish.
889 * This "shouldn't happen", as the first thread to enter
890 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
891 * all subsequent threads should see that and fail with
892 * ENXIO.
893 */
894 panic("Two threads blocked in a BPF close");
895 break;
896 }
897
898 if (d->bd_bif)
899 bpf_detachd(d, 1);
900 selthreadclear(&d->bd_sel);
901 #if CONFIG_MACF_NET
902 mac_bpfdesc_label_destroy(d);
903 #endif
904 thread_call_free(d->bd_thread_call);
905
906 while (d->bd_hbuf_read)
907 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
908
909 bpf_freed(d);
910
911 /* Mark free in same context as bpfopen comes to check */
912 bpf_dtab[minor(dev)] = NULL; /* Mark closed */
913
914 bpf_release_d(d);
915
916 lck_mtx_unlock(bpf_mlock);
917
918 return (0);
919 }
920
921
922 #define BPF_SLEEP bpf_sleep
923
924 static int
925 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
926 {
927 u_int64_t abstime = 0;
928
929 if(timo)
930 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
931
932 return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
933 }
934
935 /*
936 * Rotate the packet buffers in descriptor d. Move the store buffer
937 * into the hold slot, and the free buffer into the store slot.
938 * Zero the length of the new store buffer.
939 */
940 #define ROTATE_BUFFERS(d) \
941 if (d->bd_hbuf_read) \
942 panic("rotating bpf buffers during read"); \
943 (d)->bd_hbuf = (d)->bd_sbuf; \
944 (d)->bd_hlen = (d)->bd_slen; \
945 (d)->bd_hcnt = (d)->bd_scnt; \
946 (d)->bd_sbuf = (d)->bd_fbuf; \
947 (d)->bd_slen = 0; \
948 (d)->bd_scnt = 0; \
949 (d)->bd_fbuf = NULL;
950 /*
951 * bpfread - read next chunk of packets from buffers
952 */
953 int
954 bpfread(dev_t dev, struct uio *uio, int ioflag)
955 {
956 struct bpf_d *d;
957 caddr_t hbuf;
958 int timed_out, hbuf_len;
959 int error;
960 int flags;
961
962 lck_mtx_lock(bpf_mlock);
963
964 d = bpf_dtab[minor(dev)];
965 if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
966 lck_mtx_unlock(bpf_mlock);
967 return (ENXIO);
968 }
969
970 bpf_acquire_d(d);
971
972 /*
973 * Restrict application to use a buffer the same size as
974 * as kernel buffers.
975 */
976 if (uio_resid(uio) != d->bd_bufsize) {
977 bpf_release_d(d);
978 lck_mtx_unlock(bpf_mlock);
979 return (EINVAL);
980 }
981
982 if (d->bd_state == BPF_WAITING)
983 bpf_stop_timer(d);
984
985 timed_out = (d->bd_state == BPF_TIMED_OUT);
986 d->bd_state = BPF_IDLE;
987
988 while (d->bd_hbuf_read)
989 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
990
991 if ((d->bd_flags & BPF_CLOSING) != 0) {
992 bpf_release_d(d);
993 lck_mtx_unlock(bpf_mlock);
994 return (ENXIO);
995 }
996 /*
997 * If the hold buffer is empty, then do a timed sleep, which
998 * ends when the timeout expires or when enough packets
999 * have arrived to fill the store buffer.
1000 */
1001 while (d->bd_hbuf == 0) {
1002 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY))
1003 && d->bd_slen != 0) {
1004 /*
1005 * We're in immediate mode, or are reading
1006 * in non-blocking mode, or a timer was
1007 * started before the read (e.g., by select()
1008 * or poll()) and has expired and a packet(s)
1009 * either arrived since the previous
1010 * read or arrived while we were asleep.
1011 * Rotate the buffers and return what's here.
1012 */
1013 ROTATE_BUFFERS(d);
1014 break;
1015 }
1016
1017 /*
1018 * No data is available, check to see if the bpf device
1019 * is still pointed at a real interface. If not, return
1020 * ENXIO so that the userland process knows to rebind
1021 * it before using it again.
1022 */
1023 if (d->bd_bif == NULL) {
1024 bpf_release_d(d);
1025 lck_mtx_unlock(bpf_mlock);
1026 return (ENXIO);
1027 }
1028 if (ioflag & IO_NDELAY) {
1029 bpf_release_d(d);
1030 lck_mtx_unlock(bpf_mlock);
1031 return (EWOULDBLOCK);
1032 }
1033 error = BPF_SLEEP(d, PRINET|PCATCH, "bpf",
1034 d->bd_rtout);
1035 /*
1036 * Make sure device is still opened
1037 */
1038 if ((d->bd_flags & BPF_CLOSING) != 0) {
1039 bpf_release_d(d);
1040 lck_mtx_unlock(bpf_mlock);
1041 return (ENXIO);
1042 }
1043
1044 while (d->bd_hbuf_read)
1045 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1046
1047 if ((d->bd_flags & BPF_CLOSING) != 0) {
1048 bpf_release_d(d);
1049 lck_mtx_unlock(bpf_mlock);
1050 return (ENXIO);
1051 }
1052
1053 if (error == EINTR || error == ERESTART) {
1054 if (d->bd_hbuf != NULL) {
1055 /*
1056 * Because we msleep, the hold buffer might
1057 * be filled when we wake up. Avoid rotating
1058 * in this case.
1059 */
1060 break;
1061 }
1062 if (d->bd_slen != 0) {
1063 /*
1064 * Sometimes we may be interrupted often and
1065 * the sleep above will not timeout.
1066 * Regardless, we should rotate the buffers
1067 * if there's any new data pending and
1068 * return it.
1069 */
1070 ROTATE_BUFFERS(d);
1071 break;
1072 }
1073 bpf_release_d(d);
1074 lck_mtx_unlock(bpf_mlock);
1075 if (error == ERESTART) {
1076 printf("%s: %llx ERESTART to EINTR\n",
1077 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
1078 error = EINTR;
1079 }
1080 return (error);
1081 }
1082 if (error == EWOULDBLOCK) {
1083 /*
1084 * On a timeout, return what's in the buffer,
1085 * which may be nothing. If there is something
1086 * in the store buffer, we can rotate the buffers.
1087 */
1088 if (d->bd_hbuf)
1089 /*
1090 * We filled up the buffer in between
1091 * getting the timeout and arriving
1092 * here, so we don't need to rotate.
1093 */
1094 break;
1095
1096 if (d->bd_slen == 0) {
1097 bpf_release_d(d);
1098 lck_mtx_unlock(bpf_mlock);
1099 return (0);
1100 }
1101 ROTATE_BUFFERS(d);
1102 break;
1103 }
1104 }
1105 /*
1106 * At this point, we know we have something in the hold slot.
1107 */
1108
1109 /*
1110 * Set the hold buffer read. So we do not
1111 * rotate the buffers until the hold buffer
1112 * read is complete. Also to avoid issues resulting
1113 * from page faults during disk sleep (<rdar://problem/13436396>).
1114 */
1115 d->bd_hbuf_read = 1;
1116 hbuf = d->bd_hbuf;
1117 hbuf_len = d->bd_hlen;
1118 flags = d->bd_flags;
1119 lck_mtx_unlock(bpf_mlock);
1120
1121 #ifdef __APPLE__
1122 /*
1123 * Before we move data to userland, we fill out the extended
1124 * header fields.
1125 */
1126 if (flags & BPF_EXTENDED_HDR) {
1127 char *p;
1128
1129 p = hbuf;
1130 while (p < hbuf + hbuf_len) {
1131 struct bpf_hdr_ext *ehp;
1132 uint32_t flowid;
1133 struct so_procinfo soprocinfo;
1134 int found = 0;
1135
1136 ehp = (struct bpf_hdr_ext *)(void *)p;
1137 if ((flowid = ehp->bh_flowid)) {
1138 if (ehp->bh_proto == IPPROTO_TCP)
1139 found = inp_findinpcb_procinfo(&tcbinfo,
1140 flowid, &soprocinfo);
1141 else if (ehp->bh_proto == IPPROTO_UDP)
1142 found = inp_findinpcb_procinfo(&udbinfo,
1143 flowid, &soprocinfo);
1144 if (found == 1) {
1145 ehp->bh_pid = soprocinfo.spi_pid;
1146 proc_name(ehp->bh_pid, ehp->bh_comm, MAXCOMLEN);
1147 }
1148 ehp->bh_flowid = 0;
1149 }
1150
1151 if (flags & BPF_FINALIZE_PKTAP) {
1152 struct pktap_header *pktaphdr;
1153
1154 pktaphdr = (struct pktap_header *)(void *)
1155 (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1156
1157 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1158 pktap_finalize_proc_info(pktaphdr);
1159
1160 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1161 ehp->bh_tstamp.tv_sec =
1162 pktaphdr->pth_tstamp.tv_sec;
1163 ehp->bh_tstamp.tv_usec =
1164 pktaphdr->pth_tstamp.tv_usec;
1165 }
1166 }
1167 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1168 }
1169 } else if (flags & BPF_FINALIZE_PKTAP) {
1170 char *p;
1171
1172 p = hbuf;
1173 while (p < hbuf + hbuf_len) {
1174 struct bpf_hdr *hp;
1175 struct pktap_header *pktaphdr;
1176
1177 hp = (struct bpf_hdr *)(void *)p;
1178 pktaphdr = (struct pktap_header *)(void *)
1179 (p + BPF_WORDALIGN(hp->bh_hdrlen));
1180
1181 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1182 pktap_finalize_proc_info(pktaphdr);
1183
1184 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1185 hp->bh_tstamp.tv_sec =
1186 pktaphdr->pth_tstamp.tv_sec;
1187 hp->bh_tstamp.tv_usec =
1188 pktaphdr->pth_tstamp.tv_usec;
1189 }
1190
1191 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1192 }
1193 }
1194 #endif
1195
1196 /*
1197 * Move data from hold buffer into user space.
1198 * We know the entire buffer is transferred since
1199 * we checked above that the read buffer is bpf_bufsize bytes.
1200 */
1201 error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1202
1203 lck_mtx_lock(bpf_mlock);
1204 /*
1205 * Make sure device is still opened
1206 */
1207 if ((d->bd_flags & BPF_CLOSING) != 0) {
1208 bpf_release_d(d);
1209 lck_mtx_unlock(bpf_mlock);
1210 return (ENXIO);
1211 }
1212
1213 d->bd_hbuf_read = 0;
1214 d->bd_fbuf = d->bd_hbuf;
1215 d->bd_hbuf = NULL;
1216 d->bd_hlen = 0;
1217 d->bd_hcnt = 0;
1218 wakeup((caddr_t)d);
1219
1220 bpf_release_d(d);
1221 lck_mtx_unlock(bpf_mlock);
1222 return (error);
1223
1224 }
1225
1226
1227 /*
1228 * If there are processes sleeping on this descriptor, wake them up.
1229 */
1230 static void
1231 bpf_wakeup(struct bpf_d *d)
1232 {
1233 if (d->bd_state == BPF_WAITING) {
1234 bpf_stop_timer(d);
1235 d->bd_state = BPF_IDLE;
1236 }
1237 wakeup((caddr_t)d);
1238 if (d->bd_async && d->bd_sig && d->bd_sigio)
1239 pgsigio(d->bd_sigio, d->bd_sig);
1240
1241 selwakeup(&d->bd_sel);
1242 if ((d->bd_flags & BPF_KNOTE))
1243 KNOTE(&d->bd_sel.si_note, 1);
1244 }
1245
1246
1247 static void
1248 bpf_timed_out(void *arg, __unused void *dummy)
1249 {
1250 struct bpf_d *d = (struct bpf_d *)arg;
1251
1252 lck_mtx_lock(bpf_mlock);
1253 if (d->bd_state == BPF_WAITING) {
1254 /*
1255 * There's a select or kqueue waiting for this; if there's
1256 * now stuff to read, wake it up.
1257 */
1258 d->bd_state = BPF_TIMED_OUT;
1259 if (d->bd_slen != 0)
1260 bpf_wakeup(d);
1261 } else if (d->bd_state == BPF_DRAINING) {
1262 /*
1263 * A close is waiting for this to finish.
1264 * Mark it as finished, and wake the close up.
1265 */
1266 d->bd_state = BPF_IDLE;
1267 bpf_wakeup(d);
1268 }
1269 lck_mtx_unlock(bpf_mlock);
1270 }
1271
1272
1273
1274
1275
1276 /* keep in sync with bpf_movein above: */
1277 #define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
1278
1279 int
1280 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1281 {
1282 struct bpf_d *d;
1283 struct ifnet *ifp;
1284 struct mbuf *m = NULL;
1285 int error;
1286 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1287 int datlen = 0;
1288 int bif_dlt;
1289 int bd_hdrcmplt;
1290
1291 lck_mtx_lock(bpf_mlock);
1292
1293 d = bpf_dtab[minor(dev)];
1294 if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
1295 lck_mtx_unlock(bpf_mlock);
1296 return (ENXIO);
1297 }
1298
1299 bpf_acquire_d(d);
1300
1301 if (d->bd_bif == 0) {
1302 bpf_release_d(d);
1303 lck_mtx_unlock(bpf_mlock);
1304 return (ENXIO);
1305 }
1306
1307 ifp = d->bd_bif->bif_ifp;
1308
1309 if ((ifp->if_flags & IFF_UP) == 0) {
1310 bpf_release_d(d);
1311 lck_mtx_unlock(bpf_mlock);
1312 return (ENETDOWN);
1313 }
1314 if (uio_resid(uio) == 0) {
1315 bpf_release_d(d);
1316 lck_mtx_unlock(bpf_mlock);
1317 return (0);
1318 }
1319 ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1320
1321 /*
1322 * fix for PR-6849527
1323 * geting variables onto stack before dropping lock for bpf_movein()
1324 */
1325 bif_dlt = (int)d->bd_bif->bif_dlt;
1326 bd_hdrcmplt = d->bd_hdrcmplt;
1327
1328 /* bpf_movein allocating mbufs; drop lock */
1329 lck_mtx_unlock(bpf_mlock);
1330
1331 error = bpf_movein(uio, bif_dlt, &m,
1332 bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1333 &datlen);
1334
1335 /* take the lock again */
1336 lck_mtx_lock(bpf_mlock);
1337 if (error) {
1338 bpf_release_d(d);
1339 lck_mtx_unlock(bpf_mlock);
1340 return (error);
1341 }
1342
1343 /* verify the device is still open */
1344 if ((d->bd_flags & BPF_CLOSING) != 0) {
1345 bpf_release_d(d);
1346 lck_mtx_unlock(bpf_mlock);
1347 m_freem(m);
1348 return (ENXIO);
1349 }
1350
1351 if (d->bd_bif == NULL) {
1352 bpf_release_d(d);
1353 lck_mtx_unlock(bpf_mlock);
1354 m_free(m);
1355 return (ENXIO);
1356 }
1357
1358 if ((unsigned)datlen > ifp->if_mtu) {
1359 bpf_release_d(d);
1360 lck_mtx_unlock(bpf_mlock);
1361 m_freem(m);
1362 return (EMSGSIZE);
1363 }
1364
1365
1366 #if CONFIG_MACF_NET
1367 mac_mbuf_label_associate_bpfdesc(d, m);
1368 #endif
1369
1370 bpf_set_packet_service_class(m, d->bd_traffic_class);
1371
1372 lck_mtx_unlock(bpf_mlock);
1373
1374 /*
1375 * The driver frees the mbuf.
1376 */
1377 if (d->bd_hdrcmplt) {
1378 if (d->bd_bif->bif_send)
1379 error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1380 else
1381 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1382 } else {
1383 error = dlil_output(ifp, PF_INET, m, NULL,
1384 (struct sockaddr *)dst_buf, 0, NULL);
1385 }
1386
1387 lck_mtx_lock(bpf_mlock);
1388 bpf_release_d(d);
1389 lck_mtx_unlock(bpf_mlock);
1390
1391 return (error);
1392 }
1393
1394 /*
1395 * Reset a descriptor by flushing its packet buffer and clearing the
1396 * receive and drop counts.
1397 */
1398 static void
1399 reset_d(struct bpf_d *d)
1400 {
1401 if (d->bd_hbuf_read)
1402 panic("resetting buffers during read");
1403
1404 if (d->bd_hbuf) {
1405 /* Free the hold buffer. */
1406 d->bd_fbuf = d->bd_hbuf;
1407 d->bd_hbuf = NULL;
1408 }
1409 d->bd_slen = 0;
1410 d->bd_hlen = 0;
1411 d->bd_scnt = 0;
1412 d->bd_hcnt = 0;
1413 d->bd_rcount = 0;
1414 d->bd_dcount = 0;
1415 }
1416
1417 /*
1418 * FIONREAD Check for read packet available.
1419 * SIOCGIFADDR Get interface address - convenient hook to driver.
1420 * BIOCGBLEN Get buffer len [for read()].
1421 * BIOCSETF Set ethernet read filter.
1422 * BIOCFLUSH Flush read packet buffer.
1423 * BIOCPROMISC Put interface into promiscuous mode.
1424 * BIOCGDLT Get link layer type.
1425 * BIOCGETIF Get interface name.
1426 * BIOCSETIF Set interface.
1427 * BIOCSRTIMEOUT Set read timeout.
1428 * BIOCGRTIMEOUT Get read timeout.
1429 * BIOCGSTATS Get packet stats.
1430 * BIOCIMMEDIATE Set immediate mode.
1431 * BIOCVERSION Get filter language version.
1432 * BIOCGHDRCMPLT Get "header already complete" flag
1433 * BIOCSHDRCMPLT Set "header already complete" flag
1434 * BIOCGSEESENT Get "see packets sent" flag
1435 * BIOCSSEESENT Set "see packets sent" flag
1436 * BIOCSETTC Set traffic class.
1437 * BIOCGETTC Get traffic class.
1438 * BIOCSEXTHDR Set "extended header" flag
1439 * BIOCSHEADDROP Drop head of the buffer if user is not reading
1440 * BIOCGHEADDROP Get "head-drop" flag
1441 */
1442 /* ARGSUSED */
1443 int
1444 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1445 struct proc *p)
1446 {
1447 struct bpf_d *d;
1448 int error = 0;
1449 u_int int_arg;
1450 struct ifreq ifr;
1451
1452 lck_mtx_lock(bpf_mlock);
1453
1454 d = bpf_dtab[minor(dev)];
1455 if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
1456 lck_mtx_unlock(bpf_mlock);
1457 return (ENXIO);
1458 }
1459
1460 bpf_acquire_d(d);
1461
1462 if (d->bd_state == BPF_WAITING)
1463 bpf_stop_timer(d);
1464 d->bd_state = BPF_IDLE;
1465
1466 switch (cmd) {
1467
1468 default:
1469 error = EINVAL;
1470 break;
1471
1472 /*
1473 * Check for read packet available.
1474 */
1475 case FIONREAD: /* int */
1476 {
1477 int n;
1478
1479 n = d->bd_slen;
1480 if (d->bd_hbuf && d->bd_hbuf_read == 0)
1481 n += d->bd_hlen;
1482
1483 bcopy(&n, addr, sizeof (n));
1484 break;
1485 }
1486
1487 case SIOCGIFADDR: /* struct ifreq */
1488 {
1489 struct ifnet *ifp;
1490
1491 if (d->bd_bif == 0)
1492 error = EINVAL;
1493 else {
1494 ifp = d->bd_bif->bif_ifp;
1495 error = ifnet_ioctl(ifp, 0, cmd, addr);
1496 }
1497 break;
1498 }
1499
1500 /*
1501 * Get buffer len [for read()].
1502 */
1503 case BIOCGBLEN: /* u_int */
1504 bcopy(&d->bd_bufsize, addr, sizeof (u_int));
1505 break;
1506
1507 /*
1508 * Set buffer length.
1509 */
1510 case BIOCSBLEN: /* u_int */
1511 if (d->bd_bif != 0)
1512 error = EINVAL;
1513 else {
1514 u_int size;
1515
1516 bcopy(addr, &size, sizeof (size));
1517
1518 /*
1519 * Allow larger buffer in head drop mode with the
1520 * assumption the capture is in standby mode to
1521 * keep a cache of recent traffic
1522 */
1523 if (d->bd_headdrop != 0 && size > 2 * bpf_maxbufsize)
1524 size = 2 * bpf_maxbufsize;
1525 else if (size > bpf_maxbufsize)
1526 size = bpf_maxbufsize;
1527 else if (size < BPF_MINBUFSIZE)
1528 size = BPF_MINBUFSIZE;
1529 bcopy(&size, addr, sizeof (size));
1530 d->bd_bufsize = size;
1531 }
1532 break;
1533
1534 /*
1535 * Set link layer read filter.
1536 */
1537 case BIOCSETF32:
1538 case BIOCSETFNR32: { /* struct bpf_program32 */
1539 struct bpf_program32 prg32;
1540
1541 bcopy(addr, &prg32, sizeof (prg32));
1542 error = bpf_setf(d, prg32.bf_len,
1543 CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1544 break;
1545 }
1546
1547 case BIOCSETF64:
1548 case BIOCSETFNR64: { /* struct bpf_program64 */
1549 struct bpf_program64 prg64;
1550
1551 bcopy(addr, &prg64, sizeof (prg64));
1552 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
1553 break;
1554 }
1555
1556 /*
1557 * Flush read packet buffer.
1558 */
1559 case BIOCFLUSH:
1560 while (d->bd_hbuf_read) {
1561 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1562 }
1563 if ((d->bd_flags & BPF_CLOSING) != 0) {
1564 error = ENXIO;
1565 break;
1566 }
1567 reset_d(d);
1568 break;
1569
1570 /*
1571 * Put interface into promiscuous mode.
1572 */
1573 case BIOCPROMISC:
1574 if (d->bd_bif == 0) {
1575 /*
1576 * No interface attached yet.
1577 */
1578 error = EINVAL;
1579 break;
1580 }
1581 if (d->bd_promisc == 0) {
1582 lck_mtx_unlock(bpf_mlock);
1583 error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1584 lck_mtx_lock(bpf_mlock);
1585 if (error == 0)
1586 d->bd_promisc = 1;
1587 }
1588 break;
1589
1590 /*
1591 * Get device parameters.
1592 */
1593 case BIOCGDLT: /* u_int */
1594 if (d->bd_bif == 0)
1595 error = EINVAL;
1596 else
1597 bcopy(&d->bd_bif->bif_dlt, addr, sizeof (u_int));
1598 break;
1599
1600 /*
1601 * Get a list of supported data link types.
1602 */
1603 case BIOCGDLTLIST: /* struct bpf_dltlist */
1604 if (d->bd_bif == NULL) {
1605 error = EINVAL;
1606 } else {
1607 error = bpf_getdltlist(d, addr, p);
1608 }
1609 break;
1610
1611 /*
1612 * Set data link type.
1613 */
1614 case BIOCSDLT: /* u_int */
1615 if (d->bd_bif == NULL) {
1616 error = EINVAL;
1617 } else {
1618 u_int dlt;
1619
1620 bcopy(addr, &dlt, sizeof (dlt));
1621
1622 if (dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
1623 printf("BIOCSDLT downgrade DLT_PKTAP to DLT_RAW\n");
1624 dlt = DLT_RAW;
1625 }
1626 error = bpf_setdlt(d, dlt);
1627 }
1628 break;
1629
1630 /*
1631 * Get interface name.
1632 */
1633 case BIOCGETIF: /* struct ifreq */
1634 if (d->bd_bif == 0)
1635 error = EINVAL;
1636 else {
1637 struct ifnet *const ifp = d->bd_bif->bif_ifp;
1638
1639 snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1640 sizeof (ifr.ifr_name), "%s", if_name(ifp));
1641 }
1642 break;
1643
1644 /*
1645 * Set interface.
1646 */
1647 case BIOCSETIF: { /* struct ifreq */
1648 ifnet_t ifp;
1649
1650 bcopy(addr, &ifr, sizeof (ifr));
1651 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1652 ifp = ifunit(ifr.ifr_name);
1653 if (ifp == NULL)
1654 error = ENXIO;
1655 else
1656 error = bpf_setif(d, ifp);
1657 break;
1658 }
1659
1660 /*
1661 * Set read timeout.
1662 */
1663 case BIOCSRTIMEOUT32: { /* struct user32_timeval */
1664 struct user32_timeval _tv;
1665 struct timeval tv;
1666
1667 bcopy(addr, &_tv, sizeof (_tv));
1668 tv.tv_sec = _tv.tv_sec;
1669 tv.tv_usec = _tv.tv_usec;
1670
1671 /*
1672 * Subtract 1 tick from tvtohz() since this isn't
1673 * a one-shot timer.
1674 */
1675 if ((error = itimerfix(&tv)) == 0)
1676 d->bd_rtout = tvtohz(&tv) - 1;
1677 break;
1678 }
1679
1680 case BIOCSRTIMEOUT64: { /* struct user64_timeval */
1681 struct user64_timeval _tv;
1682 struct timeval tv;
1683
1684 bcopy(addr, &_tv, sizeof (_tv));
1685 tv.tv_sec = _tv.tv_sec;
1686 tv.tv_usec = _tv.tv_usec;
1687
1688 /*
1689 * Subtract 1 tick from tvtohz() since this isn't
1690 * a one-shot timer.
1691 */
1692 if ((error = itimerfix(&tv)) == 0)
1693 d->bd_rtout = tvtohz(&tv) - 1;
1694 break;
1695 }
1696
1697 /*
1698 * Get read timeout.
1699 */
1700 case BIOCGRTIMEOUT32: { /* struct user32_timeval */
1701 struct user32_timeval tv;
1702
1703 bzero(&tv, sizeof (tv));
1704 tv.tv_sec = d->bd_rtout / hz;
1705 tv.tv_usec = (d->bd_rtout % hz) * tick;
1706 bcopy(&tv, addr, sizeof (tv));
1707 break;
1708 }
1709
1710 case BIOCGRTIMEOUT64: { /* struct user64_timeval */
1711 struct user64_timeval tv;
1712
1713 bzero(&tv, sizeof (tv));
1714 tv.tv_sec = d->bd_rtout / hz;
1715 tv.tv_usec = (d->bd_rtout % hz) * tick;
1716 bcopy(&tv, addr, sizeof (tv));
1717 break;
1718 }
1719
1720 /*
1721 * Get packet stats.
1722 */
1723 case BIOCGSTATS: { /* struct bpf_stat */
1724 struct bpf_stat bs;
1725
1726 bzero(&bs, sizeof (bs));
1727 bs.bs_recv = d->bd_rcount;
1728 bs.bs_drop = d->bd_dcount;
1729 bcopy(&bs, addr, sizeof (bs));
1730 break;
1731 }
1732
1733 /*
1734 * Set immediate mode.
1735 */
1736 case BIOCIMMEDIATE: /* u_int */
1737 d->bd_immediate = *(u_int *)(void *)addr;
1738 break;
1739
1740 case BIOCVERSION: { /* struct bpf_version */
1741 struct bpf_version bv;
1742
1743 bzero(&bv, sizeof (bv));
1744 bv.bv_major = BPF_MAJOR_VERSION;
1745 bv.bv_minor = BPF_MINOR_VERSION;
1746 bcopy(&bv, addr, sizeof (bv));
1747 break;
1748 }
1749
1750 /*
1751 * Get "header already complete" flag
1752 */
1753 case BIOCGHDRCMPLT: /* u_int */
1754 bcopy(&d->bd_hdrcmplt, addr, sizeof (u_int));
1755 break;
1756
1757 /*
1758 * Set "header already complete" flag
1759 */
1760 case BIOCSHDRCMPLT: /* u_int */
1761 bcopy(addr, &int_arg, sizeof (int_arg));
1762 d->bd_hdrcmplt = int_arg ? 1 : 0;
1763 break;
1764
1765 /*
1766 * Get "see sent packets" flag
1767 */
1768 case BIOCGSEESENT: /* u_int */
1769 bcopy(&d->bd_seesent, addr, sizeof (u_int));
1770 break;
1771
1772 /*
1773 * Set "see sent packets" flag
1774 */
1775 case BIOCSSEESENT: /* u_int */
1776 bcopy(addr, &d->bd_seesent, sizeof (u_int));
1777 break;
1778
1779 /*
1780 * Set traffic service class
1781 */
1782 case BIOCSETTC: { /* int */
1783 int tc;
1784
1785 bcopy(addr, &tc, sizeof (int));
1786 error = bpf_set_traffic_class(d, tc);
1787 break;
1788 }
1789
1790 /*
1791 * Get traffic service class
1792 */
1793 case BIOCGETTC: /* int */
1794 bcopy(&d->bd_traffic_class, addr, sizeof (int));
1795 break;
1796
1797 case FIONBIO: /* Non-blocking I/O; int */
1798 break;
1799
1800 case FIOASYNC: /* Send signal on receive packets; int */
1801 bcopy(addr, &d->bd_async, sizeof (int));
1802 break;
1803 #ifndef __APPLE__
1804 case FIOSETOWN:
1805 error = fsetown(*(int *)addr, &d->bd_sigio);
1806 break;
1807
1808 case FIOGETOWN:
1809 *(int *)addr = fgetown(d->bd_sigio);
1810 break;
1811
1812 /* This is deprecated, FIOSETOWN should be used instead. */
1813 case TIOCSPGRP:
1814 error = fsetown(-(*(int *)addr), &d->bd_sigio);
1815 break;
1816
1817 /* This is deprecated, FIOGETOWN should be used instead. */
1818 case TIOCGPGRP:
1819 *(int *)addr = -fgetown(d->bd_sigio);
1820 break;
1821 #endif
1822 case BIOCSRSIG: { /* Set receive signal; u_int */
1823 u_int sig;
1824
1825 bcopy(addr, &sig, sizeof (u_int));
1826
1827 if (sig >= NSIG)
1828 error = EINVAL;
1829 else
1830 d->bd_sig = sig;
1831 break;
1832 }
1833 case BIOCGRSIG: /* u_int */
1834 bcopy(&d->bd_sig, addr, sizeof (u_int));
1835 break;
1836 #ifdef __APPLE__
1837 case BIOCSEXTHDR: /* u_int */
1838 bcopy(addr, &int_arg, sizeof (int_arg));
1839 if (int_arg)
1840 d->bd_flags |= BPF_EXTENDED_HDR;
1841 else
1842 d->bd_flags &= ~BPF_EXTENDED_HDR;
1843 break;
1844
1845 case BIOCGIFATTACHCOUNT: { /* struct ifreq */
1846 ifnet_t ifp;
1847 struct bpf_if *bp;
1848
1849 bcopy(addr, &ifr, sizeof (ifr));
1850 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1851 ifp = ifunit(ifr.ifr_name);
1852 if (ifp == NULL) {
1853 error = ENXIO;
1854 break;
1855 }
1856 ifr.ifr_intval = 0;
1857 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1858 struct bpf_d *bpf_d;
1859
1860 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp)
1861 continue;
1862 for (bpf_d = bp->bif_dlist; bpf_d; bpf_d = bpf_d->bd_next) {
1863 ifr.ifr_intval += 1;
1864 }
1865 }
1866 bcopy(&ifr, addr, sizeof (ifr));
1867 break;
1868 }
1869 case BIOCGWANTPKTAP: /* u_int */
1870 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
1871 bcopy(&int_arg, addr, sizeof (int_arg));
1872 break;
1873
1874 case BIOCSWANTPKTAP: /* u_int */
1875 bcopy(addr, &int_arg, sizeof (int_arg));
1876 if (int_arg)
1877 d->bd_flags |= BPF_WANT_PKTAP;
1878 else
1879 d->bd_flags &= ~BPF_WANT_PKTAP;
1880 break;
1881 #endif
1882
1883 case BIOCSHEADDROP:
1884 bcopy(addr, &int_arg, sizeof (int_arg));
1885 d->bd_headdrop = int_arg ? 1 : 0;
1886 break;
1887
1888 case BIOCGHEADDROP:
1889 bcopy(&d->bd_headdrop, addr, sizeof (int));
1890 break;
1891 }
1892
1893 bpf_release_d(d);
1894 lck_mtx_unlock(bpf_mlock);
1895
1896 return (error);
1897 }
1898
1899 /*
1900 * Set d's packet filter program to fp. If this file already has a filter,
1901 * free it and replace it. Returns EINVAL for bogus requests.
1902 */
1903 static int
1904 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
1905 u_long cmd)
1906 {
1907 struct bpf_insn *fcode, *old;
1908 u_int flen, size;
1909
1910 while (d->bd_hbuf_read)
1911 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1912
1913 if ((d->bd_flags & BPF_CLOSING) != 0)
1914 return (ENXIO);
1915
1916 old = d->bd_filter;
1917 if (bf_insns == USER_ADDR_NULL) {
1918 if (bf_len != 0)
1919 return (EINVAL);
1920 d->bd_filter = NULL;
1921 reset_d(d);
1922 if (old != 0)
1923 FREE((caddr_t)old, M_DEVBUF);
1924 return (0);
1925 }
1926 flen = bf_len;
1927 if (flen > BPF_MAXINSNS)
1928 return (EINVAL);
1929
1930 size = flen * sizeof(struct bpf_insn);
1931 fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
1932 #ifdef __APPLE__
1933 if (fcode == NULL)
1934 return (ENOBUFS);
1935 #endif
1936 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
1937 bpf_validate(fcode, (int)flen)) {
1938 d->bd_filter = fcode;
1939
1940 if (cmd == BIOCSETF32 || cmd == BIOCSETF64)
1941 reset_d(d);
1942
1943 if (old != 0)
1944 FREE((caddr_t)old, M_DEVBUF);
1945
1946 return (0);
1947 }
1948 FREE((caddr_t)fcode, M_DEVBUF);
1949 return (EINVAL);
1950 }
1951
1952 /*
1953 * Detach a file from its current interface (if attached at all) and attach
1954 * to the interface indicated by the name stored in ifr.
1955 * Return an errno or 0.
1956 */
1957 static int
1958 bpf_setif(struct bpf_d *d, ifnet_t theywant)
1959 {
1960 struct bpf_if *bp;
1961 int error;
1962
1963 while (d->bd_hbuf_read)
1964 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1965
1966 if ((d->bd_flags & BPF_CLOSING) != 0)
1967 return (ENXIO);
1968
1969 /*
1970 * Look through attached interfaces for the named one.
1971 */
1972 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1973 struct ifnet *ifp = bp->bif_ifp;
1974
1975 if (ifp == 0 || ifp != theywant)
1976 continue;
1977 /*
1978 * Do not use DLT_PKTAP, unless requested explicitly
1979 */
1980 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
1981 continue;
1982 /*
1983 * Skip the coprocessor interface
1984 */
1985 if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp))
1986 continue;
1987 /*
1988 * We found the requested interface.
1989 * Allocate the packet buffers.
1990 */
1991 error = bpf_allocbufs(d);
1992 if (error != 0)
1993 return (error);
1994 /*
1995 * Detach if attached to something else.
1996 */
1997 if (bp != d->bd_bif) {
1998 if (d->bd_bif != NULL) {
1999 if (bpf_detachd(d, 0) != 0)
2000 return (ENXIO);
2001 }
2002 if (bpf_attachd(d, bp) != 0)
2003 return (ENXIO);
2004 }
2005 reset_d(d);
2006 return (0);
2007 }
2008 /* Not found. */
2009 return (ENXIO);
2010 }
2011
2012
2013
2014 /*
2015 * Get a list of available data link type of the interface.
2016 */
2017 static int
2018 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2019 {
2020 u_int n;
2021 int error;
2022 struct ifnet *ifp;
2023 struct bpf_if *bp;
2024 user_addr_t dlist;
2025 struct bpf_dltlist bfl;
2026
2027 bcopy(addr, &bfl, sizeof (bfl));
2028 if (proc_is64bit(p)) {
2029 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2030 } else {
2031 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2032 }
2033
2034 ifp = d->bd_bif->bif_ifp;
2035 n = 0;
2036 error = 0;
2037
2038 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2039 if (bp->bif_ifp != ifp)
2040 continue;
2041 /*
2042 * Do not use DLT_PKTAP, unless requested explicitly
2043 */
2044 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
2045 continue;
2046 if (dlist != USER_ADDR_NULL) {
2047 if (n >= bfl.bfl_len) {
2048 return (ENOMEM);
2049 }
2050 error = copyout(&bp->bif_dlt, dlist,
2051 sizeof (bp->bif_dlt));
2052 if (error != 0)
2053 break;
2054 dlist += sizeof (bp->bif_dlt);
2055 }
2056 n++;
2057 }
2058 bfl.bfl_len = n;
2059 bcopy(&bfl, addr, sizeof (bfl));
2060
2061 return (error);
2062 }
2063
2064 /*
2065 * Set the data link type of a BPF instance.
2066 */
2067 static int
2068 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2069 {
2070 int error, opromisc;
2071 struct ifnet *ifp;
2072 struct bpf_if *bp;
2073
2074 if (d->bd_bif->bif_dlt == dlt)
2075 return (0);
2076
2077 while (d->bd_hbuf_read)
2078 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2079
2080 if ((d->bd_flags & BPF_CLOSING) != 0)
2081 return (ENXIO);
2082
2083 ifp = d->bd_bif->bif_ifp;
2084 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2085 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2086 /*
2087 * Do not use DLT_PKTAP, unless requested explicitly
2088 */
2089 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2090 continue;
2091 }
2092 break;
2093 }
2094 }
2095 if (bp != NULL) {
2096 opromisc = d->bd_promisc;
2097 if (bpf_detachd(d, 0) != 0)
2098 return (ENXIO);
2099 error = bpf_attachd(d, bp);
2100 if (error) {
2101 printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
2102 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error);
2103 return error;
2104 }
2105 reset_d(d);
2106 if (opromisc) {
2107 lck_mtx_unlock(bpf_mlock);
2108 error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2109 lck_mtx_lock(bpf_mlock);
2110 if (error) {
2111 printf("%s: ifpromisc %s%d failed (%d)\n",
2112 __func__, ifnet_name(bp->bif_ifp),
2113 ifnet_unit(bp->bif_ifp), error);
2114 } else {
2115 d->bd_promisc = 1;
2116 }
2117 }
2118 }
2119 return (bp == NULL ? EINVAL : 0);
2120 }
2121
2122 static int
2123 bpf_set_traffic_class(struct bpf_d *d, int tc)
2124 {
2125 int error = 0;
2126
2127 if (!SO_VALID_TC(tc))
2128 error = EINVAL;
2129 else
2130 d->bd_traffic_class = tc;
2131
2132 return (error);
2133 }
2134
2135 static void
2136 bpf_set_packet_service_class(struct mbuf *m, int tc)
2137 {
2138 if (!(m->m_flags & M_PKTHDR))
2139 return;
2140
2141 VERIFY(SO_VALID_TC(tc));
2142 (void) m_set_service_class(m, so_tc2msc(tc));
2143 }
2144
2145 /*
2146 * Support for select()
2147 *
2148 * Return true iff the specific operation will not block indefinitely.
2149 * Otherwise, return false but make a note that a selwakeup() must be done.
2150 */
2151 int
2152 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2153 {
2154 struct bpf_d *d;
2155 int ret = 0;
2156
2157 lck_mtx_lock(bpf_mlock);
2158
2159 d = bpf_dtab[minor(dev)];
2160 if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
2161 lck_mtx_unlock(bpf_mlock);
2162 return (ENXIO);
2163 }
2164
2165 bpf_acquire_d(d);
2166
2167 if (d->bd_bif == NULL) {
2168 bpf_release_d(d);
2169 lck_mtx_unlock(bpf_mlock);
2170 return (ENXIO);
2171 }
2172
2173 while (d->bd_hbuf_read)
2174 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2175
2176 if ((d->bd_flags & BPF_CLOSING) != 0) {
2177 bpf_release_d(d);
2178 lck_mtx_unlock(bpf_mlock);
2179 return (ENXIO);
2180 }
2181
2182 switch (which) {
2183 case FREAD:
2184 if (d->bd_hlen != 0 ||
2185 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
2186 d->bd_slen != 0))
2187 ret = 1; /* read has data to return */
2188 else {
2189 /*
2190 * Read has no data to return.
2191 * Make the select wait, and start a timer if
2192 * necessary.
2193 */
2194 selrecord(p, &d->bd_sel, wql);
2195 bpf_start_timer(d);
2196 }
2197 break;
2198
2199 case FWRITE:
2200 ret = 1; /* can't determine whether a write would block */
2201 break;
2202 }
2203
2204 bpf_release_d(d);
2205 lck_mtx_unlock(bpf_mlock);
2206
2207 return (ret);
2208 }
2209
2210
2211 /*
2212 * Support for kevent() system call. Register EVFILT_READ filters and
2213 * reject all others.
2214 */
2215 int bpfkqfilter(dev_t dev, struct knote *kn);
2216 static void filt_bpfdetach(struct knote *);
2217 static int filt_bpfread(struct knote *, long);
2218 static int filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev);
2219 static int filt_bpfprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev);
2220
2221 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2222 .f_isfd = 1,
2223 .f_detach = filt_bpfdetach,
2224 .f_event = filt_bpfread,
2225 .f_touch = filt_bpftouch,
2226 .f_process = filt_bpfprocess,
2227 };
2228
2229 static int
2230 filt_bpfread_common(struct knote *kn, struct bpf_d *d)
2231 {
2232 int ready = 0;
2233
2234 if (d->bd_immediate) {
2235 /*
2236 * If there's data in the hold buffer, it's the
2237 * amount of data a read will return.
2238 *
2239 * If there's no data in the hold buffer, but
2240 * there's data in the store buffer, a read will
2241 * immediately rotate the store buffer to the
2242 * hold buffer, the amount of data in the store
2243 * buffer is the amount of data a read will
2244 * return.
2245 *
2246 * If there's no data in either buffer, we're not
2247 * ready to read.
2248 */
2249 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read)
2250 ? d->bd_slen : d->bd_hlen);
2251 int64_t lowwat = 1;
2252 if (kn->kn_sfflags & NOTE_LOWAT)
2253 {
2254 if (kn->kn_sdata > d->bd_bufsize)
2255 lowwat = d->bd_bufsize;
2256 else if (kn->kn_sdata > lowwat)
2257 lowwat = kn->kn_sdata;
2258 }
2259 ready = (kn->kn_data >= lowwat);
2260 } else {
2261 /*
2262 * If there's data in the hold buffer, it's the
2263 * amount of data a read will return.
2264 *
2265 * If there's no data in the hold buffer, but
2266 * there's data in the store buffer, if the
2267 * timer has expired a read will immediately
2268 * rotate the store buffer to the hold buffer,
2269 * so the amount of data in the store buffer is
2270 * the amount of data a read will return.
2271 *
2272 * If there's no data in either buffer, or there's
2273 * no data in the hold buffer and the timer hasn't
2274 * expired, we're not ready to read.
2275 */
2276 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read) && d->bd_state == BPF_TIMED_OUT ?
2277 d->bd_slen : d->bd_hlen);
2278 ready = (kn->kn_data > 0);
2279 }
2280 if (!ready)
2281 bpf_start_timer(d);
2282
2283 return (ready);
2284 }
2285
2286 int
2287 bpfkqfilter(dev_t dev, struct knote *kn)
2288 {
2289 struct bpf_d *d;
2290 int res;
2291
2292 /*
2293 * Is this device a bpf?
2294 */
2295 if (major(dev) != CDEV_MAJOR ||
2296 kn->kn_filter != EVFILT_READ) {
2297 kn->kn_flags = EV_ERROR;
2298 kn->kn_data = EINVAL;
2299 return 0;
2300 }
2301
2302 lck_mtx_lock(bpf_mlock);
2303
2304 d = bpf_dtab[minor(dev)];
2305
2306 if (d == 0 ||
2307 d == (void *)1 ||
2308 d->bd_bif == NULL ||
2309 (d->bd_flags & BPF_CLOSING) != 0) {
2310 lck_mtx_unlock(bpf_mlock);
2311 kn->kn_flags = EV_ERROR;
2312 kn->kn_data = ENXIO;
2313 return 0;
2314 }
2315
2316 kn->kn_hook = d;
2317 kn->kn_filtid = EVFILTID_BPFREAD;
2318 KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2319 d->bd_flags |= BPF_KNOTE;
2320
2321 /* capture the current state */
2322 res = filt_bpfread_common(kn, d);
2323
2324 lck_mtx_unlock(bpf_mlock);
2325
2326 return (res);
2327 }
2328
2329 static void
2330 filt_bpfdetach(struct knote *kn)
2331 {
2332 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2333
2334 lck_mtx_lock(bpf_mlock);
2335 if (d->bd_flags & BPF_KNOTE) {
2336 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2337 d->bd_flags &= ~BPF_KNOTE;
2338 }
2339 lck_mtx_unlock(bpf_mlock);
2340 }
2341
2342 static int
2343 filt_bpfread(struct knote *kn, long hint)
2344 {
2345 #pragma unused(hint)
2346 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2347
2348 return filt_bpfread_common(kn, d);
2349 }
2350
2351 static int
2352 filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev)
2353 {
2354 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2355 int res;
2356
2357 lck_mtx_lock(bpf_mlock);
2358
2359 /* save off the lowat threshold and flag */
2360 kn->kn_sdata = kev->data;
2361 kn->kn_sfflags = kev->fflags;
2362 if ((kn->kn_status & KN_UDATA_SPECIFIC) == 0)
2363 kn->kn_udata = kev->udata;
2364
2365 /* output data will be re-generated here */
2366 res = filt_bpfread_common(kn, d);
2367
2368 lck_mtx_unlock(bpf_mlock);
2369
2370 return res;
2371 }
2372
2373 static int
2374 filt_bpfprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev)
2375 {
2376 #pragma unused(data)
2377 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2378 int res;
2379
2380 lck_mtx_lock(bpf_mlock);
2381 res = filt_bpfread_common(kn, d);
2382 if (res) {
2383 *kev = kn->kn_kevent;
2384 }
2385 lck_mtx_unlock(bpf_mlock);
2386
2387 return res;
2388 }
2389
2390 /*
2391 * Copy data from an mbuf chain into a buffer. This code is derived
2392 * from m_copydata in kern/uipc_mbuf.c.
2393 */
2394 static void
2395 bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
2396 {
2397 u_int count;
2398 u_char *dst;
2399
2400 dst = dst_arg;
2401 while (len > 0) {
2402 if (m == 0)
2403 panic("bpf_mcopy");
2404 count = min(m->m_len, len);
2405 bcopy(mbuf_data(m), dst, count);
2406 m = m->m_next;
2407 dst += count;
2408 len -= count;
2409 }
2410 }
2411
2412 static inline void
2413 bpf_tap_imp(
2414 ifnet_t ifp,
2415 u_int32_t dlt,
2416 struct bpf_packet *bpf_pkt,
2417 int outbound)
2418 {
2419 struct bpf_d *d;
2420 u_int slen;
2421 struct bpf_if *bp;
2422
2423 /*
2424 * It's possible that we get here after the bpf descriptor has been
2425 * detached from the interface; in such a case we simply return.
2426 * Lock ordering is important since we can be called asynchronously
2427 * (from IOKit) to process an inbound packet; when that happens
2428 * we would have been holding its "gateLock" and will be acquiring
2429 * "bpf_mlock" upon entering this routine. Due to that, we release
2430 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2431 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2432 * when a ifnet_set_promiscuous request simultaneously collides with
2433 * an inbound packet being passed into the tap callback.
2434 */
2435 lck_mtx_lock(bpf_mlock);
2436 if (ifp->if_bpf == NULL) {
2437 lck_mtx_unlock(bpf_mlock);
2438 return;
2439 }
2440 for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2441 if (bp->bif_ifp != ifp) {
2442 /* wrong interface */
2443 bp = NULL;
2444 break;
2445 }
2446 if (dlt == 0 || bp->bif_dlt == dlt) {
2447 /* tapping default DLT or DLT matches */
2448 break;
2449 }
2450 }
2451 if (bp == NULL) {
2452 goto done;
2453 }
2454 for (d = bp->bif_dlist; d; d = d->bd_next) {
2455 if (outbound && !d->bd_seesent)
2456 continue;
2457 ++d->bd_rcount;
2458 slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2459 bpf_pkt->bpfp_total_length, 0);
2460 if (slen != 0) {
2461 #if CONFIG_MACF_NET
2462 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0)
2463 continue;
2464 #endif
2465 catchpacket(d, bpf_pkt, slen, outbound);
2466 }
2467 }
2468
2469 done:
2470 lck_mtx_unlock(bpf_mlock);
2471 }
2472
2473 static inline void
2474 bpf_tap_mbuf(
2475 ifnet_t ifp,
2476 u_int32_t dlt,
2477 mbuf_t m,
2478 void* hdr,
2479 size_t hlen,
2480 int outbound)
2481 {
2482 struct bpf_packet bpf_pkt;
2483 struct mbuf *m0;
2484
2485 if (ifp->if_bpf == NULL) {
2486 /* quickly check without taking lock */
2487 return;
2488 }
2489 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2490 bpf_pkt.bpfp_mbuf = m;
2491 bpf_pkt.bpfp_total_length = 0;
2492 for (m0 = m; m0 != NULL; m0 = m0->m_next)
2493 bpf_pkt.bpfp_total_length += m0->m_len;
2494 bpf_pkt.bpfp_header = hdr;
2495 if (hdr != NULL) {
2496 bpf_pkt.bpfp_total_length += hlen;
2497 bpf_pkt.bpfp_header_length = hlen;
2498 } else {
2499 bpf_pkt.bpfp_header_length = 0;
2500 }
2501 bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2502 }
2503
2504 void
2505 bpf_tap_out(
2506 ifnet_t ifp,
2507 u_int32_t dlt,
2508 mbuf_t m,
2509 void* hdr,
2510 size_t hlen)
2511 {
2512 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
2513 }
2514
2515 void
2516 bpf_tap_in(
2517 ifnet_t ifp,
2518 u_int32_t dlt,
2519 mbuf_t m,
2520 void* hdr,
2521 size_t hlen)
2522 {
2523 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
2524 }
2525
2526 /* Callback registered with Ethernet driver. */
2527 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2528 {
2529 bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2530
2531 return 0;
2532 }
2533
2534
2535 static void
2536 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
2537 {
2538 /* copy the optional header */
2539 if (pkt->bpfp_header_length != 0) {
2540 size_t count = min(len, pkt->bpfp_header_length);
2541 bcopy(pkt->bpfp_header, dst, count);
2542 len -= count;
2543 dst += count;
2544 }
2545 if (len == 0) {
2546 /* nothing past the header */
2547 return;
2548 }
2549 /* copy the packet */
2550 switch (pkt->bpfp_type) {
2551 case BPF_PACKET_TYPE_MBUF:
2552 bpf_mcopy(pkt->bpfp_mbuf, dst, len);
2553 break;
2554 default:
2555 break;
2556 }
2557 }
2558
2559 /*
2560 * Move the packet data from interface memory (pkt) into the
2561 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
2562 * otherwise 0.
2563 */
2564 static void
2565 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
2566 u_int snaplen, int outbound)
2567 {
2568 struct bpf_hdr *hp;
2569 struct bpf_hdr_ext *ehp;
2570 int totlen, curlen;
2571 int hdrlen, caplen;
2572 int do_wakeup = 0;
2573 u_char *payload;
2574 struct timeval tv;
2575
2576 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
2577 d->bd_bif->bif_hdrlen;
2578 /*
2579 * Figure out how many bytes to move. If the packet is
2580 * greater or equal to the snapshot length, transfer that
2581 * much. Otherwise, transfer the whole packet (unless
2582 * we hit the buffer size limit).
2583 */
2584 totlen = hdrlen + min(snaplen, pkt->bpfp_total_length);
2585 if (totlen > d->bd_bufsize)
2586 totlen = d->bd_bufsize;
2587
2588 /*
2589 * Round up the end of the previous packet to the next longword.
2590 */
2591 curlen = BPF_WORDALIGN(d->bd_slen);
2592 if (curlen + totlen > d->bd_bufsize) {
2593 /*
2594 * This packet will overflow the storage buffer.
2595 * Rotate the buffers if we can, then wakeup any
2596 * pending reads.
2597 *
2598 * We cannot rotate buffers if a read is in progress
2599 * so drop the packet
2600 */
2601 if (d->bd_hbuf_read) {
2602 ++d->bd_dcount;
2603 return;
2604 }
2605
2606 if (d->bd_fbuf == NULL) {
2607 if (d->bd_headdrop == 0) {
2608 /*
2609 * We haven't completed the previous read yet,
2610 * so drop the packet.
2611 */
2612 ++d->bd_dcount;
2613 return;
2614 }
2615 /*
2616 * Drop the hold buffer as it contains older packets
2617 */
2618 d->bd_dcount += d->bd_hcnt;
2619 d->bd_fbuf = d->bd_hbuf;
2620 ROTATE_BUFFERS(d);
2621 } else {
2622 ROTATE_BUFFERS(d);
2623 }
2624 do_wakeup = 1;
2625 curlen = 0;
2626 }
2627 else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
2628 /*
2629 * Immediate mode is set, or the read timeout has
2630 * already expired during a select call. A packet
2631 * arrived, so the reader should be woken up.
2632 */
2633 do_wakeup = 1;
2634
2635 /*
2636 * Append the bpf header.
2637 */
2638 microtime(&tv);
2639 if (d->bd_flags & BPF_EXTENDED_HDR) {
2640 struct mbuf *m;
2641
2642 m = (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF)
2643 ? pkt->bpfp_mbuf : NULL;
2644 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
2645 memset(ehp, 0, sizeof(*ehp));
2646 ehp->bh_tstamp.tv_sec = tv.tv_sec;
2647 ehp->bh_tstamp.tv_usec = tv.tv_usec;
2648
2649 ehp->bh_datalen = pkt->bpfp_total_length;
2650 ehp->bh_hdrlen = hdrlen;
2651 caplen = ehp->bh_caplen = totlen - hdrlen;
2652 if (m == NULL) {
2653 if (outbound) {
2654 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2655 } else {
2656 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2657 }
2658 } else if (outbound) {
2659 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2660
2661 /* only do lookups on non-raw INPCB */
2662 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID|
2663 PKTF_FLOW_LOCALSRC|PKTF_FLOW_RAWSOCK)) ==
2664 (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) &&
2665 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
2666 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
2667 ehp->bh_proto = m->m_pkthdr.pkt_proto;
2668 }
2669 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
2670 if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT)
2671 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
2672 if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ)
2673 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
2674 if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT)
2675 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
2676 if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
2677 ehp->bh_unsent_bytes =
2678 m->m_pkthdr.bufstatus_if;
2679 ehp->bh_unsent_snd =
2680 m->m_pkthdr.bufstatus_sndbuf;
2681 }
2682 } else
2683 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2684 payload = (u_char *)ehp + hdrlen;
2685 } else {
2686 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
2687 hp->bh_tstamp.tv_sec = tv.tv_sec;
2688 hp->bh_tstamp.tv_usec = tv.tv_usec;
2689 hp->bh_datalen = pkt->bpfp_total_length;
2690 hp->bh_hdrlen = hdrlen;
2691 caplen = hp->bh_caplen = totlen - hdrlen;
2692 payload = (u_char *)hp + hdrlen;
2693 }
2694 /*
2695 * Copy the packet data into the store buffer and update its length.
2696 */
2697 copy_bpf_packet(pkt, payload, caplen);
2698 d->bd_slen = curlen + totlen;
2699 d->bd_scnt += 1;
2700
2701 if (do_wakeup)
2702 bpf_wakeup(d);
2703 }
2704
2705 /*
2706 * Initialize all nonzero fields of a descriptor.
2707 */
2708 static int
2709 bpf_allocbufs(struct bpf_d *d)
2710 {
2711 if (d->bd_sbuf != NULL) {
2712 FREE(d->bd_sbuf, M_DEVBUF);
2713 d->bd_sbuf = NULL;
2714 }
2715 if (d->bd_hbuf != NULL) {
2716 FREE(d->bd_hbuf, M_DEVBUF);
2717 d->bd_hbuf = NULL;
2718 }
2719 if (d->bd_fbuf != NULL) {
2720 FREE(d->bd_fbuf, M_DEVBUF);
2721 d->bd_fbuf = NULL;
2722 }
2723
2724 d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2725 if (d->bd_fbuf == NULL)
2726 return (ENOBUFS);
2727
2728 d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2729 if (d->bd_sbuf == NULL) {
2730 FREE(d->bd_fbuf, M_DEVBUF);
2731 d->bd_fbuf = NULL;
2732 return (ENOBUFS);
2733 }
2734 d->bd_slen = 0;
2735 d->bd_hlen = 0;
2736 d->bd_scnt = 0;
2737 d->bd_hcnt = 0;
2738 return (0);
2739 }
2740
2741 /*
2742 * Free buffers currently in use by a descriptor.
2743 * Called on close.
2744 */
2745 static void
2746 bpf_freed(struct bpf_d *d)
2747 {
2748 /*
2749 * We don't need to lock out interrupts since this descriptor has
2750 * been detached from its interface and it yet hasn't been marked
2751 * free.
2752 */
2753 if (d->bd_hbuf_read)
2754 panic("bpf buffer freed during read");
2755
2756 if (d->bd_sbuf != 0) {
2757 FREE(d->bd_sbuf, M_DEVBUF);
2758 if (d->bd_hbuf != 0)
2759 FREE(d->bd_hbuf, M_DEVBUF);
2760 if (d->bd_fbuf != 0)
2761 FREE(d->bd_fbuf, M_DEVBUF);
2762 }
2763 if (d->bd_filter)
2764 FREE((caddr_t)d->bd_filter, M_DEVBUF);
2765 }
2766
2767 /*
2768 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
2769 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
2770 * size of the link header (variable length headers not yet supported).
2771 */
2772 void
2773 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2774 {
2775 bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
2776 }
2777
2778 errno_t
2779 bpf_attach(
2780 ifnet_t ifp,
2781 u_int32_t dlt,
2782 u_int32_t hdrlen,
2783 bpf_send_func send,
2784 bpf_tap_func tap)
2785 {
2786 struct bpf_if *bp;
2787 struct bpf_if *bp_new;
2788 struct bpf_if *bp_before_first = NULL;
2789 struct bpf_if *bp_first = NULL;
2790 struct bpf_if *bp_last = NULL;
2791 boolean_t found;
2792
2793 bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF,
2794 M_WAIT | M_ZERO);
2795 if (bp_new == 0)
2796 panic("bpfattach");
2797
2798 lck_mtx_lock(bpf_mlock);
2799
2800 /*
2801 * Check if this interface/dlt is already attached. Remember the
2802 * first and last attachment for this interface, as well as the
2803 * element before the first attachment.
2804 */
2805 found = FALSE;
2806 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
2807 if (bp->bif_ifp != ifp) {
2808 if (bp_first != NULL) {
2809 /* no more elements for this interface */
2810 break;
2811 }
2812 bp_before_first = bp;
2813 } else {
2814 if (bp->bif_dlt == dlt) {
2815 found = TRUE;
2816 break;
2817 }
2818 if (bp_first == NULL) {
2819 bp_first = bp;
2820 }
2821 bp_last = bp;
2822 }
2823 }
2824 if (found) {
2825 lck_mtx_unlock(bpf_mlock);
2826 printf("bpfattach - %s with dlt %d is already attached\n",
2827 if_name(ifp), dlt);
2828 FREE(bp_new, M_DEVBUF);
2829 return EEXIST;
2830 }
2831
2832 bp_new->bif_ifp = ifp;
2833 bp_new->bif_dlt = dlt;
2834 bp_new->bif_send = send;
2835 bp_new->bif_tap = tap;
2836
2837 if (bp_first == NULL) {
2838 /* No other entries for this ifp */
2839 bp_new->bif_next = bpf_iflist;
2840 bpf_iflist = bp_new;
2841 }
2842 else {
2843 if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
2844 /* Make this the first entry for this interface */
2845 if (bp_before_first != NULL) {
2846 /* point the previous to us */
2847 bp_before_first->bif_next = bp_new;
2848 } else {
2849 /* we're the new head */
2850 bpf_iflist = bp_new;
2851 }
2852 bp_new->bif_next = bp_first;
2853 } else {
2854 /* Add this after the last entry for this interface */
2855 bp_new->bif_next = bp_last->bif_next;
2856 bp_last->bif_next = bp_new;
2857 }
2858 }
2859
2860 /*
2861 * Compute the length of the bpf header. This is not necessarily
2862 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
2863 * that the network layer header begins on a longword boundary (for
2864 * performance reasons and to alleviate alignment restrictions).
2865 */
2866 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
2867 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
2868 sizeof(struct bpf_hdr_ext)) - hdrlen;
2869
2870 /* Take a reference on the interface */
2871 ifnet_reference(ifp);
2872
2873 lck_mtx_unlock(bpf_mlock);
2874
2875 #ifndef __APPLE__
2876 if (bootverbose)
2877 printf("bpf: %s attached\n", if_name(ifp));
2878 #endif
2879
2880 return 0;
2881 }
2882
2883 /*
2884 * Detach bpf from an interface. This involves detaching each descriptor
2885 * associated with the interface, and leaving bd_bif NULL. Notify each
2886 * descriptor as it's detached so that any sleepers wake up and get
2887 * ENXIO.
2888 */
2889 void
2890 bpfdetach(struct ifnet *ifp)
2891 {
2892 struct bpf_if *bp, *bp_prev, *bp_next;
2893 struct bpf_d *d;
2894
2895 if (bpf_debug != 0)
2896 printf("%s: %s\n", __func__, if_name(ifp));
2897
2898 lck_mtx_lock(bpf_mlock);
2899
2900 /*
2901 * Build the list of devices attached to that interface
2902 * that we need to free while keeping the lock to maintain
2903 * the integrity of the interface list
2904 */
2905 bp_prev = NULL;
2906 for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
2907 bp_next = bp->bif_next;
2908
2909 if (ifp != bp->bif_ifp) {
2910 bp_prev = bp;
2911 continue;
2912 }
2913 /* Unlink from the interface list */
2914 if (bp_prev)
2915 bp_prev->bif_next = bp->bif_next;
2916 else
2917 bpf_iflist = bp->bif_next;
2918
2919 /* Detach the devices attached to the interface */
2920 while ((d = bp->bif_dlist) != NULL) {
2921 /*
2922 * Take an extra reference to prevent the device
2923 * from being freed when bpf_detachd() releases
2924 * the reference for the interface list
2925 */
2926 bpf_acquire_d(d);
2927 bpf_detachd(d, 0);
2928 bpf_wakeup(d);
2929 bpf_release_d(d);
2930 }
2931 ifnet_release(ifp);
2932 }
2933
2934 lck_mtx_unlock(bpf_mlock);
2935 }
2936
2937 void
2938 bpf_init(__unused void *unused)
2939 {
2940 #ifdef __APPLE__
2941 int i;
2942 int maj;
2943
2944 if (bpf_devsw_installed == 0) {
2945 bpf_devsw_installed = 1;
2946 bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
2947 bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
2948 bpf_mlock_attr = lck_attr_alloc_init();
2949 lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
2950 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
2951 if (maj == -1) {
2952 if (bpf_mlock_attr)
2953 lck_attr_free(bpf_mlock_attr);
2954 if (bpf_mlock_grp)
2955 lck_grp_free(bpf_mlock_grp);
2956 if (bpf_mlock_grp_attr)
2957 lck_grp_attr_free(bpf_mlock_grp_attr);
2958
2959 bpf_mlock = NULL;
2960 bpf_mlock_attr = NULL;
2961 bpf_mlock_grp = NULL;
2962 bpf_mlock_grp_attr = NULL;
2963 bpf_devsw_installed = 0;
2964 printf("bpf_init: failed to allocate a major number!\n");
2965 return;
2966 }
2967
2968 for (i = 0 ; i < NBPFILTER; i++)
2969 bpf_make_dev_t(maj);
2970 }
2971 #else
2972 cdevsw_add(&bpf_cdevsw);
2973 #endif
2974 }
2975
2976 #ifndef __APPLE__
2977 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
2978 #endif
2979
2980 #if CONFIG_MACF_NET
2981 struct label *
2982 mac_bpfdesc_label_get(struct bpf_d *d)
2983 {
2984
2985 return (d->bd_label);
2986 }
2987
2988 void
2989 mac_bpfdesc_label_set(struct bpf_d *d, struct label *label)
2990 {
2991
2992 d->bd_label = label;
2993 }
2994 #endif