]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/bpf.c
xnu-4570.61.1.tar.gz
[apple/xnu.git] / bsd / net / bpf.c
1 /*
2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1990, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
66 *
67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include "bpf.h"
77
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99
100 #include <sys/poll.h>
101
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109
110 #include <netinet/in.h>
111 #include <netinet/in_pcb.h>
112 #include <netinet/in_var.h>
113 #include <netinet/ip_var.h>
114 #include <netinet/tcp.h>
115 #include <netinet/tcp_var.h>
116 #include <netinet/udp.h>
117 #include <netinet/udp_var.h>
118 #include <netinet/if_ether.h>
119 #include <sys/kernel.h>
120 #include <sys/sysctl.h>
121 #include <net/firewire.h>
122
123 #include <miscfs/devfs/devfs.h>
124 #include <net/dlil.h>
125 #include <net/pktap.h>
126
127 #include <kern/locks.h>
128 #include <kern/thread_call.h>
129 #include <libkern/section_keywords.h>
130
131 #if CONFIG_MACF_NET
132 #include <security/mac_framework.h>
133 #endif /* MAC_NET */
134
135 extern int tvtohz(struct timeval *);
136
137 #define BPF_BUFSIZE 4096
138 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
139
140
141 #define PRINET 26 /* interruptible */
142
143 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
144
145 /*
146 * The default read buffer size is patchable.
147 */
148 static unsigned int bpf_bufsize = BPF_BUFSIZE;
149 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
150 &bpf_bufsize, 0, "");
151 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
152 SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
153 &bpf_maxbufsize, 0, "");
154 static unsigned int bpf_maxdevices = 256;
155 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
156 &bpf_maxdevices, 0, "");
157 /*
158 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
159 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
160 * explicitly to be able to use DLT_PKTAP.
161 */
162 #if CONFIG_EMBEDDED
163 static unsigned int bpf_wantpktap = 1;
164 #else
165 static unsigned int bpf_wantpktap = 0;
166 #endif
167 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
168 &bpf_wantpktap, 0, "");
169
170 static int bpf_debug = 0;
171 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
172 &bpf_debug, 0, "");
173
174 /*
175 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
176 * bpf_dtab holds pointer to the descriptors, indexed by minor device #
177 */
178 static struct bpf_if *bpf_iflist;
179 #ifdef __APPLE__
180 /*
181 * BSD now stores the bpf_d in the dev_t which is a struct
182 * on their system. Our dev_t is an int, so we still store
183 * the bpf_d in a separate table indexed by minor device #.
184 *
185 * The value stored in bpf_dtab[n] represent three states:
186 * 0: device not opened
187 * 1: device opening or closing
188 * other: device <n> opened with pointer to storage
189 */
190 static struct bpf_d **bpf_dtab = NULL;
191 static unsigned int bpf_dtab_size = 0;
192 static unsigned int nbpfilter = 0;
193
194 decl_lck_mtx_data(static, bpf_mlock_data);
195 static lck_mtx_t *bpf_mlock = &bpf_mlock_data;
196 static lck_grp_t *bpf_mlock_grp;
197 static lck_grp_attr_t *bpf_mlock_grp_attr;
198 static lck_attr_t *bpf_mlock_attr;
199
200 #endif /* __APPLE__ */
201
202 static int bpf_allocbufs(struct bpf_d *);
203 static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
204 static int bpf_detachd(struct bpf_d *d, int);
205 static void bpf_freed(struct bpf_d *);
206 static int bpf_movein(struct uio *, int,
207 struct mbuf **, struct sockaddr *, int *);
208 static int bpf_setif(struct bpf_d *, ifnet_t ifp);
209 static void bpf_timed_out(void *, void *);
210 static void bpf_wakeup(struct bpf_d *);
211 static void catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
212 static void reset_d(struct bpf_d *);
213 static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
214 static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
215 static int bpf_setdlt(struct bpf_d *, u_int);
216 static int bpf_set_traffic_class(struct bpf_d *, int);
217 static void bpf_set_packet_service_class(struct mbuf *, int);
218
219 static void bpf_acquire_d(struct bpf_d *);
220 static void bpf_release_d(struct bpf_d *);
221
222 static int bpf_devsw_installed;
223
224 void bpf_init(void *unused);
225 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
226
227 /*
228 * Darwin differs from BSD here, the following are static
229 * on BSD and not static on Darwin.
230 */
231 d_open_t bpfopen;
232 d_close_t bpfclose;
233 d_read_t bpfread;
234 d_write_t bpfwrite;
235 ioctl_fcn_t bpfioctl;
236 select_fcn_t bpfselect;
237
238
239 /* Darwin's cdevsw struct differs slightly from BSDs */
240 #define CDEV_MAJOR 23
241 static struct cdevsw bpf_cdevsw = {
242 /* open */ bpfopen,
243 /* close */ bpfclose,
244 /* read */ bpfread,
245 /* write */ bpfwrite,
246 /* ioctl */ bpfioctl,
247 /* stop */ eno_stop,
248 /* reset */ eno_reset,
249 /* tty */ NULL,
250 /* select */ bpfselect,
251 /* mmap */ eno_mmap,
252 /* strategy*/ eno_strat,
253 /* getc */ eno_getc,
254 /* putc */ eno_putc,
255 /* type */ 0
256 };
257
258 #define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
259
260 static int
261 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *sockp, int *datlen)
262 {
263 struct mbuf *m;
264 int error;
265 int len;
266 uint8_t sa_family;
267 int hlen;
268
269 switch (linktype) {
270
271 #if SLIP
272 case DLT_SLIP:
273 sa_family = AF_INET;
274 hlen = 0;
275 break;
276 #endif /* SLIP */
277
278 case DLT_EN10MB:
279 sa_family = AF_UNSPEC;
280 /* XXX Would MAXLINKHDR be better? */
281 hlen = sizeof(struct ether_header);
282 break;
283
284 #if FDDI
285 case DLT_FDDI:
286 #if defined(__FreeBSD__) || defined(__bsdi__)
287 sa_family = AF_IMPLINK;
288 hlen = 0;
289 #else
290 sa_family = AF_UNSPEC;
291 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
292 hlen = 24;
293 #endif
294 break;
295 #endif /* FDDI */
296
297 case DLT_RAW:
298 case DLT_NULL:
299 sa_family = AF_UNSPEC;
300 hlen = 0;
301 break;
302
303 #ifdef __FreeBSD__
304 case DLT_ATM_RFC1483:
305 /*
306 * en atm driver requires 4-byte atm pseudo header.
307 * though it isn't standard, vpi:vci needs to be
308 * specified anyway.
309 */
310 sa_family = AF_UNSPEC;
311 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
312 break;
313 #endif
314
315 case DLT_PPP:
316 sa_family = AF_UNSPEC;
317 hlen = 4; /* This should match PPP_HDRLEN */
318 break;
319
320 case DLT_APPLE_IP_OVER_IEEE1394:
321 sa_family = AF_UNSPEC;
322 hlen = sizeof(struct firewire_header);
323 break;
324
325 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
326 sa_family = AF_IEEE80211;
327 hlen = 0;
328 break;
329
330 case DLT_IEEE802_11_RADIO:
331 sa_family = AF_IEEE80211;
332 hlen = 0;
333 break;
334
335 default:
336 return (EIO);
337 }
338
339 // LP64todo - fix this!
340 len = uio_resid(uio);
341 *datlen = len - hlen;
342 if ((unsigned)len > MCLBYTES)
343 return (EIO);
344
345 if (sockp) {
346 /*
347 * Build a sockaddr based on the data link layer type.
348 * We do this at this level because the ethernet header
349 * is copied directly into the data field of the sockaddr.
350 * In the case of SLIP, there is no header and the packet
351 * is forwarded as is.
352 * Also, we are careful to leave room at the front of the mbuf
353 * for the link level header.
354 */
355 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
356 return (EIO);
357 }
358 sockp->sa_family = sa_family;
359 } else {
360 /*
361 * We're directly sending the packet data supplied by
362 * the user; we don't need to make room for the link
363 * header, and don't need the header length value any
364 * more, so set it to 0.
365 */
366 hlen = 0;
367 }
368
369 MGETHDR(m, M_WAIT, MT_DATA);
370 if (m == 0)
371 return (ENOBUFS);
372 if ((unsigned)len > MHLEN) {
373 MCLGET(m, M_WAIT);
374 if ((m->m_flags & M_EXT) == 0) {
375 error = ENOBUFS;
376 goto bad;
377 }
378 }
379 m->m_pkthdr.len = m->m_len = len;
380 m->m_pkthdr.rcvif = NULL;
381 *mp = m;
382
383 /*
384 * Make room for link header.
385 */
386 if (hlen != 0) {
387 m->m_pkthdr.len -= hlen;
388 m->m_len -= hlen;
389 m->m_data += hlen; /* XXX */
390 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
391 if (error)
392 goto bad;
393 }
394 error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
395 if (error)
396 goto bad;
397
398 /* Check for multicast destination */
399 switch (linktype) {
400 case DLT_EN10MB: {
401 struct ether_header *eh = mtod(m, struct ether_header *);
402
403 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
404 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0)
405 m->m_flags |= M_BCAST;
406 else
407 m->m_flags |= M_MCAST;
408 }
409 break;
410 }
411 }
412
413 return 0;
414 bad:
415 m_freem(m);
416 return (error);
417 }
418
419 #ifdef __APPLE__
420
421 /*
422 * The dynamic addition of a new device node must block all processes that
423 * are opening the last device so that no process will get an unexpected
424 * ENOENT
425 */
426 static void
427 bpf_make_dev_t(int maj)
428 {
429 static int bpf_growing = 0;
430 unsigned int cur_size = nbpfilter, i;
431
432 if (nbpfilter >= bpf_maxdevices)
433 return;
434
435 while (bpf_growing) {
436 /* Wait until new device has been created */
437 (void)tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
438 }
439 if (nbpfilter > cur_size) {
440 /* other thread grew it already */
441 return;
442 }
443 bpf_growing = 1;
444
445 /* need to grow bpf_dtab first */
446 if (nbpfilter == bpf_dtab_size) {
447 int new_dtab_size;
448 struct bpf_d **new_dtab = NULL;
449 struct bpf_d **old_dtab = NULL;
450
451 new_dtab_size = bpf_dtab_size + NBPFILTER;
452 new_dtab = (struct bpf_d **)_MALLOC(sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
453 if (new_dtab == 0) {
454 printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
455 goto done;
456 }
457 if (bpf_dtab) {
458 bcopy(bpf_dtab, new_dtab,
459 sizeof(struct bpf_d *) * bpf_dtab_size);
460 }
461 bzero(new_dtab + bpf_dtab_size,
462 sizeof(struct bpf_d *) * NBPFILTER);
463 old_dtab = bpf_dtab;
464 bpf_dtab = new_dtab;
465 bpf_dtab_size = new_dtab_size;
466 if (old_dtab != NULL)
467 _FREE(old_dtab, M_DEVBUF);
468 }
469 i = nbpfilter++;
470 (void) devfs_make_node(makedev(maj, i),
471 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
472 "bpf%d", i);
473 done:
474 bpf_growing = 0;
475 wakeup((caddr_t)&bpf_growing);
476 }
477
478 #endif
479
480 /*
481 * Attach file to the bpf interface, i.e. make d listen on bp.
482 */
483 static errno_t
484 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
485 {
486 int first = bp->bif_dlist == NULL;
487 int error = 0;
488
489 /*
490 * Point d at bp, and add d to the interface's list of listeners.
491 * Finally, point the driver's bpf cookie at the interface so
492 * it will divert packets to bpf.
493 */
494 d->bd_bif = bp;
495 d->bd_next = bp->bif_dlist;
496 bp->bif_dlist = d;
497
498 /*
499 * Take a reference on the device even if an error is returned
500 * because we keep the device in the interface's list of listeners
501 */
502 bpf_acquire_d(d);
503
504 if (first) {
505 /* Find the default bpf entry for this ifp */
506 if (bp->bif_ifp->if_bpf == NULL) {
507 struct bpf_if *tmp, *primary = NULL;
508
509 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
510 if (tmp->bif_ifp == bp->bif_ifp) {
511 primary = tmp;
512 break;
513 }
514 }
515 bp->bif_ifp->if_bpf = primary;
516 }
517 /* Only call dlil_set_bpf_tap for primary dlt */
518 if (bp->bif_ifp->if_bpf == bp)
519 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT, bpf_tap_callback);
520
521 if (bp->bif_tap != NULL)
522 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT);
523 }
524
525 /*
526 * Reset the detach flags in case we previously detached an interface
527 */
528 d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
529
530 if (bp->bif_dlt == DLT_PKTAP) {
531 d->bd_flags |= BPF_FINALIZE_PKTAP;
532 } else {
533 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
534 }
535 return error;
536 }
537
538 /*
539 * Detach a file from its interface.
540 *
541 * Return 1 if was closed by some thread, 0 otherwise
542 */
543 static int
544 bpf_detachd(struct bpf_d *d, int closing)
545 {
546 struct bpf_d **p;
547 struct bpf_if *bp;
548 struct ifnet *ifp;
549
550 int bpf_closed = d->bd_flags & BPF_CLOSING;
551 /*
552 * Some other thread already detached
553 */
554 if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0)
555 goto done;
556 /*
557 * This thread is doing the detach
558 */
559 d->bd_flags |= BPF_DETACHING;
560
561 ifp = d->bd_bif->bif_ifp;
562 bp = d->bd_bif;
563
564 if (bpf_debug != 0)
565 printf("%s: %llx %s%s\n",
566 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
567 if_name(ifp), closing ? " closing" : "");
568
569 /* Remove d from the interface's descriptor list. */
570 p = &bp->bif_dlist;
571 while (*p != d) {
572 p = &(*p)->bd_next;
573 if (*p == 0)
574 panic("bpf_detachd: descriptor not in list");
575 }
576 *p = (*p)->bd_next;
577 if (bp->bif_dlist == 0) {
578 /*
579 * Let the driver know that there are no more listeners.
580 */
581 /* Only call dlil_set_bpf_tap for primary dlt */
582 if (bp->bif_ifp->if_bpf == bp)
583 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
584 if (bp->bif_tap)
585 bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
586
587 for (bp = bpf_iflist; bp; bp = bp->bif_next)
588 if (bp->bif_ifp == ifp && bp->bif_dlist != 0)
589 break;
590 if (bp == NULL)
591 ifp->if_bpf = NULL;
592 }
593 d->bd_bif = NULL;
594 /*
595 * Check if this descriptor had requested promiscuous mode.
596 * If so, turn it off.
597 */
598 if (d->bd_promisc) {
599 d->bd_promisc = 0;
600 lck_mtx_unlock(bpf_mlock);
601 if (ifnet_set_promiscuous(ifp, 0)) {
602 /*
603 * Something is really wrong if we were able to put
604 * the driver into promiscuous mode, but can't
605 * take it out.
606 * Most likely the network interface is gone.
607 */
608 printf("%s: ifnet_set_promiscuous failed\n", __func__);
609 }
610 lck_mtx_lock(bpf_mlock);
611 }
612
613 /*
614 * Wake up other thread that are waiting for this thread to finish
615 * detaching
616 */
617 d->bd_flags &= ~BPF_DETACHING;
618 d->bd_flags |= BPF_DETACHED;
619
620 /* Refresh the local variable as d could have been modified */
621 bpf_closed = d->bd_flags & BPF_CLOSING;
622 /*
623 * Note that We've kept the reference because we may have dropped
624 * the lock when turning off promiscuous mode
625 */
626 bpf_release_d(d);
627
628 done:
629 /*
630 * When closing makes sure no other thread refer to the bpf_d
631 */
632 if (bpf_debug != 0)
633 printf("%s: %llx done\n",
634 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
635 /*
636 * Let the caller know the bpf_d is closed
637 */
638 if (bpf_closed)
639 return (1);
640 else
641 return (0);
642 }
643
644
645 /*
646 * Start asynchronous timer, if necessary.
647 * Must be called with bpf_mlock held.
648 */
649 static void
650 bpf_start_timer(struct bpf_d *d)
651 {
652 uint64_t deadline;
653 struct timeval tv;
654
655 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
656 tv.tv_sec = d->bd_rtout / hz;
657 tv.tv_usec = (d->bd_rtout % hz) * tick;
658
659 clock_interval_to_deadline(
660 (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
661 NSEC_PER_USEC, &deadline);
662 /*
663 * The state is BPF_IDLE, so the timer hasn't
664 * been started yet, and hasn't gone off yet;
665 * there is no thread call scheduled, so this
666 * won't change the schedule.
667 *
668 * XXX - what if, by the time it gets entered,
669 * the deadline has already passed?
670 */
671 thread_call_enter_delayed(d->bd_thread_call, deadline);
672 d->bd_state = BPF_WAITING;
673 }
674 }
675
676 /*
677 * Cancel asynchronous timer.
678 * Must be called with bpf_mlock held.
679 */
680 static boolean_t
681 bpf_stop_timer(struct bpf_d *d)
682 {
683 /*
684 * If the timer has already gone off, this does nothing.
685 * Our caller is expected to set d->bd_state to BPF_IDLE,
686 * with the bpf_mlock, after we are called. bpf_timed_out()
687 * also grabs bpf_mlock, so, if the timer has gone off and
688 * bpf_timed_out() hasn't finished, it's waiting for the
689 * lock; when this thread releases the lock, it will
690 * find the state is BPF_IDLE, and just release the
691 * lock and return.
692 */
693 return (thread_call_cancel(d->bd_thread_call));
694 }
695
696 void
697 bpf_acquire_d(struct bpf_d *d)
698 {
699 void *lr_saved = __builtin_return_address(0);
700
701 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
702
703 d->bd_refcnt += 1;
704
705 d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
706 d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
707 }
708
709 void
710 bpf_release_d(struct bpf_d *d)
711 {
712 void *lr_saved = __builtin_return_address(0);
713
714 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
715
716 if (d->bd_refcnt <= 0)
717 panic("%s: %p refcnt <= 0", __func__, d);
718
719 d->bd_refcnt -= 1;
720
721 d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
722 d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
723
724 if (d->bd_refcnt == 0) {
725 /* Assert the device is detached */
726 if ((d->bd_flags & BPF_DETACHED) == 0)
727 panic("%s: %p BPF_DETACHED not set", __func__, d);
728
729 _FREE(d, M_DEVBUF);
730 }
731 }
732
733 /*
734 * Open ethernet device. Returns ENXIO for illegal minor device number,
735 * EBUSY if file is open by another process.
736 */
737 /* ARGSUSED */
738 int
739 bpfopen(dev_t dev, int flags, __unused int fmt,
740 __unused struct proc *p)
741 {
742 struct bpf_d *d;
743
744 lck_mtx_lock(bpf_mlock);
745 if ((unsigned int) minor(dev) >= nbpfilter) {
746 lck_mtx_unlock(bpf_mlock);
747 return (ENXIO);
748 }
749 /*
750 * New device nodes are created on demand when opening the last one.
751 * The programming model is for processes to loop on the minor starting at 0
752 * as long as EBUSY is returned. The loop stops when either the open succeeds or
753 * an error other that EBUSY is returned. That means that bpf_make_dev_t() must
754 * block all processes that are opening the last node. If not all
755 * processes are blocked, they could unexpectedly get ENOENT and abort their
756 * opening loop.
757 */
758 if ((unsigned int) minor(dev) == (nbpfilter - 1))
759 bpf_make_dev_t(major(dev));
760
761 /*
762 * Each minor can be opened by only one process. If the requested
763 * minor is in use, return EBUSY.
764 *
765 * Important: bpfopen() and bpfclose() have to check and set the status of a device
766 * in the same lockin context otherwise the device may be leaked because the vnode use count
767 * will be unpextectly greater than 1 when close() is called.
768 */
769 if (bpf_dtab[minor(dev)] == 0) {
770 bpf_dtab[minor(dev)] = (void *)1; /* Mark opening */
771 } else {
772 lck_mtx_unlock(bpf_mlock);
773 return (EBUSY);
774 }
775 d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
776 M_WAIT | M_ZERO);
777 if (d == NULL) {
778 /* this really is a catastrophic failure */
779 printf("bpfopen: malloc bpf_d failed\n");
780 bpf_dtab[minor(dev)] = NULL;
781 lck_mtx_unlock(bpf_mlock);
782 return ENOMEM;
783 }
784
785 /* Mark "in use" and do most initialization. */
786 bpf_acquire_d(d);
787 d->bd_bufsize = bpf_bufsize;
788 d->bd_sig = SIGIO;
789 d->bd_seesent = 1;
790 d->bd_oflags = flags;
791 d->bd_state = BPF_IDLE;
792 d->bd_traffic_class = SO_TC_BE;
793 d->bd_flags |= BPF_DETACHED;
794 if (bpf_wantpktap)
795 d->bd_flags |= BPF_WANT_PKTAP;
796 else
797 d->bd_flags &= ~BPF_WANT_PKTAP;
798 d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
799 if (d->bd_thread_call == NULL) {
800 printf("bpfopen: malloc thread call failed\n");
801 bpf_dtab[minor(dev)] = NULL;
802 bpf_release_d(d);
803 lck_mtx_unlock(bpf_mlock);
804
805 return (ENOMEM);
806 }
807 #if CONFIG_MACF_NET
808 mac_bpfdesc_label_init(d);
809 mac_bpfdesc_label_associate(kauth_cred_get(), d);
810 #endif
811 bpf_dtab[minor(dev)] = d; /* Mark opened */
812 lck_mtx_unlock(bpf_mlock);
813
814 return (0);
815 }
816
817 /*
818 * Close the descriptor by detaching it from its interface,
819 * deallocating its buffers, and marking it free.
820 */
821 /* ARGSUSED */
822 int
823 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
824 __unused struct proc *p)
825 {
826 struct bpf_d *d;
827
828 /* Take BPF lock to ensure no other thread is using the device */
829 lck_mtx_lock(bpf_mlock);
830
831 d = bpf_dtab[minor(dev)];
832 if (d == 0 || d == (void *)1) {
833 lck_mtx_unlock(bpf_mlock);
834 return (ENXIO);
835 }
836
837 /*
838 * Other threads may call bpd_detachd() if we drop the bpf_mlock
839 */
840 d->bd_flags |= BPF_CLOSING;
841
842 if (bpf_debug != 0)
843 printf("%s: %llx\n",
844 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
845
846 bpf_dtab[minor(dev)] = (void *)1; /* Mark closing */
847
848 /*
849 * Deal with any in-progress timeouts.
850 */
851 switch (d->bd_state) {
852 case BPF_IDLE:
853 /*
854 * Not waiting for a timeout, and no timeout happened.
855 */
856 break;
857
858 case BPF_WAITING:
859 /*
860 * Waiting for a timeout.
861 * Cancel any timer that has yet to go off,
862 * and mark the state as "closing".
863 * Then drop the lock to allow any timers that
864 * *have* gone off to run to completion, and wait
865 * for them to finish.
866 */
867 if (!bpf_stop_timer(d)) {
868 /*
869 * There was no pending call, so the call must
870 * have been in progress. Wait for the call to
871 * complete; we have to drop the lock while
872 * waiting. to let the in-progrss call complete
873 */
874 d->bd_state = BPF_DRAINING;
875 while (d->bd_state == BPF_DRAINING)
876 msleep((caddr_t)d, bpf_mlock, PRINET,
877 "bpfdraining", NULL);
878 }
879 d->bd_state = BPF_IDLE;
880 break;
881
882 case BPF_TIMED_OUT:
883 /*
884 * Timer went off, and the timeout routine finished.
885 */
886 d->bd_state = BPF_IDLE;
887 break;
888
889 case BPF_DRAINING:
890 /*
891 * Another thread is blocked on a close waiting for
892 * a timeout to finish.
893 * This "shouldn't happen", as the first thread to enter
894 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
895 * all subsequent threads should see that and fail with
896 * ENXIO.
897 */
898 panic("Two threads blocked in a BPF close");
899 break;
900 }
901
902 if (d->bd_bif)
903 bpf_detachd(d, 1);
904 selthreadclear(&d->bd_sel);
905 #if CONFIG_MACF_NET
906 mac_bpfdesc_label_destroy(d);
907 #endif
908 thread_call_free(d->bd_thread_call);
909
910 while (d->bd_hbuf_read)
911 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
912
913 bpf_freed(d);
914
915 /* Mark free in same context as bpfopen comes to check */
916 bpf_dtab[minor(dev)] = NULL; /* Mark closed */
917
918 bpf_release_d(d);
919
920 lck_mtx_unlock(bpf_mlock);
921
922 return (0);
923 }
924
925
926 #define BPF_SLEEP bpf_sleep
927
928 static int
929 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
930 {
931 u_int64_t abstime = 0;
932
933 if(timo)
934 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
935
936 return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
937 }
938
939 /*
940 * Rotate the packet buffers in descriptor d. Move the store buffer
941 * into the hold slot, and the free buffer into the store slot.
942 * Zero the length of the new store buffer.
943 */
944 #define ROTATE_BUFFERS(d) \
945 if (d->bd_hbuf_read) \
946 panic("rotating bpf buffers during read"); \
947 (d)->bd_hbuf = (d)->bd_sbuf; \
948 (d)->bd_hlen = (d)->bd_slen; \
949 (d)->bd_hcnt = (d)->bd_scnt; \
950 (d)->bd_sbuf = (d)->bd_fbuf; \
951 (d)->bd_slen = 0; \
952 (d)->bd_scnt = 0; \
953 (d)->bd_fbuf = NULL;
954 /*
955 * bpfread - read next chunk of packets from buffers
956 */
957 int
958 bpfread(dev_t dev, struct uio *uio, int ioflag)
959 {
960 struct bpf_d *d;
961 caddr_t hbuf;
962 int timed_out, hbuf_len;
963 int error;
964 int flags;
965
966 lck_mtx_lock(bpf_mlock);
967
968 d = bpf_dtab[minor(dev)];
969 if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
970 lck_mtx_unlock(bpf_mlock);
971 return (ENXIO);
972 }
973
974 bpf_acquire_d(d);
975
976 /*
977 * Restrict application to use a buffer the same size as
978 * as kernel buffers.
979 */
980 if (uio_resid(uio) != d->bd_bufsize) {
981 bpf_release_d(d);
982 lck_mtx_unlock(bpf_mlock);
983 return (EINVAL);
984 }
985
986 if (d->bd_state == BPF_WAITING)
987 bpf_stop_timer(d);
988
989 timed_out = (d->bd_state == BPF_TIMED_OUT);
990 d->bd_state = BPF_IDLE;
991
992 while (d->bd_hbuf_read)
993 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
994
995 if ((d->bd_flags & BPF_CLOSING) != 0) {
996 bpf_release_d(d);
997 lck_mtx_unlock(bpf_mlock);
998 return (ENXIO);
999 }
1000 /*
1001 * If the hold buffer is empty, then do a timed sleep, which
1002 * ends when the timeout expires or when enough packets
1003 * have arrived to fill the store buffer.
1004 */
1005 while (d->bd_hbuf == 0) {
1006 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY))
1007 && d->bd_slen != 0) {
1008 /*
1009 * We're in immediate mode, or are reading
1010 * in non-blocking mode, or a timer was
1011 * started before the read (e.g., by select()
1012 * or poll()) and has expired and a packet(s)
1013 * either arrived since the previous
1014 * read or arrived while we were asleep.
1015 * Rotate the buffers and return what's here.
1016 */
1017 ROTATE_BUFFERS(d);
1018 break;
1019 }
1020
1021 /*
1022 * No data is available, check to see if the bpf device
1023 * is still pointed at a real interface. If not, return
1024 * ENXIO so that the userland process knows to rebind
1025 * it before using it again.
1026 */
1027 if (d->bd_bif == NULL) {
1028 bpf_release_d(d);
1029 lck_mtx_unlock(bpf_mlock);
1030 return (ENXIO);
1031 }
1032 if (ioflag & IO_NDELAY) {
1033 bpf_release_d(d);
1034 lck_mtx_unlock(bpf_mlock);
1035 return (EWOULDBLOCK);
1036 }
1037 error = BPF_SLEEP(d, PRINET|PCATCH, "bpf",
1038 d->bd_rtout);
1039 /*
1040 * Make sure device is still opened
1041 */
1042 if ((d->bd_flags & BPF_CLOSING) != 0) {
1043 bpf_release_d(d);
1044 lck_mtx_unlock(bpf_mlock);
1045 return (ENXIO);
1046 }
1047
1048 while (d->bd_hbuf_read)
1049 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1050
1051 if ((d->bd_flags & BPF_CLOSING) != 0) {
1052 bpf_release_d(d);
1053 lck_mtx_unlock(bpf_mlock);
1054 return (ENXIO);
1055 }
1056
1057 if (error == EINTR || error == ERESTART) {
1058 if (d->bd_hbuf != NULL) {
1059 /*
1060 * Because we msleep, the hold buffer might
1061 * be filled when we wake up. Avoid rotating
1062 * in this case.
1063 */
1064 break;
1065 }
1066 if (d->bd_slen != 0) {
1067 /*
1068 * Sometimes we may be interrupted often and
1069 * the sleep above will not timeout.
1070 * Regardless, we should rotate the buffers
1071 * if there's any new data pending and
1072 * return it.
1073 */
1074 ROTATE_BUFFERS(d);
1075 break;
1076 }
1077 bpf_release_d(d);
1078 lck_mtx_unlock(bpf_mlock);
1079 if (error == ERESTART) {
1080 printf("%s: %llx ERESTART to EINTR\n",
1081 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
1082 error = EINTR;
1083 }
1084 return (error);
1085 }
1086 if (error == EWOULDBLOCK) {
1087 /*
1088 * On a timeout, return what's in the buffer,
1089 * which may be nothing. If there is something
1090 * in the store buffer, we can rotate the buffers.
1091 */
1092 if (d->bd_hbuf)
1093 /*
1094 * We filled up the buffer in between
1095 * getting the timeout and arriving
1096 * here, so we don't need to rotate.
1097 */
1098 break;
1099
1100 if (d->bd_slen == 0) {
1101 bpf_release_d(d);
1102 lck_mtx_unlock(bpf_mlock);
1103 return (0);
1104 }
1105 ROTATE_BUFFERS(d);
1106 break;
1107 }
1108 }
1109 /*
1110 * At this point, we know we have something in the hold slot.
1111 */
1112
1113 /*
1114 * Set the hold buffer read. So we do not
1115 * rotate the buffers until the hold buffer
1116 * read is complete. Also to avoid issues resulting
1117 * from page faults during disk sleep (<rdar://problem/13436396>).
1118 */
1119 d->bd_hbuf_read = 1;
1120 hbuf = d->bd_hbuf;
1121 hbuf_len = d->bd_hlen;
1122 flags = d->bd_flags;
1123 lck_mtx_unlock(bpf_mlock);
1124
1125 #ifdef __APPLE__
1126 /*
1127 * Before we move data to userland, we fill out the extended
1128 * header fields.
1129 */
1130 if (flags & BPF_EXTENDED_HDR) {
1131 char *p;
1132
1133 p = hbuf;
1134 while (p < hbuf + hbuf_len) {
1135 struct bpf_hdr_ext *ehp;
1136 uint32_t flowid;
1137 struct so_procinfo soprocinfo;
1138 int found = 0;
1139
1140 ehp = (struct bpf_hdr_ext *)(void *)p;
1141 if ((flowid = ehp->bh_flowid)) {
1142 if (ehp->bh_proto == IPPROTO_TCP)
1143 found = inp_findinpcb_procinfo(&tcbinfo,
1144 flowid, &soprocinfo);
1145 else if (ehp->bh_proto == IPPROTO_UDP)
1146 found = inp_findinpcb_procinfo(&udbinfo,
1147 flowid, &soprocinfo);
1148 if (found == 1) {
1149 ehp->bh_pid = soprocinfo.spi_pid;
1150 proc_name(ehp->bh_pid, ehp->bh_comm, MAXCOMLEN);
1151 }
1152 ehp->bh_flowid = 0;
1153 }
1154
1155 if (flags & BPF_FINALIZE_PKTAP) {
1156 struct pktap_header *pktaphdr;
1157
1158 pktaphdr = (struct pktap_header *)(void *)
1159 (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1160
1161 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1162 pktap_finalize_proc_info(pktaphdr);
1163
1164 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1165 ehp->bh_tstamp.tv_sec =
1166 pktaphdr->pth_tstamp.tv_sec;
1167 ehp->bh_tstamp.tv_usec =
1168 pktaphdr->pth_tstamp.tv_usec;
1169 }
1170 }
1171 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1172 }
1173 } else if (flags & BPF_FINALIZE_PKTAP) {
1174 char *p;
1175
1176 p = hbuf;
1177 while (p < hbuf + hbuf_len) {
1178 struct bpf_hdr *hp;
1179 struct pktap_header *pktaphdr;
1180
1181 hp = (struct bpf_hdr *)(void *)p;
1182 pktaphdr = (struct pktap_header *)(void *)
1183 (p + BPF_WORDALIGN(hp->bh_hdrlen));
1184
1185 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1186 pktap_finalize_proc_info(pktaphdr);
1187
1188 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1189 hp->bh_tstamp.tv_sec =
1190 pktaphdr->pth_tstamp.tv_sec;
1191 hp->bh_tstamp.tv_usec =
1192 pktaphdr->pth_tstamp.tv_usec;
1193 }
1194
1195 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1196 }
1197 }
1198 #endif
1199
1200 /*
1201 * Move data from hold buffer into user space.
1202 * We know the entire buffer is transferred since
1203 * we checked above that the read buffer is bpf_bufsize bytes.
1204 */
1205 error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1206
1207 lck_mtx_lock(bpf_mlock);
1208 /*
1209 * Make sure device is still opened
1210 */
1211 if ((d->bd_flags & BPF_CLOSING) != 0) {
1212 bpf_release_d(d);
1213 lck_mtx_unlock(bpf_mlock);
1214 return (ENXIO);
1215 }
1216
1217 d->bd_hbuf_read = 0;
1218 d->bd_fbuf = d->bd_hbuf;
1219 d->bd_hbuf = NULL;
1220 d->bd_hlen = 0;
1221 d->bd_hcnt = 0;
1222 wakeup((caddr_t)d);
1223
1224 bpf_release_d(d);
1225 lck_mtx_unlock(bpf_mlock);
1226 return (error);
1227
1228 }
1229
1230
1231 /*
1232 * If there are processes sleeping on this descriptor, wake them up.
1233 */
1234 static void
1235 bpf_wakeup(struct bpf_d *d)
1236 {
1237 if (d->bd_state == BPF_WAITING) {
1238 bpf_stop_timer(d);
1239 d->bd_state = BPF_IDLE;
1240 }
1241 wakeup((caddr_t)d);
1242 if (d->bd_async && d->bd_sig && d->bd_sigio)
1243 pgsigio(d->bd_sigio, d->bd_sig);
1244
1245 selwakeup(&d->bd_sel);
1246 if ((d->bd_flags & BPF_KNOTE))
1247 KNOTE(&d->bd_sel.si_note, 1);
1248 }
1249
1250
1251 static void
1252 bpf_timed_out(void *arg, __unused void *dummy)
1253 {
1254 struct bpf_d *d = (struct bpf_d *)arg;
1255
1256 lck_mtx_lock(bpf_mlock);
1257 if (d->bd_state == BPF_WAITING) {
1258 /*
1259 * There's a select or kqueue waiting for this; if there's
1260 * now stuff to read, wake it up.
1261 */
1262 d->bd_state = BPF_TIMED_OUT;
1263 if (d->bd_slen != 0)
1264 bpf_wakeup(d);
1265 } else if (d->bd_state == BPF_DRAINING) {
1266 /*
1267 * A close is waiting for this to finish.
1268 * Mark it as finished, and wake the close up.
1269 */
1270 d->bd_state = BPF_IDLE;
1271 bpf_wakeup(d);
1272 }
1273 lck_mtx_unlock(bpf_mlock);
1274 }
1275
1276
1277
1278
1279
1280 /* keep in sync with bpf_movein above: */
1281 #define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
1282
1283 int
1284 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1285 {
1286 struct bpf_d *d;
1287 struct ifnet *ifp;
1288 struct mbuf *m = NULL;
1289 int error;
1290 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1291 int datlen = 0;
1292 int bif_dlt;
1293 int bd_hdrcmplt;
1294
1295 lck_mtx_lock(bpf_mlock);
1296
1297 d = bpf_dtab[minor(dev)];
1298 if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
1299 lck_mtx_unlock(bpf_mlock);
1300 return (ENXIO);
1301 }
1302
1303 bpf_acquire_d(d);
1304
1305 if (d->bd_bif == 0) {
1306 bpf_release_d(d);
1307 lck_mtx_unlock(bpf_mlock);
1308 return (ENXIO);
1309 }
1310
1311 ifp = d->bd_bif->bif_ifp;
1312
1313 if ((ifp->if_flags & IFF_UP) == 0) {
1314 bpf_release_d(d);
1315 lck_mtx_unlock(bpf_mlock);
1316 return (ENETDOWN);
1317 }
1318 if (uio_resid(uio) == 0) {
1319 bpf_release_d(d);
1320 lck_mtx_unlock(bpf_mlock);
1321 return (0);
1322 }
1323 ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1324
1325 /*
1326 * fix for PR-6849527
1327 * geting variables onto stack before dropping lock for bpf_movein()
1328 */
1329 bif_dlt = (int)d->bd_bif->bif_dlt;
1330 bd_hdrcmplt = d->bd_hdrcmplt;
1331
1332 /* bpf_movein allocating mbufs; drop lock */
1333 lck_mtx_unlock(bpf_mlock);
1334
1335 error = bpf_movein(uio, bif_dlt, &m,
1336 bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1337 &datlen);
1338
1339 /* take the lock again */
1340 lck_mtx_lock(bpf_mlock);
1341 if (error) {
1342 bpf_release_d(d);
1343 lck_mtx_unlock(bpf_mlock);
1344 return (error);
1345 }
1346
1347 /* verify the device is still open */
1348 if ((d->bd_flags & BPF_CLOSING) != 0) {
1349 bpf_release_d(d);
1350 lck_mtx_unlock(bpf_mlock);
1351 m_freem(m);
1352 return (ENXIO);
1353 }
1354
1355 if (d->bd_bif == NULL) {
1356 bpf_release_d(d);
1357 lck_mtx_unlock(bpf_mlock);
1358 m_free(m);
1359 return (ENXIO);
1360 }
1361
1362 if ((unsigned)datlen > ifp->if_mtu) {
1363 bpf_release_d(d);
1364 lck_mtx_unlock(bpf_mlock);
1365 m_freem(m);
1366 return (EMSGSIZE);
1367 }
1368
1369
1370 #if CONFIG_MACF_NET
1371 mac_mbuf_label_associate_bpfdesc(d, m);
1372 #endif
1373
1374 bpf_set_packet_service_class(m, d->bd_traffic_class);
1375
1376 lck_mtx_unlock(bpf_mlock);
1377
1378 /*
1379 * The driver frees the mbuf.
1380 */
1381 if (d->bd_hdrcmplt) {
1382 if (d->bd_bif->bif_send)
1383 error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1384 else
1385 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1386 } else {
1387 error = dlil_output(ifp, PF_INET, m, NULL,
1388 (struct sockaddr *)dst_buf, 0, NULL);
1389 }
1390
1391 lck_mtx_lock(bpf_mlock);
1392 bpf_release_d(d);
1393 lck_mtx_unlock(bpf_mlock);
1394
1395 return (error);
1396 }
1397
1398 /*
1399 * Reset a descriptor by flushing its packet buffer and clearing the
1400 * receive and drop counts.
1401 */
1402 static void
1403 reset_d(struct bpf_d *d)
1404 {
1405 if (d->bd_hbuf_read)
1406 panic("resetting buffers during read");
1407
1408 if (d->bd_hbuf) {
1409 /* Free the hold buffer. */
1410 d->bd_fbuf = d->bd_hbuf;
1411 d->bd_hbuf = NULL;
1412 }
1413 d->bd_slen = 0;
1414 d->bd_hlen = 0;
1415 d->bd_scnt = 0;
1416 d->bd_hcnt = 0;
1417 d->bd_rcount = 0;
1418 d->bd_dcount = 0;
1419 }
1420
1421 /*
1422 * FIONREAD Check for read packet available.
1423 * SIOCGIFADDR Get interface address - convenient hook to driver.
1424 * BIOCGBLEN Get buffer len [for read()].
1425 * BIOCSETF Set ethernet read filter.
1426 * BIOCFLUSH Flush read packet buffer.
1427 * BIOCPROMISC Put interface into promiscuous mode.
1428 * BIOCGDLT Get link layer type.
1429 * BIOCGETIF Get interface name.
1430 * BIOCSETIF Set interface.
1431 * BIOCSRTIMEOUT Set read timeout.
1432 * BIOCGRTIMEOUT Get read timeout.
1433 * BIOCGSTATS Get packet stats.
1434 * BIOCIMMEDIATE Set immediate mode.
1435 * BIOCVERSION Get filter language version.
1436 * BIOCGHDRCMPLT Get "header already complete" flag
1437 * BIOCSHDRCMPLT Set "header already complete" flag
1438 * BIOCGSEESENT Get "see packets sent" flag
1439 * BIOCSSEESENT Set "see packets sent" flag
1440 * BIOCSETTC Set traffic class.
1441 * BIOCGETTC Get traffic class.
1442 * BIOCSEXTHDR Set "extended header" flag
1443 * BIOCSHEADDROP Drop head of the buffer if user is not reading
1444 * BIOCGHEADDROP Get "head-drop" flag
1445 */
1446 /* ARGSUSED */
1447 int
1448 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1449 struct proc *p)
1450 {
1451 struct bpf_d *d;
1452 int error = 0;
1453 u_int int_arg;
1454 struct ifreq ifr;
1455
1456 lck_mtx_lock(bpf_mlock);
1457
1458 d = bpf_dtab[minor(dev)];
1459 if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
1460 lck_mtx_unlock(bpf_mlock);
1461 return (ENXIO);
1462 }
1463
1464 bpf_acquire_d(d);
1465
1466 if (d->bd_state == BPF_WAITING)
1467 bpf_stop_timer(d);
1468 d->bd_state = BPF_IDLE;
1469
1470 switch (cmd) {
1471
1472 default:
1473 error = EINVAL;
1474 break;
1475
1476 /*
1477 * Check for read packet available.
1478 */
1479 case FIONREAD: /* int */
1480 {
1481 int n;
1482
1483 n = d->bd_slen;
1484 if (d->bd_hbuf && d->bd_hbuf_read == 0)
1485 n += d->bd_hlen;
1486
1487 bcopy(&n, addr, sizeof (n));
1488 break;
1489 }
1490
1491 case SIOCGIFADDR: /* struct ifreq */
1492 {
1493 struct ifnet *ifp;
1494
1495 if (d->bd_bif == 0)
1496 error = EINVAL;
1497 else {
1498 ifp = d->bd_bif->bif_ifp;
1499 error = ifnet_ioctl(ifp, 0, cmd, addr);
1500 }
1501 break;
1502 }
1503
1504 /*
1505 * Get buffer len [for read()].
1506 */
1507 case BIOCGBLEN: /* u_int */
1508 bcopy(&d->bd_bufsize, addr, sizeof (u_int));
1509 break;
1510
1511 /*
1512 * Set buffer length.
1513 */
1514 case BIOCSBLEN: /* u_int */
1515 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING))
1516 error = EINVAL;
1517 else {
1518 u_int size;
1519
1520 bcopy(addr, &size, sizeof (size));
1521
1522 /*
1523 * Allow larger buffer in head drop mode with the
1524 * assumption the capture is in standby mode to
1525 * keep a cache of recent traffic
1526 */
1527 if (d->bd_headdrop != 0 && size > 2 * bpf_maxbufsize)
1528 size = 2 * bpf_maxbufsize;
1529 else if (size > bpf_maxbufsize)
1530 size = bpf_maxbufsize;
1531 else if (size < BPF_MINBUFSIZE)
1532 size = BPF_MINBUFSIZE;
1533 bcopy(&size, addr, sizeof (size));
1534 d->bd_bufsize = size;
1535 }
1536 break;
1537
1538 /*
1539 * Set link layer read filter.
1540 */
1541 case BIOCSETF32:
1542 case BIOCSETFNR32: { /* struct bpf_program32 */
1543 struct bpf_program32 prg32;
1544
1545 bcopy(addr, &prg32, sizeof (prg32));
1546 error = bpf_setf(d, prg32.bf_len,
1547 CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1548 break;
1549 }
1550
1551 case BIOCSETF64:
1552 case BIOCSETFNR64: { /* struct bpf_program64 */
1553 struct bpf_program64 prg64;
1554
1555 bcopy(addr, &prg64, sizeof (prg64));
1556 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
1557 break;
1558 }
1559
1560 /*
1561 * Flush read packet buffer.
1562 */
1563 case BIOCFLUSH:
1564 while (d->bd_hbuf_read) {
1565 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1566 }
1567 if ((d->bd_flags & BPF_CLOSING) != 0) {
1568 error = ENXIO;
1569 break;
1570 }
1571 reset_d(d);
1572 break;
1573
1574 /*
1575 * Put interface into promiscuous mode.
1576 */
1577 case BIOCPROMISC:
1578 if (d->bd_bif == 0) {
1579 /*
1580 * No interface attached yet.
1581 */
1582 error = EINVAL;
1583 break;
1584 }
1585 if (d->bd_promisc == 0) {
1586 lck_mtx_unlock(bpf_mlock);
1587 error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1588 lck_mtx_lock(bpf_mlock);
1589 if (error == 0)
1590 d->bd_promisc = 1;
1591 }
1592 break;
1593
1594 /*
1595 * Get device parameters.
1596 */
1597 case BIOCGDLT: /* u_int */
1598 if (d->bd_bif == 0)
1599 error = EINVAL;
1600 else
1601 bcopy(&d->bd_bif->bif_dlt, addr, sizeof (u_int));
1602 break;
1603
1604 /*
1605 * Get a list of supported data link types.
1606 */
1607 case BIOCGDLTLIST: /* struct bpf_dltlist */
1608 if (d->bd_bif == NULL) {
1609 error = EINVAL;
1610 } else {
1611 error = bpf_getdltlist(d, addr, p);
1612 }
1613 break;
1614
1615 /*
1616 * Set data link type.
1617 */
1618 case BIOCSDLT: /* u_int */
1619 if (d->bd_bif == NULL) {
1620 error = EINVAL;
1621 } else {
1622 u_int dlt;
1623
1624 bcopy(addr, &dlt, sizeof (dlt));
1625
1626 if (dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
1627 printf("BIOCSDLT downgrade DLT_PKTAP to DLT_RAW\n");
1628 dlt = DLT_RAW;
1629 }
1630 error = bpf_setdlt(d, dlt);
1631 }
1632 break;
1633
1634 /*
1635 * Get interface name.
1636 */
1637 case BIOCGETIF: /* struct ifreq */
1638 if (d->bd_bif == 0)
1639 error = EINVAL;
1640 else {
1641 struct ifnet *const ifp = d->bd_bif->bif_ifp;
1642
1643 snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1644 sizeof (ifr.ifr_name), "%s", if_name(ifp));
1645 }
1646 break;
1647
1648 /*
1649 * Set interface.
1650 */
1651 case BIOCSETIF: { /* struct ifreq */
1652 ifnet_t ifp;
1653
1654 bcopy(addr, &ifr, sizeof (ifr));
1655 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1656 ifp = ifunit(ifr.ifr_name);
1657 if (ifp == NULL)
1658 error = ENXIO;
1659 else
1660 error = bpf_setif(d, ifp);
1661 break;
1662 }
1663
1664 /*
1665 * Set read timeout.
1666 */
1667 case BIOCSRTIMEOUT32: { /* struct user32_timeval */
1668 struct user32_timeval _tv;
1669 struct timeval tv;
1670
1671 bcopy(addr, &_tv, sizeof (_tv));
1672 tv.tv_sec = _tv.tv_sec;
1673 tv.tv_usec = _tv.tv_usec;
1674
1675 /*
1676 * Subtract 1 tick from tvtohz() since this isn't
1677 * a one-shot timer.
1678 */
1679 if ((error = itimerfix(&tv)) == 0)
1680 d->bd_rtout = tvtohz(&tv) - 1;
1681 break;
1682 }
1683
1684 case BIOCSRTIMEOUT64: { /* struct user64_timeval */
1685 struct user64_timeval _tv;
1686 struct timeval tv;
1687
1688 bcopy(addr, &_tv, sizeof (_tv));
1689 tv.tv_sec = _tv.tv_sec;
1690 tv.tv_usec = _tv.tv_usec;
1691
1692 /*
1693 * Subtract 1 tick from tvtohz() since this isn't
1694 * a one-shot timer.
1695 */
1696 if ((error = itimerfix(&tv)) == 0)
1697 d->bd_rtout = tvtohz(&tv) - 1;
1698 break;
1699 }
1700
1701 /*
1702 * Get read timeout.
1703 */
1704 case BIOCGRTIMEOUT32: { /* struct user32_timeval */
1705 struct user32_timeval tv;
1706
1707 bzero(&tv, sizeof (tv));
1708 tv.tv_sec = d->bd_rtout / hz;
1709 tv.tv_usec = (d->bd_rtout % hz) * tick;
1710 bcopy(&tv, addr, sizeof (tv));
1711 break;
1712 }
1713
1714 case BIOCGRTIMEOUT64: { /* struct user64_timeval */
1715 struct user64_timeval tv;
1716
1717 bzero(&tv, sizeof (tv));
1718 tv.tv_sec = d->bd_rtout / hz;
1719 tv.tv_usec = (d->bd_rtout % hz) * tick;
1720 bcopy(&tv, addr, sizeof (tv));
1721 break;
1722 }
1723
1724 /*
1725 * Get packet stats.
1726 */
1727 case BIOCGSTATS: { /* struct bpf_stat */
1728 struct bpf_stat bs;
1729
1730 bzero(&bs, sizeof (bs));
1731 bs.bs_recv = d->bd_rcount;
1732 bs.bs_drop = d->bd_dcount;
1733 bcopy(&bs, addr, sizeof (bs));
1734 break;
1735 }
1736
1737 /*
1738 * Set immediate mode.
1739 */
1740 case BIOCIMMEDIATE: /* u_int */
1741 d->bd_immediate = *(u_int *)(void *)addr;
1742 break;
1743
1744 case BIOCVERSION: { /* struct bpf_version */
1745 struct bpf_version bv;
1746
1747 bzero(&bv, sizeof (bv));
1748 bv.bv_major = BPF_MAJOR_VERSION;
1749 bv.bv_minor = BPF_MINOR_VERSION;
1750 bcopy(&bv, addr, sizeof (bv));
1751 break;
1752 }
1753
1754 /*
1755 * Get "header already complete" flag
1756 */
1757 case BIOCGHDRCMPLT: /* u_int */
1758 bcopy(&d->bd_hdrcmplt, addr, sizeof (u_int));
1759 break;
1760
1761 /*
1762 * Set "header already complete" flag
1763 */
1764 case BIOCSHDRCMPLT: /* u_int */
1765 bcopy(addr, &int_arg, sizeof (int_arg));
1766 d->bd_hdrcmplt = int_arg ? 1 : 0;
1767 break;
1768
1769 /*
1770 * Get "see sent packets" flag
1771 */
1772 case BIOCGSEESENT: /* u_int */
1773 bcopy(&d->bd_seesent, addr, sizeof (u_int));
1774 break;
1775
1776 /*
1777 * Set "see sent packets" flag
1778 */
1779 case BIOCSSEESENT: /* u_int */
1780 bcopy(addr, &d->bd_seesent, sizeof (u_int));
1781 break;
1782
1783 /*
1784 * Set traffic service class
1785 */
1786 case BIOCSETTC: { /* int */
1787 int tc;
1788
1789 bcopy(addr, &tc, sizeof (int));
1790 error = bpf_set_traffic_class(d, tc);
1791 break;
1792 }
1793
1794 /*
1795 * Get traffic service class
1796 */
1797 case BIOCGETTC: /* int */
1798 bcopy(&d->bd_traffic_class, addr, sizeof (int));
1799 break;
1800
1801 case FIONBIO: /* Non-blocking I/O; int */
1802 break;
1803
1804 case FIOASYNC: /* Send signal on receive packets; int */
1805 bcopy(addr, &d->bd_async, sizeof (int));
1806 break;
1807 #ifndef __APPLE__
1808 case FIOSETOWN:
1809 error = fsetown(*(int *)addr, &d->bd_sigio);
1810 break;
1811
1812 case FIOGETOWN:
1813 *(int *)addr = fgetown(d->bd_sigio);
1814 break;
1815
1816 /* This is deprecated, FIOSETOWN should be used instead. */
1817 case TIOCSPGRP:
1818 error = fsetown(-(*(int *)addr), &d->bd_sigio);
1819 break;
1820
1821 /* This is deprecated, FIOGETOWN should be used instead. */
1822 case TIOCGPGRP:
1823 *(int *)addr = -fgetown(d->bd_sigio);
1824 break;
1825 #endif
1826 case BIOCSRSIG: { /* Set receive signal; u_int */
1827 u_int sig;
1828
1829 bcopy(addr, &sig, sizeof (u_int));
1830
1831 if (sig >= NSIG)
1832 error = EINVAL;
1833 else
1834 d->bd_sig = sig;
1835 break;
1836 }
1837 case BIOCGRSIG: /* u_int */
1838 bcopy(&d->bd_sig, addr, sizeof (u_int));
1839 break;
1840 #ifdef __APPLE__
1841 case BIOCSEXTHDR: /* u_int */
1842 bcopy(addr, &int_arg, sizeof (int_arg));
1843 if (int_arg)
1844 d->bd_flags |= BPF_EXTENDED_HDR;
1845 else
1846 d->bd_flags &= ~BPF_EXTENDED_HDR;
1847 break;
1848
1849 case BIOCGIFATTACHCOUNT: { /* struct ifreq */
1850 ifnet_t ifp;
1851 struct bpf_if *bp;
1852
1853 bcopy(addr, &ifr, sizeof (ifr));
1854 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1855 ifp = ifunit(ifr.ifr_name);
1856 if (ifp == NULL) {
1857 error = ENXIO;
1858 break;
1859 }
1860 ifr.ifr_intval = 0;
1861 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1862 struct bpf_d *bpf_d;
1863
1864 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp)
1865 continue;
1866 for (bpf_d = bp->bif_dlist; bpf_d; bpf_d = bpf_d->bd_next) {
1867 ifr.ifr_intval += 1;
1868 }
1869 }
1870 bcopy(&ifr, addr, sizeof (ifr));
1871 break;
1872 }
1873 case BIOCGWANTPKTAP: /* u_int */
1874 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
1875 bcopy(&int_arg, addr, sizeof (int_arg));
1876 break;
1877
1878 case BIOCSWANTPKTAP: /* u_int */
1879 bcopy(addr, &int_arg, sizeof (int_arg));
1880 if (int_arg)
1881 d->bd_flags |= BPF_WANT_PKTAP;
1882 else
1883 d->bd_flags &= ~BPF_WANT_PKTAP;
1884 break;
1885 #endif
1886
1887 case BIOCSHEADDROP:
1888 bcopy(addr, &int_arg, sizeof (int_arg));
1889 d->bd_headdrop = int_arg ? 1 : 0;
1890 break;
1891
1892 case BIOCGHEADDROP:
1893 bcopy(&d->bd_headdrop, addr, sizeof (int));
1894 break;
1895 }
1896
1897 bpf_release_d(d);
1898 lck_mtx_unlock(bpf_mlock);
1899
1900 return (error);
1901 }
1902
1903 /*
1904 * Set d's packet filter program to fp. If this file already has a filter,
1905 * free it and replace it. Returns EINVAL for bogus requests.
1906 */
1907 static int
1908 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
1909 u_long cmd)
1910 {
1911 struct bpf_insn *fcode, *old;
1912 u_int flen, size;
1913
1914 while (d->bd_hbuf_read)
1915 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1916
1917 if ((d->bd_flags & BPF_CLOSING) != 0)
1918 return (ENXIO);
1919
1920 old = d->bd_filter;
1921 if (bf_insns == USER_ADDR_NULL) {
1922 if (bf_len != 0)
1923 return (EINVAL);
1924 d->bd_filter = NULL;
1925 reset_d(d);
1926 if (old != 0)
1927 FREE((caddr_t)old, M_DEVBUF);
1928 return (0);
1929 }
1930 flen = bf_len;
1931 if (flen > BPF_MAXINSNS)
1932 return (EINVAL);
1933
1934 size = flen * sizeof(struct bpf_insn);
1935 fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
1936 #ifdef __APPLE__
1937 if (fcode == NULL)
1938 return (ENOBUFS);
1939 #endif
1940 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
1941 bpf_validate(fcode, (int)flen)) {
1942 d->bd_filter = fcode;
1943
1944 if (cmd == BIOCSETF32 || cmd == BIOCSETF64)
1945 reset_d(d);
1946
1947 if (old != 0)
1948 FREE((caddr_t)old, M_DEVBUF);
1949
1950 return (0);
1951 }
1952 FREE((caddr_t)fcode, M_DEVBUF);
1953 return (EINVAL);
1954 }
1955
1956 /*
1957 * Detach a file from its current interface (if attached at all) and attach
1958 * to the interface indicated by the name stored in ifr.
1959 * Return an errno or 0.
1960 */
1961 static int
1962 bpf_setif(struct bpf_d *d, ifnet_t theywant)
1963 {
1964 struct bpf_if *bp;
1965 int error;
1966
1967 while (d->bd_hbuf_read)
1968 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1969
1970 if ((d->bd_flags & BPF_CLOSING) != 0)
1971 return (ENXIO);
1972
1973 /*
1974 * Look through attached interfaces for the named one.
1975 */
1976 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1977 struct ifnet *ifp = bp->bif_ifp;
1978
1979 if (ifp == 0 || ifp != theywant)
1980 continue;
1981 /*
1982 * Do not use DLT_PKTAP, unless requested explicitly
1983 */
1984 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
1985 continue;
1986 /*
1987 * Skip the coprocessor interface
1988 */
1989 if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp))
1990 continue;
1991 /*
1992 * We found the requested interface.
1993 * Allocate the packet buffers.
1994 */
1995 error = bpf_allocbufs(d);
1996 if (error != 0)
1997 return (error);
1998 /*
1999 * Detach if attached to something else.
2000 */
2001 if (bp != d->bd_bif) {
2002 if (d->bd_bif != NULL) {
2003 if (bpf_detachd(d, 0) != 0)
2004 return (ENXIO);
2005 }
2006 if (bpf_attachd(d, bp) != 0)
2007 return (ENXIO);
2008 }
2009 reset_d(d);
2010 return (0);
2011 }
2012 /* Not found. */
2013 return (ENXIO);
2014 }
2015
2016
2017
2018 /*
2019 * Get a list of available data link type of the interface.
2020 */
2021 static int
2022 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2023 {
2024 u_int n;
2025 int error;
2026 struct ifnet *ifp;
2027 struct bpf_if *bp;
2028 user_addr_t dlist;
2029 struct bpf_dltlist bfl;
2030
2031 bcopy(addr, &bfl, sizeof (bfl));
2032 if (proc_is64bit(p)) {
2033 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2034 } else {
2035 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2036 }
2037
2038 ifp = d->bd_bif->bif_ifp;
2039 n = 0;
2040 error = 0;
2041
2042 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2043 if (bp->bif_ifp != ifp)
2044 continue;
2045 /*
2046 * Do not use DLT_PKTAP, unless requested explicitly
2047 */
2048 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
2049 continue;
2050 if (dlist != USER_ADDR_NULL) {
2051 if (n >= bfl.bfl_len) {
2052 return (ENOMEM);
2053 }
2054 error = copyout(&bp->bif_dlt, dlist,
2055 sizeof (bp->bif_dlt));
2056 if (error != 0)
2057 break;
2058 dlist += sizeof (bp->bif_dlt);
2059 }
2060 n++;
2061 }
2062 bfl.bfl_len = n;
2063 bcopy(&bfl, addr, sizeof (bfl));
2064
2065 return (error);
2066 }
2067
2068 /*
2069 * Set the data link type of a BPF instance.
2070 */
2071 static int
2072 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2073 {
2074 int error, opromisc;
2075 struct ifnet *ifp;
2076 struct bpf_if *bp;
2077
2078 if (d->bd_bif->bif_dlt == dlt)
2079 return (0);
2080
2081 while (d->bd_hbuf_read)
2082 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2083
2084 if ((d->bd_flags & BPF_CLOSING) != 0)
2085 return (ENXIO);
2086
2087 ifp = d->bd_bif->bif_ifp;
2088 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2089 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2090 /*
2091 * Do not use DLT_PKTAP, unless requested explicitly
2092 */
2093 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2094 continue;
2095 }
2096 break;
2097 }
2098 }
2099 if (bp != NULL) {
2100 opromisc = d->bd_promisc;
2101 if (bpf_detachd(d, 0) != 0)
2102 return (ENXIO);
2103 error = bpf_attachd(d, bp);
2104 if (error) {
2105 printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
2106 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error);
2107 return error;
2108 }
2109 reset_d(d);
2110 if (opromisc) {
2111 lck_mtx_unlock(bpf_mlock);
2112 error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2113 lck_mtx_lock(bpf_mlock);
2114 if (error) {
2115 printf("%s: ifpromisc %s%d failed (%d)\n",
2116 __func__, ifnet_name(bp->bif_ifp),
2117 ifnet_unit(bp->bif_ifp), error);
2118 } else {
2119 d->bd_promisc = 1;
2120 }
2121 }
2122 }
2123 return (bp == NULL ? EINVAL : 0);
2124 }
2125
2126 static int
2127 bpf_set_traffic_class(struct bpf_d *d, int tc)
2128 {
2129 int error = 0;
2130
2131 if (!SO_VALID_TC(tc))
2132 error = EINVAL;
2133 else
2134 d->bd_traffic_class = tc;
2135
2136 return (error);
2137 }
2138
2139 static void
2140 bpf_set_packet_service_class(struct mbuf *m, int tc)
2141 {
2142 if (!(m->m_flags & M_PKTHDR))
2143 return;
2144
2145 VERIFY(SO_VALID_TC(tc));
2146 (void) m_set_service_class(m, so_tc2msc(tc));
2147 }
2148
2149 /*
2150 * Support for select()
2151 *
2152 * Return true iff the specific operation will not block indefinitely.
2153 * Otherwise, return false but make a note that a selwakeup() must be done.
2154 */
2155 int
2156 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2157 {
2158 struct bpf_d *d;
2159 int ret = 0;
2160
2161 lck_mtx_lock(bpf_mlock);
2162
2163 d = bpf_dtab[minor(dev)];
2164 if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
2165 lck_mtx_unlock(bpf_mlock);
2166 return (ENXIO);
2167 }
2168
2169 bpf_acquire_d(d);
2170
2171 if (d->bd_bif == NULL) {
2172 bpf_release_d(d);
2173 lck_mtx_unlock(bpf_mlock);
2174 return (ENXIO);
2175 }
2176
2177 while (d->bd_hbuf_read)
2178 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2179
2180 if ((d->bd_flags & BPF_CLOSING) != 0) {
2181 bpf_release_d(d);
2182 lck_mtx_unlock(bpf_mlock);
2183 return (ENXIO);
2184 }
2185
2186 switch (which) {
2187 case FREAD:
2188 if (d->bd_hlen != 0 ||
2189 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
2190 d->bd_slen != 0))
2191 ret = 1; /* read has data to return */
2192 else {
2193 /*
2194 * Read has no data to return.
2195 * Make the select wait, and start a timer if
2196 * necessary.
2197 */
2198 selrecord(p, &d->bd_sel, wql);
2199 bpf_start_timer(d);
2200 }
2201 break;
2202
2203 case FWRITE:
2204 ret = 1; /* can't determine whether a write would block */
2205 break;
2206 }
2207
2208 bpf_release_d(d);
2209 lck_mtx_unlock(bpf_mlock);
2210
2211 return (ret);
2212 }
2213
2214
2215 /*
2216 * Support for kevent() system call. Register EVFILT_READ filters and
2217 * reject all others.
2218 */
2219 int bpfkqfilter(dev_t dev, struct knote *kn);
2220 static void filt_bpfdetach(struct knote *);
2221 static int filt_bpfread(struct knote *, long);
2222 static int filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev);
2223 static int filt_bpfprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev);
2224
2225 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2226 .f_isfd = 1,
2227 .f_detach = filt_bpfdetach,
2228 .f_event = filt_bpfread,
2229 .f_touch = filt_bpftouch,
2230 .f_process = filt_bpfprocess,
2231 };
2232
2233 static int
2234 filt_bpfread_common(struct knote *kn, struct bpf_d *d)
2235 {
2236 int ready = 0;
2237
2238 if (d->bd_immediate) {
2239 /*
2240 * If there's data in the hold buffer, it's the
2241 * amount of data a read will return.
2242 *
2243 * If there's no data in the hold buffer, but
2244 * there's data in the store buffer, a read will
2245 * immediately rotate the store buffer to the
2246 * hold buffer, the amount of data in the store
2247 * buffer is the amount of data a read will
2248 * return.
2249 *
2250 * If there's no data in either buffer, we're not
2251 * ready to read.
2252 */
2253 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read)
2254 ? d->bd_slen : d->bd_hlen);
2255 int64_t lowwat = 1;
2256 if (kn->kn_sfflags & NOTE_LOWAT)
2257 {
2258 if (kn->kn_sdata > d->bd_bufsize)
2259 lowwat = d->bd_bufsize;
2260 else if (kn->kn_sdata > lowwat)
2261 lowwat = kn->kn_sdata;
2262 }
2263 ready = (kn->kn_data >= lowwat);
2264 } else {
2265 /*
2266 * If there's data in the hold buffer, it's the
2267 * amount of data a read will return.
2268 *
2269 * If there's no data in the hold buffer, but
2270 * there's data in the store buffer, if the
2271 * timer has expired a read will immediately
2272 * rotate the store buffer to the hold buffer,
2273 * so the amount of data in the store buffer is
2274 * the amount of data a read will return.
2275 *
2276 * If there's no data in either buffer, or there's
2277 * no data in the hold buffer and the timer hasn't
2278 * expired, we're not ready to read.
2279 */
2280 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read) && d->bd_state == BPF_TIMED_OUT ?
2281 d->bd_slen : d->bd_hlen);
2282 ready = (kn->kn_data > 0);
2283 }
2284 if (!ready)
2285 bpf_start_timer(d);
2286
2287 return (ready);
2288 }
2289
2290 int
2291 bpfkqfilter(dev_t dev, struct knote *kn)
2292 {
2293 struct bpf_d *d;
2294 int res;
2295
2296 /*
2297 * Is this device a bpf?
2298 */
2299 if (major(dev) != CDEV_MAJOR ||
2300 kn->kn_filter != EVFILT_READ) {
2301 kn->kn_flags = EV_ERROR;
2302 kn->kn_data = EINVAL;
2303 return 0;
2304 }
2305
2306 lck_mtx_lock(bpf_mlock);
2307
2308 d = bpf_dtab[minor(dev)];
2309
2310 if (d == 0 ||
2311 d == (void *)1 ||
2312 d->bd_bif == NULL ||
2313 (d->bd_flags & BPF_CLOSING) != 0) {
2314 lck_mtx_unlock(bpf_mlock);
2315 kn->kn_flags = EV_ERROR;
2316 kn->kn_data = ENXIO;
2317 return 0;
2318 }
2319
2320 kn->kn_hook = d;
2321 kn->kn_filtid = EVFILTID_BPFREAD;
2322 KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2323 d->bd_flags |= BPF_KNOTE;
2324
2325 /* capture the current state */
2326 res = filt_bpfread_common(kn, d);
2327
2328 lck_mtx_unlock(bpf_mlock);
2329
2330 return (res);
2331 }
2332
2333 static void
2334 filt_bpfdetach(struct knote *kn)
2335 {
2336 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2337
2338 lck_mtx_lock(bpf_mlock);
2339 if (d->bd_flags & BPF_KNOTE) {
2340 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2341 d->bd_flags &= ~BPF_KNOTE;
2342 }
2343 lck_mtx_unlock(bpf_mlock);
2344 }
2345
2346 static int
2347 filt_bpfread(struct knote *kn, long hint)
2348 {
2349 #pragma unused(hint)
2350 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2351
2352 return filt_bpfread_common(kn, d);
2353 }
2354
2355 static int
2356 filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev)
2357 {
2358 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2359 int res;
2360
2361 lck_mtx_lock(bpf_mlock);
2362
2363 /* save off the lowat threshold and flag */
2364 kn->kn_sdata = kev->data;
2365 kn->kn_sfflags = kev->fflags;
2366 if ((kn->kn_status & KN_UDATA_SPECIFIC) == 0)
2367 kn->kn_udata = kev->udata;
2368
2369 /* output data will be re-generated here */
2370 res = filt_bpfread_common(kn, d);
2371
2372 lck_mtx_unlock(bpf_mlock);
2373
2374 return res;
2375 }
2376
2377 static int
2378 filt_bpfprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev)
2379 {
2380 #pragma unused(data)
2381 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2382 int res;
2383
2384 lck_mtx_lock(bpf_mlock);
2385 res = filt_bpfread_common(kn, d);
2386 if (res) {
2387 *kev = kn->kn_kevent;
2388 }
2389 lck_mtx_unlock(bpf_mlock);
2390
2391 return res;
2392 }
2393
2394 /*
2395 * Copy data from an mbuf chain into a buffer. This code is derived
2396 * from m_copydata in kern/uipc_mbuf.c.
2397 */
2398 static void
2399 bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
2400 {
2401 u_int count;
2402 u_char *dst;
2403
2404 dst = dst_arg;
2405 while (len > 0) {
2406 if (m == 0)
2407 panic("bpf_mcopy");
2408 count = min(m->m_len, len);
2409 bcopy(mbuf_data(m), dst, count);
2410 m = m->m_next;
2411 dst += count;
2412 len -= count;
2413 }
2414 }
2415
2416 static inline void
2417 bpf_tap_imp(
2418 ifnet_t ifp,
2419 u_int32_t dlt,
2420 struct bpf_packet *bpf_pkt,
2421 int outbound)
2422 {
2423 struct bpf_d *d;
2424 u_int slen;
2425 struct bpf_if *bp;
2426
2427 /*
2428 * It's possible that we get here after the bpf descriptor has been
2429 * detached from the interface; in such a case we simply return.
2430 * Lock ordering is important since we can be called asynchronously
2431 * (from IOKit) to process an inbound packet; when that happens
2432 * we would have been holding its "gateLock" and will be acquiring
2433 * "bpf_mlock" upon entering this routine. Due to that, we release
2434 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2435 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2436 * when a ifnet_set_promiscuous request simultaneously collides with
2437 * an inbound packet being passed into the tap callback.
2438 */
2439 lck_mtx_lock(bpf_mlock);
2440 if (ifp->if_bpf == NULL) {
2441 lck_mtx_unlock(bpf_mlock);
2442 return;
2443 }
2444 for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2445 if (bp->bif_ifp != ifp) {
2446 /* wrong interface */
2447 bp = NULL;
2448 break;
2449 }
2450 if (dlt == 0 || bp->bif_dlt == dlt) {
2451 /* tapping default DLT or DLT matches */
2452 break;
2453 }
2454 }
2455 if (bp == NULL) {
2456 goto done;
2457 }
2458 for (d = bp->bif_dlist; d; d = d->bd_next) {
2459 if (outbound && !d->bd_seesent)
2460 continue;
2461 ++d->bd_rcount;
2462 slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2463 bpf_pkt->bpfp_total_length, 0);
2464 if (slen != 0) {
2465 #if CONFIG_MACF_NET
2466 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0)
2467 continue;
2468 #endif
2469 catchpacket(d, bpf_pkt, slen, outbound);
2470 }
2471 }
2472
2473 done:
2474 lck_mtx_unlock(bpf_mlock);
2475 }
2476
2477 static inline void
2478 bpf_tap_mbuf(
2479 ifnet_t ifp,
2480 u_int32_t dlt,
2481 mbuf_t m,
2482 void* hdr,
2483 size_t hlen,
2484 int outbound)
2485 {
2486 struct bpf_packet bpf_pkt;
2487 struct mbuf *m0;
2488
2489 if (ifp->if_bpf == NULL) {
2490 /* quickly check without taking lock */
2491 return;
2492 }
2493 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2494 bpf_pkt.bpfp_mbuf = m;
2495 bpf_pkt.bpfp_total_length = 0;
2496 for (m0 = m; m0 != NULL; m0 = m0->m_next)
2497 bpf_pkt.bpfp_total_length += m0->m_len;
2498 bpf_pkt.bpfp_header = hdr;
2499 if (hdr != NULL) {
2500 bpf_pkt.bpfp_total_length += hlen;
2501 bpf_pkt.bpfp_header_length = hlen;
2502 } else {
2503 bpf_pkt.bpfp_header_length = 0;
2504 }
2505 bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2506 }
2507
2508 void
2509 bpf_tap_out(
2510 ifnet_t ifp,
2511 u_int32_t dlt,
2512 mbuf_t m,
2513 void* hdr,
2514 size_t hlen)
2515 {
2516 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
2517 }
2518
2519 void
2520 bpf_tap_in(
2521 ifnet_t ifp,
2522 u_int32_t dlt,
2523 mbuf_t m,
2524 void* hdr,
2525 size_t hlen)
2526 {
2527 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
2528 }
2529
2530 /* Callback registered with Ethernet driver. */
2531 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2532 {
2533 bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2534
2535 return 0;
2536 }
2537
2538
2539 static void
2540 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
2541 {
2542 /* copy the optional header */
2543 if (pkt->bpfp_header_length != 0) {
2544 size_t count = min(len, pkt->bpfp_header_length);
2545 bcopy(pkt->bpfp_header, dst, count);
2546 len -= count;
2547 dst += count;
2548 }
2549 if (len == 0) {
2550 /* nothing past the header */
2551 return;
2552 }
2553 /* copy the packet */
2554 switch (pkt->bpfp_type) {
2555 case BPF_PACKET_TYPE_MBUF:
2556 bpf_mcopy(pkt->bpfp_mbuf, dst, len);
2557 break;
2558 default:
2559 break;
2560 }
2561 }
2562
2563 /*
2564 * Move the packet data from interface memory (pkt) into the
2565 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
2566 * otherwise 0.
2567 */
2568 static void
2569 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
2570 u_int snaplen, int outbound)
2571 {
2572 struct bpf_hdr *hp;
2573 struct bpf_hdr_ext *ehp;
2574 int totlen, curlen;
2575 int hdrlen, caplen;
2576 int do_wakeup = 0;
2577 u_char *payload;
2578 struct timeval tv;
2579
2580 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
2581 d->bd_bif->bif_hdrlen;
2582 /*
2583 * Figure out how many bytes to move. If the packet is
2584 * greater or equal to the snapshot length, transfer that
2585 * much. Otherwise, transfer the whole packet (unless
2586 * we hit the buffer size limit).
2587 */
2588 totlen = hdrlen + min(snaplen, pkt->bpfp_total_length);
2589 if (totlen > d->bd_bufsize)
2590 totlen = d->bd_bufsize;
2591
2592 if (hdrlen > totlen)
2593 return;
2594
2595 /*
2596 * Round up the end of the previous packet to the next longword.
2597 */
2598 curlen = BPF_WORDALIGN(d->bd_slen);
2599 if (curlen + totlen > d->bd_bufsize) {
2600 /*
2601 * This packet will overflow the storage buffer.
2602 * Rotate the buffers if we can, then wakeup any
2603 * pending reads.
2604 *
2605 * We cannot rotate buffers if a read is in progress
2606 * so drop the packet
2607 */
2608 if (d->bd_hbuf_read) {
2609 ++d->bd_dcount;
2610 return;
2611 }
2612
2613 if (d->bd_fbuf == NULL) {
2614 if (d->bd_headdrop == 0) {
2615 /*
2616 * We haven't completed the previous read yet,
2617 * so drop the packet.
2618 */
2619 ++d->bd_dcount;
2620 return;
2621 }
2622 /*
2623 * Drop the hold buffer as it contains older packets
2624 */
2625 d->bd_dcount += d->bd_hcnt;
2626 d->bd_fbuf = d->bd_hbuf;
2627 ROTATE_BUFFERS(d);
2628 } else {
2629 ROTATE_BUFFERS(d);
2630 }
2631 do_wakeup = 1;
2632 curlen = 0;
2633 }
2634 else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
2635 /*
2636 * Immediate mode is set, or the read timeout has
2637 * already expired during a select call. A packet
2638 * arrived, so the reader should be woken up.
2639 */
2640 do_wakeup = 1;
2641
2642 /*
2643 * Append the bpf header.
2644 */
2645 microtime(&tv);
2646 if (d->bd_flags & BPF_EXTENDED_HDR) {
2647 struct mbuf *m;
2648
2649 m = (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF)
2650 ? pkt->bpfp_mbuf : NULL;
2651 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
2652 memset(ehp, 0, sizeof(*ehp));
2653 ehp->bh_tstamp.tv_sec = tv.tv_sec;
2654 ehp->bh_tstamp.tv_usec = tv.tv_usec;
2655
2656 ehp->bh_datalen = pkt->bpfp_total_length;
2657 ehp->bh_hdrlen = hdrlen;
2658 caplen = ehp->bh_caplen = totlen - hdrlen;
2659 if (m == NULL) {
2660 if (outbound) {
2661 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2662 } else {
2663 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2664 }
2665 } else if (outbound) {
2666 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2667
2668 /* only do lookups on non-raw INPCB */
2669 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID|
2670 PKTF_FLOW_LOCALSRC|PKTF_FLOW_RAWSOCK)) ==
2671 (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) &&
2672 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
2673 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
2674 ehp->bh_proto = m->m_pkthdr.pkt_proto;
2675 }
2676 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
2677 if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT)
2678 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
2679 if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ)
2680 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
2681 if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT)
2682 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
2683 if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
2684 ehp->bh_unsent_bytes =
2685 m->m_pkthdr.bufstatus_if;
2686 ehp->bh_unsent_snd =
2687 m->m_pkthdr.bufstatus_sndbuf;
2688 }
2689 } else
2690 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2691 payload = (u_char *)ehp + hdrlen;
2692 } else {
2693 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
2694 hp->bh_tstamp.tv_sec = tv.tv_sec;
2695 hp->bh_tstamp.tv_usec = tv.tv_usec;
2696 hp->bh_datalen = pkt->bpfp_total_length;
2697 hp->bh_hdrlen = hdrlen;
2698 caplen = hp->bh_caplen = totlen - hdrlen;
2699 payload = (u_char *)hp + hdrlen;
2700 }
2701 /*
2702 * Copy the packet data into the store buffer and update its length.
2703 */
2704 copy_bpf_packet(pkt, payload, caplen);
2705 d->bd_slen = curlen + totlen;
2706 d->bd_scnt += 1;
2707
2708 if (do_wakeup)
2709 bpf_wakeup(d);
2710 }
2711
2712 /*
2713 * Initialize all nonzero fields of a descriptor.
2714 */
2715 static int
2716 bpf_allocbufs(struct bpf_d *d)
2717 {
2718 if (d->bd_sbuf != NULL) {
2719 FREE(d->bd_sbuf, M_DEVBUF);
2720 d->bd_sbuf = NULL;
2721 }
2722 if (d->bd_hbuf != NULL) {
2723 FREE(d->bd_hbuf, M_DEVBUF);
2724 d->bd_hbuf = NULL;
2725 }
2726 if (d->bd_fbuf != NULL) {
2727 FREE(d->bd_fbuf, M_DEVBUF);
2728 d->bd_fbuf = NULL;
2729 }
2730
2731 d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2732 if (d->bd_fbuf == NULL)
2733 return (ENOBUFS);
2734
2735 d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2736 if (d->bd_sbuf == NULL) {
2737 FREE(d->bd_fbuf, M_DEVBUF);
2738 d->bd_fbuf = NULL;
2739 return (ENOBUFS);
2740 }
2741 d->bd_slen = 0;
2742 d->bd_hlen = 0;
2743 d->bd_scnt = 0;
2744 d->bd_hcnt = 0;
2745 return (0);
2746 }
2747
2748 /*
2749 * Free buffers currently in use by a descriptor.
2750 * Called on close.
2751 */
2752 static void
2753 bpf_freed(struct bpf_d *d)
2754 {
2755 /*
2756 * We don't need to lock out interrupts since this descriptor has
2757 * been detached from its interface and it yet hasn't been marked
2758 * free.
2759 */
2760 if (d->bd_hbuf_read)
2761 panic("bpf buffer freed during read");
2762
2763 if (d->bd_sbuf != 0) {
2764 FREE(d->bd_sbuf, M_DEVBUF);
2765 if (d->bd_hbuf != 0)
2766 FREE(d->bd_hbuf, M_DEVBUF);
2767 if (d->bd_fbuf != 0)
2768 FREE(d->bd_fbuf, M_DEVBUF);
2769 }
2770 if (d->bd_filter)
2771 FREE((caddr_t)d->bd_filter, M_DEVBUF);
2772 }
2773
2774 /*
2775 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
2776 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
2777 * size of the link header (variable length headers not yet supported).
2778 */
2779 void
2780 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2781 {
2782 bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
2783 }
2784
2785 errno_t
2786 bpf_attach(
2787 ifnet_t ifp,
2788 u_int32_t dlt,
2789 u_int32_t hdrlen,
2790 bpf_send_func send,
2791 bpf_tap_func tap)
2792 {
2793 struct bpf_if *bp;
2794 struct bpf_if *bp_new;
2795 struct bpf_if *bp_before_first = NULL;
2796 struct bpf_if *bp_first = NULL;
2797 struct bpf_if *bp_last = NULL;
2798 boolean_t found;
2799
2800 bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF,
2801 M_WAIT | M_ZERO);
2802 if (bp_new == 0)
2803 panic("bpfattach");
2804
2805 lck_mtx_lock(bpf_mlock);
2806
2807 /*
2808 * Check if this interface/dlt is already attached. Remember the
2809 * first and last attachment for this interface, as well as the
2810 * element before the first attachment.
2811 */
2812 found = FALSE;
2813 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
2814 if (bp->bif_ifp != ifp) {
2815 if (bp_first != NULL) {
2816 /* no more elements for this interface */
2817 break;
2818 }
2819 bp_before_first = bp;
2820 } else {
2821 if (bp->bif_dlt == dlt) {
2822 found = TRUE;
2823 break;
2824 }
2825 if (bp_first == NULL) {
2826 bp_first = bp;
2827 }
2828 bp_last = bp;
2829 }
2830 }
2831 if (found) {
2832 lck_mtx_unlock(bpf_mlock);
2833 printf("bpfattach - %s with dlt %d is already attached\n",
2834 if_name(ifp), dlt);
2835 FREE(bp_new, M_DEVBUF);
2836 return EEXIST;
2837 }
2838
2839 bp_new->bif_ifp = ifp;
2840 bp_new->bif_dlt = dlt;
2841 bp_new->bif_send = send;
2842 bp_new->bif_tap = tap;
2843
2844 if (bp_first == NULL) {
2845 /* No other entries for this ifp */
2846 bp_new->bif_next = bpf_iflist;
2847 bpf_iflist = bp_new;
2848 }
2849 else {
2850 if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
2851 /* Make this the first entry for this interface */
2852 if (bp_before_first != NULL) {
2853 /* point the previous to us */
2854 bp_before_first->bif_next = bp_new;
2855 } else {
2856 /* we're the new head */
2857 bpf_iflist = bp_new;
2858 }
2859 bp_new->bif_next = bp_first;
2860 } else {
2861 /* Add this after the last entry for this interface */
2862 bp_new->bif_next = bp_last->bif_next;
2863 bp_last->bif_next = bp_new;
2864 }
2865 }
2866
2867 /*
2868 * Compute the length of the bpf header. This is not necessarily
2869 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
2870 * that the network layer header begins on a longword boundary (for
2871 * performance reasons and to alleviate alignment restrictions).
2872 */
2873 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
2874 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
2875 sizeof(struct bpf_hdr_ext)) - hdrlen;
2876
2877 /* Take a reference on the interface */
2878 ifnet_reference(ifp);
2879
2880 lck_mtx_unlock(bpf_mlock);
2881
2882 #ifndef __APPLE__
2883 if (bootverbose)
2884 printf("bpf: %s attached\n", if_name(ifp));
2885 #endif
2886
2887 return 0;
2888 }
2889
2890 /*
2891 * Detach bpf from an interface. This involves detaching each descriptor
2892 * associated with the interface, and leaving bd_bif NULL. Notify each
2893 * descriptor as it's detached so that any sleepers wake up and get
2894 * ENXIO.
2895 */
2896 void
2897 bpfdetach(struct ifnet *ifp)
2898 {
2899 struct bpf_if *bp, *bp_prev, *bp_next;
2900 struct bpf_d *d;
2901
2902 if (bpf_debug != 0)
2903 printf("%s: %s\n", __func__, if_name(ifp));
2904
2905 lck_mtx_lock(bpf_mlock);
2906
2907 /*
2908 * Build the list of devices attached to that interface
2909 * that we need to free while keeping the lock to maintain
2910 * the integrity of the interface list
2911 */
2912 bp_prev = NULL;
2913 for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
2914 bp_next = bp->bif_next;
2915
2916 if (ifp != bp->bif_ifp) {
2917 bp_prev = bp;
2918 continue;
2919 }
2920 /* Unlink from the interface list */
2921 if (bp_prev)
2922 bp_prev->bif_next = bp->bif_next;
2923 else
2924 bpf_iflist = bp->bif_next;
2925
2926 /* Detach the devices attached to the interface */
2927 while ((d = bp->bif_dlist) != NULL) {
2928 /*
2929 * Take an extra reference to prevent the device
2930 * from being freed when bpf_detachd() releases
2931 * the reference for the interface list
2932 */
2933 bpf_acquire_d(d);
2934 bpf_detachd(d, 0);
2935 bpf_wakeup(d);
2936 bpf_release_d(d);
2937 }
2938 ifnet_release(ifp);
2939 }
2940
2941 lck_mtx_unlock(bpf_mlock);
2942 }
2943
2944 void
2945 bpf_init(__unused void *unused)
2946 {
2947 #ifdef __APPLE__
2948 int i;
2949 int maj;
2950
2951 if (bpf_devsw_installed == 0) {
2952 bpf_devsw_installed = 1;
2953 bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
2954 bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
2955 bpf_mlock_attr = lck_attr_alloc_init();
2956 lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
2957 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
2958 if (maj == -1) {
2959 if (bpf_mlock_attr)
2960 lck_attr_free(bpf_mlock_attr);
2961 if (bpf_mlock_grp)
2962 lck_grp_free(bpf_mlock_grp);
2963 if (bpf_mlock_grp_attr)
2964 lck_grp_attr_free(bpf_mlock_grp_attr);
2965
2966 bpf_mlock = NULL;
2967 bpf_mlock_attr = NULL;
2968 bpf_mlock_grp = NULL;
2969 bpf_mlock_grp_attr = NULL;
2970 bpf_devsw_installed = 0;
2971 printf("bpf_init: failed to allocate a major number!\n");
2972 return;
2973 }
2974
2975 for (i = 0 ; i < NBPFILTER; i++)
2976 bpf_make_dev_t(maj);
2977 }
2978 #else
2979 cdevsw_add(&bpf_cdevsw);
2980 #endif
2981 }
2982
2983 #ifndef __APPLE__
2984 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
2985 #endif
2986
2987 #if CONFIG_MACF_NET
2988 struct label *
2989 mac_bpfdesc_label_get(struct bpf_d *d)
2990 {
2991
2992 return (d->bd_label);
2993 }
2994
2995 void
2996 mac_bpfdesc_label_set(struct bpf_d *d, struct label *label)
2997 {
2998
2999 d->bd_label = label;
3000 }
3001 #endif