]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/bpf.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / bsd / net / bpf.c
1 /*
2 * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1990, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
66 *
67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include "bpf.h"
77
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99
100 #include <sys/poll.h>
101
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109
110 #include <netinet/in.h>
111 #include <netinet/ip.h>
112 #include <netinet/ip6.h>
113 #include <netinet/in_pcb.h>
114 #include <netinet/in_var.h>
115 #include <netinet/ip_var.h>
116 #include <netinet/tcp.h>
117 #include <netinet/tcp_var.h>
118 #include <netinet/udp.h>
119 #include <netinet/udp_var.h>
120 #include <netinet/if_ether.h>
121 #include <netinet/isakmp.h>
122 #include <netinet6/esp.h>
123 #include <sys/kernel.h>
124 #include <sys/sysctl.h>
125 #include <net/firewire.h>
126
127 #include <miscfs/devfs/devfs.h>
128 #include <net/dlil.h>
129 #include <net/pktap.h>
130
131 #include <kern/locks.h>
132 #include <kern/thread_call.h>
133 #include <libkern/section_keywords.h>
134
135 #if CONFIG_MACF_NET
136 #include <security/mac_framework.h>
137 #endif /* MAC_NET */
138
139 #include <os/log.h>
140
141 extern int tvtohz(struct timeval *);
142
143 #define BPF_BUFSIZE 4096
144 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
145
146 #define PRINET 26 /* interruptible */
147
148 #define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
149 #define ESP_HDR_SIZE sizeof(struct newesp)
150
151 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
152
153 /*
154 * The default read buffer size is patchable.
155 */
156 static unsigned int bpf_bufsize = BPF_BUFSIZE;
157 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
158 &bpf_bufsize, 0, "");
159 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
160 SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
161 &bpf_maxbufsize, 0, "");
162 static unsigned int bpf_maxdevices = 256;
163 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
164 &bpf_maxdevices, 0, "");
165 /*
166 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
167 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
168 * explicitly to be able to use DLT_PKTAP.
169 */
170 #if CONFIG_EMBEDDED
171 static unsigned int bpf_wantpktap = 1;
172 #else
173 static unsigned int bpf_wantpktap = 0;
174 #endif
175 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
176 &bpf_wantpktap, 0, "");
177
178 static int bpf_debug = 0;
179 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
180 &bpf_debug, 0, "");
181
182 /*
183 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
184 * bpf_dtab holds pointer to the descriptors, indexed by minor device #
185 */
186 static struct bpf_if *bpf_iflist;
187 #ifdef __APPLE__
188 /*
189 * BSD now stores the bpf_d in the dev_t which is a struct
190 * on their system. Our dev_t is an int, so we still store
191 * the bpf_d in a separate table indexed by minor device #.
192 *
193 * The value stored in bpf_dtab[n] represent three states:
194 * NULL: device not opened
195 * BPF_DEV_RESERVED: device opening or closing
196 * other: device <n> opened with pointer to storage
197 */
198 #define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
199 static struct bpf_d **bpf_dtab = NULL;
200 static unsigned int bpf_dtab_size = 0;
201 static unsigned int nbpfilter = 0;
202
203 decl_lck_mtx_data(static, bpf_mlock_data);
204 static lck_mtx_t *bpf_mlock = &bpf_mlock_data;
205 static lck_grp_t *bpf_mlock_grp;
206 static lck_grp_attr_t *bpf_mlock_grp_attr;
207 static lck_attr_t *bpf_mlock_attr;
208
209 #endif /* __APPLE__ */
210
211 static int bpf_allocbufs(struct bpf_d *);
212 static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
213 static int bpf_detachd(struct bpf_d *d, int);
214 static void bpf_freed(struct bpf_d *);
215 static int bpf_movein(struct uio *, int,
216 struct mbuf **, struct sockaddr *, int *);
217 static int bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool);
218 static void bpf_timed_out(void *, void *);
219 static void bpf_wakeup(struct bpf_d *);
220 static u_int get_pkt_trunc_len(u_char *, u_int);
221 static void catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
222 static void reset_d(struct bpf_d *);
223 static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
224 static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
225 static int bpf_setdlt(struct bpf_d *, u_int);
226 static int bpf_set_traffic_class(struct bpf_d *, int);
227 static void bpf_set_packet_service_class(struct mbuf *, int);
228
229 static void bpf_acquire_d(struct bpf_d *);
230 static void bpf_release_d(struct bpf_d *);
231
232 static int bpf_devsw_installed;
233
234 void bpf_init(void *unused);
235 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
236
237 /*
238 * Darwin differs from BSD here, the following are static
239 * on BSD and not static on Darwin.
240 */
241 d_open_t bpfopen;
242 d_close_t bpfclose;
243 d_read_t bpfread;
244 d_write_t bpfwrite;
245 ioctl_fcn_t bpfioctl;
246 select_fcn_t bpfselect;
247
248 /* Darwin's cdevsw struct differs slightly from BSDs */
249 #define CDEV_MAJOR 23
250 static struct cdevsw bpf_cdevsw = {
251 /* open */ bpfopen,
252 /* close */ bpfclose,
253 /* read */ bpfread,
254 /* write */ bpfwrite,
255 /* ioctl */ bpfioctl,
256 /* stop */ eno_stop,
257 /* reset */ eno_reset,
258 /* tty */ NULL,
259 /* select */ bpfselect,
260 /* mmap */ eno_mmap,
261 /* strategy */ eno_strat,
262 /* getc */ eno_getc,
263 /* putc */ eno_putc,
264 /* type */ 0
265 };
266
267 #define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
268
269 static int
270 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp,
271 struct sockaddr *sockp, int *datlen)
272 {
273 struct mbuf *m;
274 int error;
275 int len;
276 uint8_t sa_family;
277 int hlen;
278
279 switch (linktype) {
280 #if SLIP
281 case DLT_SLIP:
282 sa_family = AF_INET;
283 hlen = 0;
284 break;
285 #endif /* SLIP */
286
287 case DLT_EN10MB:
288 sa_family = AF_UNSPEC;
289 /* XXX Would MAXLINKHDR be better? */
290 hlen = sizeof(struct ether_header);
291 break;
292
293 #if FDDI
294 case DLT_FDDI:
295 #if defined(__FreeBSD__) || defined(__bsdi__)
296 sa_family = AF_IMPLINK;
297 hlen = 0;
298 #else
299 sa_family = AF_UNSPEC;
300 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
301 hlen = 24;
302 #endif
303 break;
304 #endif /* FDDI */
305
306 case DLT_RAW:
307 case DLT_NULL:
308 sa_family = AF_UNSPEC;
309 hlen = 0;
310 break;
311
312 #ifdef __FreeBSD__
313 case DLT_ATM_RFC1483:
314 /*
315 * en atm driver requires 4-byte atm pseudo header.
316 * though it isn't standard, vpi:vci needs to be
317 * specified anyway.
318 */
319 sa_family = AF_UNSPEC;
320 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
321 break;
322 #endif
323
324 case DLT_PPP:
325 sa_family = AF_UNSPEC;
326 hlen = 4; /* This should match PPP_HDRLEN */
327 break;
328
329 case DLT_APPLE_IP_OVER_IEEE1394:
330 sa_family = AF_UNSPEC;
331 hlen = sizeof(struct firewire_header);
332 break;
333
334 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
335 sa_family = AF_IEEE80211;
336 hlen = 0;
337 break;
338
339 case DLT_IEEE802_11_RADIO:
340 sa_family = AF_IEEE80211;
341 hlen = 0;
342 break;
343
344 default:
345 return EIO;
346 }
347
348 // LP64todo - fix this!
349 len = uio_resid(uio);
350 *datlen = len - hlen;
351 if ((unsigned)len > MCLBYTES) {
352 return EIO;
353 }
354
355 if (sockp) {
356 /*
357 * Build a sockaddr based on the data link layer type.
358 * We do this at this level because the ethernet header
359 * is copied directly into the data field of the sockaddr.
360 * In the case of SLIP, there is no header and the packet
361 * is forwarded as is.
362 * Also, we are careful to leave room at the front of the mbuf
363 * for the link level header.
364 */
365 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
366 return EIO;
367 }
368 sockp->sa_family = sa_family;
369 } else {
370 /*
371 * We're directly sending the packet data supplied by
372 * the user; we don't need to make room for the link
373 * header, and don't need the header length value any
374 * more, so set it to 0.
375 */
376 hlen = 0;
377 }
378
379 MGETHDR(m, M_WAIT, MT_DATA);
380 if (m == 0) {
381 return ENOBUFS;
382 }
383 if ((unsigned)len > MHLEN) {
384 MCLGET(m, M_WAIT);
385 if ((m->m_flags & M_EXT) == 0) {
386 error = ENOBUFS;
387 goto bad;
388 }
389 }
390 m->m_pkthdr.len = m->m_len = len;
391 m->m_pkthdr.rcvif = NULL;
392 *mp = m;
393
394 /*
395 * Make room for link header.
396 */
397 if (hlen != 0) {
398 m->m_pkthdr.len -= hlen;
399 m->m_len -= hlen;
400 m->m_data += hlen; /* XXX */
401 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
402 if (error) {
403 goto bad;
404 }
405 }
406 error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
407 if (error) {
408 goto bad;
409 }
410
411 /* Check for multicast destination */
412 switch (linktype) {
413 case DLT_EN10MB: {
414 struct ether_header *eh;
415
416 eh = mtod(m, struct ether_header *);
417 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
418 if (_ether_cmp(etherbroadcastaddr,
419 eh->ether_dhost) == 0) {
420 m->m_flags |= M_BCAST;
421 } else {
422 m->m_flags |= M_MCAST;
423 }
424 }
425 break;
426 }
427 }
428
429 return 0;
430 bad:
431 m_freem(m);
432 return error;
433 }
434
435 #ifdef __APPLE__
436
437 /*
438 * The dynamic addition of a new device node must block all processes that
439 * are opening the last device so that no process will get an unexpected
440 * ENOENT
441 */
442 static void
443 bpf_make_dev_t(int maj)
444 {
445 static int bpf_growing = 0;
446 unsigned int cur_size = nbpfilter, i;
447
448 if (nbpfilter >= bpf_maxdevices) {
449 return;
450 }
451
452 while (bpf_growing) {
453 /* Wait until new device has been created */
454 (void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
455 }
456 if (nbpfilter > cur_size) {
457 /* other thread grew it already */
458 return;
459 }
460 bpf_growing = 1;
461
462 /* need to grow bpf_dtab first */
463 if (nbpfilter == bpf_dtab_size) {
464 int new_dtab_size;
465 struct bpf_d **new_dtab = NULL;
466 struct bpf_d **old_dtab = NULL;
467
468 new_dtab_size = bpf_dtab_size + NBPFILTER;
469 new_dtab = (struct bpf_d **)_MALLOC(
470 sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
471 if (new_dtab == 0) {
472 printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
473 goto done;
474 }
475 if (bpf_dtab) {
476 bcopy(bpf_dtab, new_dtab,
477 sizeof(struct bpf_d *) * bpf_dtab_size);
478 }
479 bzero(new_dtab + bpf_dtab_size,
480 sizeof(struct bpf_d *) * NBPFILTER);
481 old_dtab = bpf_dtab;
482 bpf_dtab = new_dtab;
483 bpf_dtab_size = new_dtab_size;
484 if (old_dtab != NULL) {
485 _FREE(old_dtab, M_DEVBUF);
486 }
487 }
488 i = nbpfilter++;
489 (void) devfs_make_node(makedev(maj, i),
490 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
491 "bpf%d", i);
492 done:
493 bpf_growing = 0;
494 wakeup((caddr_t)&bpf_growing);
495 }
496
497 #endif
498
499 /*
500 * Attach file to the bpf interface, i.e. make d listen on bp.
501 */
502 static errno_t
503 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
504 {
505 int first = bp->bif_dlist == NULL;
506 int error = 0;
507
508 /*
509 * Point d at bp, and add d to the interface's list of listeners.
510 * Finally, point the driver's bpf cookie at the interface so
511 * it will divert packets to bpf.
512 */
513 d->bd_bif = bp;
514 d->bd_next = bp->bif_dlist;
515 bp->bif_dlist = d;
516
517 /*
518 * Take a reference on the device even if an error is returned
519 * because we keep the device in the interface's list of listeners
520 */
521 bpf_acquire_d(d);
522
523 if (first) {
524 /* Find the default bpf entry for this ifp */
525 if (bp->bif_ifp->if_bpf == NULL) {
526 struct bpf_if *tmp, *primary = NULL;
527
528 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
529 if (tmp->bif_ifp == bp->bif_ifp) {
530 primary = tmp;
531 break;
532 }
533 }
534 bp->bif_ifp->if_bpf = primary;
535 }
536 /* Only call dlil_set_bpf_tap for primary dlt */
537 if (bp->bif_ifp->if_bpf == bp) {
538 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
539 bpf_tap_callback);
540 }
541
542 if (bp->bif_tap != NULL) {
543 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
544 BPF_TAP_INPUT_OUTPUT);
545 }
546 }
547
548 /*
549 * Reset the detach flags in case we previously detached an interface
550 */
551 d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
552
553 if (bp->bif_dlt == DLT_PKTAP) {
554 d->bd_flags |= BPF_FINALIZE_PKTAP;
555 } else {
556 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
557 }
558 return error;
559 }
560
561 /*
562 * Detach a file from its interface.
563 *
564 * Return 1 if was closed by some thread, 0 otherwise
565 */
566 static int
567 bpf_detachd(struct bpf_d *d, int closing)
568 {
569 struct bpf_d **p;
570 struct bpf_if *bp;
571 struct ifnet *ifp;
572
573 int bpf_closed = d->bd_flags & BPF_CLOSING;
574 /*
575 * Some other thread already detached
576 */
577 if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
578 goto done;
579 }
580 /*
581 * This thread is doing the detach
582 */
583 d->bd_flags |= BPF_DETACHING;
584
585 ifp = d->bd_bif->bif_ifp;
586 bp = d->bd_bif;
587
588 if (bpf_debug != 0) {
589 printf("%s: %llx %s%s\n",
590 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
591 if_name(ifp), closing ? " closing" : "");
592 }
593
594 /* Remove d from the interface's descriptor list. */
595 p = &bp->bif_dlist;
596 while (*p != d) {
597 p = &(*p)->bd_next;
598 if (*p == 0) {
599 panic("bpf_detachd: descriptor not in list");
600 }
601 }
602 *p = (*p)->bd_next;
603 if (bp->bif_dlist == 0) {
604 /*
605 * Let the driver know that there are no more listeners.
606 */
607 /* Only call dlil_set_bpf_tap for primary dlt */
608 if (bp->bif_ifp->if_bpf == bp) {
609 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
610 }
611 if (bp->bif_tap) {
612 bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
613 }
614
615 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
616 if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
617 break;
618 }
619 }
620 if (bp == NULL) {
621 ifp->if_bpf = NULL;
622 }
623 }
624 d->bd_bif = NULL;
625 /*
626 * Check if this descriptor had requested promiscuous mode.
627 * If so, turn it off.
628 */
629 if (d->bd_promisc) {
630 d->bd_promisc = 0;
631 lck_mtx_unlock(bpf_mlock);
632 if (ifnet_set_promiscuous(ifp, 0)) {
633 /*
634 * Something is really wrong if we were able to put
635 * the driver into promiscuous mode, but can't
636 * take it out.
637 * Most likely the network interface is gone.
638 */
639 printf("%s: ifnet_set_promiscuous failed\n", __func__);
640 }
641 lck_mtx_lock(bpf_mlock);
642 }
643
644 /*
645 * Wake up other thread that are waiting for this thread to finish
646 * detaching
647 */
648 d->bd_flags &= ~BPF_DETACHING;
649 d->bd_flags |= BPF_DETACHED;
650
651 /* Refresh the local variable as d could have been modified */
652 bpf_closed = d->bd_flags & BPF_CLOSING;
653 /*
654 * Note that We've kept the reference because we may have dropped
655 * the lock when turning off promiscuous mode
656 */
657 bpf_release_d(d);
658
659 done:
660 /*
661 * When closing makes sure no other thread refer to the bpf_d
662 */
663 if (bpf_debug != 0) {
664 printf("%s: %llx done\n",
665 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
666 }
667 /*
668 * Let the caller know the bpf_d is closed
669 */
670 if (bpf_closed) {
671 return 1;
672 } else {
673 return 0;
674 }
675 }
676
677 /*
678 * Start asynchronous timer, if necessary.
679 * Must be called with bpf_mlock held.
680 */
681 static void
682 bpf_start_timer(struct bpf_d *d)
683 {
684 uint64_t deadline;
685 struct timeval tv;
686
687 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
688 tv.tv_sec = d->bd_rtout / hz;
689 tv.tv_usec = (d->bd_rtout % hz) * tick;
690
691 clock_interval_to_deadline(
692 (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
693 NSEC_PER_USEC, &deadline);
694 /*
695 * The state is BPF_IDLE, so the timer hasn't
696 * been started yet, and hasn't gone off yet;
697 * there is no thread call scheduled, so this
698 * won't change the schedule.
699 *
700 * XXX - what if, by the time it gets entered,
701 * the deadline has already passed?
702 */
703 thread_call_enter_delayed(d->bd_thread_call, deadline);
704 d->bd_state = BPF_WAITING;
705 }
706 }
707
708 /*
709 * Cancel asynchronous timer.
710 * Must be called with bpf_mlock held.
711 */
712 static boolean_t
713 bpf_stop_timer(struct bpf_d *d)
714 {
715 /*
716 * If the timer has already gone off, this does nothing.
717 * Our caller is expected to set d->bd_state to BPF_IDLE,
718 * with the bpf_mlock, after we are called. bpf_timed_out()
719 * also grabs bpf_mlock, so, if the timer has gone off and
720 * bpf_timed_out() hasn't finished, it's waiting for the
721 * lock; when this thread releases the lock, it will
722 * find the state is BPF_IDLE, and just release the
723 * lock and return.
724 */
725 return thread_call_cancel(d->bd_thread_call);
726 }
727
728 void
729 bpf_acquire_d(struct bpf_d *d)
730 {
731 void *lr_saved = __builtin_return_address(0);
732
733 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
734
735 d->bd_refcnt += 1;
736
737 d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
738 d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
739 }
740
741 void
742 bpf_release_d(struct bpf_d *d)
743 {
744 void *lr_saved = __builtin_return_address(0);
745
746 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
747
748 if (d->bd_refcnt <= 0) {
749 panic("%s: %p refcnt <= 0", __func__, d);
750 }
751
752 d->bd_refcnt -= 1;
753
754 d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
755 d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
756
757 if (d->bd_refcnt == 0) {
758 /* Assert the device is detached */
759 if ((d->bd_flags & BPF_DETACHED) == 0) {
760 panic("%s: %p BPF_DETACHED not set", __func__, d);
761 }
762
763 _FREE(d, M_DEVBUF);
764 }
765 }
766
767 /*
768 * Open ethernet device. Returns ENXIO for illegal minor device number,
769 * EBUSY if file is open by another process.
770 */
771 /* ARGSUSED */
772 int
773 bpfopen(dev_t dev, int flags, __unused int fmt,
774 struct proc *p)
775 {
776 struct bpf_d *d;
777
778 lck_mtx_lock(bpf_mlock);
779 if ((unsigned int) minor(dev) >= nbpfilter) {
780 lck_mtx_unlock(bpf_mlock);
781 return ENXIO;
782 }
783 /*
784 * New device nodes are created on demand when opening the last one.
785 * The programming model is for processes to loop on the minor starting
786 * at 0 as long as EBUSY is returned. The loop stops when either the
787 * open succeeds or an error other that EBUSY is returned. That means
788 * that bpf_make_dev_t() must block all processes that are opening the
789 * last node. If not all processes are blocked, they could unexpectedly
790 * get ENOENT and abort their opening loop.
791 */
792 if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
793 bpf_make_dev_t(major(dev));
794 }
795
796 /*
797 * Each minor can be opened by only one process. If the requested
798 * minor is in use, return EBUSY.
799 *
800 * Important: bpfopen() and bpfclose() have to check and set the status
801 * of a device in the same lockin context otherwise the device may be
802 * leaked because the vnode use count will be unpextectly greater than 1
803 * when close() is called.
804 */
805 if (bpf_dtab[minor(dev)] == NULL) {
806 /* Reserve while opening */
807 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
808 } else {
809 lck_mtx_unlock(bpf_mlock);
810 return EBUSY;
811 }
812 d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
813 M_WAIT | M_ZERO);
814 if (d == NULL) {
815 /* this really is a catastrophic failure */
816 printf("bpfopen: malloc bpf_d failed\n");
817 bpf_dtab[minor(dev)] = NULL;
818 lck_mtx_unlock(bpf_mlock);
819 return ENOMEM;
820 }
821
822 /* Mark "in use" and do most initialization. */
823 bpf_acquire_d(d);
824 d->bd_bufsize = bpf_bufsize;
825 d->bd_sig = SIGIO;
826 d->bd_seesent = 1;
827 d->bd_oflags = flags;
828 d->bd_state = BPF_IDLE;
829 d->bd_traffic_class = SO_TC_BE;
830 d->bd_flags |= BPF_DETACHED;
831 if (bpf_wantpktap) {
832 d->bd_flags |= BPF_WANT_PKTAP;
833 } else {
834 d->bd_flags &= ~BPF_WANT_PKTAP;
835 }
836 d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
837 if (d->bd_thread_call == NULL) {
838 printf("bpfopen: malloc thread call failed\n");
839 bpf_dtab[minor(dev)] = NULL;
840 bpf_release_d(d);
841 lck_mtx_unlock(bpf_mlock);
842
843 return ENOMEM;
844 }
845 d->bd_opened_by = p;
846 uuid_generate(d->bd_uuid);
847
848 #if CONFIG_MACF_NET
849 mac_bpfdesc_label_init(d);
850 mac_bpfdesc_label_associate(kauth_cred_get(), d);
851 #endif
852 bpf_dtab[minor(dev)] = d; /* Mark opened */
853 lck_mtx_unlock(bpf_mlock);
854
855 return 0;
856 }
857
858 /*
859 * Close the descriptor by detaching it from its interface,
860 * deallocating its buffers, and marking it free.
861 */
862 /* ARGSUSED */
863 int
864 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
865 __unused struct proc *p)
866 {
867 struct bpf_d *d;
868
869 /* Take BPF lock to ensure no other thread is using the device */
870 lck_mtx_lock(bpf_mlock);
871
872 d = bpf_dtab[minor(dev)];
873 if (d == NULL || d == BPF_DEV_RESERVED) {
874 lck_mtx_unlock(bpf_mlock);
875 return ENXIO;
876 }
877
878 /*
879 * Other threads may call bpd_detachd() if we drop the bpf_mlock
880 */
881 d->bd_flags |= BPF_CLOSING;
882
883 if (bpf_debug != 0) {
884 printf("%s: %llx\n",
885 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
886 }
887
888 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
889
890 /*
891 * Deal with any in-progress timeouts.
892 */
893 switch (d->bd_state) {
894 case BPF_IDLE:
895 /*
896 * Not waiting for a timeout, and no timeout happened.
897 */
898 break;
899
900 case BPF_WAITING:
901 /*
902 * Waiting for a timeout.
903 * Cancel any timer that has yet to go off,
904 * and mark the state as "closing".
905 * Then drop the lock to allow any timers that
906 * *have* gone off to run to completion, and wait
907 * for them to finish.
908 */
909 if (!bpf_stop_timer(d)) {
910 /*
911 * There was no pending call, so the call must
912 * have been in progress. Wait for the call to
913 * complete; we have to drop the lock while
914 * waiting. to let the in-progrss call complete
915 */
916 d->bd_state = BPF_DRAINING;
917 while (d->bd_state == BPF_DRAINING) {
918 msleep((caddr_t)d, bpf_mlock, PRINET,
919 "bpfdraining", NULL);
920 }
921 }
922 d->bd_state = BPF_IDLE;
923 break;
924
925 case BPF_TIMED_OUT:
926 /*
927 * Timer went off, and the timeout routine finished.
928 */
929 d->bd_state = BPF_IDLE;
930 break;
931
932 case BPF_DRAINING:
933 /*
934 * Another thread is blocked on a close waiting for
935 * a timeout to finish.
936 * This "shouldn't happen", as the first thread to enter
937 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
938 * all subsequent threads should see that and fail with
939 * ENXIO.
940 */
941 panic("Two threads blocked in a BPF close");
942 break;
943 }
944
945 if (d->bd_bif) {
946 bpf_detachd(d, 1);
947 }
948 selthreadclear(&d->bd_sel);
949 #if CONFIG_MACF_NET
950 mac_bpfdesc_label_destroy(d);
951 #endif
952 thread_call_free(d->bd_thread_call);
953
954 while (d->bd_hbuf_read != 0) {
955 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
956 }
957
958 bpf_freed(d);
959
960 /* Mark free in same context as bpfopen comes to check */
961 bpf_dtab[minor(dev)] = NULL; /* Mark closed */
962
963 bpf_release_d(d);
964
965 lck_mtx_unlock(bpf_mlock);
966
967 return 0;
968 }
969
970 #define BPF_SLEEP bpf_sleep
971
972 static int
973 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
974 {
975 u_int64_t abstime = 0;
976
977 if (timo != 0) {
978 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
979 }
980
981 return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
982 }
983
984 static void
985 bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
986 {
987 if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
988 struct pktap_v2_hdr *pktap_v2_hdr;
989
990 pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
991
992 if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
993 pktap_v2_finalize_proc_info(pktap_v2_hdr);
994 }
995 } else {
996 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
997 pktap_finalize_proc_info(pktaphdr);
998 }
999
1000 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1001 hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
1002 hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
1003 }
1004 }
1005 }
1006
1007 /*
1008 * Rotate the packet buffers in descriptor d. Move the store buffer
1009 * into the hold slot, and the free buffer into the store slot.
1010 * Zero the length of the new store buffer.
1011 */
1012 #define ROTATE_BUFFERS(d) \
1013 if (d->bd_hbuf_read != 0) \
1014 panic("rotating bpf buffers during read"); \
1015 (d)->bd_hbuf = (d)->bd_sbuf; \
1016 (d)->bd_hlen = (d)->bd_slen; \
1017 (d)->bd_hcnt = (d)->bd_scnt; \
1018 (d)->bd_sbuf = (d)->bd_fbuf; \
1019 (d)->bd_slen = 0; \
1020 (d)->bd_scnt = 0; \
1021 (d)->bd_fbuf = NULL;
1022 /*
1023 * bpfread - read next chunk of packets from buffers
1024 */
1025 int
1026 bpfread(dev_t dev, struct uio *uio, int ioflag)
1027 {
1028 struct bpf_d *d;
1029 caddr_t hbuf;
1030 int timed_out, hbuf_len;
1031 int error;
1032 int flags;
1033
1034 lck_mtx_lock(bpf_mlock);
1035
1036 d = bpf_dtab[minor(dev)];
1037 if (d == NULL || d == BPF_DEV_RESERVED ||
1038 (d->bd_flags & BPF_CLOSING) != 0) {
1039 lck_mtx_unlock(bpf_mlock);
1040 return ENXIO;
1041 }
1042
1043 bpf_acquire_d(d);
1044
1045 /*
1046 * Restrict application to use a buffer the same size as
1047 * as kernel buffers.
1048 */
1049 if (uio_resid(uio) != d->bd_bufsize) {
1050 bpf_release_d(d);
1051 lck_mtx_unlock(bpf_mlock);
1052 return EINVAL;
1053 }
1054
1055 if (d->bd_state == BPF_WAITING) {
1056 bpf_stop_timer(d);
1057 }
1058
1059 timed_out = (d->bd_state == BPF_TIMED_OUT);
1060 d->bd_state = BPF_IDLE;
1061
1062 while (d->bd_hbuf_read != 0) {
1063 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1064 }
1065
1066 if ((d->bd_flags & BPF_CLOSING) != 0) {
1067 bpf_release_d(d);
1068 lck_mtx_unlock(bpf_mlock);
1069 return ENXIO;
1070 }
1071 /*
1072 * If the hold buffer is empty, then do a timed sleep, which
1073 * ends when the timeout expires or when enough packets
1074 * have arrived to fill the store buffer.
1075 */
1076 while (d->bd_hbuf == 0) {
1077 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1078 d->bd_slen != 0) {
1079 /*
1080 * We're in immediate mode, or are reading
1081 * in non-blocking mode, or a timer was
1082 * started before the read (e.g., by select()
1083 * or poll()) and has expired and a packet(s)
1084 * either arrived since the previous
1085 * read or arrived while we were asleep.
1086 * Rotate the buffers and return what's here.
1087 */
1088 ROTATE_BUFFERS(d);
1089 break;
1090 }
1091
1092 /*
1093 * No data is available, check to see if the bpf device
1094 * is still pointed at a real interface. If not, return
1095 * ENXIO so that the userland process knows to rebind
1096 * it before using it again.
1097 */
1098 if (d->bd_bif == NULL) {
1099 bpf_release_d(d);
1100 lck_mtx_unlock(bpf_mlock);
1101 return ENXIO;
1102 }
1103 if (ioflag & IO_NDELAY) {
1104 bpf_release_d(d);
1105 lck_mtx_unlock(bpf_mlock);
1106 return EWOULDBLOCK;
1107 }
1108 error = BPF_SLEEP(d, PRINET | PCATCH, "bpf", d->bd_rtout);
1109 /*
1110 * Make sure device is still opened
1111 */
1112 if ((d->bd_flags & BPF_CLOSING) != 0) {
1113 bpf_release_d(d);
1114 lck_mtx_unlock(bpf_mlock);
1115 return ENXIO;
1116 }
1117
1118 while (d->bd_hbuf_read != 0) {
1119 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1120 NULL);
1121 }
1122
1123 if ((d->bd_flags & BPF_CLOSING) != 0) {
1124 bpf_release_d(d);
1125 lck_mtx_unlock(bpf_mlock);
1126 return ENXIO;
1127 }
1128
1129 if (error == EINTR || error == ERESTART) {
1130 if (d->bd_hbuf != NULL) {
1131 /*
1132 * Because we msleep, the hold buffer might
1133 * be filled when we wake up. Avoid rotating
1134 * in this case.
1135 */
1136 break;
1137 }
1138 if (d->bd_slen != 0) {
1139 /*
1140 * Sometimes we may be interrupted often and
1141 * the sleep above will not timeout.
1142 * Regardless, we should rotate the buffers
1143 * if there's any new data pending and
1144 * return it.
1145 */
1146 ROTATE_BUFFERS(d);
1147 break;
1148 }
1149 bpf_release_d(d);
1150 lck_mtx_unlock(bpf_mlock);
1151 if (error == ERESTART) {
1152 printf("%s: %llx ERESTART to EINTR\n",
1153 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
1154 error = EINTR;
1155 }
1156 return error;
1157 }
1158 if (error == EWOULDBLOCK) {
1159 /*
1160 * On a timeout, return what's in the buffer,
1161 * which may be nothing. If there is something
1162 * in the store buffer, we can rotate the buffers.
1163 */
1164 if (d->bd_hbuf) {
1165 /*
1166 * We filled up the buffer in between
1167 * getting the timeout and arriving
1168 * here, so we don't need to rotate.
1169 */
1170 break;
1171 }
1172
1173 if (d->bd_slen == 0) {
1174 bpf_release_d(d);
1175 lck_mtx_unlock(bpf_mlock);
1176 return 0;
1177 }
1178 ROTATE_BUFFERS(d);
1179 break;
1180 }
1181 }
1182 /*
1183 * At this point, we know we have something in the hold slot.
1184 */
1185
1186 /*
1187 * Set the hold buffer read. So we do not
1188 * rotate the buffers until the hold buffer
1189 * read is complete. Also to avoid issues resulting
1190 * from page faults during disk sleep (<rdar://problem/13436396>).
1191 */
1192 d->bd_hbuf_read = 1;
1193 hbuf = d->bd_hbuf;
1194 hbuf_len = d->bd_hlen;
1195 flags = d->bd_flags;
1196 lck_mtx_unlock(bpf_mlock);
1197
1198 #ifdef __APPLE__
1199 /*
1200 * Before we move data to userland, we fill out the extended
1201 * header fields.
1202 */
1203 if (flags & BPF_EXTENDED_HDR) {
1204 char *p;
1205
1206 p = hbuf;
1207 while (p < hbuf + hbuf_len) {
1208 struct bpf_hdr_ext *ehp;
1209 uint32_t flowid;
1210 struct so_procinfo soprocinfo;
1211 int found = 0;
1212
1213 ehp = (struct bpf_hdr_ext *)(void *)p;
1214 if ((flowid = ehp->bh_flowid) != 0) {
1215 if (ehp->bh_proto == IPPROTO_TCP) {
1216 found = inp_findinpcb_procinfo(&tcbinfo,
1217 flowid, &soprocinfo);
1218 } else if (ehp->bh_proto == IPPROTO_UDP) {
1219 found = inp_findinpcb_procinfo(&udbinfo,
1220 flowid, &soprocinfo);
1221 }
1222 if (found == 1) {
1223 ehp->bh_pid = soprocinfo.spi_pid;
1224 proc_name(ehp->bh_pid, ehp->bh_comm,
1225 MAXCOMLEN);
1226 }
1227 ehp->bh_flowid = 0;
1228 }
1229
1230 if (flags & BPF_FINALIZE_PKTAP) {
1231 struct pktap_header *pktaphdr;
1232
1233 pktaphdr = (struct pktap_header *)(void *)
1234 (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1235
1236 bpf_finalize_pktap((struct bpf_hdr *) ehp,
1237 pktaphdr);
1238 }
1239 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1240 }
1241 } else if (flags & BPF_FINALIZE_PKTAP) {
1242 char *p;
1243
1244 p = hbuf;
1245 while (p < hbuf + hbuf_len) {
1246 struct bpf_hdr *hp;
1247 struct pktap_header *pktaphdr;
1248
1249 hp = (struct bpf_hdr *)(void *)p;
1250 pktaphdr = (struct pktap_header *)(void *)
1251 (p + BPF_WORDALIGN(hp->bh_hdrlen));
1252
1253 bpf_finalize_pktap(hp, pktaphdr);
1254
1255 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1256 }
1257 }
1258 #endif
1259
1260 /*
1261 * Move data from hold buffer into user space.
1262 * We know the entire buffer is transferred since
1263 * we checked above that the read buffer is bpf_bufsize bytes.
1264 */
1265 error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1266
1267 lck_mtx_lock(bpf_mlock);
1268 /*
1269 * Make sure device is still opened
1270 */
1271 if ((d->bd_flags & BPF_CLOSING) != 0) {
1272 bpf_release_d(d);
1273 lck_mtx_unlock(bpf_mlock);
1274 return ENXIO;
1275 }
1276
1277 d->bd_hbuf_read = 0;
1278 d->bd_fbuf = d->bd_hbuf;
1279 d->bd_hbuf = NULL;
1280 d->bd_hlen = 0;
1281 d->bd_hcnt = 0;
1282 wakeup((caddr_t)d);
1283
1284 bpf_release_d(d);
1285 lck_mtx_unlock(bpf_mlock);
1286 return error;
1287 }
1288
1289 /*
1290 * If there are processes sleeping on this descriptor, wake them up.
1291 */
1292 static void
1293 bpf_wakeup(struct bpf_d *d)
1294 {
1295 if (d->bd_state == BPF_WAITING) {
1296 bpf_stop_timer(d);
1297 d->bd_state = BPF_IDLE;
1298 }
1299 wakeup((caddr_t)d);
1300 if (d->bd_async && d->bd_sig && d->bd_sigio) {
1301 pgsigio(d->bd_sigio, d->bd_sig);
1302 }
1303
1304 selwakeup(&d->bd_sel);
1305 if ((d->bd_flags & BPF_KNOTE)) {
1306 KNOTE(&d->bd_sel.si_note, 1);
1307 }
1308 }
1309
1310 static void
1311 bpf_timed_out(void *arg, __unused void *dummy)
1312 {
1313 struct bpf_d *d = (struct bpf_d *)arg;
1314
1315 lck_mtx_lock(bpf_mlock);
1316 if (d->bd_state == BPF_WAITING) {
1317 /*
1318 * There's a select or kqueue waiting for this; if there's
1319 * now stuff to read, wake it up.
1320 */
1321 d->bd_state = BPF_TIMED_OUT;
1322 if (d->bd_slen != 0) {
1323 bpf_wakeup(d);
1324 }
1325 } else if (d->bd_state == BPF_DRAINING) {
1326 /*
1327 * A close is waiting for this to finish.
1328 * Mark it as finished, and wake the close up.
1329 */
1330 d->bd_state = BPF_IDLE;
1331 bpf_wakeup(d);
1332 }
1333 lck_mtx_unlock(bpf_mlock);
1334 }
1335
1336 /* keep in sync with bpf_movein above: */
1337 #define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
1338
1339 int
1340 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1341 {
1342 struct bpf_d *d;
1343 struct ifnet *ifp;
1344 struct mbuf *m = NULL;
1345 int error;
1346 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1347 int datlen = 0;
1348 int bif_dlt;
1349 int bd_hdrcmplt;
1350
1351 lck_mtx_lock(bpf_mlock);
1352
1353 d = bpf_dtab[minor(dev)];
1354 if (d == NULL || d == BPF_DEV_RESERVED ||
1355 (d->bd_flags & BPF_CLOSING) != 0) {
1356 lck_mtx_unlock(bpf_mlock);
1357 return ENXIO;
1358 }
1359
1360 bpf_acquire_d(d);
1361
1362 if (d->bd_bif == 0) {
1363 bpf_release_d(d);
1364 lck_mtx_unlock(bpf_mlock);
1365 return ENXIO;
1366 }
1367
1368 ifp = d->bd_bif->bif_ifp;
1369
1370 if ((ifp->if_flags & IFF_UP) == 0) {
1371 bpf_release_d(d);
1372 lck_mtx_unlock(bpf_mlock);
1373 return ENETDOWN;
1374 }
1375 if (uio_resid(uio) == 0) {
1376 bpf_release_d(d);
1377 lck_mtx_unlock(bpf_mlock);
1378 return 0;
1379 }
1380 ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1381
1382 /*
1383 * fix for PR-6849527
1384 * geting variables onto stack before dropping lock for bpf_movein()
1385 */
1386 bif_dlt = (int)d->bd_bif->bif_dlt;
1387 bd_hdrcmplt = d->bd_hdrcmplt;
1388
1389 /* bpf_movein allocating mbufs; drop lock */
1390 lck_mtx_unlock(bpf_mlock);
1391
1392 error = bpf_movein(uio, bif_dlt, &m,
1393 bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1394 &datlen);
1395
1396 /* take the lock again */
1397 lck_mtx_lock(bpf_mlock);
1398 if (error) {
1399 bpf_release_d(d);
1400 lck_mtx_unlock(bpf_mlock);
1401 return error;
1402 }
1403
1404 /* verify the device is still open */
1405 if ((d->bd_flags & BPF_CLOSING) != 0) {
1406 bpf_release_d(d);
1407 lck_mtx_unlock(bpf_mlock);
1408 m_freem(m);
1409 return ENXIO;
1410 }
1411
1412 if (d->bd_bif == NULL) {
1413 bpf_release_d(d);
1414 lck_mtx_unlock(bpf_mlock);
1415 m_free(m);
1416 return ENXIO;
1417 }
1418
1419 if ((unsigned)datlen > ifp->if_mtu) {
1420 bpf_release_d(d);
1421 lck_mtx_unlock(bpf_mlock);
1422 m_freem(m);
1423 return EMSGSIZE;
1424 }
1425
1426 #if CONFIG_MACF_NET
1427 mac_mbuf_label_associate_bpfdesc(d, m);
1428 #endif
1429
1430 bpf_set_packet_service_class(m, d->bd_traffic_class);
1431
1432 lck_mtx_unlock(bpf_mlock);
1433
1434 /*
1435 * The driver frees the mbuf.
1436 */
1437 if (d->bd_hdrcmplt) {
1438 if (d->bd_bif->bif_send) {
1439 error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1440 } else {
1441 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1442 }
1443 } else {
1444 error = dlil_output(ifp, PF_INET, m, NULL,
1445 (struct sockaddr *)dst_buf, 0, NULL);
1446 }
1447
1448 lck_mtx_lock(bpf_mlock);
1449 bpf_release_d(d);
1450 lck_mtx_unlock(bpf_mlock);
1451
1452 return error;
1453 }
1454
1455 /*
1456 * Reset a descriptor by flushing its packet buffer and clearing the
1457 * receive and drop counts.
1458 */
1459 static void
1460 reset_d(struct bpf_d *d)
1461 {
1462 if (d->bd_hbuf_read != 0) {
1463 panic("resetting buffers during read");
1464 }
1465
1466 if (d->bd_hbuf) {
1467 /* Free the hold buffer. */
1468 d->bd_fbuf = d->bd_hbuf;
1469 d->bd_hbuf = NULL;
1470 }
1471 d->bd_slen = 0;
1472 d->bd_hlen = 0;
1473 d->bd_scnt = 0;
1474 d->bd_hcnt = 0;
1475 d->bd_rcount = 0;
1476 d->bd_dcount = 0;
1477 }
1478
1479 static struct bpf_d *
1480 bpf_get_device_from_uuid(uuid_t uuid)
1481 {
1482 unsigned int i;
1483
1484 for (i = 0; i < nbpfilter; i++) {
1485 struct bpf_d *d = bpf_dtab[i];
1486
1487 if (d == NULL || d == BPF_DEV_RESERVED ||
1488 (d->bd_flags & BPF_CLOSING) != 0) {
1489 continue;
1490 }
1491 if (uuid_compare(uuid, d->bd_uuid) == 0) {
1492 return d;
1493 }
1494 }
1495
1496 return NULL;
1497 }
1498
1499 /*
1500 * The BIOCSETUP command "atomically" attach to the interface and
1501 * copy the buffer from another interface. This minimizes the risk
1502 * of missing packet because this is done while holding
1503 * the BPF global lock
1504 */
1505 static int
1506 bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1507 {
1508 struct bpf_d *d_from;
1509 int error = 0;
1510
1511 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1512
1513 /*
1514 * Sanity checks
1515 */
1516 d_from = bpf_get_device_from_uuid(uuid_from);
1517 if (d_from == NULL) {
1518 error = ENOENT;
1519 os_log_info(OS_LOG_DEFAULT,
1520 "%s: uuids not found error %d",
1521 __func__, error);
1522 return error;
1523 }
1524 if (d_from->bd_opened_by != d_to->bd_opened_by) {
1525 error = EACCES;
1526 os_log_info(OS_LOG_DEFAULT,
1527 "%s: processes not matching error %d",
1528 __func__, error);
1529 return error;
1530 }
1531
1532 /*
1533 * Prevent any read while copying
1534 */
1535 while (d_to->bd_hbuf_read != 0) {
1536 msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
1537 }
1538 d_to->bd_hbuf_read = 1;
1539
1540 while (d_from->bd_hbuf_read != 0) {
1541 msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
1542 }
1543 d_from->bd_hbuf_read = 1;
1544
1545 /*
1546 * Verify the devices have not been closed
1547 */
1548 if (d_to->bd_flags & BPF_CLOSING) {
1549 error = ENXIO;
1550 os_log_info(OS_LOG_DEFAULT,
1551 "%s: d_to is closing error %d",
1552 __func__, error);
1553 goto done;
1554 }
1555 if (d_from->bd_flags & BPF_CLOSING) {
1556 error = ENXIO;
1557 os_log_info(OS_LOG_DEFAULT,
1558 "%s: d_from is closing error %d",
1559 __func__, error);
1560 goto done;
1561 }
1562
1563 /*
1564 * For now require the same buffer size
1565 */
1566 if (d_from->bd_bufsize != d_to->bd_bufsize) {
1567 error = EINVAL;
1568 os_log_info(OS_LOG_DEFAULT,
1569 "%s: bufsizes not matching error %d",
1570 __func__, error);
1571 goto done;
1572 }
1573
1574 /*
1575 * Attach to the interface
1576 */
1577 error = bpf_setif(d_to, ifp, false, true);
1578 if (error != 0) {
1579 os_log_info(OS_LOG_DEFAULT,
1580 "%s: bpf_setif() failed error %d",
1581 __func__, error);
1582 goto done;
1583 }
1584
1585 /*
1586 * Make sure the buffers are setup as expected by bpf_setif()
1587 */
1588 ASSERT(d_to->bd_hbuf == NULL);
1589 ASSERT(d_to->bd_sbuf != NULL);
1590 ASSERT(d_to->bd_fbuf != NULL);
1591
1592 /*
1593 * Copy the buffers and update the pointers and counts
1594 */
1595 memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1596 d_to->bd_slen = d_from->bd_slen;
1597 d_to->bd_scnt = d_from->bd_scnt;
1598
1599 if (d_from->bd_hbuf != NULL) {
1600 d_to->bd_hbuf = d_to->bd_fbuf;
1601 d_to->bd_fbuf = NULL;
1602 memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1603 }
1604 d_to->bd_hlen = d_from->bd_hlen;
1605 d_to->bd_hcnt = d_from->bd_hcnt;
1606
1607 if (bpf_debug > 0) {
1608 os_log_info(OS_LOG_DEFAULT,
1609 "%s: done slen %u scnt %u hlen %u hcnt %u",
1610 __func__, d_to->bd_slen, d_to->bd_scnt,
1611 d_to->bd_hlen, d_to->bd_hcnt);
1612 }
1613 done:
1614 d_from->bd_hbuf_read = 0;
1615 wakeup((caddr_t)d_from);
1616
1617 d_to->bd_hbuf_read = 0;
1618 wakeup((caddr_t)d_to);
1619
1620 return error;
1621 }
1622
1623 /*
1624 * FIONREAD Check for read packet available.
1625 * SIOCGIFADDR Get interface address - convenient hook to driver.
1626 * BIOCGBLEN Get buffer len [for read()].
1627 * BIOCSETF Set ethernet read filter.
1628 * BIOCFLUSH Flush read packet buffer.
1629 * BIOCPROMISC Put interface into promiscuous mode.
1630 * BIOCGDLT Get link layer type.
1631 * BIOCGETIF Get interface name.
1632 * BIOCSETIF Set interface.
1633 * BIOCSRTIMEOUT Set read timeout.
1634 * BIOCGRTIMEOUT Get read timeout.
1635 * BIOCGSTATS Get packet stats.
1636 * BIOCIMMEDIATE Set immediate mode.
1637 * BIOCVERSION Get filter language version.
1638 * BIOCGHDRCMPLT Get "header already complete" flag
1639 * BIOCSHDRCMPLT Set "header already complete" flag
1640 * BIOCGSEESENT Get "see packets sent" flag
1641 * BIOCSSEESENT Set "see packets sent" flag
1642 * BIOCSETTC Set traffic class.
1643 * BIOCGETTC Get traffic class.
1644 * BIOCSEXTHDR Set "extended header" flag
1645 * BIOCSHEADDROP Drop head of the buffer if user is not reading
1646 * BIOCGHEADDROP Get "head-drop" flag
1647 */
1648 /* ARGSUSED */
1649 int
1650 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1651 struct proc *p)
1652 {
1653 struct bpf_d *d;
1654 int error = 0;
1655 u_int int_arg;
1656 struct ifreq ifr;
1657
1658 lck_mtx_lock(bpf_mlock);
1659
1660 d = bpf_dtab[minor(dev)];
1661 if (d == NULL || d == BPF_DEV_RESERVED ||
1662 (d->bd_flags & BPF_CLOSING) != 0) {
1663 lck_mtx_unlock(bpf_mlock);
1664 return ENXIO;
1665 }
1666
1667 bpf_acquire_d(d);
1668
1669 if (d->bd_state == BPF_WAITING) {
1670 bpf_stop_timer(d);
1671 }
1672 d->bd_state = BPF_IDLE;
1673
1674 switch (cmd) {
1675 default:
1676 error = EINVAL;
1677 break;
1678
1679 /*
1680 * Check for read packet available.
1681 */
1682 case FIONREAD: /* int */
1683 {
1684 int n;
1685
1686 n = d->bd_slen;
1687 if (d->bd_hbuf && d->bd_hbuf_read == 0) {
1688 n += d->bd_hlen;
1689 }
1690
1691 bcopy(&n, addr, sizeof(n));
1692 break;
1693 }
1694
1695 case SIOCGIFADDR: /* struct ifreq */
1696 {
1697 struct ifnet *ifp;
1698
1699 if (d->bd_bif == 0) {
1700 error = EINVAL;
1701 } else {
1702 ifp = d->bd_bif->bif_ifp;
1703 error = ifnet_ioctl(ifp, 0, cmd, addr);
1704 }
1705 break;
1706 }
1707
1708 /*
1709 * Get buffer len [for read()].
1710 */
1711 case BIOCGBLEN: /* u_int */
1712 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1713 break;
1714
1715 /*
1716 * Set buffer length.
1717 */
1718 case BIOCSBLEN: { /* u_int */
1719 u_int size;
1720 unsigned int maxbufsize = bpf_maxbufsize;
1721
1722 /*
1723 * Allow larger buffer in head drop mode to with the
1724 * assumption the reading process may be low priority but
1725 * is interested in the most recent traffic
1726 */
1727 if (d->bd_headdrop != 0) {
1728 maxbufsize = 2 * bpf_maxbufsize;
1729 }
1730
1731 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
1732 /*
1733 * Interface already attached, unable to change buffers
1734 */
1735 error = EINVAL;
1736 break;
1737 }
1738 bcopy(addr, &size, sizeof(size));
1739
1740 if (size > maxbufsize) {
1741 d->bd_bufsize = maxbufsize;
1742
1743 os_log_info(OS_LOG_DEFAULT,
1744 "%s bufsize capped to %u from %u",
1745 __func__, d->bd_bufsize, size);
1746 } else if (size < BPF_MINBUFSIZE) {
1747 d->bd_bufsize = BPF_MINBUFSIZE;
1748
1749 os_log_info(OS_LOG_DEFAULT,
1750 "%s bufsize bumped to %u from %u",
1751 __func__, d->bd_bufsize, size);
1752 } else {
1753 d->bd_bufsize = size;
1754 }
1755
1756 /* It's a read/write ioctl */
1757 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1758 break;
1759 }
1760 /*
1761 * Set link layer read filter.
1762 */
1763 case BIOCSETF32:
1764 case BIOCSETFNR32: { /* struct bpf_program32 */
1765 struct bpf_program32 prg32;
1766
1767 bcopy(addr, &prg32, sizeof(prg32));
1768 error = bpf_setf(d, prg32.bf_len,
1769 CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1770 break;
1771 }
1772
1773 case BIOCSETF64:
1774 case BIOCSETFNR64: { /* struct bpf_program64 */
1775 struct bpf_program64 prg64;
1776
1777 bcopy(addr, &prg64, sizeof(prg64));
1778 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
1779 break;
1780 }
1781
1782 /*
1783 * Flush read packet buffer.
1784 */
1785 case BIOCFLUSH:
1786 while (d->bd_hbuf_read != 0) {
1787 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1788 NULL);
1789 }
1790 if ((d->bd_flags & BPF_CLOSING) != 0) {
1791 error = ENXIO;
1792 break;
1793 }
1794 reset_d(d);
1795 break;
1796
1797 /*
1798 * Put interface into promiscuous mode.
1799 */
1800 case BIOCPROMISC:
1801 if (d->bd_bif == 0) {
1802 /*
1803 * No interface attached yet.
1804 */
1805 error = EINVAL;
1806 break;
1807 }
1808 if (d->bd_promisc == 0) {
1809 lck_mtx_unlock(bpf_mlock);
1810 error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1811 lck_mtx_lock(bpf_mlock);
1812 if (error == 0) {
1813 d->bd_promisc = 1;
1814 }
1815 }
1816 break;
1817
1818 /*
1819 * Get device parameters.
1820 */
1821 case BIOCGDLT: /* u_int */
1822 if (d->bd_bif == 0) {
1823 error = EINVAL;
1824 } else {
1825 bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
1826 }
1827 break;
1828
1829 /*
1830 * Get a list of supported data link types.
1831 */
1832 case BIOCGDLTLIST: /* struct bpf_dltlist */
1833 if (d->bd_bif == NULL) {
1834 error = EINVAL;
1835 } else {
1836 error = bpf_getdltlist(d, addr, p);
1837 }
1838 break;
1839
1840 /*
1841 * Set data link type.
1842 */
1843 case BIOCSDLT: /* u_int */
1844 if (d->bd_bif == NULL) {
1845 error = EINVAL;
1846 } else {
1847 u_int dlt;
1848
1849 bcopy(addr, &dlt, sizeof(dlt));
1850
1851 if (dlt == DLT_PKTAP &&
1852 !(d->bd_flags & BPF_WANT_PKTAP)) {
1853 dlt = DLT_RAW;
1854 }
1855 error = bpf_setdlt(d, dlt);
1856 }
1857 break;
1858
1859 /*
1860 * Get interface name.
1861 */
1862 case BIOCGETIF: /* struct ifreq */
1863 if (d->bd_bif == 0) {
1864 error = EINVAL;
1865 } else {
1866 struct ifnet *const ifp = d->bd_bif->bif_ifp;
1867
1868 snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1869 sizeof(ifr.ifr_name), "%s", if_name(ifp));
1870 }
1871 break;
1872
1873 /*
1874 * Set interface.
1875 */
1876 case BIOCSETIF: { /* struct ifreq */
1877 ifnet_t ifp;
1878
1879 bcopy(addr, &ifr, sizeof(ifr));
1880 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1881 ifp = ifunit(ifr.ifr_name);
1882 if (ifp == NULL) {
1883 error = ENXIO;
1884 } else {
1885 error = bpf_setif(d, ifp, true, false);
1886 }
1887 break;
1888 }
1889
1890 /*
1891 * Set read timeout.
1892 */
1893 case BIOCSRTIMEOUT32: { /* struct user32_timeval */
1894 struct user32_timeval _tv;
1895 struct timeval tv;
1896
1897 bcopy(addr, &_tv, sizeof(_tv));
1898 tv.tv_sec = _tv.tv_sec;
1899 tv.tv_usec = _tv.tv_usec;
1900
1901 /*
1902 * Subtract 1 tick from tvtohz() since this isn't
1903 * a one-shot timer.
1904 */
1905 if ((error = itimerfix(&tv)) == 0) {
1906 d->bd_rtout = tvtohz(&tv) - 1;
1907 }
1908 break;
1909 }
1910
1911 case BIOCSRTIMEOUT64: { /* struct user64_timeval */
1912 struct user64_timeval _tv;
1913 struct timeval tv;
1914
1915 bcopy(addr, &_tv, sizeof(_tv));
1916 tv.tv_sec = _tv.tv_sec;
1917 tv.tv_usec = _tv.tv_usec;
1918
1919 /*
1920 * Subtract 1 tick from tvtohz() since this isn't
1921 * a one-shot timer.
1922 */
1923 if ((error = itimerfix(&tv)) == 0) {
1924 d->bd_rtout = tvtohz(&tv) - 1;
1925 }
1926 break;
1927 }
1928
1929 /*
1930 * Get read timeout.
1931 */
1932 case BIOCGRTIMEOUT32: { /* struct user32_timeval */
1933 struct user32_timeval tv;
1934
1935 bzero(&tv, sizeof(tv));
1936 tv.tv_sec = d->bd_rtout / hz;
1937 tv.tv_usec = (d->bd_rtout % hz) * tick;
1938 bcopy(&tv, addr, sizeof(tv));
1939 break;
1940 }
1941
1942 case BIOCGRTIMEOUT64: { /* struct user64_timeval */
1943 struct user64_timeval tv;
1944
1945 bzero(&tv, sizeof(tv));
1946 tv.tv_sec = d->bd_rtout / hz;
1947 tv.tv_usec = (d->bd_rtout % hz) * tick;
1948 bcopy(&tv, addr, sizeof(tv));
1949 break;
1950 }
1951
1952 /*
1953 * Get packet stats.
1954 */
1955 case BIOCGSTATS: { /* struct bpf_stat */
1956 struct bpf_stat bs;
1957
1958 bzero(&bs, sizeof(bs));
1959 bs.bs_recv = d->bd_rcount;
1960 bs.bs_drop = d->bd_dcount;
1961 bcopy(&bs, addr, sizeof(bs));
1962 break;
1963 }
1964
1965 /*
1966 * Set immediate mode.
1967 */
1968 case BIOCIMMEDIATE: /* u_int */
1969 d->bd_immediate = *(u_int *)(void *)addr;
1970 break;
1971
1972 case BIOCVERSION: { /* struct bpf_version */
1973 struct bpf_version bv;
1974
1975 bzero(&bv, sizeof(bv));
1976 bv.bv_major = BPF_MAJOR_VERSION;
1977 bv.bv_minor = BPF_MINOR_VERSION;
1978 bcopy(&bv, addr, sizeof(bv));
1979 break;
1980 }
1981
1982 /*
1983 * Get "header already complete" flag
1984 */
1985 case BIOCGHDRCMPLT: /* u_int */
1986 bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
1987 break;
1988
1989 /*
1990 * Set "header already complete" flag
1991 */
1992 case BIOCSHDRCMPLT: /* u_int */
1993 bcopy(addr, &int_arg, sizeof(int_arg));
1994 d->bd_hdrcmplt = int_arg ? 1 : 0;
1995 break;
1996
1997 /*
1998 * Get "see sent packets" flag
1999 */
2000 case BIOCGSEESENT: /* u_int */
2001 bcopy(&d->bd_seesent, addr, sizeof(u_int));
2002 break;
2003
2004 /*
2005 * Set "see sent packets" flag
2006 */
2007 case BIOCSSEESENT: /* u_int */
2008 bcopy(addr, &d->bd_seesent, sizeof(u_int));
2009 break;
2010
2011 /*
2012 * Set traffic service class
2013 */
2014 case BIOCSETTC: { /* int */
2015 int tc;
2016
2017 bcopy(addr, &tc, sizeof(int));
2018 error = bpf_set_traffic_class(d, tc);
2019 break;
2020 }
2021
2022 /*
2023 * Get traffic service class
2024 */
2025 case BIOCGETTC: /* int */
2026 bcopy(&d->bd_traffic_class, addr, sizeof(int));
2027 break;
2028
2029 case FIONBIO: /* Non-blocking I/O; int */
2030 break;
2031
2032 case FIOASYNC: /* Send signal on receive packets; int */
2033 bcopy(addr, &d->bd_async, sizeof(int));
2034 break;
2035 #ifndef __APPLE__
2036 case FIOSETOWN:
2037 error = fsetown(*(int *)addr, &d->bd_sigio);
2038 break;
2039
2040 case FIOGETOWN:
2041 *(int *)addr = fgetown(d->bd_sigio);
2042 break;
2043
2044 /* This is deprecated, FIOSETOWN should be used instead. */
2045 case TIOCSPGRP:
2046 error = fsetown(-(*(int *)addr), &d->bd_sigio);
2047 break;
2048
2049 /* This is deprecated, FIOGETOWN should be used instead. */
2050 case TIOCGPGRP:
2051 *(int *)addr = -fgetown(d->bd_sigio);
2052 break;
2053 #endif
2054 case BIOCSRSIG: { /* Set receive signal; u_int */
2055 u_int sig;
2056
2057 bcopy(addr, &sig, sizeof(u_int));
2058
2059 if (sig >= NSIG) {
2060 error = EINVAL;
2061 } else {
2062 d->bd_sig = sig;
2063 }
2064 break;
2065 }
2066 case BIOCGRSIG: /* u_int */
2067 bcopy(&d->bd_sig, addr, sizeof(u_int));
2068 break;
2069 #ifdef __APPLE__
2070 case BIOCSEXTHDR: /* u_int */
2071 bcopy(addr, &int_arg, sizeof(int_arg));
2072 if (int_arg) {
2073 d->bd_flags |= BPF_EXTENDED_HDR;
2074 } else {
2075 d->bd_flags &= ~BPF_EXTENDED_HDR;
2076 }
2077 break;
2078
2079 case BIOCGIFATTACHCOUNT: { /* struct ifreq */
2080 ifnet_t ifp;
2081 struct bpf_if *bp;
2082
2083 bcopy(addr, &ifr, sizeof(ifr));
2084 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2085 ifp = ifunit(ifr.ifr_name);
2086 if (ifp == NULL) {
2087 error = ENXIO;
2088 break;
2089 }
2090 ifr.ifr_intval = 0;
2091 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2092 struct bpf_d *bpf_d;
2093
2094 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
2095 continue;
2096 }
2097 for (bpf_d = bp->bif_dlist; bpf_d;
2098 bpf_d = bpf_d->bd_next) {
2099 ifr.ifr_intval += 1;
2100 }
2101 }
2102 bcopy(&ifr, addr, sizeof(ifr));
2103 break;
2104 }
2105 case BIOCGWANTPKTAP: /* u_int */
2106 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
2107 bcopy(&int_arg, addr, sizeof(int_arg));
2108 break;
2109
2110 case BIOCSWANTPKTAP: /* u_int */
2111 bcopy(addr, &int_arg, sizeof(int_arg));
2112 if (int_arg) {
2113 d->bd_flags |= BPF_WANT_PKTAP;
2114 } else {
2115 d->bd_flags &= ~BPF_WANT_PKTAP;
2116 }
2117 break;
2118 #endif
2119
2120 case BIOCSHEADDROP:
2121 bcopy(addr, &int_arg, sizeof(int_arg));
2122 d->bd_headdrop = int_arg ? 1 : 0;
2123 break;
2124
2125 case BIOCGHEADDROP:
2126 bcopy(&d->bd_headdrop, addr, sizeof(int));
2127 break;
2128
2129 case BIOCSTRUNCATE:
2130 bcopy(addr, &int_arg, sizeof(int_arg));
2131 if (int_arg) {
2132 d->bd_flags |= BPF_TRUNCATE;
2133 } else {
2134 d->bd_flags &= ~BPF_TRUNCATE;
2135 }
2136 break;
2137
2138 case BIOCGETUUID:
2139 bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
2140 break;
2141
2142 case BIOCSETUP: {
2143 struct bpf_setup_args bsa;
2144 ifnet_t ifp;
2145
2146 bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
2147 bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2148 ifp = ifunit(bsa.bsa_ifname);
2149 if (ifp == NULL) {
2150 error = ENXIO;
2151 os_log_info(OS_LOG_DEFAULT,
2152 "%s: ifnet not found for %s error %d",
2153 __func__, bsa.bsa_ifname, error);
2154 break;
2155 }
2156
2157 error = bpf_setup(d, bsa.bsa_uuid, ifp);
2158 break;
2159 }
2160 case BIOCSPKTHDRV2:
2161 bcopy(addr, &int_arg, sizeof(int_arg));
2162 if (int_arg != 0) {
2163 d->bd_flags |= BPF_PKTHDRV2;
2164 } else {
2165 d->bd_flags &= ~BPF_PKTHDRV2;
2166 }
2167 break;
2168
2169 case BIOCGPKTHDRV2:
2170 int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
2171 bcopy(&int_arg, addr, sizeof(int));
2172 break;
2173 }
2174
2175 bpf_release_d(d);
2176 lck_mtx_unlock(bpf_mlock);
2177
2178 return error;
2179 }
2180
2181 /*
2182 * Set d's packet filter program to fp. If this file already has a filter,
2183 * free it and replace it. Returns EINVAL for bogus requests.
2184 */
2185 static int
2186 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2187 u_long cmd)
2188 {
2189 struct bpf_insn *fcode, *old;
2190 u_int flen, size;
2191
2192 while (d->bd_hbuf_read != 0) {
2193 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2194 }
2195
2196 if ((d->bd_flags & BPF_CLOSING) != 0) {
2197 return ENXIO;
2198 }
2199
2200 old = d->bd_filter;
2201 if (bf_insns == USER_ADDR_NULL) {
2202 if (bf_len != 0) {
2203 return EINVAL;
2204 }
2205 d->bd_filter = NULL;
2206 reset_d(d);
2207 if (old != 0) {
2208 FREE(old, M_DEVBUF);
2209 }
2210 return 0;
2211 }
2212 flen = bf_len;
2213 if (flen > BPF_MAXINSNS) {
2214 return EINVAL;
2215 }
2216
2217 size = flen * sizeof(struct bpf_insn);
2218 fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
2219 #ifdef __APPLE__
2220 if (fcode == NULL) {
2221 return ENOBUFS;
2222 }
2223 #endif
2224 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
2225 bpf_validate(fcode, (int)flen)) {
2226 d->bd_filter = fcode;
2227
2228 if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
2229 reset_d(d);
2230 }
2231
2232 if (old != 0) {
2233 FREE(old, M_DEVBUF);
2234 }
2235
2236 return 0;
2237 }
2238 FREE(fcode, M_DEVBUF);
2239 return EINVAL;
2240 }
2241
2242 /*
2243 * Detach a file from its current interface (if attached at all) and attach
2244 * to the interface indicated by the name stored in ifr.
2245 * Return an errno or 0.
2246 */
2247 static int
2248 bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read)
2249 {
2250 struct bpf_if *bp;
2251 int error;
2252
2253 while (d->bd_hbuf_read != 0 && !has_hbuf_read) {
2254 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2255 }
2256
2257 if ((d->bd_flags & BPF_CLOSING) != 0) {
2258 return ENXIO;
2259 }
2260
2261 /*
2262 * Look through attached interfaces for the named one.
2263 */
2264 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2265 struct ifnet *ifp = bp->bif_ifp;
2266
2267 if (ifp == 0 || ifp != theywant) {
2268 continue;
2269 }
2270 /*
2271 * Do not use DLT_PKTAP, unless requested explicitly
2272 */
2273 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2274 continue;
2275 }
2276 /*
2277 * Skip the coprocessor interface
2278 */
2279 if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
2280 continue;
2281 }
2282 /*
2283 * We found the requested interface.
2284 * Allocate the packet buffers.
2285 */
2286 error = bpf_allocbufs(d);
2287 if (error != 0) {
2288 return error;
2289 }
2290 /*
2291 * Detach if attached to something else.
2292 */
2293 if (bp != d->bd_bif) {
2294 if (d->bd_bif != NULL) {
2295 if (bpf_detachd(d, 0) != 0) {
2296 return ENXIO;
2297 }
2298 }
2299 if (bpf_attachd(d, bp) != 0) {
2300 return ENXIO;
2301 }
2302 }
2303 if (do_reset) {
2304 reset_d(d);
2305 }
2306 return 0;
2307 }
2308 /* Not found. */
2309 return ENXIO;
2310 }
2311
2312 /*
2313 * Get a list of available data link type of the interface.
2314 */
2315 static int
2316 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2317 {
2318 u_int n;
2319 int error;
2320 struct ifnet *ifp;
2321 struct bpf_if *bp;
2322 user_addr_t dlist;
2323 struct bpf_dltlist bfl;
2324
2325 bcopy(addr, &bfl, sizeof(bfl));
2326 if (proc_is64bit(p)) {
2327 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2328 } else {
2329 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2330 }
2331
2332 ifp = d->bd_bif->bif_ifp;
2333 n = 0;
2334 error = 0;
2335
2336 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2337 if (bp->bif_ifp != ifp) {
2338 continue;
2339 }
2340 /*
2341 * Do not use DLT_PKTAP, unless requested explicitly
2342 */
2343 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2344 continue;
2345 }
2346 if (dlist != USER_ADDR_NULL) {
2347 if (n >= bfl.bfl_len) {
2348 return ENOMEM;
2349 }
2350 error = copyout(&bp->bif_dlt, dlist,
2351 sizeof(bp->bif_dlt));
2352 if (error != 0) {
2353 break;
2354 }
2355 dlist += sizeof(bp->bif_dlt);
2356 }
2357 n++;
2358 }
2359 bfl.bfl_len = n;
2360 bcopy(&bfl, addr, sizeof(bfl));
2361
2362 return error;
2363 }
2364
2365 /*
2366 * Set the data link type of a BPF instance.
2367 */
2368 static int
2369 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2370 {
2371 int error, opromisc;
2372 struct ifnet *ifp;
2373 struct bpf_if *bp;
2374
2375 if (d->bd_bif->bif_dlt == dlt) {
2376 return 0;
2377 }
2378
2379 while (d->bd_hbuf_read != 0) {
2380 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2381 }
2382
2383 if ((d->bd_flags & BPF_CLOSING) != 0) {
2384 return ENXIO;
2385 }
2386
2387 ifp = d->bd_bif->bif_ifp;
2388 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2389 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2390 /*
2391 * Do not use DLT_PKTAP, unless requested explicitly
2392 */
2393 if (bp->bif_dlt == DLT_PKTAP &&
2394 !(d->bd_flags & BPF_WANT_PKTAP)) {
2395 continue;
2396 }
2397 break;
2398 }
2399 }
2400 if (bp != NULL) {
2401 opromisc = d->bd_promisc;
2402 if (bpf_detachd(d, 0) != 0) {
2403 return ENXIO;
2404 }
2405 error = bpf_attachd(d, bp);
2406 if (error) {
2407 printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
2408 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp),
2409 error);
2410 return error;
2411 }
2412 reset_d(d);
2413 if (opromisc) {
2414 lck_mtx_unlock(bpf_mlock);
2415 error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2416 lck_mtx_lock(bpf_mlock);
2417 if (error) {
2418 printf("%s: ifpromisc %s%d failed (%d)\n",
2419 __func__, ifnet_name(bp->bif_ifp),
2420 ifnet_unit(bp->bif_ifp), error);
2421 } else {
2422 d->bd_promisc = 1;
2423 }
2424 }
2425 }
2426 return bp == NULL ? EINVAL : 0;
2427 }
2428
2429 static int
2430 bpf_set_traffic_class(struct bpf_d *d, int tc)
2431 {
2432 int error = 0;
2433
2434 if (!SO_VALID_TC(tc)) {
2435 error = EINVAL;
2436 } else {
2437 d->bd_traffic_class = tc;
2438 }
2439
2440 return error;
2441 }
2442
2443 static void
2444 bpf_set_packet_service_class(struct mbuf *m, int tc)
2445 {
2446 if (!(m->m_flags & M_PKTHDR)) {
2447 return;
2448 }
2449
2450 VERIFY(SO_VALID_TC(tc));
2451 (void) m_set_service_class(m, so_tc2msc(tc));
2452 }
2453
2454 /*
2455 * Support for select()
2456 *
2457 * Return true iff the specific operation will not block indefinitely.
2458 * Otherwise, return false but make a note that a selwakeup() must be done.
2459 */
2460 int
2461 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2462 {
2463 struct bpf_d *d;
2464 int ret = 0;
2465
2466 lck_mtx_lock(bpf_mlock);
2467
2468 d = bpf_dtab[minor(dev)];
2469 if (d == NULL || d == BPF_DEV_RESERVED ||
2470 (d->bd_flags & BPF_CLOSING) != 0) {
2471 lck_mtx_unlock(bpf_mlock);
2472 return ENXIO;
2473 }
2474
2475 bpf_acquire_d(d);
2476
2477 if (d->bd_bif == NULL) {
2478 bpf_release_d(d);
2479 lck_mtx_unlock(bpf_mlock);
2480 return ENXIO;
2481 }
2482
2483 while (d->bd_hbuf_read != 0) {
2484 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2485 }
2486
2487 if ((d->bd_flags & BPF_CLOSING) != 0) {
2488 bpf_release_d(d);
2489 lck_mtx_unlock(bpf_mlock);
2490 return ENXIO;
2491 }
2492
2493 switch (which) {
2494 case FREAD:
2495 if (d->bd_hlen != 0 ||
2496 ((d->bd_immediate ||
2497 d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2498 ret = 1; /* read has data to return */
2499 } else {
2500 /*
2501 * Read has no data to return.
2502 * Make the select wait, and start a timer if
2503 * necessary.
2504 */
2505 selrecord(p, &d->bd_sel, wql);
2506 bpf_start_timer(d);
2507 }
2508 break;
2509
2510 case FWRITE:
2511 /* can't determine whether a write would block */
2512 ret = 1;
2513 break;
2514 }
2515
2516 bpf_release_d(d);
2517 lck_mtx_unlock(bpf_mlock);
2518
2519 return ret;
2520 }
2521
2522 /*
2523 * Support for kevent() system call. Register EVFILT_READ filters and
2524 * reject all others.
2525 */
2526 int bpfkqfilter(dev_t dev, struct knote *kn);
2527 static void filt_bpfdetach(struct knote *);
2528 static int filt_bpfread(struct knote *, long);
2529 static int filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev);
2530 static int filt_bpfprocess(struct knote *kn, struct filt_process_s *data,
2531 struct kevent_internal_s *kev);
2532
2533 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2534 .f_isfd = 1,
2535 .f_detach = filt_bpfdetach,
2536 .f_event = filt_bpfread,
2537 .f_touch = filt_bpftouch,
2538 .f_process = filt_bpfprocess,
2539 };
2540
2541 static int
2542 filt_bpfread_common(struct knote *kn, struct bpf_d *d)
2543 {
2544 int ready = 0;
2545
2546 if (d->bd_immediate) {
2547 /*
2548 * If there's data in the hold buffer, it's the
2549 * amount of data a read will return.
2550 *
2551 * If there's no data in the hold buffer, but
2552 * there's data in the store buffer, a read will
2553 * immediately rotate the store buffer to the
2554 * hold buffer, the amount of data in the store
2555 * buffer is the amount of data a read will
2556 * return.
2557 *
2558 * If there's no data in either buffer, we're not
2559 * ready to read.
2560 */
2561 kn->kn_data = (d->bd_hlen == 0 || d->bd_hbuf_read != 0 ?
2562 d->bd_slen : d->bd_hlen);
2563 int64_t lowwat = 1;
2564 if (kn->kn_sfflags & NOTE_LOWAT) {
2565 if (kn->kn_sdata > d->bd_bufsize) {
2566 lowwat = d->bd_bufsize;
2567 } else if (kn->kn_sdata > lowwat) {
2568 lowwat = kn->kn_sdata;
2569 }
2570 }
2571 ready = (kn->kn_data >= lowwat);
2572 } else {
2573 /*
2574 * If there's data in the hold buffer, it's the
2575 * amount of data a read will return.
2576 *
2577 * If there's no data in the hold buffer, but
2578 * there's data in the store buffer, if the
2579 * timer has expired a read will immediately
2580 * rotate the store buffer to the hold buffer,
2581 * so the amount of data in the store buffer is
2582 * the amount of data a read will return.
2583 *
2584 * If there's no data in either buffer, or there's
2585 * no data in the hold buffer and the timer hasn't
2586 * expired, we're not ready to read.
2587 */
2588 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read != 0) &&
2589 d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
2590 ready = (kn->kn_data > 0);
2591 }
2592 if (!ready) {
2593 bpf_start_timer(d);
2594 }
2595
2596 return ready;
2597 }
2598
2599 int
2600 bpfkqfilter(dev_t dev, struct knote *kn)
2601 {
2602 struct bpf_d *d;
2603 int res;
2604
2605 /*
2606 * Is this device a bpf?
2607 */
2608 if (major(dev) != CDEV_MAJOR ||
2609 kn->kn_filter != EVFILT_READ) {
2610 kn->kn_flags = EV_ERROR;
2611 kn->kn_data = EINVAL;
2612 return 0;
2613 }
2614
2615 lck_mtx_lock(bpf_mlock);
2616
2617 d = bpf_dtab[minor(dev)];
2618
2619 if (d == NULL || d == BPF_DEV_RESERVED ||
2620 (d->bd_flags & BPF_CLOSING) != 0 ||
2621 d->bd_bif == NULL) {
2622 lck_mtx_unlock(bpf_mlock);
2623 kn->kn_flags = EV_ERROR;
2624 kn->kn_data = ENXIO;
2625 return 0;
2626 }
2627
2628 kn->kn_hook = d;
2629 kn->kn_filtid = EVFILTID_BPFREAD;
2630 KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2631 d->bd_flags |= BPF_KNOTE;
2632
2633 /* capture the current state */
2634 res = filt_bpfread_common(kn, d);
2635
2636 lck_mtx_unlock(bpf_mlock);
2637
2638 return res;
2639 }
2640
2641 static void
2642 filt_bpfdetach(struct knote *kn)
2643 {
2644 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2645
2646 lck_mtx_lock(bpf_mlock);
2647 if (d->bd_flags & BPF_KNOTE) {
2648 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2649 d->bd_flags &= ~BPF_KNOTE;
2650 }
2651 lck_mtx_unlock(bpf_mlock);
2652 }
2653
2654 static int
2655 filt_bpfread(struct knote *kn, long hint)
2656 {
2657 #pragma unused(hint)
2658 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2659
2660 return filt_bpfread_common(kn, d);
2661 }
2662
2663 static int
2664 filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev)
2665 {
2666 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2667 int res;
2668
2669 lck_mtx_lock(bpf_mlock);
2670
2671 /* save off the lowat threshold and flag */
2672 kn->kn_sdata = kev->data;
2673 kn->kn_sfflags = kev->fflags;
2674
2675 /* output data will be re-generated here */
2676 res = filt_bpfread_common(kn, d);
2677
2678 lck_mtx_unlock(bpf_mlock);
2679
2680 return res;
2681 }
2682
2683 static int
2684 filt_bpfprocess(struct knote *kn, struct filt_process_s *data,
2685 struct kevent_internal_s *kev)
2686 {
2687 #pragma unused(data)
2688 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2689 int res;
2690
2691 lck_mtx_lock(bpf_mlock);
2692 res = filt_bpfread_common(kn, d);
2693 if (res) {
2694 *kev = kn->kn_kevent;
2695 }
2696 lck_mtx_unlock(bpf_mlock);
2697
2698 return res;
2699 }
2700
2701 /*
2702 * Copy data from an mbuf chain into a buffer. This code is derived
2703 * from m_copydata in kern/uipc_mbuf.c.
2704 */
2705 static void
2706 bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
2707 {
2708 u_int count;
2709 u_char *dst;
2710
2711 dst = dst_arg;
2712 while (len > 0) {
2713 if (m == 0) {
2714 panic("bpf_mcopy");
2715 }
2716 count = min(m->m_len, len);
2717 bcopy(mbuf_data(m), dst, count);
2718 m = m->m_next;
2719 dst += count;
2720 len -= count;
2721 }
2722 }
2723
2724 static inline void
2725 bpf_tap_imp(
2726 ifnet_t ifp,
2727 u_int32_t dlt,
2728 struct bpf_packet *bpf_pkt,
2729 int outbound)
2730 {
2731 struct bpf_d *d;
2732 u_int slen;
2733 struct bpf_if *bp;
2734
2735 /*
2736 * It's possible that we get here after the bpf descriptor has been
2737 * detached from the interface; in such a case we simply return.
2738 * Lock ordering is important since we can be called asynchronously
2739 * (from IOKit) to process an inbound packet; when that happens
2740 * we would have been holding its "gateLock" and will be acquiring
2741 * "bpf_mlock" upon entering this routine. Due to that, we release
2742 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2743 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2744 * when a ifnet_set_promiscuous request simultaneously collides with
2745 * an inbound packet being passed into the tap callback.
2746 */
2747 lck_mtx_lock(bpf_mlock);
2748 if (ifp->if_bpf == NULL) {
2749 lck_mtx_unlock(bpf_mlock);
2750 return;
2751 }
2752 for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2753 if (bp->bif_ifp != ifp) {
2754 /* wrong interface */
2755 bp = NULL;
2756 break;
2757 }
2758 if (dlt == 0 || bp->bif_dlt == dlt) {
2759 /* tapping default DLT or DLT matches */
2760 break;
2761 }
2762 }
2763 if (bp == NULL) {
2764 goto done;
2765 }
2766 for (d = bp->bif_dlist; d; d = d->bd_next) {
2767 struct bpf_packet *bpf_pkt_saved = bpf_pkt;
2768 struct bpf_packet bpf_pkt_tmp;
2769 struct pktap_header_buffer bpfp_header_tmp;
2770
2771 if (outbound && !d->bd_seesent) {
2772 continue;
2773 }
2774
2775 ++d->bd_rcount;
2776 slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2777 bpf_pkt->bpfp_total_length, 0);
2778 if (bp->bif_ifp->if_type == IFT_PKTAP &&
2779 bp->bif_dlt == DLT_PKTAP) {
2780 /*
2781 * Need to copy the bpf_pkt because the conversion
2782 * to v2 pktap header modifies the content of the
2783 * bpfp_header
2784 */
2785 if ((d->bd_flags & BPF_PKTHDRV2) &&
2786 bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
2787 bpf_pkt_tmp = *bpf_pkt;
2788
2789 bpf_pkt = &bpf_pkt_tmp;
2790
2791 memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
2792 bpf_pkt->bpfp_header_length);
2793
2794 bpf_pkt->bpfp_header = &bpfp_header_tmp;
2795
2796 convert_to_pktap_header_to_v2(bpf_pkt,
2797 !!(d->bd_flags & BPF_TRUNCATE));
2798 }
2799
2800 if (d->bd_flags & BPF_TRUNCATE) {
2801 slen = min(slen,
2802 get_pkt_trunc_len((u_char *)bpf_pkt,
2803 bpf_pkt->bpfp_total_length));
2804 }
2805 }
2806 if (slen != 0) {
2807 #if CONFIG_MACF_NET
2808 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0) {
2809 continue;
2810 }
2811 #endif
2812 catchpacket(d, bpf_pkt, slen, outbound);
2813 }
2814 bpf_pkt = bpf_pkt_saved;
2815 }
2816
2817 done:
2818 lck_mtx_unlock(bpf_mlock);
2819 }
2820
2821 static inline void
2822 bpf_tap_mbuf(
2823 ifnet_t ifp,
2824 u_int32_t dlt,
2825 mbuf_t m,
2826 void* hdr,
2827 size_t hlen,
2828 int outbound)
2829 {
2830 struct bpf_packet bpf_pkt;
2831 struct mbuf *m0;
2832
2833 if (ifp->if_bpf == NULL) {
2834 /* quickly check without taking lock */
2835 return;
2836 }
2837 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2838 bpf_pkt.bpfp_mbuf = m;
2839 bpf_pkt.bpfp_total_length = 0;
2840 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
2841 bpf_pkt.bpfp_total_length += m0->m_len;
2842 }
2843 bpf_pkt.bpfp_header = hdr;
2844 if (hdr != NULL) {
2845 bpf_pkt.bpfp_total_length += hlen;
2846 bpf_pkt.bpfp_header_length = hlen;
2847 } else {
2848 bpf_pkt.bpfp_header_length = 0;
2849 }
2850 bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2851 }
2852
2853 void
2854 bpf_tap_out(
2855 ifnet_t ifp,
2856 u_int32_t dlt,
2857 mbuf_t m,
2858 void* hdr,
2859 size_t hlen)
2860 {
2861 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
2862 }
2863
2864 void
2865 bpf_tap_in(
2866 ifnet_t ifp,
2867 u_int32_t dlt,
2868 mbuf_t m,
2869 void* hdr,
2870 size_t hlen)
2871 {
2872 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
2873 }
2874
2875 /* Callback registered with Ethernet driver. */
2876 static int
2877 bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2878 {
2879 bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2880
2881 return 0;
2882 }
2883
2884
2885 static errno_t
2886 bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
2887 {
2888 errno_t err = 0;
2889 if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
2890 err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
2891 } else {
2892 err = EINVAL;
2893 }
2894
2895 return err;
2896 }
2897
2898 static void
2899 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
2900 {
2901 /* copy the optional header */
2902 if (pkt->bpfp_header_length != 0) {
2903 size_t count = min(len, pkt->bpfp_header_length);
2904 bcopy(pkt->bpfp_header, dst, count);
2905 len -= count;
2906 dst += count;
2907 }
2908 if (len == 0) {
2909 /* nothing past the header */
2910 return;
2911 }
2912 /* copy the packet */
2913 switch (pkt->bpfp_type) {
2914 case BPF_PACKET_TYPE_MBUF:
2915 bpf_mcopy(pkt->bpfp_mbuf, dst, len);
2916 break;
2917 default:
2918 break;
2919 }
2920 }
2921
2922 static uint16_t
2923 get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
2924 const uint16_t remaining_caplen)
2925 {
2926 /*
2927 * For some reason tcpdump expects to have one byte beyond the ESP header
2928 */
2929 uint16_t trunc_len = ESP_HDR_SIZE + 1;
2930
2931 if (trunc_len > remaining_caplen) {
2932 return remaining_caplen;
2933 }
2934
2935 return trunc_len;
2936 }
2937
2938 static uint16_t
2939 get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
2940 const uint16_t remaining_caplen)
2941 {
2942 /*
2943 * Include the payload generic header
2944 */
2945 uint16_t trunc_len = ISAKMP_HDR_SIZE;
2946
2947 if (trunc_len > remaining_caplen) {
2948 return remaining_caplen;
2949 }
2950
2951 return trunc_len;
2952 }
2953
2954 static uint16_t
2955 get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint16_t off,
2956 const uint16_t remaining_caplen)
2957 {
2958 int err = 0;
2959 uint16_t trunc_len = 0;
2960 char payload[remaining_caplen];
2961
2962 err = bpf_copydata(pkt, off, remaining_caplen, payload);
2963 if (err != 0) {
2964 return remaining_caplen;
2965 }
2966 /*
2967 * They are three cases:
2968 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
2969 * - keep alive: 1 byte payload
2970 * - otherwise it's ESP
2971 */
2972 if (remaining_caplen >= 4 &&
2973 payload[0] == 0 && payload[1] == 0 &&
2974 payload[2] == 0 && payload[3] == 0) {
2975 trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
2976 } else if (remaining_caplen == 1) {
2977 trunc_len = 1;
2978 } else {
2979 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
2980 }
2981
2982 if (trunc_len > remaining_caplen) {
2983 return remaining_caplen;
2984 }
2985
2986 return trunc_len;
2987 }
2988
2989 static uint16_t
2990 get_udp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
2991 {
2992 int err = 0;
2993 uint16_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
2994
2995 if (trunc_len >= remaining_caplen) {
2996 return remaining_caplen;
2997 }
2998
2999 struct udphdr udphdr;
3000 err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
3001 if (err != 0) {
3002 return remaining_caplen;
3003 }
3004
3005 u_short sport, dport;
3006
3007 sport = EXTRACT_SHORT(&udphdr.uh_sport);
3008 dport = EXTRACT_SHORT(&udphdr.uh_dport);
3009
3010 if (dport == PORT_DNS || sport == PORT_DNS) {
3011 /*
3012 * Full UDP payload for DNS
3013 */
3014 trunc_len = remaining_caplen;
3015 } else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
3016 (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
3017 /*
3018 * Full UDP payload for BOOTP and DHCP
3019 */
3020 trunc_len = remaining_caplen;
3021 } else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
3022 /*
3023 * Return the ISAKMP header
3024 */
3025 trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
3026 remaining_caplen - sizeof(struct udphdr));
3027 } else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3028 trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
3029 remaining_caplen - sizeof(struct udphdr));
3030 }
3031 if (trunc_len >= remaining_caplen) {
3032 return remaining_caplen;
3033 }
3034
3035 return trunc_len;
3036 }
3037
3038 static uint16_t
3039 get_tcp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3040 {
3041 int err = 0;
3042 uint16_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
3043 if (trunc_len >= remaining_caplen) {
3044 return remaining_caplen;
3045 }
3046
3047 struct tcphdr tcphdr;
3048 err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
3049 if (err != 0) {
3050 return remaining_caplen;
3051 }
3052
3053 u_short sport, dport;
3054 sport = EXTRACT_SHORT(&tcphdr.th_sport);
3055 dport = EXTRACT_SHORT(&tcphdr.th_dport);
3056
3057 if (dport == PORT_DNS || sport == PORT_DNS) {
3058 /*
3059 * Full TCP payload for DNS
3060 */
3061 trunc_len = remaining_caplen;
3062 } else {
3063 trunc_len = tcphdr.th_off << 2;
3064 }
3065 if (trunc_len >= remaining_caplen) {
3066 return remaining_caplen;
3067 }
3068
3069 return trunc_len;
3070 }
3071
3072 static uint16_t
3073 get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3074 {
3075 uint16_t trunc_len;
3076
3077 switch (proto) {
3078 case IPPROTO_ICMP: {
3079 /*
3080 * Full IMCP payload
3081 */
3082 trunc_len = remaining_caplen;
3083 break;
3084 }
3085 case IPPROTO_ICMPV6: {
3086 /*
3087 * Full IMCPV6 payload
3088 */
3089 trunc_len = remaining_caplen;
3090 break;
3091 }
3092 case IPPROTO_IGMP: {
3093 /*
3094 * Full IGMP payload
3095 */
3096 trunc_len = remaining_caplen;
3097 break;
3098 }
3099 case IPPROTO_UDP: {
3100 trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3101 break;
3102 }
3103 case IPPROTO_TCP: {
3104 trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3105 break;
3106 }
3107 case IPPROTO_ESP: {
3108 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3109 break;
3110 }
3111 default: {
3112 /*
3113 * By default we only include the IP header
3114 */
3115 trunc_len = 0;
3116 break;
3117 }
3118 }
3119 if (trunc_len >= remaining_caplen) {
3120 return remaining_caplen;
3121 }
3122
3123 return trunc_len;
3124 }
3125
3126 static uint16_t
3127 get_ip_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3128 {
3129 int err = 0;
3130 uint16_t iplen = sizeof(struct ip);
3131 if (iplen >= remaining_caplen) {
3132 return remaining_caplen;
3133 }
3134
3135 struct ip iphdr;
3136 err = bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
3137 if (err != 0) {
3138 return remaining_caplen;
3139 }
3140
3141 uint8_t proto = 0;
3142
3143 iplen = iphdr.ip_hl << 2;
3144 if (iplen >= remaining_caplen) {
3145 return remaining_caplen;
3146 }
3147
3148 proto = iphdr.ip_p;
3149 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3150
3151 if (iplen >= remaining_caplen) {
3152 return remaining_caplen;
3153 }
3154
3155 return iplen;
3156 }
3157
3158 static uint16_t
3159 get_ip6_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3160 {
3161 int err = 0;
3162 uint16_t iplen = sizeof(struct ip6_hdr);
3163 if (iplen >= remaining_caplen) {
3164 return remaining_caplen;
3165 }
3166
3167 struct ip6_hdr ip6hdr;
3168 err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
3169 if (err != 0) {
3170 return remaining_caplen;
3171 }
3172
3173 uint8_t proto = 0;
3174
3175 /*
3176 * TBD: process the extension headers
3177 */
3178 proto = ip6hdr.ip6_nxt;
3179 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3180
3181 if (iplen >= remaining_caplen) {
3182 return remaining_caplen;
3183 }
3184
3185 return iplen;
3186 }
3187
3188 static uint16_t
3189 get_ether_trunc_len(struct bpf_packet *pkt, int off, const uint16_t remaining_caplen)
3190 {
3191 int err = 0;
3192 uint16_t ethlen = sizeof(struct ether_header);
3193 if (ethlen >= remaining_caplen) {
3194 return remaining_caplen;
3195 }
3196
3197 struct ether_header eh;
3198 u_short type;
3199 err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
3200 if (err != 0) {
3201 return remaining_caplen;
3202 }
3203
3204 type = EXTRACT_SHORT(&eh.ether_type);
3205 /* Include full ARP */
3206 if (type == ETHERTYPE_ARP) {
3207 ethlen = remaining_caplen;
3208 } else if (type != ETHERTYPE_IP && type != ETHERTYPE_IPV6) {
3209 ethlen = min(BPF_MIN_PKT_SIZE, remaining_caplen);
3210 } else {
3211 if (type == ETHERTYPE_IP) {
3212 ethlen += get_ip_trunc_len(pkt, sizeof(struct ether_header),
3213 remaining_caplen);
3214 } else if (type == ETHERTYPE_IPV6) {
3215 ethlen += get_ip6_trunc_len(pkt, sizeof(struct ether_header),
3216 remaining_caplen);
3217 }
3218 }
3219 return ethlen;
3220 }
3221
3222 static uint32_t
3223 get_pkt_trunc_len(u_char *p, u_int len)
3224 {
3225 struct bpf_packet *pkt = (struct bpf_packet *)(void *) p;
3226 struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3227 uint32_t out_pkt_len = 0, tlen = 0;
3228 /*
3229 * pktap->pth_frame_pre_length is L2 header length and accounts
3230 * for both pre and pre_adjust.
3231 * pktap->pth_length is sizeof(pktap_header) (excl the pre/pre_adjust)
3232 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3233 * pre is the offset to the L3 header after the bpfp_header, or length
3234 * of L2 header after bpfp_header, if present.
3235 */
3236 uint32_t pre = pktap->pth_frame_pre_length -
3237 (pkt->bpfp_header_length - pktap->pth_length);
3238
3239 /* Length of the input packet starting from L3 header */
3240 uint32_t in_pkt_len = len - pkt->bpfp_header_length - pre;
3241 if (pktap->pth_protocol_family == AF_INET ||
3242 pktap->pth_protocol_family == AF_INET6) {
3243 /* Contains L2 header */
3244 if (pre > 0) {
3245 if (pre < sizeof(struct ether_header)) {
3246 goto too_short;
3247 }
3248
3249 out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3250 } else if (pre == 0) {
3251 if (pktap->pth_protocol_family == AF_INET) {
3252 out_pkt_len = get_ip_trunc_len(pkt, pre, in_pkt_len);
3253 } else if (pktap->pth_protocol_family == AF_INET6) {
3254 out_pkt_len = get_ip6_trunc_len(pkt, pre, in_pkt_len);
3255 }
3256 } else {
3257 /* Ideally pre should be >= 0. This is an exception */
3258 out_pkt_len = min(BPF_MIN_PKT_SIZE, in_pkt_len);
3259 }
3260 } else {
3261 if (pktap->pth_iftype == IFT_ETHER) {
3262 if (in_pkt_len < sizeof(struct ether_header)) {
3263 goto too_short;
3264 }
3265 /* At most include the Ethernet header and 16 bytes */
3266 out_pkt_len = MIN(sizeof(struct ether_header) + 16,
3267 in_pkt_len);
3268 } else {
3269 /*
3270 * For unknown protocols include at most 16 bytes
3271 */
3272 out_pkt_len = MIN(16, in_pkt_len);
3273 }
3274 }
3275 done:
3276 tlen = pkt->bpfp_header_length + out_pkt_len + pre;
3277 return tlen;
3278 too_short:
3279 out_pkt_len = in_pkt_len;
3280 goto done;
3281 }
3282
3283 /*
3284 * Move the packet data from interface memory (pkt) into the
3285 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
3286 * otherwise 0.
3287 */
3288 static void
3289 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
3290 u_int snaplen, int outbound)
3291 {
3292 struct bpf_hdr *hp;
3293 struct bpf_hdr_ext *ehp;
3294 int totlen, curlen;
3295 int hdrlen, caplen;
3296 int do_wakeup = 0;
3297 u_char *payload;
3298 struct timeval tv;
3299
3300 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3301 d->bd_bif->bif_hdrlen;
3302 /*
3303 * Figure out how many bytes to move. If the packet is
3304 * greater or equal to the snapshot length, transfer that
3305 * much. Otherwise, transfer the whole packet (unless
3306 * we hit the buffer size limit).
3307 */
3308 totlen = hdrlen + min(snaplen, pkt->bpfp_total_length);
3309 if (totlen > d->bd_bufsize) {
3310 totlen = d->bd_bufsize;
3311 }
3312
3313 if (hdrlen > totlen) {
3314 return;
3315 }
3316
3317 /*
3318 * Round up the end of the previous packet to the next longword.
3319 */
3320 curlen = BPF_WORDALIGN(d->bd_slen);
3321 if (curlen + totlen > d->bd_bufsize) {
3322 /*
3323 * This packet will overflow the storage buffer.
3324 * Rotate the buffers if we can, then wakeup any
3325 * pending reads.
3326 *
3327 * We cannot rotate buffers if a read is in progress
3328 * so drop the packet
3329 */
3330 if (d->bd_hbuf_read != 0) {
3331 ++d->bd_dcount;
3332 return;
3333 }
3334
3335 if (d->bd_fbuf == NULL) {
3336 if (d->bd_headdrop == 0) {
3337 /*
3338 * We haven't completed the previous read yet,
3339 * so drop the packet.
3340 */
3341 ++d->bd_dcount;
3342 return;
3343 }
3344 /*
3345 * Drop the hold buffer as it contains older packets
3346 */
3347 d->bd_dcount += d->bd_hcnt;
3348 d->bd_fbuf = d->bd_hbuf;
3349 ROTATE_BUFFERS(d);
3350 } else {
3351 ROTATE_BUFFERS(d);
3352 }
3353 do_wakeup = 1;
3354 curlen = 0;
3355 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
3356 /*
3357 * Immediate mode is set, or the read timeout has
3358 * already expired during a select call. A packet
3359 * arrived, so the reader should be woken up.
3360 */
3361 do_wakeup = 1;
3362 }
3363
3364 /*
3365 * Append the bpf header.
3366 */
3367 microtime(&tv);
3368 if (d->bd_flags & BPF_EXTENDED_HDR) {
3369 struct mbuf *m;
3370
3371 m = (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF)
3372 ? pkt->bpfp_mbuf : NULL;
3373 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
3374 memset(ehp, 0, sizeof(*ehp));
3375 ehp->bh_tstamp.tv_sec = tv.tv_sec;
3376 ehp->bh_tstamp.tv_usec = tv.tv_usec;
3377
3378 ehp->bh_datalen = pkt->bpfp_total_length;
3379 ehp->bh_hdrlen = hdrlen;
3380 caplen = ehp->bh_caplen = totlen - hdrlen;
3381 if (m == NULL) {
3382 if (outbound) {
3383 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3384 } else {
3385 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3386 }
3387 } else if (outbound) {
3388 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3389
3390 /* only do lookups on non-raw INPCB */
3391 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
3392 PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
3393 (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
3394 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3395 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3396 ehp->bh_proto = m->m_pkthdr.pkt_proto;
3397 }
3398 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
3399 if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
3400 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3401 }
3402 if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
3403 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
3404 }
3405 if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
3406 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3407 }
3408 if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3409 ehp->bh_unsent_bytes =
3410 m->m_pkthdr.bufstatus_if;
3411 ehp->bh_unsent_snd =
3412 m->m_pkthdr.bufstatus_sndbuf;
3413 }
3414 } else {
3415 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3416 }
3417 payload = (u_char *)ehp + hdrlen;
3418 } else {
3419 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
3420 hp->bh_tstamp.tv_sec = tv.tv_sec;
3421 hp->bh_tstamp.tv_usec = tv.tv_usec;
3422 hp->bh_datalen = pkt->bpfp_total_length;
3423 hp->bh_hdrlen = hdrlen;
3424 caplen = hp->bh_caplen = totlen - hdrlen;
3425 payload = (u_char *)hp + hdrlen;
3426 }
3427 /*
3428 * Copy the packet data into the store buffer and update its length.
3429 */
3430 copy_bpf_packet(pkt, payload, caplen);
3431 d->bd_slen = curlen + totlen;
3432 d->bd_scnt += 1;
3433
3434 if (do_wakeup) {
3435 bpf_wakeup(d);
3436 }
3437 }
3438
3439 /*
3440 * Initialize all nonzero fields of a descriptor.
3441 */
3442 static int
3443 bpf_allocbufs(struct bpf_d *d)
3444 {
3445 if (d->bd_sbuf != NULL) {
3446 FREE(d->bd_sbuf, M_DEVBUF);
3447 d->bd_sbuf = NULL;
3448 }
3449 if (d->bd_hbuf != NULL) {
3450 FREE(d->bd_hbuf, M_DEVBUF);
3451 d->bd_hbuf = NULL;
3452 }
3453 if (d->bd_fbuf != NULL) {
3454 FREE(d->bd_fbuf, M_DEVBUF);
3455 d->bd_fbuf = NULL;
3456 }
3457
3458 d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
3459 if (d->bd_fbuf == NULL) {
3460 return ENOBUFS;
3461 }
3462
3463 d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
3464 if (d->bd_sbuf == NULL) {
3465 FREE(d->bd_fbuf, M_DEVBUF);
3466 d->bd_fbuf = NULL;
3467 return ENOBUFS;
3468 }
3469 d->bd_slen = 0;
3470 d->bd_hlen = 0;
3471 d->bd_scnt = 0;
3472 d->bd_hcnt = 0;
3473 return 0;
3474 }
3475
3476 /*
3477 * Free buffers currently in use by a descriptor.
3478 * Called on close.
3479 */
3480 static void
3481 bpf_freed(struct bpf_d *d)
3482 {
3483 /*
3484 * We don't need to lock out interrupts since this descriptor has
3485 * been detached from its interface and it yet hasn't been marked
3486 * free.
3487 */
3488 if (d->bd_hbuf_read != 0) {
3489 panic("bpf buffer freed during read");
3490 }
3491
3492 if (d->bd_sbuf != 0) {
3493 FREE(d->bd_sbuf, M_DEVBUF);
3494 if (d->bd_hbuf != 0) {
3495 FREE(d->bd_hbuf, M_DEVBUF);
3496 }
3497 if (d->bd_fbuf != 0) {
3498 FREE(d->bd_fbuf, M_DEVBUF);
3499 }
3500 }
3501 if (d->bd_filter) {
3502 FREE(d->bd_filter, M_DEVBUF);
3503 }
3504 }
3505
3506 /*
3507 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
3508 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
3509 * size of the link header (variable length headers not yet supported).
3510 */
3511 void
3512 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3513 {
3514 bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
3515 }
3516
3517 errno_t
3518 bpf_attach(
3519 ifnet_t ifp,
3520 u_int32_t dlt,
3521 u_int32_t hdrlen,
3522 bpf_send_func send,
3523 bpf_tap_func tap)
3524 {
3525 struct bpf_if *bp;
3526 struct bpf_if *bp_new;
3527 struct bpf_if *bp_before_first = NULL;
3528 struct bpf_if *bp_first = NULL;
3529 struct bpf_if *bp_last = NULL;
3530 boolean_t found;
3531
3532 bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF,
3533 M_WAIT | M_ZERO);
3534 if (bp_new == 0) {
3535 panic("bpfattach");
3536 }
3537
3538 lck_mtx_lock(bpf_mlock);
3539
3540 /*
3541 * Check if this interface/dlt is already attached. Remember the
3542 * first and last attachment for this interface, as well as the
3543 * element before the first attachment.
3544 */
3545 found = FALSE;
3546 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
3547 if (bp->bif_ifp != ifp) {
3548 if (bp_first != NULL) {
3549 /* no more elements for this interface */
3550 break;
3551 }
3552 bp_before_first = bp;
3553 } else {
3554 if (bp->bif_dlt == dlt) {
3555 found = TRUE;
3556 break;
3557 }
3558 if (bp_first == NULL) {
3559 bp_first = bp;
3560 }
3561 bp_last = bp;
3562 }
3563 }
3564 if (found) {
3565 lck_mtx_unlock(bpf_mlock);
3566 printf("bpfattach - %s with dlt %d is already attached\n",
3567 if_name(ifp), dlt);
3568 FREE(bp_new, M_DEVBUF);
3569 return EEXIST;
3570 }
3571
3572 bp_new->bif_ifp = ifp;
3573 bp_new->bif_dlt = dlt;
3574 bp_new->bif_send = send;
3575 bp_new->bif_tap = tap;
3576
3577 if (bp_first == NULL) {
3578 /* No other entries for this ifp */
3579 bp_new->bif_next = bpf_iflist;
3580 bpf_iflist = bp_new;
3581 } else {
3582 if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
3583 /* Make this the first entry for this interface */
3584 if (bp_before_first != NULL) {
3585 /* point the previous to us */
3586 bp_before_first->bif_next = bp_new;
3587 } else {
3588 /* we're the new head */
3589 bpf_iflist = bp_new;
3590 }
3591 bp_new->bif_next = bp_first;
3592 } else {
3593 /* Add this after the last entry for this interface */
3594 bp_new->bif_next = bp_last->bif_next;
3595 bp_last->bif_next = bp_new;
3596 }
3597 }
3598
3599 /*
3600 * Compute the length of the bpf header. This is not necessarily
3601 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
3602 * that the network layer header begins on a longword boundary (for
3603 * performance reasons and to alleviate alignment restrictions).
3604 */
3605 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
3606 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
3607 sizeof(struct bpf_hdr_ext)) - hdrlen;
3608
3609 /* Take a reference on the interface */
3610 ifnet_reference(ifp);
3611
3612 lck_mtx_unlock(bpf_mlock);
3613
3614 #ifndef __APPLE__
3615 if (bootverbose) {
3616 printf("bpf: %s attached\n", if_name(ifp));
3617 }
3618 #endif
3619
3620 return 0;
3621 }
3622
3623 /*
3624 * Detach bpf from an interface. This involves detaching each descriptor
3625 * associated with the interface, and leaving bd_bif NULL. Notify each
3626 * descriptor as it's detached so that any sleepers wake up and get
3627 * ENXIO.
3628 */
3629 void
3630 bpfdetach(struct ifnet *ifp)
3631 {
3632 struct bpf_if *bp, *bp_prev, *bp_next;
3633 struct bpf_d *d;
3634
3635 if (bpf_debug != 0) {
3636 printf("%s: %s\n", __func__, if_name(ifp));
3637 }
3638
3639 lck_mtx_lock(bpf_mlock);
3640
3641 /*
3642 * Build the list of devices attached to that interface
3643 * that we need to free while keeping the lock to maintain
3644 * the integrity of the interface list
3645 */
3646 bp_prev = NULL;
3647 for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
3648 bp_next = bp->bif_next;
3649
3650 if (ifp != bp->bif_ifp) {
3651 bp_prev = bp;
3652 continue;
3653 }
3654 /* Unlink from the interface list */
3655 if (bp_prev) {
3656 bp_prev->bif_next = bp->bif_next;
3657 } else {
3658 bpf_iflist = bp->bif_next;
3659 }
3660
3661 /* Detach the devices attached to the interface */
3662 while ((d = bp->bif_dlist) != NULL) {
3663 /*
3664 * Take an extra reference to prevent the device
3665 * from being freed when bpf_detachd() releases
3666 * the reference for the interface list
3667 */
3668 bpf_acquire_d(d);
3669 bpf_detachd(d, 0);
3670 bpf_wakeup(d);
3671 bpf_release_d(d);
3672 }
3673 ifnet_release(ifp);
3674 }
3675
3676 lck_mtx_unlock(bpf_mlock);
3677 }
3678
3679 void
3680 bpf_init(__unused void *unused)
3681 {
3682 #ifdef __APPLE__
3683 int i;
3684 int maj;
3685
3686 if (bpf_devsw_installed == 0) {
3687 bpf_devsw_installed = 1;
3688 bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
3689 bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
3690 bpf_mlock_attr = lck_attr_alloc_init();
3691 lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
3692 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
3693 if (maj == -1) {
3694 if (bpf_mlock_attr) {
3695 lck_attr_free(bpf_mlock_attr);
3696 }
3697 if (bpf_mlock_grp) {
3698 lck_grp_free(bpf_mlock_grp);
3699 }
3700 if (bpf_mlock_grp_attr) {
3701 lck_grp_attr_free(bpf_mlock_grp_attr);
3702 }
3703
3704 bpf_mlock = NULL;
3705 bpf_mlock_attr = NULL;
3706 bpf_mlock_grp = NULL;
3707 bpf_mlock_grp_attr = NULL;
3708 bpf_devsw_installed = 0;
3709 printf("bpf_init: failed to allocate a major number\n");
3710 return;
3711 }
3712
3713 for (i = 0; i < NBPFILTER; i++) {
3714 bpf_make_dev_t(maj);
3715 }
3716 }
3717 #else
3718 cdevsw_add(&bpf_cdevsw);
3719 #endif
3720 }
3721
3722 #ifndef __APPLE__
3723 SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE + CDEV_MAJOR, bpf_drvinit, NULL)
3724 #endif
3725
3726 #if CONFIG_MACF_NET
3727 struct label *
3728 mac_bpfdesc_label_get(struct bpf_d *d)
3729 {
3730 return d->bd_label;
3731 }
3732
3733 void
3734 mac_bpfdesc_label_set(struct bpf_d *d, struct label *label)
3735 {
3736 d->bd_label = label;
3737 }
3738 #endif