]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/bpf.c
xnu-6153.41.3.tar.gz
[apple/xnu.git] / bsd / net / bpf.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1990, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
66 *
67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include "bpf.h"
77
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99
100 #include <sys/poll.h>
101
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109
110 #include <netinet/in.h>
111 #include <netinet/ip.h>
112 #include <netinet/ip6.h>
113 #include <netinet/in_pcb.h>
114 #include <netinet/in_var.h>
115 #include <netinet/ip_var.h>
116 #include <netinet/tcp.h>
117 #include <netinet/tcp_var.h>
118 #include <netinet/udp.h>
119 #include <netinet/udp_var.h>
120 #include <netinet/if_ether.h>
121 #include <netinet/isakmp.h>
122 #include <netinet6/esp.h>
123 #include <sys/kernel.h>
124 #include <sys/sysctl.h>
125 #include <net/firewire.h>
126
127 #include <miscfs/devfs/devfs.h>
128 #include <net/dlil.h>
129 #include <net/pktap.h>
130
131 #include <kern/locks.h>
132 #include <kern/thread_call.h>
133 #include <libkern/section_keywords.h>
134
135 #if CONFIG_MACF_NET
136 #include <security/mac_framework.h>
137 #endif /* MAC_NET */
138
139 #include <os/log.h>
140
141 extern int tvtohz(struct timeval *);
142
143 #define BPF_BUFSIZE 4096
144 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
145
146 #define PRINET 26 /* interruptible */
147
148 #define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
149 #define ESP_HDR_SIZE sizeof(struct newesp)
150
151 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
152
153 /*
154 * The default read buffer size is patchable.
155 */
156 static unsigned int bpf_bufsize = BPF_BUFSIZE;
157 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
158 &bpf_bufsize, 0, "");
159
160 static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
161 extern const int copysize_limit_panic;
162 #define BPF_MAXSIZE_CAP (copysize_limit_panic >> 1)
163 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
164 SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
165 &bpf_maxbufsize, 0,
166 sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
167
168 static unsigned int bpf_maxdevices = 256;
169 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
170 &bpf_maxdevices, 0, "");
171 /*
172 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
173 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
174 * explicitly to be able to use DLT_PKTAP.
175 */
176 #if CONFIG_EMBEDDED
177 static unsigned int bpf_wantpktap = 1;
178 #else
179 static unsigned int bpf_wantpktap = 0;
180 #endif
181 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
182 &bpf_wantpktap, 0, "");
183
184 static int bpf_debug = 0;
185 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
186 &bpf_debug, 0, "");
187
188 /*
189 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
190 * bpf_dtab holds pointer to the descriptors, indexed by minor device #
191 */
192 static struct bpf_if *bpf_iflist;
193 #ifdef __APPLE__
194 /*
195 * BSD now stores the bpf_d in the dev_t which is a struct
196 * on their system. Our dev_t is an int, so we still store
197 * the bpf_d in a separate table indexed by minor device #.
198 *
199 * The value stored in bpf_dtab[n] represent three states:
200 * NULL: device not opened
201 * BPF_DEV_RESERVED: device opening or closing
202 * other: device <n> opened with pointer to storage
203 */
204 #define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
205 static struct bpf_d **bpf_dtab = NULL;
206 static unsigned int bpf_dtab_size = 0;
207 static unsigned int nbpfilter = 0;
208
209 decl_lck_mtx_data(static, bpf_mlock_data);
210 static lck_mtx_t *bpf_mlock = &bpf_mlock_data;
211 static lck_grp_t *bpf_mlock_grp;
212 static lck_grp_attr_t *bpf_mlock_grp_attr;
213 static lck_attr_t *bpf_mlock_attr;
214
215 #endif /* __APPLE__ */
216
217 static int bpf_allocbufs(struct bpf_d *);
218 static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
219 static int bpf_detachd(struct bpf_d *d, int);
220 static void bpf_freed(struct bpf_d *);
221 static int bpf_movein(struct uio *, int,
222 struct mbuf **, struct sockaddr *, int *);
223 static int bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool);
224 static void bpf_timed_out(void *, void *);
225 static void bpf_wakeup(struct bpf_d *);
226 static u_int get_pkt_trunc_len(u_char *, u_int);
227 static void catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
228 static void reset_d(struct bpf_d *);
229 static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
230 static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
231 static int bpf_setdlt(struct bpf_d *, u_int);
232 static int bpf_set_traffic_class(struct bpf_d *, int);
233 static void bpf_set_packet_service_class(struct mbuf *, int);
234
235 static void bpf_acquire_d(struct bpf_d *);
236 static void bpf_release_d(struct bpf_d *);
237
238 static int bpf_devsw_installed;
239
240 void bpf_init(void *unused);
241 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
242
243 /*
244 * Darwin differs from BSD here, the following are static
245 * on BSD and not static on Darwin.
246 */
247 d_open_t bpfopen;
248 d_close_t bpfclose;
249 d_read_t bpfread;
250 d_write_t bpfwrite;
251 ioctl_fcn_t bpfioctl;
252 select_fcn_t bpfselect;
253
254 /* Darwin's cdevsw struct differs slightly from BSDs */
255 #define CDEV_MAJOR 23
256 static struct cdevsw bpf_cdevsw = {
257 .d_open = bpfopen,
258 .d_close = bpfclose,
259 .d_read = bpfread,
260 .d_write = bpfwrite,
261 .d_ioctl = bpfioctl,
262 .d_stop = eno_stop,
263 .d_reset = eno_reset,
264 .d_ttys = NULL,
265 .d_select = bpfselect,
266 .d_mmap = eno_mmap,
267 .d_strategy = eno_strat,
268 .d_reserved_1 = eno_getc,
269 .d_reserved_2 = eno_putc,
270 .d_type = 0
271 };
272
273 #define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
274
275 static int
276 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp,
277 struct sockaddr *sockp, int *datlen)
278 {
279 struct mbuf *m;
280 int error;
281 int len;
282 uint8_t sa_family;
283 int hlen;
284
285 switch (linktype) {
286 #if SLIP
287 case DLT_SLIP:
288 sa_family = AF_INET;
289 hlen = 0;
290 break;
291 #endif /* SLIP */
292
293 case DLT_EN10MB:
294 sa_family = AF_UNSPEC;
295 /* XXX Would MAXLINKHDR be better? */
296 hlen = sizeof(struct ether_header);
297 break;
298
299 #if FDDI
300 case DLT_FDDI:
301 #if defined(__FreeBSD__) || defined(__bsdi__)
302 sa_family = AF_IMPLINK;
303 hlen = 0;
304 #else
305 sa_family = AF_UNSPEC;
306 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
307 hlen = 24;
308 #endif
309 break;
310 #endif /* FDDI */
311
312 case DLT_RAW:
313 case DLT_NULL:
314 sa_family = AF_UNSPEC;
315 hlen = 0;
316 break;
317
318 #ifdef __FreeBSD__
319 case DLT_ATM_RFC1483:
320 /*
321 * en atm driver requires 4-byte atm pseudo header.
322 * though it isn't standard, vpi:vci needs to be
323 * specified anyway.
324 */
325 sa_family = AF_UNSPEC;
326 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
327 break;
328 #endif
329
330 case DLT_PPP:
331 sa_family = AF_UNSPEC;
332 hlen = 4; /* This should match PPP_HDRLEN */
333 break;
334
335 case DLT_APPLE_IP_OVER_IEEE1394:
336 sa_family = AF_UNSPEC;
337 hlen = sizeof(struct firewire_header);
338 break;
339
340 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
341 sa_family = AF_IEEE80211;
342 hlen = 0;
343 break;
344
345 case DLT_IEEE802_11_RADIO:
346 sa_family = AF_IEEE80211;
347 hlen = 0;
348 break;
349
350 default:
351 return EIO;
352 }
353
354 // LP64todo - fix this!
355 len = uio_resid(uio);
356 *datlen = len - hlen;
357 if ((unsigned)len > MCLBYTES) {
358 return EIO;
359 }
360
361 if (sockp) {
362 /*
363 * Build a sockaddr based on the data link layer type.
364 * We do this at this level because the ethernet header
365 * is copied directly into the data field of the sockaddr.
366 * In the case of SLIP, there is no header and the packet
367 * is forwarded as is.
368 * Also, we are careful to leave room at the front of the mbuf
369 * for the link level header.
370 */
371 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
372 return EIO;
373 }
374 sockp->sa_family = sa_family;
375 } else {
376 /*
377 * We're directly sending the packet data supplied by
378 * the user; we don't need to make room for the link
379 * header, and don't need the header length value any
380 * more, so set it to 0.
381 */
382 hlen = 0;
383 }
384
385 MGETHDR(m, M_WAIT, MT_DATA);
386 if (m == 0) {
387 return ENOBUFS;
388 }
389 if ((unsigned)len > MHLEN) {
390 MCLGET(m, M_WAIT);
391 if ((m->m_flags & M_EXT) == 0) {
392 error = ENOBUFS;
393 goto bad;
394 }
395 }
396 m->m_pkthdr.len = m->m_len = len;
397 m->m_pkthdr.rcvif = NULL;
398 *mp = m;
399
400 /*
401 * Make room for link header.
402 */
403 if (hlen != 0) {
404 m->m_pkthdr.len -= hlen;
405 m->m_len -= hlen;
406 m->m_data += hlen; /* XXX */
407 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
408 if (error) {
409 goto bad;
410 }
411 }
412 error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
413 if (error) {
414 goto bad;
415 }
416
417 /* Check for multicast destination */
418 switch (linktype) {
419 case DLT_EN10MB: {
420 struct ether_header *eh;
421
422 eh = mtod(m, struct ether_header *);
423 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
424 if (_ether_cmp(etherbroadcastaddr,
425 eh->ether_dhost) == 0) {
426 m->m_flags |= M_BCAST;
427 } else {
428 m->m_flags |= M_MCAST;
429 }
430 }
431 break;
432 }
433 }
434
435 return 0;
436 bad:
437 m_freem(m);
438 return error;
439 }
440
441 #ifdef __APPLE__
442
443 /*
444 * The dynamic addition of a new device node must block all processes that
445 * are opening the last device so that no process will get an unexpected
446 * ENOENT
447 */
448 static void
449 bpf_make_dev_t(int maj)
450 {
451 static int bpf_growing = 0;
452 unsigned int cur_size = nbpfilter, i;
453
454 if (nbpfilter >= bpf_maxdevices) {
455 return;
456 }
457
458 while (bpf_growing) {
459 /* Wait until new device has been created */
460 (void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
461 }
462 if (nbpfilter > cur_size) {
463 /* other thread grew it already */
464 return;
465 }
466 bpf_growing = 1;
467
468 /* need to grow bpf_dtab first */
469 if (nbpfilter == bpf_dtab_size) {
470 int new_dtab_size;
471 struct bpf_d **new_dtab = NULL;
472 struct bpf_d **old_dtab = NULL;
473
474 new_dtab_size = bpf_dtab_size + NBPFILTER;
475 new_dtab = (struct bpf_d **)_MALLOC(
476 sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
477 if (new_dtab == 0) {
478 printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
479 goto done;
480 }
481 if (bpf_dtab) {
482 bcopy(bpf_dtab, new_dtab,
483 sizeof(struct bpf_d *) * bpf_dtab_size);
484 }
485 bzero(new_dtab + bpf_dtab_size,
486 sizeof(struct bpf_d *) * NBPFILTER);
487 old_dtab = bpf_dtab;
488 bpf_dtab = new_dtab;
489 bpf_dtab_size = new_dtab_size;
490 if (old_dtab != NULL) {
491 _FREE(old_dtab, M_DEVBUF);
492 }
493 }
494 i = nbpfilter++;
495 (void) devfs_make_node(makedev(maj, i),
496 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
497 "bpf%d", i);
498 done:
499 bpf_growing = 0;
500 wakeup((caddr_t)&bpf_growing);
501 }
502
503 #endif
504
505 /*
506 * Attach file to the bpf interface, i.e. make d listen on bp.
507 */
508 static errno_t
509 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
510 {
511 int first = bp->bif_dlist == NULL;
512 int error = 0;
513
514 /*
515 * Point d at bp, and add d to the interface's list of listeners.
516 * Finally, point the driver's bpf cookie at the interface so
517 * it will divert packets to bpf.
518 */
519 d->bd_bif = bp;
520 d->bd_next = bp->bif_dlist;
521 bp->bif_dlist = d;
522
523 /*
524 * Take a reference on the device even if an error is returned
525 * because we keep the device in the interface's list of listeners
526 */
527 bpf_acquire_d(d);
528
529 if (first) {
530 /* Find the default bpf entry for this ifp */
531 if (bp->bif_ifp->if_bpf == NULL) {
532 struct bpf_if *tmp, *primary = NULL;
533
534 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
535 if (tmp->bif_ifp == bp->bif_ifp) {
536 primary = tmp;
537 break;
538 }
539 }
540 bp->bif_ifp->if_bpf = primary;
541 }
542 /* Only call dlil_set_bpf_tap for primary dlt */
543 if (bp->bif_ifp->if_bpf == bp) {
544 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
545 bpf_tap_callback);
546 }
547
548 if (bp->bif_tap != NULL) {
549 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
550 BPF_TAP_INPUT_OUTPUT);
551 }
552 }
553
554 /*
555 * Reset the detach flags in case we previously detached an interface
556 */
557 d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
558
559 if (bp->bif_dlt == DLT_PKTAP) {
560 d->bd_flags |= BPF_FINALIZE_PKTAP;
561 } else {
562 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
563 }
564 return error;
565 }
566
567 /*
568 * Detach a file from its interface.
569 *
570 * Return 1 if was closed by some thread, 0 otherwise
571 */
572 static int
573 bpf_detachd(struct bpf_d *d, int closing)
574 {
575 struct bpf_d **p;
576 struct bpf_if *bp;
577 struct ifnet *ifp;
578
579 int bpf_closed = d->bd_flags & BPF_CLOSING;
580 /*
581 * Some other thread already detached
582 */
583 if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
584 goto done;
585 }
586 /*
587 * This thread is doing the detach
588 */
589 d->bd_flags |= BPF_DETACHING;
590
591 ifp = d->bd_bif->bif_ifp;
592 bp = d->bd_bif;
593
594 if (bpf_debug != 0) {
595 printf("%s: %llx %s%s\n",
596 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
597 if_name(ifp), closing ? " closing" : "");
598 }
599
600 /* Remove d from the interface's descriptor list. */
601 p = &bp->bif_dlist;
602 while (*p != d) {
603 p = &(*p)->bd_next;
604 if (*p == 0) {
605 panic("bpf_detachd: descriptor not in list");
606 }
607 }
608 *p = (*p)->bd_next;
609 if (bp->bif_dlist == 0) {
610 /*
611 * Let the driver know that there are no more listeners.
612 */
613 /* Only call dlil_set_bpf_tap for primary dlt */
614 if (bp->bif_ifp->if_bpf == bp) {
615 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
616 }
617 if (bp->bif_tap) {
618 bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
619 }
620
621 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
622 if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
623 break;
624 }
625 }
626 if (bp == NULL) {
627 ifp->if_bpf = NULL;
628 }
629 }
630 d->bd_bif = NULL;
631 /*
632 * Check if this descriptor had requested promiscuous mode.
633 * If so, turn it off.
634 */
635 if (d->bd_promisc) {
636 d->bd_promisc = 0;
637 lck_mtx_unlock(bpf_mlock);
638 if (ifnet_set_promiscuous(ifp, 0)) {
639 /*
640 * Something is really wrong if we were able to put
641 * the driver into promiscuous mode, but can't
642 * take it out.
643 * Most likely the network interface is gone.
644 */
645 printf("%s: ifnet_set_promiscuous failed\n", __func__);
646 }
647 lck_mtx_lock(bpf_mlock);
648 }
649
650 /*
651 * Wake up other thread that are waiting for this thread to finish
652 * detaching
653 */
654 d->bd_flags &= ~BPF_DETACHING;
655 d->bd_flags |= BPF_DETACHED;
656
657 /* Refresh the local variable as d could have been modified */
658 bpf_closed = d->bd_flags & BPF_CLOSING;
659 /*
660 * Note that We've kept the reference because we may have dropped
661 * the lock when turning off promiscuous mode
662 */
663 bpf_release_d(d);
664
665 done:
666 /*
667 * When closing makes sure no other thread refer to the bpf_d
668 */
669 if (bpf_debug != 0) {
670 printf("%s: %llx done\n",
671 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
672 }
673 /*
674 * Let the caller know the bpf_d is closed
675 */
676 if (bpf_closed) {
677 return 1;
678 } else {
679 return 0;
680 }
681 }
682
683 /*
684 * Start asynchronous timer, if necessary.
685 * Must be called with bpf_mlock held.
686 */
687 static void
688 bpf_start_timer(struct bpf_d *d)
689 {
690 uint64_t deadline;
691 struct timeval tv;
692
693 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
694 tv.tv_sec = d->bd_rtout / hz;
695 tv.tv_usec = (d->bd_rtout % hz) * tick;
696
697 clock_interval_to_deadline(
698 (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
699 NSEC_PER_USEC, &deadline);
700 /*
701 * The state is BPF_IDLE, so the timer hasn't
702 * been started yet, and hasn't gone off yet;
703 * there is no thread call scheduled, so this
704 * won't change the schedule.
705 *
706 * XXX - what if, by the time it gets entered,
707 * the deadline has already passed?
708 */
709 thread_call_enter_delayed(d->bd_thread_call, deadline);
710 d->bd_state = BPF_WAITING;
711 }
712 }
713
714 /*
715 * Cancel asynchronous timer.
716 * Must be called with bpf_mlock held.
717 */
718 static boolean_t
719 bpf_stop_timer(struct bpf_d *d)
720 {
721 /*
722 * If the timer has already gone off, this does nothing.
723 * Our caller is expected to set d->bd_state to BPF_IDLE,
724 * with the bpf_mlock, after we are called. bpf_timed_out()
725 * also grabs bpf_mlock, so, if the timer has gone off and
726 * bpf_timed_out() hasn't finished, it's waiting for the
727 * lock; when this thread releases the lock, it will
728 * find the state is BPF_IDLE, and just release the
729 * lock and return.
730 */
731 return thread_call_cancel(d->bd_thread_call);
732 }
733
734 void
735 bpf_acquire_d(struct bpf_d *d)
736 {
737 void *lr_saved = __builtin_return_address(0);
738
739 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
740
741 d->bd_refcnt += 1;
742
743 d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
744 d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
745 }
746
747 void
748 bpf_release_d(struct bpf_d *d)
749 {
750 void *lr_saved = __builtin_return_address(0);
751
752 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
753
754 if (d->bd_refcnt <= 0) {
755 panic("%s: %p refcnt <= 0", __func__, d);
756 }
757
758 d->bd_refcnt -= 1;
759
760 d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
761 d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
762
763 if (d->bd_refcnt == 0) {
764 /* Assert the device is detached */
765 if ((d->bd_flags & BPF_DETACHED) == 0) {
766 panic("%s: %p BPF_DETACHED not set", __func__, d);
767 }
768
769 _FREE(d, M_DEVBUF);
770 }
771 }
772
773 /*
774 * Open ethernet device. Returns ENXIO for illegal minor device number,
775 * EBUSY if file is open by another process.
776 */
777 /* ARGSUSED */
778 int
779 bpfopen(dev_t dev, int flags, __unused int fmt,
780 struct proc *p)
781 {
782 struct bpf_d *d;
783
784 lck_mtx_lock(bpf_mlock);
785 if ((unsigned int) minor(dev) >= nbpfilter) {
786 lck_mtx_unlock(bpf_mlock);
787 return ENXIO;
788 }
789 /*
790 * New device nodes are created on demand when opening the last one.
791 * The programming model is for processes to loop on the minor starting
792 * at 0 as long as EBUSY is returned. The loop stops when either the
793 * open succeeds or an error other that EBUSY is returned. That means
794 * that bpf_make_dev_t() must block all processes that are opening the
795 * last node. If not all processes are blocked, they could unexpectedly
796 * get ENOENT and abort their opening loop.
797 */
798 if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
799 bpf_make_dev_t(major(dev));
800 }
801
802 /*
803 * Each minor can be opened by only one process. If the requested
804 * minor is in use, return EBUSY.
805 *
806 * Important: bpfopen() and bpfclose() have to check and set the status
807 * of a device in the same lockin context otherwise the device may be
808 * leaked because the vnode use count will be unpextectly greater than 1
809 * when close() is called.
810 */
811 if (bpf_dtab[minor(dev)] == NULL) {
812 /* Reserve while opening */
813 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
814 } else {
815 lck_mtx_unlock(bpf_mlock);
816 return EBUSY;
817 }
818 d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
819 M_WAIT | M_ZERO);
820 if (d == NULL) {
821 /* this really is a catastrophic failure */
822 printf("bpfopen: malloc bpf_d failed\n");
823 bpf_dtab[minor(dev)] = NULL;
824 lck_mtx_unlock(bpf_mlock);
825 return ENOMEM;
826 }
827
828 /* Mark "in use" and do most initialization. */
829 bpf_acquire_d(d);
830 d->bd_bufsize = bpf_bufsize;
831 d->bd_sig = SIGIO;
832 d->bd_seesent = 1;
833 d->bd_oflags = flags;
834 d->bd_state = BPF_IDLE;
835 d->bd_traffic_class = SO_TC_BE;
836 d->bd_flags |= BPF_DETACHED;
837 if (bpf_wantpktap) {
838 d->bd_flags |= BPF_WANT_PKTAP;
839 } else {
840 d->bd_flags &= ~BPF_WANT_PKTAP;
841 }
842 d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
843 if (d->bd_thread_call == NULL) {
844 printf("bpfopen: malloc thread call failed\n");
845 bpf_dtab[minor(dev)] = NULL;
846 bpf_release_d(d);
847 lck_mtx_unlock(bpf_mlock);
848
849 return ENOMEM;
850 }
851 d->bd_opened_by = p;
852 uuid_generate(d->bd_uuid);
853
854 #if CONFIG_MACF_NET
855 mac_bpfdesc_label_init(d);
856 mac_bpfdesc_label_associate(kauth_cred_get(), d);
857 #endif
858 bpf_dtab[minor(dev)] = d; /* Mark opened */
859 lck_mtx_unlock(bpf_mlock);
860
861 return 0;
862 }
863
864 /*
865 * Close the descriptor by detaching it from its interface,
866 * deallocating its buffers, and marking it free.
867 */
868 /* ARGSUSED */
869 int
870 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
871 __unused struct proc *p)
872 {
873 struct bpf_d *d;
874
875 /* Take BPF lock to ensure no other thread is using the device */
876 lck_mtx_lock(bpf_mlock);
877
878 d = bpf_dtab[minor(dev)];
879 if (d == NULL || d == BPF_DEV_RESERVED) {
880 lck_mtx_unlock(bpf_mlock);
881 return ENXIO;
882 }
883
884 /*
885 * Other threads may call bpd_detachd() if we drop the bpf_mlock
886 */
887 d->bd_flags |= BPF_CLOSING;
888
889 if (bpf_debug != 0) {
890 printf("%s: %llx\n",
891 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
892 }
893
894 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
895
896 /*
897 * Deal with any in-progress timeouts.
898 */
899 switch (d->bd_state) {
900 case BPF_IDLE:
901 /*
902 * Not waiting for a timeout, and no timeout happened.
903 */
904 break;
905
906 case BPF_WAITING:
907 /*
908 * Waiting for a timeout.
909 * Cancel any timer that has yet to go off,
910 * and mark the state as "closing".
911 * Then drop the lock to allow any timers that
912 * *have* gone off to run to completion, and wait
913 * for them to finish.
914 */
915 if (!bpf_stop_timer(d)) {
916 /*
917 * There was no pending call, so the call must
918 * have been in progress. Wait for the call to
919 * complete; we have to drop the lock while
920 * waiting. to let the in-progrss call complete
921 */
922 d->bd_state = BPF_DRAINING;
923 while (d->bd_state == BPF_DRAINING) {
924 msleep((caddr_t)d, bpf_mlock, PRINET,
925 "bpfdraining", NULL);
926 }
927 }
928 d->bd_state = BPF_IDLE;
929 break;
930
931 case BPF_TIMED_OUT:
932 /*
933 * Timer went off, and the timeout routine finished.
934 */
935 d->bd_state = BPF_IDLE;
936 break;
937
938 case BPF_DRAINING:
939 /*
940 * Another thread is blocked on a close waiting for
941 * a timeout to finish.
942 * This "shouldn't happen", as the first thread to enter
943 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
944 * all subsequent threads should see that and fail with
945 * ENXIO.
946 */
947 panic("Two threads blocked in a BPF close");
948 break;
949 }
950
951 if (d->bd_bif) {
952 bpf_detachd(d, 1);
953 }
954 selthreadclear(&d->bd_sel);
955 #if CONFIG_MACF_NET
956 mac_bpfdesc_label_destroy(d);
957 #endif
958 thread_call_free(d->bd_thread_call);
959
960 while (d->bd_hbuf_read != 0) {
961 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
962 }
963
964 bpf_freed(d);
965
966 /* Mark free in same context as bpfopen comes to check */
967 bpf_dtab[minor(dev)] = NULL; /* Mark closed */
968
969 bpf_release_d(d);
970
971 lck_mtx_unlock(bpf_mlock);
972
973 return 0;
974 }
975
976 #define BPF_SLEEP bpf_sleep
977
978 static int
979 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
980 {
981 u_int64_t abstime = 0;
982
983 if (timo != 0) {
984 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
985 }
986
987 return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
988 }
989
990 static void
991 bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
992 {
993 if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
994 struct pktap_v2_hdr *pktap_v2_hdr;
995
996 pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
997
998 if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
999 pktap_v2_finalize_proc_info(pktap_v2_hdr);
1000 }
1001 } else {
1002 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1003 pktap_finalize_proc_info(pktaphdr);
1004 }
1005
1006 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1007 hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
1008 hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
1009 }
1010 }
1011 }
1012
1013 /*
1014 * Rotate the packet buffers in descriptor d. Move the store buffer
1015 * into the hold slot, and the free buffer into the store slot.
1016 * Zero the length of the new store buffer.
1017 */
1018 #define ROTATE_BUFFERS(d) \
1019 if (d->bd_hbuf_read != 0) \
1020 panic("rotating bpf buffers during read"); \
1021 (d)->bd_hbuf = (d)->bd_sbuf; \
1022 (d)->bd_hlen = (d)->bd_slen; \
1023 (d)->bd_hcnt = (d)->bd_scnt; \
1024 (d)->bd_sbuf = (d)->bd_fbuf; \
1025 (d)->bd_slen = 0; \
1026 (d)->bd_scnt = 0; \
1027 (d)->bd_fbuf = NULL;
1028 /*
1029 * bpfread - read next chunk of packets from buffers
1030 */
1031 int
1032 bpfread(dev_t dev, struct uio *uio, int ioflag)
1033 {
1034 struct bpf_d *d;
1035 caddr_t hbuf;
1036 int timed_out, hbuf_len;
1037 int error;
1038 int flags;
1039
1040 lck_mtx_lock(bpf_mlock);
1041
1042 d = bpf_dtab[minor(dev)];
1043 if (d == NULL || d == BPF_DEV_RESERVED ||
1044 (d->bd_flags & BPF_CLOSING) != 0) {
1045 lck_mtx_unlock(bpf_mlock);
1046 return ENXIO;
1047 }
1048
1049 bpf_acquire_d(d);
1050
1051 /*
1052 * Restrict application to use a buffer the same size as
1053 * as kernel buffers.
1054 */
1055 if (uio_resid(uio) != d->bd_bufsize) {
1056 bpf_release_d(d);
1057 lck_mtx_unlock(bpf_mlock);
1058 return EINVAL;
1059 }
1060
1061 if (d->bd_state == BPF_WAITING) {
1062 bpf_stop_timer(d);
1063 }
1064
1065 timed_out = (d->bd_state == BPF_TIMED_OUT);
1066 d->bd_state = BPF_IDLE;
1067
1068 while (d->bd_hbuf_read != 0) {
1069 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1070 }
1071
1072 if ((d->bd_flags & BPF_CLOSING) != 0) {
1073 bpf_release_d(d);
1074 lck_mtx_unlock(bpf_mlock);
1075 return ENXIO;
1076 }
1077 /*
1078 * If the hold buffer is empty, then do a timed sleep, which
1079 * ends when the timeout expires or when enough packets
1080 * have arrived to fill the store buffer.
1081 */
1082 while (d->bd_hbuf == 0) {
1083 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1084 d->bd_slen != 0) {
1085 /*
1086 * We're in immediate mode, or are reading
1087 * in non-blocking mode, or a timer was
1088 * started before the read (e.g., by select()
1089 * or poll()) and has expired and a packet(s)
1090 * either arrived since the previous
1091 * read or arrived while we were asleep.
1092 * Rotate the buffers and return what's here.
1093 */
1094 ROTATE_BUFFERS(d);
1095 break;
1096 }
1097
1098 /*
1099 * No data is available, check to see if the bpf device
1100 * is still pointed at a real interface. If not, return
1101 * ENXIO so that the userland process knows to rebind
1102 * it before using it again.
1103 */
1104 if (d->bd_bif == NULL) {
1105 bpf_release_d(d);
1106 lck_mtx_unlock(bpf_mlock);
1107 return ENXIO;
1108 }
1109 if (ioflag & IO_NDELAY) {
1110 bpf_release_d(d);
1111 lck_mtx_unlock(bpf_mlock);
1112 return EWOULDBLOCK;
1113 }
1114 error = BPF_SLEEP(d, PRINET | PCATCH, "bpf", d->bd_rtout);
1115 /*
1116 * Make sure device is still opened
1117 */
1118 if ((d->bd_flags & BPF_CLOSING) != 0) {
1119 bpf_release_d(d);
1120 lck_mtx_unlock(bpf_mlock);
1121 return ENXIO;
1122 }
1123
1124 while (d->bd_hbuf_read != 0) {
1125 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1126 NULL);
1127 }
1128
1129 if ((d->bd_flags & BPF_CLOSING) != 0) {
1130 bpf_release_d(d);
1131 lck_mtx_unlock(bpf_mlock);
1132 return ENXIO;
1133 }
1134
1135 if (error == EINTR || error == ERESTART) {
1136 if (d->bd_hbuf != NULL) {
1137 /*
1138 * Because we msleep, the hold buffer might
1139 * be filled when we wake up. Avoid rotating
1140 * in this case.
1141 */
1142 break;
1143 }
1144 if (d->bd_slen != 0) {
1145 /*
1146 * Sometimes we may be interrupted often and
1147 * the sleep above will not timeout.
1148 * Regardless, we should rotate the buffers
1149 * if there's any new data pending and
1150 * return it.
1151 */
1152 ROTATE_BUFFERS(d);
1153 break;
1154 }
1155 bpf_release_d(d);
1156 lck_mtx_unlock(bpf_mlock);
1157 if (error == ERESTART) {
1158 printf("%s: %llx ERESTART to EINTR\n",
1159 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
1160 error = EINTR;
1161 }
1162 return error;
1163 }
1164 if (error == EWOULDBLOCK) {
1165 /*
1166 * On a timeout, return what's in the buffer,
1167 * which may be nothing. If there is something
1168 * in the store buffer, we can rotate the buffers.
1169 */
1170 if (d->bd_hbuf) {
1171 /*
1172 * We filled up the buffer in between
1173 * getting the timeout and arriving
1174 * here, so we don't need to rotate.
1175 */
1176 break;
1177 }
1178
1179 if (d->bd_slen == 0) {
1180 bpf_release_d(d);
1181 lck_mtx_unlock(bpf_mlock);
1182 return 0;
1183 }
1184 ROTATE_BUFFERS(d);
1185 break;
1186 }
1187 }
1188 /*
1189 * At this point, we know we have something in the hold slot.
1190 */
1191
1192 /*
1193 * Set the hold buffer read. So we do not
1194 * rotate the buffers until the hold buffer
1195 * read is complete. Also to avoid issues resulting
1196 * from page faults during disk sleep (<rdar://problem/13436396>).
1197 */
1198 d->bd_hbuf_read = 1;
1199 hbuf = d->bd_hbuf;
1200 hbuf_len = d->bd_hlen;
1201 flags = d->bd_flags;
1202 lck_mtx_unlock(bpf_mlock);
1203
1204 #ifdef __APPLE__
1205 /*
1206 * Before we move data to userland, we fill out the extended
1207 * header fields.
1208 */
1209 if (flags & BPF_EXTENDED_HDR) {
1210 char *p;
1211
1212 p = hbuf;
1213 while (p < hbuf + hbuf_len) {
1214 struct bpf_hdr_ext *ehp;
1215 uint32_t flowid;
1216 struct so_procinfo soprocinfo;
1217 int found = 0;
1218
1219 ehp = (struct bpf_hdr_ext *)(void *)p;
1220 if ((flowid = ehp->bh_flowid) != 0) {
1221 if (ehp->bh_proto == IPPROTO_TCP) {
1222 found = inp_findinpcb_procinfo(&tcbinfo,
1223 flowid, &soprocinfo);
1224 } else if (ehp->bh_proto == IPPROTO_UDP) {
1225 found = inp_findinpcb_procinfo(&udbinfo,
1226 flowid, &soprocinfo);
1227 }
1228 if (found == 1) {
1229 ehp->bh_pid = soprocinfo.spi_pid;
1230 strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
1231 }
1232 ehp->bh_flowid = 0;
1233 }
1234
1235 if (flags & BPF_FINALIZE_PKTAP) {
1236 struct pktap_header *pktaphdr;
1237
1238 pktaphdr = (struct pktap_header *)(void *)
1239 (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1240
1241 bpf_finalize_pktap((struct bpf_hdr *) ehp,
1242 pktaphdr);
1243 }
1244 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1245 }
1246 } else if (flags & BPF_FINALIZE_PKTAP) {
1247 char *p;
1248
1249 p = hbuf;
1250 while (p < hbuf + hbuf_len) {
1251 struct bpf_hdr *hp;
1252 struct pktap_header *pktaphdr;
1253
1254 hp = (struct bpf_hdr *)(void *)p;
1255 pktaphdr = (struct pktap_header *)(void *)
1256 (p + BPF_WORDALIGN(hp->bh_hdrlen));
1257
1258 bpf_finalize_pktap(hp, pktaphdr);
1259
1260 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1261 }
1262 }
1263 #endif
1264
1265 /*
1266 * Move data from hold buffer into user space.
1267 * We know the entire buffer is transferred since
1268 * we checked above that the read buffer is bpf_bufsize bytes.
1269 */
1270 error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1271
1272 lck_mtx_lock(bpf_mlock);
1273 /*
1274 * Make sure device is still opened
1275 */
1276 if ((d->bd_flags & BPF_CLOSING) != 0) {
1277 bpf_release_d(d);
1278 lck_mtx_unlock(bpf_mlock);
1279 return ENXIO;
1280 }
1281
1282 d->bd_hbuf_read = 0;
1283 d->bd_fbuf = d->bd_hbuf;
1284 d->bd_hbuf = NULL;
1285 d->bd_hlen = 0;
1286 d->bd_hcnt = 0;
1287 wakeup((caddr_t)d);
1288
1289 bpf_release_d(d);
1290 lck_mtx_unlock(bpf_mlock);
1291 return error;
1292 }
1293
1294 /*
1295 * If there are processes sleeping on this descriptor, wake them up.
1296 */
1297 static void
1298 bpf_wakeup(struct bpf_d *d)
1299 {
1300 if (d->bd_state == BPF_WAITING) {
1301 bpf_stop_timer(d);
1302 d->bd_state = BPF_IDLE;
1303 }
1304 wakeup((caddr_t)d);
1305 if (d->bd_async && d->bd_sig && d->bd_sigio) {
1306 pgsigio(d->bd_sigio, d->bd_sig);
1307 }
1308
1309 selwakeup(&d->bd_sel);
1310 if ((d->bd_flags & BPF_KNOTE)) {
1311 KNOTE(&d->bd_sel.si_note, 1);
1312 }
1313 }
1314
1315 static void
1316 bpf_timed_out(void *arg, __unused void *dummy)
1317 {
1318 struct bpf_d *d = (struct bpf_d *)arg;
1319
1320 lck_mtx_lock(bpf_mlock);
1321 if (d->bd_state == BPF_WAITING) {
1322 /*
1323 * There's a select or kqueue waiting for this; if there's
1324 * now stuff to read, wake it up.
1325 */
1326 d->bd_state = BPF_TIMED_OUT;
1327 if (d->bd_slen != 0) {
1328 bpf_wakeup(d);
1329 }
1330 } else if (d->bd_state == BPF_DRAINING) {
1331 /*
1332 * A close is waiting for this to finish.
1333 * Mark it as finished, and wake the close up.
1334 */
1335 d->bd_state = BPF_IDLE;
1336 bpf_wakeup(d);
1337 }
1338 lck_mtx_unlock(bpf_mlock);
1339 }
1340
1341 /* keep in sync with bpf_movein above: */
1342 #define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
1343
1344 int
1345 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1346 {
1347 struct bpf_d *d;
1348 struct ifnet *ifp;
1349 struct mbuf *m = NULL;
1350 int error;
1351 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1352 int datlen = 0;
1353 int bif_dlt;
1354 int bd_hdrcmplt;
1355
1356 lck_mtx_lock(bpf_mlock);
1357
1358 d = bpf_dtab[minor(dev)];
1359 if (d == NULL || d == BPF_DEV_RESERVED ||
1360 (d->bd_flags & BPF_CLOSING) != 0) {
1361 lck_mtx_unlock(bpf_mlock);
1362 return ENXIO;
1363 }
1364
1365 bpf_acquire_d(d);
1366
1367 if (d->bd_bif == 0) {
1368 bpf_release_d(d);
1369 lck_mtx_unlock(bpf_mlock);
1370 return ENXIO;
1371 }
1372
1373 ifp = d->bd_bif->bif_ifp;
1374
1375 if ((ifp->if_flags & IFF_UP) == 0) {
1376 bpf_release_d(d);
1377 lck_mtx_unlock(bpf_mlock);
1378 return ENETDOWN;
1379 }
1380 if (uio_resid(uio) == 0) {
1381 bpf_release_d(d);
1382 lck_mtx_unlock(bpf_mlock);
1383 return 0;
1384 }
1385 ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1386
1387 /*
1388 * fix for PR-6849527
1389 * geting variables onto stack before dropping lock for bpf_movein()
1390 */
1391 bif_dlt = (int)d->bd_bif->bif_dlt;
1392 bd_hdrcmplt = d->bd_hdrcmplt;
1393
1394 /* bpf_movein allocating mbufs; drop lock */
1395 lck_mtx_unlock(bpf_mlock);
1396
1397 error = bpf_movein(uio, bif_dlt, &m,
1398 bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1399 &datlen);
1400
1401 /* take the lock again */
1402 lck_mtx_lock(bpf_mlock);
1403 if (error) {
1404 bpf_release_d(d);
1405 lck_mtx_unlock(bpf_mlock);
1406 return error;
1407 }
1408
1409 /* verify the device is still open */
1410 if ((d->bd_flags & BPF_CLOSING) != 0) {
1411 bpf_release_d(d);
1412 lck_mtx_unlock(bpf_mlock);
1413 m_freem(m);
1414 return ENXIO;
1415 }
1416
1417 if (d->bd_bif == NULL) {
1418 bpf_release_d(d);
1419 lck_mtx_unlock(bpf_mlock);
1420 m_free(m);
1421 return ENXIO;
1422 }
1423
1424 if ((unsigned)datlen > ifp->if_mtu) {
1425 bpf_release_d(d);
1426 lck_mtx_unlock(bpf_mlock);
1427 m_freem(m);
1428 return EMSGSIZE;
1429 }
1430
1431 #if CONFIG_MACF_NET
1432 mac_mbuf_label_associate_bpfdesc(d, m);
1433 #endif
1434
1435 bpf_set_packet_service_class(m, d->bd_traffic_class);
1436
1437 lck_mtx_unlock(bpf_mlock);
1438
1439 /*
1440 * The driver frees the mbuf.
1441 */
1442 if (d->bd_hdrcmplt) {
1443 if (d->bd_bif->bif_send) {
1444 error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1445 } else {
1446 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1447 }
1448 } else {
1449 error = dlil_output(ifp, PF_INET, m, NULL,
1450 (struct sockaddr *)dst_buf, 0, NULL);
1451 }
1452
1453 lck_mtx_lock(bpf_mlock);
1454 bpf_release_d(d);
1455 lck_mtx_unlock(bpf_mlock);
1456
1457 return error;
1458 }
1459
1460 /*
1461 * Reset a descriptor by flushing its packet buffer and clearing the
1462 * receive and drop counts.
1463 */
1464 static void
1465 reset_d(struct bpf_d *d)
1466 {
1467 if (d->bd_hbuf_read != 0) {
1468 panic("resetting buffers during read");
1469 }
1470
1471 if (d->bd_hbuf) {
1472 /* Free the hold buffer. */
1473 d->bd_fbuf = d->bd_hbuf;
1474 d->bd_hbuf = NULL;
1475 }
1476 d->bd_slen = 0;
1477 d->bd_hlen = 0;
1478 d->bd_scnt = 0;
1479 d->bd_hcnt = 0;
1480 d->bd_rcount = 0;
1481 d->bd_dcount = 0;
1482 }
1483
1484 static struct bpf_d *
1485 bpf_get_device_from_uuid(uuid_t uuid)
1486 {
1487 unsigned int i;
1488
1489 for (i = 0; i < nbpfilter; i++) {
1490 struct bpf_d *d = bpf_dtab[i];
1491
1492 if (d == NULL || d == BPF_DEV_RESERVED ||
1493 (d->bd_flags & BPF_CLOSING) != 0) {
1494 continue;
1495 }
1496 if (uuid_compare(uuid, d->bd_uuid) == 0) {
1497 return d;
1498 }
1499 }
1500
1501 return NULL;
1502 }
1503
1504 /*
1505 * The BIOCSETUP command "atomically" attach to the interface and
1506 * copy the buffer from another interface. This minimizes the risk
1507 * of missing packet because this is done while holding
1508 * the BPF global lock
1509 */
1510 static int
1511 bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1512 {
1513 struct bpf_d *d_from;
1514 int error = 0;
1515
1516 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1517
1518 /*
1519 * Sanity checks
1520 */
1521 d_from = bpf_get_device_from_uuid(uuid_from);
1522 if (d_from == NULL) {
1523 error = ENOENT;
1524 os_log_info(OS_LOG_DEFAULT,
1525 "%s: uuids not found error %d",
1526 __func__, error);
1527 return error;
1528 }
1529 if (d_from->bd_opened_by != d_to->bd_opened_by) {
1530 error = EACCES;
1531 os_log_info(OS_LOG_DEFAULT,
1532 "%s: processes not matching error %d",
1533 __func__, error);
1534 return error;
1535 }
1536
1537 /*
1538 * Prevent any read while copying
1539 */
1540 while (d_to->bd_hbuf_read != 0) {
1541 msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
1542 }
1543 d_to->bd_hbuf_read = 1;
1544
1545 while (d_from->bd_hbuf_read != 0) {
1546 msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
1547 }
1548 d_from->bd_hbuf_read = 1;
1549
1550 /*
1551 * Verify the devices have not been closed
1552 */
1553 if (d_to->bd_flags & BPF_CLOSING) {
1554 error = ENXIO;
1555 os_log_info(OS_LOG_DEFAULT,
1556 "%s: d_to is closing error %d",
1557 __func__, error);
1558 goto done;
1559 }
1560 if (d_from->bd_flags & BPF_CLOSING) {
1561 error = ENXIO;
1562 os_log_info(OS_LOG_DEFAULT,
1563 "%s: d_from is closing error %d",
1564 __func__, error);
1565 goto done;
1566 }
1567
1568 /*
1569 * For now require the same buffer size
1570 */
1571 if (d_from->bd_bufsize != d_to->bd_bufsize) {
1572 error = EINVAL;
1573 os_log_info(OS_LOG_DEFAULT,
1574 "%s: bufsizes not matching error %d",
1575 __func__, error);
1576 goto done;
1577 }
1578
1579 /*
1580 * Attach to the interface
1581 */
1582 error = bpf_setif(d_to, ifp, false, true);
1583 if (error != 0) {
1584 os_log_info(OS_LOG_DEFAULT,
1585 "%s: bpf_setif() failed error %d",
1586 __func__, error);
1587 goto done;
1588 }
1589
1590 /*
1591 * Make sure the buffers are setup as expected by bpf_setif()
1592 */
1593 ASSERT(d_to->bd_hbuf == NULL);
1594 ASSERT(d_to->bd_sbuf != NULL);
1595 ASSERT(d_to->bd_fbuf != NULL);
1596
1597 /*
1598 * Copy the buffers and update the pointers and counts
1599 */
1600 memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1601 d_to->bd_slen = d_from->bd_slen;
1602 d_to->bd_scnt = d_from->bd_scnt;
1603
1604 if (d_from->bd_hbuf != NULL) {
1605 d_to->bd_hbuf = d_to->bd_fbuf;
1606 d_to->bd_fbuf = NULL;
1607 memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1608 }
1609 d_to->bd_hlen = d_from->bd_hlen;
1610 d_to->bd_hcnt = d_from->bd_hcnt;
1611
1612 if (bpf_debug > 0) {
1613 os_log_info(OS_LOG_DEFAULT,
1614 "%s: done slen %u scnt %u hlen %u hcnt %u",
1615 __func__, d_to->bd_slen, d_to->bd_scnt,
1616 d_to->bd_hlen, d_to->bd_hcnt);
1617 }
1618 done:
1619 d_from->bd_hbuf_read = 0;
1620 wakeup((caddr_t)d_from);
1621
1622 d_to->bd_hbuf_read = 0;
1623 wakeup((caddr_t)d_to);
1624
1625 return error;
1626 }
1627
1628 /*
1629 * FIONREAD Check for read packet available.
1630 * SIOCGIFADDR Get interface address - convenient hook to driver.
1631 * BIOCGBLEN Get buffer len [for read()].
1632 * BIOCSETF Set ethernet read filter.
1633 * BIOCFLUSH Flush read packet buffer.
1634 * BIOCPROMISC Put interface into promiscuous mode.
1635 * BIOCGDLT Get link layer type.
1636 * BIOCGETIF Get interface name.
1637 * BIOCSETIF Set interface.
1638 * BIOCSRTIMEOUT Set read timeout.
1639 * BIOCGRTIMEOUT Get read timeout.
1640 * BIOCGSTATS Get packet stats.
1641 * BIOCIMMEDIATE Set immediate mode.
1642 * BIOCVERSION Get filter language version.
1643 * BIOCGHDRCMPLT Get "header already complete" flag
1644 * BIOCSHDRCMPLT Set "header already complete" flag
1645 * BIOCGSEESENT Get "see packets sent" flag
1646 * BIOCSSEESENT Set "see packets sent" flag
1647 * BIOCSETTC Set traffic class.
1648 * BIOCGETTC Get traffic class.
1649 * BIOCSEXTHDR Set "extended header" flag
1650 * BIOCSHEADDROP Drop head of the buffer if user is not reading
1651 * BIOCGHEADDROP Get "head-drop" flag
1652 */
1653 /* ARGSUSED */
1654 int
1655 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1656 struct proc *p)
1657 {
1658 struct bpf_d *d;
1659 int error = 0;
1660 u_int int_arg;
1661 struct ifreq ifr;
1662
1663 lck_mtx_lock(bpf_mlock);
1664
1665 d = bpf_dtab[minor(dev)];
1666 if (d == NULL || d == BPF_DEV_RESERVED ||
1667 (d->bd_flags & BPF_CLOSING) != 0) {
1668 lck_mtx_unlock(bpf_mlock);
1669 return ENXIO;
1670 }
1671
1672 bpf_acquire_d(d);
1673
1674 if (d->bd_state == BPF_WAITING) {
1675 bpf_stop_timer(d);
1676 }
1677 d->bd_state = BPF_IDLE;
1678
1679 switch (cmd) {
1680 default:
1681 error = EINVAL;
1682 break;
1683
1684 /*
1685 * Check for read packet available.
1686 */
1687 case FIONREAD: /* int */
1688 {
1689 int n;
1690
1691 n = d->bd_slen;
1692 if (d->bd_hbuf && d->bd_hbuf_read == 0) {
1693 n += d->bd_hlen;
1694 }
1695
1696 bcopy(&n, addr, sizeof(n));
1697 break;
1698 }
1699
1700 case SIOCGIFADDR: /* struct ifreq */
1701 {
1702 struct ifnet *ifp;
1703
1704 if (d->bd_bif == 0) {
1705 error = EINVAL;
1706 } else {
1707 ifp = d->bd_bif->bif_ifp;
1708 error = ifnet_ioctl(ifp, 0, cmd, addr);
1709 }
1710 break;
1711 }
1712
1713 /*
1714 * Get buffer len [for read()].
1715 */
1716 case BIOCGBLEN: /* u_int */
1717 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1718 break;
1719
1720 /*
1721 * Set buffer length.
1722 */
1723 case BIOCSBLEN: { /* u_int */
1724 u_int size;
1725 unsigned int maxbufsize = bpf_maxbufsize;
1726
1727 /*
1728 * Allow larger buffer in head drop mode to with the
1729 * assumption the reading process may be low priority but
1730 * is interested in the most recent traffic
1731 */
1732 if (d->bd_headdrop != 0) {
1733 maxbufsize = 2 * bpf_maxbufsize;
1734 }
1735
1736 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
1737 /*
1738 * Interface already attached, unable to change buffers
1739 */
1740 error = EINVAL;
1741 break;
1742 }
1743 bcopy(addr, &size, sizeof(size));
1744
1745 if (size > maxbufsize) {
1746 d->bd_bufsize = maxbufsize;
1747
1748 os_log_info(OS_LOG_DEFAULT,
1749 "%s bufsize capped to %u from %u",
1750 __func__, d->bd_bufsize, size);
1751 } else if (size < BPF_MINBUFSIZE) {
1752 d->bd_bufsize = BPF_MINBUFSIZE;
1753
1754 os_log_info(OS_LOG_DEFAULT,
1755 "%s bufsize bumped to %u from %u",
1756 __func__, d->bd_bufsize, size);
1757 } else {
1758 d->bd_bufsize = size;
1759 }
1760
1761 /* It's a read/write ioctl */
1762 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1763 break;
1764 }
1765 /*
1766 * Set link layer read filter.
1767 */
1768 case BIOCSETF32:
1769 case BIOCSETFNR32: { /* struct bpf_program32 */
1770 struct bpf_program32 prg32;
1771
1772 bcopy(addr, &prg32, sizeof(prg32));
1773 error = bpf_setf(d, prg32.bf_len,
1774 CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1775 break;
1776 }
1777
1778 case BIOCSETF64:
1779 case BIOCSETFNR64: { /* struct bpf_program64 */
1780 struct bpf_program64 prg64;
1781
1782 bcopy(addr, &prg64, sizeof(prg64));
1783 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
1784 break;
1785 }
1786
1787 /*
1788 * Flush read packet buffer.
1789 */
1790 case BIOCFLUSH:
1791 while (d->bd_hbuf_read != 0) {
1792 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1793 NULL);
1794 }
1795 if ((d->bd_flags & BPF_CLOSING) != 0) {
1796 error = ENXIO;
1797 break;
1798 }
1799 reset_d(d);
1800 break;
1801
1802 /*
1803 * Put interface into promiscuous mode.
1804 */
1805 case BIOCPROMISC:
1806 if (d->bd_bif == 0) {
1807 /*
1808 * No interface attached yet.
1809 */
1810 error = EINVAL;
1811 break;
1812 }
1813 if (d->bd_promisc == 0) {
1814 lck_mtx_unlock(bpf_mlock);
1815 error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1816 lck_mtx_lock(bpf_mlock);
1817 if (error == 0) {
1818 d->bd_promisc = 1;
1819 }
1820 }
1821 break;
1822
1823 /*
1824 * Get device parameters.
1825 */
1826 case BIOCGDLT: /* u_int */
1827 if (d->bd_bif == 0) {
1828 error = EINVAL;
1829 } else {
1830 bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
1831 }
1832 break;
1833
1834 /*
1835 * Get a list of supported data link types.
1836 */
1837 case BIOCGDLTLIST: /* struct bpf_dltlist */
1838 if (d->bd_bif == NULL) {
1839 error = EINVAL;
1840 } else {
1841 error = bpf_getdltlist(d, addr, p);
1842 }
1843 break;
1844
1845 /*
1846 * Set data link type.
1847 */
1848 case BIOCSDLT: /* u_int */
1849 if (d->bd_bif == NULL) {
1850 error = EINVAL;
1851 } else {
1852 u_int dlt;
1853
1854 bcopy(addr, &dlt, sizeof(dlt));
1855
1856 if (dlt == DLT_PKTAP &&
1857 !(d->bd_flags & BPF_WANT_PKTAP)) {
1858 dlt = DLT_RAW;
1859 }
1860 error = bpf_setdlt(d, dlt);
1861 }
1862 break;
1863
1864 /*
1865 * Get interface name.
1866 */
1867 case BIOCGETIF: /* struct ifreq */
1868 if (d->bd_bif == 0) {
1869 error = EINVAL;
1870 } else {
1871 struct ifnet *const ifp = d->bd_bif->bif_ifp;
1872
1873 snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1874 sizeof(ifr.ifr_name), "%s", if_name(ifp));
1875 }
1876 break;
1877
1878 /*
1879 * Set interface.
1880 */
1881 case BIOCSETIF: { /* struct ifreq */
1882 ifnet_t ifp;
1883
1884 bcopy(addr, &ifr, sizeof(ifr));
1885 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1886 ifp = ifunit(ifr.ifr_name);
1887 if (ifp == NULL) {
1888 error = ENXIO;
1889 } else {
1890 error = bpf_setif(d, ifp, true, false);
1891 }
1892 break;
1893 }
1894
1895 /*
1896 * Set read timeout.
1897 */
1898 case BIOCSRTIMEOUT32: { /* struct user32_timeval */
1899 struct user32_timeval _tv;
1900 struct timeval tv;
1901
1902 bcopy(addr, &_tv, sizeof(_tv));
1903 tv.tv_sec = _tv.tv_sec;
1904 tv.tv_usec = _tv.tv_usec;
1905
1906 /*
1907 * Subtract 1 tick from tvtohz() since this isn't
1908 * a one-shot timer.
1909 */
1910 if ((error = itimerfix(&tv)) == 0) {
1911 d->bd_rtout = tvtohz(&tv) - 1;
1912 }
1913 break;
1914 }
1915
1916 case BIOCSRTIMEOUT64: { /* struct user64_timeval */
1917 struct user64_timeval _tv;
1918 struct timeval tv;
1919
1920 bcopy(addr, &_tv, sizeof(_tv));
1921 tv.tv_sec = _tv.tv_sec;
1922 tv.tv_usec = _tv.tv_usec;
1923
1924 /*
1925 * Subtract 1 tick from tvtohz() since this isn't
1926 * a one-shot timer.
1927 */
1928 if ((error = itimerfix(&tv)) == 0) {
1929 d->bd_rtout = tvtohz(&tv) - 1;
1930 }
1931 break;
1932 }
1933
1934 /*
1935 * Get read timeout.
1936 */
1937 case BIOCGRTIMEOUT32: { /* struct user32_timeval */
1938 struct user32_timeval tv;
1939
1940 bzero(&tv, sizeof(tv));
1941 tv.tv_sec = d->bd_rtout / hz;
1942 tv.tv_usec = (d->bd_rtout % hz) * tick;
1943 bcopy(&tv, addr, sizeof(tv));
1944 break;
1945 }
1946
1947 case BIOCGRTIMEOUT64: { /* struct user64_timeval */
1948 struct user64_timeval tv;
1949
1950 bzero(&tv, sizeof(tv));
1951 tv.tv_sec = d->bd_rtout / hz;
1952 tv.tv_usec = (d->bd_rtout % hz) * tick;
1953 bcopy(&tv, addr, sizeof(tv));
1954 break;
1955 }
1956
1957 /*
1958 * Get packet stats.
1959 */
1960 case BIOCGSTATS: { /* struct bpf_stat */
1961 struct bpf_stat bs;
1962
1963 bzero(&bs, sizeof(bs));
1964 bs.bs_recv = d->bd_rcount;
1965 bs.bs_drop = d->bd_dcount;
1966 bcopy(&bs, addr, sizeof(bs));
1967 break;
1968 }
1969
1970 /*
1971 * Set immediate mode.
1972 */
1973 case BIOCIMMEDIATE: /* u_int */
1974 d->bd_immediate = *(u_int *)(void *)addr;
1975 break;
1976
1977 case BIOCVERSION: { /* struct bpf_version */
1978 struct bpf_version bv;
1979
1980 bzero(&bv, sizeof(bv));
1981 bv.bv_major = BPF_MAJOR_VERSION;
1982 bv.bv_minor = BPF_MINOR_VERSION;
1983 bcopy(&bv, addr, sizeof(bv));
1984 break;
1985 }
1986
1987 /*
1988 * Get "header already complete" flag
1989 */
1990 case BIOCGHDRCMPLT: /* u_int */
1991 bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
1992 break;
1993
1994 /*
1995 * Set "header already complete" flag
1996 */
1997 case BIOCSHDRCMPLT: /* u_int */
1998 bcopy(addr, &int_arg, sizeof(int_arg));
1999 d->bd_hdrcmplt = int_arg ? 1 : 0;
2000 break;
2001
2002 /*
2003 * Get "see sent packets" flag
2004 */
2005 case BIOCGSEESENT: /* u_int */
2006 bcopy(&d->bd_seesent, addr, sizeof(u_int));
2007 break;
2008
2009 /*
2010 * Set "see sent packets" flag
2011 */
2012 case BIOCSSEESENT: /* u_int */
2013 bcopy(addr, &d->bd_seesent, sizeof(u_int));
2014 break;
2015
2016 /*
2017 * Set traffic service class
2018 */
2019 case BIOCSETTC: { /* int */
2020 int tc;
2021
2022 bcopy(addr, &tc, sizeof(int));
2023 error = bpf_set_traffic_class(d, tc);
2024 break;
2025 }
2026
2027 /*
2028 * Get traffic service class
2029 */
2030 case BIOCGETTC: /* int */
2031 bcopy(&d->bd_traffic_class, addr, sizeof(int));
2032 break;
2033
2034 case FIONBIO: /* Non-blocking I/O; int */
2035 break;
2036
2037 case FIOASYNC: /* Send signal on receive packets; int */
2038 bcopy(addr, &d->bd_async, sizeof(int));
2039 break;
2040 #ifndef __APPLE__
2041 case FIOSETOWN:
2042 error = fsetown(*(int *)addr, &d->bd_sigio);
2043 break;
2044
2045 case FIOGETOWN:
2046 *(int *)addr = fgetown(d->bd_sigio);
2047 break;
2048
2049 /* This is deprecated, FIOSETOWN should be used instead. */
2050 case TIOCSPGRP:
2051 error = fsetown(-(*(int *)addr), &d->bd_sigio);
2052 break;
2053
2054 /* This is deprecated, FIOGETOWN should be used instead. */
2055 case TIOCGPGRP:
2056 *(int *)addr = -fgetown(d->bd_sigio);
2057 break;
2058 #endif
2059 case BIOCSRSIG: { /* Set receive signal; u_int */
2060 u_int sig;
2061
2062 bcopy(addr, &sig, sizeof(u_int));
2063
2064 if (sig >= NSIG) {
2065 error = EINVAL;
2066 } else {
2067 d->bd_sig = sig;
2068 }
2069 break;
2070 }
2071 case BIOCGRSIG: /* u_int */
2072 bcopy(&d->bd_sig, addr, sizeof(u_int));
2073 break;
2074 #ifdef __APPLE__
2075 case BIOCSEXTHDR: /* u_int */
2076 bcopy(addr, &int_arg, sizeof(int_arg));
2077 if (int_arg) {
2078 d->bd_flags |= BPF_EXTENDED_HDR;
2079 } else {
2080 d->bd_flags &= ~BPF_EXTENDED_HDR;
2081 }
2082 break;
2083
2084 case BIOCGIFATTACHCOUNT: { /* struct ifreq */
2085 ifnet_t ifp;
2086 struct bpf_if *bp;
2087
2088 bcopy(addr, &ifr, sizeof(ifr));
2089 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2090 ifp = ifunit(ifr.ifr_name);
2091 if (ifp == NULL) {
2092 error = ENXIO;
2093 break;
2094 }
2095 ifr.ifr_intval = 0;
2096 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2097 struct bpf_d *bpf_d;
2098
2099 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
2100 continue;
2101 }
2102 for (bpf_d = bp->bif_dlist; bpf_d;
2103 bpf_d = bpf_d->bd_next) {
2104 ifr.ifr_intval += 1;
2105 }
2106 }
2107 bcopy(&ifr, addr, sizeof(ifr));
2108 break;
2109 }
2110 case BIOCGWANTPKTAP: /* u_int */
2111 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
2112 bcopy(&int_arg, addr, sizeof(int_arg));
2113 break;
2114
2115 case BIOCSWANTPKTAP: /* u_int */
2116 bcopy(addr, &int_arg, sizeof(int_arg));
2117 if (int_arg) {
2118 d->bd_flags |= BPF_WANT_PKTAP;
2119 } else {
2120 d->bd_flags &= ~BPF_WANT_PKTAP;
2121 }
2122 break;
2123 #endif
2124
2125 case BIOCSHEADDROP:
2126 bcopy(addr, &int_arg, sizeof(int_arg));
2127 d->bd_headdrop = int_arg ? 1 : 0;
2128 break;
2129
2130 case BIOCGHEADDROP:
2131 bcopy(&d->bd_headdrop, addr, sizeof(int));
2132 break;
2133
2134 case BIOCSTRUNCATE:
2135 bcopy(addr, &int_arg, sizeof(int_arg));
2136 if (int_arg) {
2137 d->bd_flags |= BPF_TRUNCATE;
2138 } else {
2139 d->bd_flags &= ~BPF_TRUNCATE;
2140 }
2141 break;
2142
2143 case BIOCGETUUID:
2144 bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
2145 break;
2146
2147 case BIOCSETUP: {
2148 struct bpf_setup_args bsa;
2149 ifnet_t ifp;
2150
2151 bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
2152 bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2153 ifp = ifunit(bsa.bsa_ifname);
2154 if (ifp == NULL) {
2155 error = ENXIO;
2156 os_log_info(OS_LOG_DEFAULT,
2157 "%s: ifnet not found for %s error %d",
2158 __func__, bsa.bsa_ifname, error);
2159 break;
2160 }
2161
2162 error = bpf_setup(d, bsa.bsa_uuid, ifp);
2163 break;
2164 }
2165 case BIOCSPKTHDRV2:
2166 bcopy(addr, &int_arg, sizeof(int_arg));
2167 if (int_arg != 0) {
2168 d->bd_flags |= BPF_PKTHDRV2;
2169 } else {
2170 d->bd_flags &= ~BPF_PKTHDRV2;
2171 }
2172 break;
2173
2174 case BIOCGPKTHDRV2:
2175 int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
2176 bcopy(&int_arg, addr, sizeof(int));
2177 break;
2178 }
2179
2180 bpf_release_d(d);
2181 lck_mtx_unlock(bpf_mlock);
2182
2183 return error;
2184 }
2185
2186 /*
2187 * Set d's packet filter program to fp. If this file already has a filter,
2188 * free it and replace it. Returns EINVAL for bogus requests.
2189 */
2190 static int
2191 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2192 u_long cmd)
2193 {
2194 struct bpf_insn *fcode, *old;
2195 u_int flen, size;
2196
2197 while (d->bd_hbuf_read != 0) {
2198 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2199 }
2200
2201 if ((d->bd_flags & BPF_CLOSING) != 0) {
2202 return ENXIO;
2203 }
2204
2205 old = d->bd_filter;
2206 if (bf_insns == USER_ADDR_NULL) {
2207 if (bf_len != 0) {
2208 return EINVAL;
2209 }
2210 d->bd_filter = NULL;
2211 reset_d(d);
2212 if (old != 0) {
2213 FREE(old, M_DEVBUF);
2214 }
2215 return 0;
2216 }
2217 flen = bf_len;
2218 if (flen > BPF_MAXINSNS) {
2219 return EINVAL;
2220 }
2221
2222 size = flen * sizeof(struct bpf_insn);
2223 fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
2224 #ifdef __APPLE__
2225 if (fcode == NULL) {
2226 return ENOBUFS;
2227 }
2228 #endif
2229 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
2230 bpf_validate(fcode, (int)flen)) {
2231 d->bd_filter = fcode;
2232
2233 if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
2234 reset_d(d);
2235 }
2236
2237 if (old != 0) {
2238 FREE(old, M_DEVBUF);
2239 }
2240
2241 return 0;
2242 }
2243 FREE(fcode, M_DEVBUF);
2244 return EINVAL;
2245 }
2246
2247 /*
2248 * Detach a file from its current interface (if attached at all) and attach
2249 * to the interface indicated by the name stored in ifr.
2250 * Return an errno or 0.
2251 */
2252 static int
2253 bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read)
2254 {
2255 struct bpf_if *bp;
2256 int error;
2257
2258 while (d->bd_hbuf_read != 0 && !has_hbuf_read) {
2259 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2260 }
2261
2262 if ((d->bd_flags & BPF_CLOSING) != 0) {
2263 return ENXIO;
2264 }
2265
2266 /*
2267 * Look through attached interfaces for the named one.
2268 */
2269 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2270 struct ifnet *ifp = bp->bif_ifp;
2271
2272 if (ifp == 0 || ifp != theywant) {
2273 continue;
2274 }
2275 /*
2276 * Do not use DLT_PKTAP, unless requested explicitly
2277 */
2278 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2279 continue;
2280 }
2281 /*
2282 * Skip the coprocessor interface
2283 */
2284 if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
2285 continue;
2286 }
2287 /*
2288 * We found the requested interface.
2289 * Allocate the packet buffers.
2290 */
2291 error = bpf_allocbufs(d);
2292 if (error != 0) {
2293 return error;
2294 }
2295 /*
2296 * Detach if attached to something else.
2297 */
2298 if (bp != d->bd_bif) {
2299 if (d->bd_bif != NULL) {
2300 if (bpf_detachd(d, 0) != 0) {
2301 return ENXIO;
2302 }
2303 }
2304 if (bpf_attachd(d, bp) != 0) {
2305 return ENXIO;
2306 }
2307 }
2308 if (do_reset) {
2309 reset_d(d);
2310 }
2311 return 0;
2312 }
2313 /* Not found. */
2314 return ENXIO;
2315 }
2316
2317 /*
2318 * Get a list of available data link type of the interface.
2319 */
2320 static int
2321 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2322 {
2323 u_int n;
2324 int error;
2325 struct ifnet *ifp;
2326 struct bpf_if *bp;
2327 user_addr_t dlist;
2328 struct bpf_dltlist bfl;
2329
2330 bcopy(addr, &bfl, sizeof(bfl));
2331 if (proc_is64bit(p)) {
2332 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2333 } else {
2334 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2335 }
2336
2337 ifp = d->bd_bif->bif_ifp;
2338 n = 0;
2339 error = 0;
2340
2341 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2342 if (bp->bif_ifp != ifp) {
2343 continue;
2344 }
2345 /*
2346 * Do not use DLT_PKTAP, unless requested explicitly
2347 */
2348 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2349 continue;
2350 }
2351 if (dlist != USER_ADDR_NULL) {
2352 if (n >= bfl.bfl_len) {
2353 return ENOMEM;
2354 }
2355 error = copyout(&bp->bif_dlt, dlist,
2356 sizeof(bp->bif_dlt));
2357 if (error != 0) {
2358 break;
2359 }
2360 dlist += sizeof(bp->bif_dlt);
2361 }
2362 n++;
2363 }
2364 bfl.bfl_len = n;
2365 bcopy(&bfl, addr, sizeof(bfl));
2366
2367 return error;
2368 }
2369
2370 /*
2371 * Set the data link type of a BPF instance.
2372 */
2373 static int
2374 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2375 {
2376 int error, opromisc;
2377 struct ifnet *ifp;
2378 struct bpf_if *bp;
2379
2380 if (d->bd_bif->bif_dlt == dlt) {
2381 return 0;
2382 }
2383
2384 while (d->bd_hbuf_read != 0) {
2385 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2386 }
2387
2388 if ((d->bd_flags & BPF_CLOSING) != 0) {
2389 return ENXIO;
2390 }
2391
2392 ifp = d->bd_bif->bif_ifp;
2393 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2394 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2395 /*
2396 * Do not use DLT_PKTAP, unless requested explicitly
2397 */
2398 if (bp->bif_dlt == DLT_PKTAP &&
2399 !(d->bd_flags & BPF_WANT_PKTAP)) {
2400 continue;
2401 }
2402 break;
2403 }
2404 }
2405 if (bp != NULL) {
2406 opromisc = d->bd_promisc;
2407 if (bpf_detachd(d, 0) != 0) {
2408 return ENXIO;
2409 }
2410 error = bpf_attachd(d, bp);
2411 if (error) {
2412 printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
2413 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp),
2414 error);
2415 return error;
2416 }
2417 reset_d(d);
2418 if (opromisc) {
2419 lck_mtx_unlock(bpf_mlock);
2420 error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2421 lck_mtx_lock(bpf_mlock);
2422 if (error) {
2423 printf("%s: ifpromisc %s%d failed (%d)\n",
2424 __func__, ifnet_name(bp->bif_ifp),
2425 ifnet_unit(bp->bif_ifp), error);
2426 } else {
2427 d->bd_promisc = 1;
2428 }
2429 }
2430 }
2431 return bp == NULL ? EINVAL : 0;
2432 }
2433
2434 static int
2435 bpf_set_traffic_class(struct bpf_d *d, int tc)
2436 {
2437 int error = 0;
2438
2439 if (!SO_VALID_TC(tc)) {
2440 error = EINVAL;
2441 } else {
2442 d->bd_traffic_class = tc;
2443 }
2444
2445 return error;
2446 }
2447
2448 static void
2449 bpf_set_packet_service_class(struct mbuf *m, int tc)
2450 {
2451 if (!(m->m_flags & M_PKTHDR)) {
2452 return;
2453 }
2454
2455 VERIFY(SO_VALID_TC(tc));
2456 (void) m_set_service_class(m, so_tc2msc(tc));
2457 }
2458
2459 /*
2460 * Support for select()
2461 *
2462 * Return true iff the specific operation will not block indefinitely.
2463 * Otherwise, return false but make a note that a selwakeup() must be done.
2464 */
2465 int
2466 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2467 {
2468 struct bpf_d *d;
2469 int ret = 0;
2470
2471 lck_mtx_lock(bpf_mlock);
2472
2473 d = bpf_dtab[minor(dev)];
2474 if (d == NULL || d == BPF_DEV_RESERVED ||
2475 (d->bd_flags & BPF_CLOSING) != 0) {
2476 lck_mtx_unlock(bpf_mlock);
2477 return ENXIO;
2478 }
2479
2480 bpf_acquire_d(d);
2481
2482 if (d->bd_bif == NULL) {
2483 bpf_release_d(d);
2484 lck_mtx_unlock(bpf_mlock);
2485 return ENXIO;
2486 }
2487
2488 while (d->bd_hbuf_read != 0) {
2489 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2490 }
2491
2492 if ((d->bd_flags & BPF_CLOSING) != 0) {
2493 bpf_release_d(d);
2494 lck_mtx_unlock(bpf_mlock);
2495 return ENXIO;
2496 }
2497
2498 switch (which) {
2499 case FREAD:
2500 if (d->bd_hlen != 0 ||
2501 ((d->bd_immediate ||
2502 d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2503 ret = 1; /* read has data to return */
2504 } else {
2505 /*
2506 * Read has no data to return.
2507 * Make the select wait, and start a timer if
2508 * necessary.
2509 */
2510 selrecord(p, &d->bd_sel, wql);
2511 bpf_start_timer(d);
2512 }
2513 break;
2514
2515 case FWRITE:
2516 /* can't determine whether a write would block */
2517 ret = 1;
2518 break;
2519 }
2520
2521 bpf_release_d(d);
2522 lck_mtx_unlock(bpf_mlock);
2523
2524 return ret;
2525 }
2526
2527 /*
2528 * Support for kevent() system call. Register EVFILT_READ filters and
2529 * reject all others.
2530 */
2531 int bpfkqfilter(dev_t dev, struct knote *kn);
2532 static void filt_bpfdetach(struct knote *);
2533 static int filt_bpfread(struct knote *, long);
2534 static int filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev);
2535 static int filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev);
2536
2537 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2538 .f_isfd = 1,
2539 .f_detach = filt_bpfdetach,
2540 .f_event = filt_bpfread,
2541 .f_touch = filt_bpftouch,
2542 .f_process = filt_bpfprocess,
2543 };
2544
2545 static int
2546 filt_bpfread_common(struct knote *kn, struct kevent_qos_s *kev, struct bpf_d *d)
2547 {
2548 int ready = 0;
2549 int64_t data = 0;
2550
2551 if (d->bd_immediate) {
2552 /*
2553 * If there's data in the hold buffer, it's the
2554 * amount of data a read will return.
2555 *
2556 * If there's no data in the hold buffer, but
2557 * there's data in the store buffer, a read will
2558 * immediately rotate the store buffer to the
2559 * hold buffer, the amount of data in the store
2560 * buffer is the amount of data a read will
2561 * return.
2562 *
2563 * If there's no data in either buffer, we're not
2564 * ready to read.
2565 */
2566 data = (d->bd_hlen == 0 || d->bd_hbuf_read != 0 ?
2567 d->bd_slen : d->bd_hlen);
2568 int64_t lowwat = knote_low_watermark(kn);
2569 if (lowwat > d->bd_bufsize) {
2570 lowwat = d->bd_bufsize;
2571 }
2572 ready = (data >= lowwat);
2573 } else {
2574 /*
2575 * If there's data in the hold buffer, it's the
2576 * amount of data a read will return.
2577 *
2578 * If there's no data in the hold buffer, but
2579 * there's data in the store buffer, if the
2580 * timer has expired a read will immediately
2581 * rotate the store buffer to the hold buffer,
2582 * so the amount of data in the store buffer is
2583 * the amount of data a read will return.
2584 *
2585 * If there's no data in either buffer, or there's
2586 * no data in the hold buffer and the timer hasn't
2587 * expired, we're not ready to read.
2588 */
2589 data = ((d->bd_hlen == 0 || d->bd_hbuf_read != 0) &&
2590 d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
2591 ready = (data > 0);
2592 }
2593 if (!ready) {
2594 bpf_start_timer(d);
2595 } else if (kev) {
2596 knote_fill_kevent(kn, kev, data);
2597 }
2598
2599 return ready;
2600 }
2601
2602 int
2603 bpfkqfilter(dev_t dev, struct knote *kn)
2604 {
2605 struct bpf_d *d;
2606 int res;
2607
2608 /*
2609 * Is this device a bpf?
2610 */
2611 if (major(dev) != CDEV_MAJOR || kn->kn_filter != EVFILT_READ) {
2612 knote_set_error(kn, EINVAL);
2613 return 0;
2614 }
2615
2616 lck_mtx_lock(bpf_mlock);
2617
2618 d = bpf_dtab[minor(dev)];
2619
2620 if (d == NULL || d == BPF_DEV_RESERVED ||
2621 (d->bd_flags & BPF_CLOSING) != 0 ||
2622 d->bd_bif == NULL) {
2623 lck_mtx_unlock(bpf_mlock);
2624 knote_set_error(kn, ENXIO);
2625 return 0;
2626 }
2627
2628 kn->kn_hook = d;
2629 kn->kn_filtid = EVFILTID_BPFREAD;
2630 KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2631 d->bd_flags |= BPF_KNOTE;
2632
2633 /* capture the current state */
2634 res = filt_bpfread_common(kn, NULL, d);
2635
2636 lck_mtx_unlock(bpf_mlock);
2637
2638 return res;
2639 }
2640
2641 static void
2642 filt_bpfdetach(struct knote *kn)
2643 {
2644 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2645
2646 lck_mtx_lock(bpf_mlock);
2647 if (d->bd_flags & BPF_KNOTE) {
2648 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2649 d->bd_flags &= ~BPF_KNOTE;
2650 }
2651 lck_mtx_unlock(bpf_mlock);
2652 }
2653
2654 static int
2655 filt_bpfread(struct knote *kn, long hint)
2656 {
2657 #pragma unused(hint)
2658 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2659
2660 return filt_bpfread_common(kn, NULL, d);
2661 }
2662
2663 static int
2664 filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev)
2665 {
2666 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2667 int res;
2668
2669 lck_mtx_lock(bpf_mlock);
2670
2671 /* save off the lowat threshold and flag */
2672 kn->kn_sdata = kev->data;
2673 kn->kn_sfflags = kev->fflags;
2674
2675 /* output data will be re-generated here */
2676 res = filt_bpfread_common(kn, NULL, d);
2677
2678 lck_mtx_unlock(bpf_mlock);
2679
2680 return res;
2681 }
2682
2683 static int
2684 filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev)
2685 {
2686 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2687 int res;
2688
2689 lck_mtx_lock(bpf_mlock);
2690 res = filt_bpfread_common(kn, kev, d);
2691 lck_mtx_unlock(bpf_mlock);
2692
2693 return res;
2694 }
2695
2696 /*
2697 * Copy data from an mbuf chain into a buffer. This code is derived
2698 * from m_copydata in kern/uipc_mbuf.c.
2699 */
2700 static void
2701 bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
2702 {
2703 u_int count;
2704 u_char *dst;
2705
2706 dst = dst_arg;
2707 while (len > 0) {
2708 if (m == 0) {
2709 panic("bpf_mcopy");
2710 }
2711 count = min(m->m_len, len);
2712 bcopy(mbuf_data(m), dst, count);
2713 m = m->m_next;
2714 dst += count;
2715 len -= count;
2716 }
2717 }
2718
2719 static inline void
2720 bpf_tap_imp(
2721 ifnet_t ifp,
2722 u_int32_t dlt,
2723 struct bpf_packet *bpf_pkt,
2724 int outbound)
2725 {
2726 struct bpf_d *d;
2727 u_int slen;
2728 struct bpf_if *bp;
2729
2730 /*
2731 * It's possible that we get here after the bpf descriptor has been
2732 * detached from the interface; in such a case we simply return.
2733 * Lock ordering is important since we can be called asynchronously
2734 * (from IOKit) to process an inbound packet; when that happens
2735 * we would have been holding its "gateLock" and will be acquiring
2736 * "bpf_mlock" upon entering this routine. Due to that, we release
2737 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2738 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2739 * when a ifnet_set_promiscuous request simultaneously collides with
2740 * an inbound packet being passed into the tap callback.
2741 */
2742 lck_mtx_lock(bpf_mlock);
2743 if (ifp->if_bpf == NULL) {
2744 lck_mtx_unlock(bpf_mlock);
2745 return;
2746 }
2747 for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2748 if (bp->bif_ifp != ifp) {
2749 /* wrong interface */
2750 bp = NULL;
2751 break;
2752 }
2753 if (dlt == 0 || bp->bif_dlt == dlt) {
2754 /* tapping default DLT or DLT matches */
2755 break;
2756 }
2757 }
2758 if (bp == NULL) {
2759 goto done;
2760 }
2761 for (d = bp->bif_dlist; d; d = d->bd_next) {
2762 struct bpf_packet *bpf_pkt_saved = bpf_pkt;
2763 struct bpf_packet bpf_pkt_tmp;
2764 struct pktap_header_buffer bpfp_header_tmp;
2765
2766 if (outbound && !d->bd_seesent) {
2767 continue;
2768 }
2769
2770 ++d->bd_rcount;
2771 slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2772 bpf_pkt->bpfp_total_length, 0);
2773 if (bp->bif_ifp->if_type == IFT_PKTAP &&
2774 bp->bif_dlt == DLT_PKTAP) {
2775 /*
2776 * Need to copy the bpf_pkt because the conversion
2777 * to v2 pktap header modifies the content of the
2778 * bpfp_header
2779 */
2780 if ((d->bd_flags & BPF_PKTHDRV2) &&
2781 bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
2782 bpf_pkt_tmp = *bpf_pkt;
2783
2784 bpf_pkt = &bpf_pkt_tmp;
2785
2786 memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
2787 bpf_pkt->bpfp_header_length);
2788
2789 bpf_pkt->bpfp_header = &bpfp_header_tmp;
2790
2791 convert_to_pktap_header_to_v2(bpf_pkt,
2792 !!(d->bd_flags & BPF_TRUNCATE));
2793 }
2794
2795 if (d->bd_flags & BPF_TRUNCATE) {
2796 slen = min(slen,
2797 get_pkt_trunc_len((u_char *)bpf_pkt,
2798 bpf_pkt->bpfp_total_length));
2799 }
2800 }
2801 if (slen != 0) {
2802 #if CONFIG_MACF_NET
2803 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0) {
2804 continue;
2805 }
2806 #endif
2807 catchpacket(d, bpf_pkt, slen, outbound);
2808 }
2809 bpf_pkt = bpf_pkt_saved;
2810 }
2811
2812 done:
2813 lck_mtx_unlock(bpf_mlock);
2814 }
2815
2816 static inline void
2817 bpf_tap_mbuf(
2818 ifnet_t ifp,
2819 u_int32_t dlt,
2820 mbuf_t m,
2821 void* hdr,
2822 size_t hlen,
2823 int outbound)
2824 {
2825 struct bpf_packet bpf_pkt;
2826 struct mbuf *m0;
2827
2828 if (ifp->if_bpf == NULL) {
2829 /* quickly check without taking lock */
2830 return;
2831 }
2832 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2833 bpf_pkt.bpfp_mbuf = m;
2834 bpf_pkt.bpfp_total_length = 0;
2835 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
2836 bpf_pkt.bpfp_total_length += m0->m_len;
2837 }
2838 bpf_pkt.bpfp_header = hdr;
2839 if (hdr != NULL) {
2840 bpf_pkt.bpfp_total_length += hlen;
2841 bpf_pkt.bpfp_header_length = hlen;
2842 } else {
2843 bpf_pkt.bpfp_header_length = 0;
2844 }
2845 bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2846 }
2847
2848 void
2849 bpf_tap_out(
2850 ifnet_t ifp,
2851 u_int32_t dlt,
2852 mbuf_t m,
2853 void* hdr,
2854 size_t hlen)
2855 {
2856 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
2857 }
2858
2859 void
2860 bpf_tap_in(
2861 ifnet_t ifp,
2862 u_int32_t dlt,
2863 mbuf_t m,
2864 void* hdr,
2865 size_t hlen)
2866 {
2867 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
2868 }
2869
2870 /* Callback registered with Ethernet driver. */
2871 static int
2872 bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2873 {
2874 bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2875
2876 return 0;
2877 }
2878
2879
2880 static errno_t
2881 bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
2882 {
2883 errno_t err = 0;
2884 if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
2885 err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
2886 } else {
2887 err = EINVAL;
2888 }
2889
2890 return err;
2891 }
2892
2893 static void
2894 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
2895 {
2896 /* copy the optional header */
2897 if (pkt->bpfp_header_length != 0) {
2898 size_t count = min(len, pkt->bpfp_header_length);
2899 bcopy(pkt->bpfp_header, dst, count);
2900 len -= count;
2901 dst += count;
2902 }
2903 if (len == 0) {
2904 /* nothing past the header */
2905 return;
2906 }
2907 /* copy the packet */
2908 switch (pkt->bpfp_type) {
2909 case BPF_PACKET_TYPE_MBUF:
2910 bpf_mcopy(pkt->bpfp_mbuf, dst, len);
2911 break;
2912 default:
2913 break;
2914 }
2915 }
2916
2917 static uint16_t
2918 get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
2919 const uint16_t remaining_caplen)
2920 {
2921 /*
2922 * For some reason tcpdump expects to have one byte beyond the ESP header
2923 */
2924 uint16_t trunc_len = ESP_HDR_SIZE + 1;
2925
2926 if (trunc_len > remaining_caplen) {
2927 return remaining_caplen;
2928 }
2929
2930 return trunc_len;
2931 }
2932
2933 static uint16_t
2934 get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
2935 const uint16_t remaining_caplen)
2936 {
2937 /*
2938 * Include the payload generic header
2939 */
2940 uint16_t trunc_len = ISAKMP_HDR_SIZE;
2941
2942 if (trunc_len > remaining_caplen) {
2943 return remaining_caplen;
2944 }
2945
2946 return trunc_len;
2947 }
2948
2949 static uint16_t
2950 get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint16_t off,
2951 const uint16_t remaining_caplen)
2952 {
2953 int err = 0;
2954 uint16_t trunc_len = 0;
2955 char payload[remaining_caplen];
2956
2957 err = bpf_copydata(pkt, off, remaining_caplen, payload);
2958 if (err != 0) {
2959 return remaining_caplen;
2960 }
2961 /*
2962 * They are three cases:
2963 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
2964 * - keep alive: 1 byte payload
2965 * - otherwise it's ESP
2966 */
2967 if (remaining_caplen >= 4 &&
2968 payload[0] == 0 && payload[1] == 0 &&
2969 payload[2] == 0 && payload[3] == 0) {
2970 trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
2971 } else if (remaining_caplen == 1) {
2972 trunc_len = 1;
2973 } else {
2974 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
2975 }
2976
2977 if (trunc_len > remaining_caplen) {
2978 return remaining_caplen;
2979 }
2980
2981 return trunc_len;
2982 }
2983
2984 static uint16_t
2985 get_udp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
2986 {
2987 int err = 0;
2988 uint16_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
2989
2990 if (trunc_len >= remaining_caplen) {
2991 return remaining_caplen;
2992 }
2993
2994 struct udphdr udphdr;
2995 err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
2996 if (err != 0) {
2997 return remaining_caplen;
2998 }
2999
3000 u_short sport, dport;
3001
3002 sport = EXTRACT_SHORT(&udphdr.uh_sport);
3003 dport = EXTRACT_SHORT(&udphdr.uh_dport);
3004
3005 if (dport == PORT_DNS || sport == PORT_DNS) {
3006 /*
3007 * Full UDP payload for DNS
3008 */
3009 trunc_len = remaining_caplen;
3010 } else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
3011 (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
3012 /*
3013 * Full UDP payload for BOOTP and DHCP
3014 */
3015 trunc_len = remaining_caplen;
3016 } else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
3017 /*
3018 * Return the ISAKMP header
3019 */
3020 trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
3021 remaining_caplen - sizeof(struct udphdr));
3022 } else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3023 trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
3024 remaining_caplen - sizeof(struct udphdr));
3025 }
3026 if (trunc_len >= remaining_caplen) {
3027 return remaining_caplen;
3028 }
3029
3030 return trunc_len;
3031 }
3032
3033 static uint16_t
3034 get_tcp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3035 {
3036 int err = 0;
3037 uint16_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
3038 if (trunc_len >= remaining_caplen) {
3039 return remaining_caplen;
3040 }
3041
3042 struct tcphdr tcphdr;
3043 err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
3044 if (err != 0) {
3045 return remaining_caplen;
3046 }
3047
3048 u_short sport, dport;
3049 sport = EXTRACT_SHORT(&tcphdr.th_sport);
3050 dport = EXTRACT_SHORT(&tcphdr.th_dport);
3051
3052 if (dport == PORT_DNS || sport == PORT_DNS) {
3053 /*
3054 * Full TCP payload for DNS
3055 */
3056 trunc_len = remaining_caplen;
3057 } else {
3058 trunc_len = tcphdr.th_off << 2;
3059 }
3060 if (trunc_len >= remaining_caplen) {
3061 return remaining_caplen;
3062 }
3063
3064 return trunc_len;
3065 }
3066
3067 static uint16_t
3068 get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3069 {
3070 uint16_t trunc_len;
3071
3072 switch (proto) {
3073 case IPPROTO_ICMP: {
3074 /*
3075 * Full IMCP payload
3076 */
3077 trunc_len = remaining_caplen;
3078 break;
3079 }
3080 case IPPROTO_ICMPV6: {
3081 /*
3082 * Full IMCPV6 payload
3083 */
3084 trunc_len = remaining_caplen;
3085 break;
3086 }
3087 case IPPROTO_IGMP: {
3088 /*
3089 * Full IGMP payload
3090 */
3091 trunc_len = remaining_caplen;
3092 break;
3093 }
3094 case IPPROTO_UDP: {
3095 trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3096 break;
3097 }
3098 case IPPROTO_TCP: {
3099 trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3100 break;
3101 }
3102 case IPPROTO_ESP: {
3103 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3104 break;
3105 }
3106 default: {
3107 /*
3108 * By default we only include the IP header
3109 */
3110 trunc_len = 0;
3111 break;
3112 }
3113 }
3114 if (trunc_len >= remaining_caplen) {
3115 return remaining_caplen;
3116 }
3117
3118 return trunc_len;
3119 }
3120
3121 static uint16_t
3122 get_ip_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3123 {
3124 int err = 0;
3125 uint16_t iplen = sizeof(struct ip);
3126 if (iplen >= remaining_caplen) {
3127 return remaining_caplen;
3128 }
3129
3130 struct ip iphdr;
3131 err = bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
3132 if (err != 0) {
3133 return remaining_caplen;
3134 }
3135
3136 uint8_t proto = 0;
3137
3138 iplen = iphdr.ip_hl << 2;
3139 if (iplen >= remaining_caplen) {
3140 return remaining_caplen;
3141 }
3142
3143 proto = iphdr.ip_p;
3144 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3145
3146 if (iplen >= remaining_caplen) {
3147 return remaining_caplen;
3148 }
3149
3150 return iplen;
3151 }
3152
3153 static uint16_t
3154 get_ip6_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3155 {
3156 int err = 0;
3157 uint16_t iplen = sizeof(struct ip6_hdr);
3158 if (iplen >= remaining_caplen) {
3159 return remaining_caplen;
3160 }
3161
3162 struct ip6_hdr ip6hdr;
3163 err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
3164 if (err != 0) {
3165 return remaining_caplen;
3166 }
3167
3168 uint8_t proto = 0;
3169
3170 /*
3171 * TBD: process the extension headers
3172 */
3173 proto = ip6hdr.ip6_nxt;
3174 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3175
3176 if (iplen >= remaining_caplen) {
3177 return remaining_caplen;
3178 }
3179
3180 return iplen;
3181 }
3182
3183 static uint16_t
3184 get_ether_trunc_len(struct bpf_packet *pkt, int off, const uint16_t remaining_caplen)
3185 {
3186 int err = 0;
3187 uint16_t ethlen = sizeof(struct ether_header);
3188 if (ethlen >= remaining_caplen) {
3189 return remaining_caplen;
3190 }
3191
3192 struct ether_header eh;
3193 u_short type;
3194 err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
3195 if (err != 0) {
3196 return remaining_caplen;
3197 }
3198
3199 type = EXTRACT_SHORT(&eh.ether_type);
3200 /* Include full ARP */
3201 if (type == ETHERTYPE_ARP) {
3202 ethlen = remaining_caplen;
3203 } else if (type != ETHERTYPE_IP && type != ETHERTYPE_IPV6) {
3204 ethlen = min(BPF_MIN_PKT_SIZE, remaining_caplen);
3205 } else {
3206 if (type == ETHERTYPE_IP) {
3207 ethlen += get_ip_trunc_len(pkt, sizeof(struct ether_header),
3208 remaining_caplen);
3209 } else if (type == ETHERTYPE_IPV6) {
3210 ethlen += get_ip6_trunc_len(pkt, sizeof(struct ether_header),
3211 remaining_caplen);
3212 }
3213 }
3214 return ethlen;
3215 }
3216
3217 static uint32_t
3218 get_pkt_trunc_len(u_char *p, u_int len)
3219 {
3220 struct bpf_packet *pkt = (struct bpf_packet *)(void *) p;
3221 struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3222 uint32_t out_pkt_len = 0, tlen = 0;
3223 /*
3224 * pktap->pth_frame_pre_length is L2 header length and accounts
3225 * for both pre and pre_adjust.
3226 * pktap->pth_length is sizeof(pktap_header) (excl the pre/pre_adjust)
3227 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3228 * pre is the offset to the L3 header after the bpfp_header, or length
3229 * of L2 header after bpfp_header, if present.
3230 */
3231 int32_t pre = pktap->pth_frame_pre_length -
3232 (pkt->bpfp_header_length - pktap->pth_length);
3233
3234 /* Length of the input packet starting from L3 header */
3235 uint32_t in_pkt_len = len - pkt->bpfp_header_length - pre;
3236 if (pktap->pth_protocol_family == AF_INET ||
3237 pktap->pth_protocol_family == AF_INET6) {
3238 /* Contains L2 header */
3239 if (pre > 0) {
3240 if (pre < (int32_t)sizeof(struct ether_header)) {
3241 goto too_short;
3242 }
3243
3244 out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3245 } else if (pre == 0) {
3246 if (pktap->pth_protocol_family == AF_INET) {
3247 out_pkt_len = get_ip_trunc_len(pkt, pre, in_pkt_len);
3248 } else if (pktap->pth_protocol_family == AF_INET6) {
3249 out_pkt_len = get_ip6_trunc_len(pkt, pre, in_pkt_len);
3250 }
3251 } else {
3252 /* Ideally pre should be >= 0. This is an exception */
3253 out_pkt_len = min(BPF_MIN_PKT_SIZE, in_pkt_len);
3254 }
3255 } else {
3256 if (pktap->pth_iftype == IFT_ETHER) {
3257 if (in_pkt_len < sizeof(struct ether_header)) {
3258 goto too_short;
3259 }
3260 /* At most include the Ethernet header and 16 bytes */
3261 out_pkt_len = MIN(sizeof(struct ether_header) + 16,
3262 in_pkt_len);
3263 } else {
3264 /*
3265 * For unknown protocols include at most 16 bytes
3266 */
3267 out_pkt_len = MIN(16, in_pkt_len);
3268 }
3269 }
3270 done:
3271 tlen = pkt->bpfp_header_length + out_pkt_len + pre;
3272 return tlen;
3273 too_short:
3274 out_pkt_len = in_pkt_len;
3275 goto done;
3276 }
3277
3278 /*
3279 * Move the packet data from interface memory (pkt) into the
3280 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
3281 * otherwise 0.
3282 */
3283 static void
3284 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
3285 u_int snaplen, int outbound)
3286 {
3287 struct bpf_hdr *hp;
3288 struct bpf_hdr_ext *ehp;
3289 int totlen, curlen;
3290 int hdrlen, caplen;
3291 int do_wakeup = 0;
3292 u_char *payload;
3293 struct timeval tv;
3294
3295 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3296 d->bd_bif->bif_hdrlen;
3297 /*
3298 * Figure out how many bytes to move. If the packet is
3299 * greater or equal to the snapshot length, transfer that
3300 * much. Otherwise, transfer the whole packet (unless
3301 * we hit the buffer size limit).
3302 */
3303 totlen = hdrlen + min(snaplen, pkt->bpfp_total_length);
3304 if (totlen > d->bd_bufsize) {
3305 totlen = d->bd_bufsize;
3306 }
3307
3308 if (hdrlen > totlen) {
3309 return;
3310 }
3311
3312 /*
3313 * Round up the end of the previous packet to the next longword.
3314 */
3315 curlen = BPF_WORDALIGN(d->bd_slen);
3316 if (curlen + totlen > d->bd_bufsize) {
3317 /*
3318 * This packet will overflow the storage buffer.
3319 * Rotate the buffers if we can, then wakeup any
3320 * pending reads.
3321 *
3322 * We cannot rotate buffers if a read is in progress
3323 * so drop the packet
3324 */
3325 if (d->bd_hbuf_read != 0) {
3326 ++d->bd_dcount;
3327 return;
3328 }
3329
3330 if (d->bd_fbuf == NULL) {
3331 if (d->bd_headdrop == 0) {
3332 /*
3333 * We haven't completed the previous read yet,
3334 * so drop the packet.
3335 */
3336 ++d->bd_dcount;
3337 return;
3338 }
3339 /*
3340 * Drop the hold buffer as it contains older packets
3341 */
3342 d->bd_dcount += d->bd_hcnt;
3343 d->bd_fbuf = d->bd_hbuf;
3344 ROTATE_BUFFERS(d);
3345 } else {
3346 ROTATE_BUFFERS(d);
3347 }
3348 do_wakeup = 1;
3349 curlen = 0;
3350 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
3351 /*
3352 * Immediate mode is set, or the read timeout has
3353 * already expired during a select call. A packet
3354 * arrived, so the reader should be woken up.
3355 */
3356 do_wakeup = 1;
3357 }
3358
3359 /*
3360 * Append the bpf header.
3361 */
3362 microtime(&tv);
3363 if (d->bd_flags & BPF_EXTENDED_HDR) {
3364 struct mbuf *m;
3365
3366 m = (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF)
3367 ? pkt->bpfp_mbuf : NULL;
3368 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
3369 memset(ehp, 0, sizeof(*ehp));
3370 ehp->bh_tstamp.tv_sec = tv.tv_sec;
3371 ehp->bh_tstamp.tv_usec = tv.tv_usec;
3372
3373 ehp->bh_datalen = pkt->bpfp_total_length;
3374 ehp->bh_hdrlen = hdrlen;
3375 caplen = ehp->bh_caplen = totlen - hdrlen;
3376 if (m == NULL) {
3377 if (outbound) {
3378 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3379 } else {
3380 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3381 }
3382 } else if (outbound) {
3383 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3384
3385 /* only do lookups on non-raw INPCB */
3386 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
3387 PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
3388 (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
3389 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3390 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3391 ehp->bh_proto = m->m_pkthdr.pkt_proto;
3392 }
3393 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
3394 if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
3395 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3396 }
3397 if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
3398 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
3399 }
3400 if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
3401 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3402 }
3403 if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3404 ehp->bh_unsent_bytes =
3405 m->m_pkthdr.bufstatus_if;
3406 ehp->bh_unsent_snd =
3407 m->m_pkthdr.bufstatus_sndbuf;
3408 }
3409 } else {
3410 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3411 }
3412 payload = (u_char *)ehp + hdrlen;
3413 } else {
3414 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
3415 hp->bh_tstamp.tv_sec = tv.tv_sec;
3416 hp->bh_tstamp.tv_usec = tv.tv_usec;
3417 hp->bh_datalen = pkt->bpfp_total_length;
3418 hp->bh_hdrlen = hdrlen;
3419 caplen = hp->bh_caplen = totlen - hdrlen;
3420 payload = (u_char *)hp + hdrlen;
3421 }
3422 /*
3423 * Copy the packet data into the store buffer and update its length.
3424 */
3425 copy_bpf_packet(pkt, payload, caplen);
3426 d->bd_slen = curlen + totlen;
3427 d->bd_scnt += 1;
3428
3429 if (do_wakeup) {
3430 bpf_wakeup(d);
3431 }
3432 }
3433
3434 /*
3435 * Initialize all nonzero fields of a descriptor.
3436 */
3437 static int
3438 bpf_allocbufs(struct bpf_d *d)
3439 {
3440 if (d->bd_sbuf != NULL) {
3441 FREE(d->bd_sbuf, M_DEVBUF);
3442 d->bd_sbuf = NULL;
3443 }
3444 if (d->bd_hbuf != NULL) {
3445 FREE(d->bd_hbuf, M_DEVBUF);
3446 d->bd_hbuf = NULL;
3447 }
3448 if (d->bd_fbuf != NULL) {
3449 FREE(d->bd_fbuf, M_DEVBUF);
3450 d->bd_fbuf = NULL;
3451 }
3452
3453 d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
3454 if (d->bd_fbuf == NULL) {
3455 return ENOBUFS;
3456 }
3457
3458 d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
3459 if (d->bd_sbuf == NULL) {
3460 FREE(d->bd_fbuf, M_DEVBUF);
3461 d->bd_fbuf = NULL;
3462 return ENOBUFS;
3463 }
3464 d->bd_slen = 0;
3465 d->bd_hlen = 0;
3466 d->bd_scnt = 0;
3467 d->bd_hcnt = 0;
3468 return 0;
3469 }
3470
3471 /*
3472 * Free buffers currently in use by a descriptor.
3473 * Called on close.
3474 */
3475 static void
3476 bpf_freed(struct bpf_d *d)
3477 {
3478 /*
3479 * We don't need to lock out interrupts since this descriptor has
3480 * been detached from its interface and it yet hasn't been marked
3481 * free.
3482 */
3483 if (d->bd_hbuf_read != 0) {
3484 panic("bpf buffer freed during read");
3485 }
3486
3487 if (d->bd_sbuf != 0) {
3488 FREE(d->bd_sbuf, M_DEVBUF);
3489 if (d->bd_hbuf != 0) {
3490 FREE(d->bd_hbuf, M_DEVBUF);
3491 }
3492 if (d->bd_fbuf != 0) {
3493 FREE(d->bd_fbuf, M_DEVBUF);
3494 }
3495 }
3496 if (d->bd_filter) {
3497 FREE(d->bd_filter, M_DEVBUF);
3498 }
3499 }
3500
3501 /*
3502 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
3503 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
3504 * size of the link header (variable length headers not yet supported).
3505 */
3506 void
3507 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3508 {
3509 bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
3510 }
3511
3512 errno_t
3513 bpf_attach(
3514 ifnet_t ifp,
3515 u_int32_t dlt,
3516 u_int32_t hdrlen,
3517 bpf_send_func send,
3518 bpf_tap_func tap)
3519 {
3520 struct bpf_if *bp;
3521 struct bpf_if *bp_new;
3522 struct bpf_if *bp_before_first = NULL;
3523 struct bpf_if *bp_first = NULL;
3524 struct bpf_if *bp_last = NULL;
3525 boolean_t found;
3526
3527 bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF,
3528 M_WAIT | M_ZERO);
3529 if (bp_new == 0) {
3530 panic("bpfattach");
3531 }
3532
3533 lck_mtx_lock(bpf_mlock);
3534
3535 /*
3536 * Check if this interface/dlt is already attached. Remember the
3537 * first and last attachment for this interface, as well as the
3538 * element before the first attachment.
3539 */
3540 found = FALSE;
3541 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
3542 if (bp->bif_ifp != ifp) {
3543 if (bp_first != NULL) {
3544 /* no more elements for this interface */
3545 break;
3546 }
3547 bp_before_first = bp;
3548 } else {
3549 if (bp->bif_dlt == dlt) {
3550 found = TRUE;
3551 break;
3552 }
3553 if (bp_first == NULL) {
3554 bp_first = bp;
3555 }
3556 bp_last = bp;
3557 }
3558 }
3559 if (found) {
3560 lck_mtx_unlock(bpf_mlock);
3561 printf("bpfattach - %s with dlt %d is already attached\n",
3562 if_name(ifp), dlt);
3563 FREE(bp_new, M_DEVBUF);
3564 return EEXIST;
3565 }
3566
3567 bp_new->bif_ifp = ifp;
3568 bp_new->bif_dlt = dlt;
3569 bp_new->bif_send = send;
3570 bp_new->bif_tap = tap;
3571
3572 if (bp_first == NULL) {
3573 /* No other entries for this ifp */
3574 bp_new->bif_next = bpf_iflist;
3575 bpf_iflist = bp_new;
3576 } else {
3577 if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
3578 /* Make this the first entry for this interface */
3579 if (bp_before_first != NULL) {
3580 /* point the previous to us */
3581 bp_before_first->bif_next = bp_new;
3582 } else {
3583 /* we're the new head */
3584 bpf_iflist = bp_new;
3585 }
3586 bp_new->bif_next = bp_first;
3587 } else {
3588 /* Add this after the last entry for this interface */
3589 bp_new->bif_next = bp_last->bif_next;
3590 bp_last->bif_next = bp_new;
3591 }
3592 }
3593
3594 /*
3595 * Compute the length of the bpf header. This is not necessarily
3596 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
3597 * that the network layer header begins on a longword boundary (for
3598 * performance reasons and to alleviate alignment restrictions).
3599 */
3600 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
3601 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
3602 sizeof(struct bpf_hdr_ext)) - hdrlen;
3603
3604 /* Take a reference on the interface */
3605 ifnet_reference(ifp);
3606
3607 lck_mtx_unlock(bpf_mlock);
3608
3609 #ifndef __APPLE__
3610 if (bootverbose) {
3611 printf("bpf: %s attached\n", if_name(ifp));
3612 }
3613 #endif
3614
3615 return 0;
3616 }
3617
3618 /*
3619 * Detach bpf from an interface. This involves detaching each descriptor
3620 * associated with the interface, and leaving bd_bif NULL. Notify each
3621 * descriptor as it's detached so that any sleepers wake up and get
3622 * ENXIO.
3623 */
3624 void
3625 bpfdetach(struct ifnet *ifp)
3626 {
3627 struct bpf_if *bp, *bp_prev, *bp_next;
3628 struct bpf_d *d;
3629
3630 if (bpf_debug != 0) {
3631 printf("%s: %s\n", __func__, if_name(ifp));
3632 }
3633
3634 lck_mtx_lock(bpf_mlock);
3635
3636 /*
3637 * Build the list of devices attached to that interface
3638 * that we need to free while keeping the lock to maintain
3639 * the integrity of the interface list
3640 */
3641 bp_prev = NULL;
3642 for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
3643 bp_next = bp->bif_next;
3644
3645 if (ifp != bp->bif_ifp) {
3646 bp_prev = bp;
3647 continue;
3648 }
3649 /* Unlink from the interface list */
3650 if (bp_prev) {
3651 bp_prev->bif_next = bp->bif_next;
3652 } else {
3653 bpf_iflist = bp->bif_next;
3654 }
3655
3656 /* Detach the devices attached to the interface */
3657 while ((d = bp->bif_dlist) != NULL) {
3658 /*
3659 * Take an extra reference to prevent the device
3660 * from being freed when bpf_detachd() releases
3661 * the reference for the interface list
3662 */
3663 bpf_acquire_d(d);
3664 bpf_detachd(d, 0);
3665 bpf_wakeup(d);
3666 bpf_release_d(d);
3667 }
3668 ifnet_release(ifp);
3669 }
3670
3671 lck_mtx_unlock(bpf_mlock);
3672 }
3673
3674 void
3675 bpf_init(__unused void *unused)
3676 {
3677 #ifdef __APPLE__
3678 int i;
3679 int maj;
3680
3681 if (bpf_devsw_installed == 0) {
3682 bpf_devsw_installed = 1;
3683 bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
3684 bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
3685 bpf_mlock_attr = lck_attr_alloc_init();
3686 lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
3687 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
3688 if (maj == -1) {
3689 if (bpf_mlock_attr) {
3690 lck_attr_free(bpf_mlock_attr);
3691 }
3692 if (bpf_mlock_grp) {
3693 lck_grp_free(bpf_mlock_grp);
3694 }
3695 if (bpf_mlock_grp_attr) {
3696 lck_grp_attr_free(bpf_mlock_grp_attr);
3697 }
3698
3699 bpf_mlock = NULL;
3700 bpf_mlock_attr = NULL;
3701 bpf_mlock_grp = NULL;
3702 bpf_mlock_grp_attr = NULL;
3703 bpf_devsw_installed = 0;
3704 printf("bpf_init: failed to allocate a major number\n");
3705 return;
3706 }
3707
3708 for (i = 0; i < NBPFILTER; i++) {
3709 bpf_make_dev_t(maj);
3710 }
3711 }
3712 #else
3713 cdevsw_add(&bpf_cdevsw);
3714 #endif
3715 }
3716
3717 #ifndef __APPLE__
3718 SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE + CDEV_MAJOR, bpf_drvinit, NULL);
3719 #endif
3720
3721 #if CONFIG_MACF_NET
3722 struct label *
3723 mac_bpfdesc_label_get(struct bpf_d *d)
3724 {
3725 return d->bd_label;
3726 }
3727
3728 void
3729 mac_bpfdesc_label_set(struct bpf_d *d, struct label *label)
3730 {
3731 d->bd_label = label;
3732 }
3733 #endif
3734
3735 static int
3736 sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
3737 {
3738 #pragma unused(arg1, arg2)
3739 int i, err;
3740
3741 i = bpf_maxbufsize;
3742
3743 err = sysctl_handle_int(oidp, &i, 0, req);
3744 if (err != 0 || req->newptr == USER_ADDR_NULL) {
3745 return err;
3746 }
3747
3748 if (i < 0 || i > BPF_MAXSIZE_CAP) {
3749 i = BPF_MAXSIZE_CAP;
3750 }
3751
3752 bpf_maxbufsize = i;
3753 return err;
3754 }