]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/bpf.c
1383cbb80f7eba3d12b93b19af1a2f0ffb0b908f
[apple/xnu.git] / bsd / net / bpf.c
1 /*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1990, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
66 *
67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include "bpf.h"
77
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99
100 #include <sys/poll.h>
101
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109
110 #include <netinet/in.h>
111 #include <netinet/in_pcb.h>
112 #include <netinet/in_var.h>
113 #include <netinet/ip_var.h>
114 #include <netinet/tcp.h>
115 #include <netinet/tcp_var.h>
116 #include <netinet/udp.h>
117 #include <netinet/udp_var.h>
118 #include <netinet/if_ether.h>
119 #include <sys/kernel.h>
120 #include <sys/sysctl.h>
121 #include <net/firewire.h>
122
123 #include <miscfs/devfs/devfs.h>
124 #include <net/dlil.h>
125 #include <net/pktap.h>
126
127 #include <kern/locks.h>
128 #include <kern/thread_call.h>
129
130 #if CONFIG_MACF_NET
131 #include <security/mac_framework.h>
132 #endif /* MAC_NET */
133
134 extern int tvtohz(struct timeval *);
135
136 #define BPF_BUFSIZE 4096
137 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
138
139
140 #define PRINET 26 /* interruptible */
141
142 /*
143 * The default read buffer size is patchable.
144 */
145 static unsigned int bpf_bufsize = BPF_BUFSIZE;
146 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
147 &bpf_bufsize, 0, "");
148 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
149 SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
150 &bpf_maxbufsize, 0, "");
151 static unsigned int bpf_maxdevices = 256;
152 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
153 &bpf_maxdevices, 0, "");
154 /*
155 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
156 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
157 * explicitly to be able to use DLT_PKTAP.
158 */
159 static unsigned int bpf_wantpktap = 0;
160 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
161 &bpf_wantpktap, 0, "");
162
163 /*
164 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
165 * bpf_dtab holds pointer to the descriptors, indexed by minor device #
166 */
167 static struct bpf_if *bpf_iflist;
168 #ifdef __APPLE__
169 /*
170 * BSD now stores the bpf_d in the dev_t which is a struct
171 * on their system. Our dev_t is an int, so we still store
172 * the bpf_d in a separate table indexed by minor device #.
173 *
174 * The value stored in bpf_dtab[n] represent three states:
175 * 0: device not opened
176 * 1: device opening or closing
177 * other: device <n> opened with pointer to storage
178 */
179 static struct bpf_d **bpf_dtab = NULL;
180 static unsigned int bpf_dtab_size = 0;
181 static unsigned int nbpfilter = 0;
182
183 decl_lck_mtx_data(static, bpf_mlock_data);
184 static lck_mtx_t *bpf_mlock = &bpf_mlock_data;
185 static lck_grp_t *bpf_mlock_grp;
186 static lck_grp_attr_t *bpf_mlock_grp_attr;
187 static lck_attr_t *bpf_mlock_attr;
188
189 static mbuf_tag_id_t bpf_mtag_id;
190 #endif /* __APPLE__ */
191
192 static int bpf_allocbufs(struct bpf_d *);
193 static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
194 static void bpf_detachd(struct bpf_d *d);
195 static void bpf_freed(struct bpf_d *);
196 static void bpf_mcopy(const void *, void *, size_t);
197 static int bpf_movein(struct uio *, int,
198 struct mbuf **, struct sockaddr *, int *);
199 static int bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt, dev_t);
200 static void bpf_timed_out(void *, void *);
201 static void bpf_wakeup(struct bpf_d *);
202 static void catchpacket(struct bpf_d *, u_char *, struct mbuf *, u_int,
203 u_int, int, void (*)(const void *, void *, size_t));
204 static void reset_d(struct bpf_d *);
205 static int bpf_setf(struct bpf_d *, u_int , user_addr_t , dev_t, u_long);
206 static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
207 static int bpf_setdlt(struct bpf_d *, u_int, dev_t);
208 static int bpf_set_traffic_class(struct bpf_d *, int);
209 static void bpf_set_packet_service_class(struct mbuf *, int);
210
211 /*static void *bpf_devfs_token[MAXBPFILTER];*/
212
213 static int bpf_devsw_installed;
214
215 void bpf_init(void *unused);
216 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
217
218 /*
219 * Darwin differs from BSD here, the following are static
220 * on BSD and not static on Darwin.
221 */
222 d_open_t bpfopen;
223 d_close_t bpfclose;
224 d_read_t bpfread;
225 d_write_t bpfwrite;
226 ioctl_fcn_t bpfioctl;
227 select_fcn_t bpfselect;
228
229
230 /* Darwin's cdevsw struct differs slightly from BSDs */
231 #define CDEV_MAJOR 23
232 static struct cdevsw bpf_cdevsw = {
233 /* open */ bpfopen,
234 /* close */ bpfclose,
235 /* read */ bpfread,
236 /* write */ bpfwrite,
237 /* ioctl */ bpfioctl,
238 /* stop */ eno_stop,
239 /* reset */ eno_reset,
240 /* tty */ NULL,
241 /* select */ bpfselect,
242 /* mmap */ eno_mmap,
243 /* strategy*/ eno_strat,
244 /* getc */ eno_getc,
245 /* putc */ eno_putc,
246 /* type */ 0
247 };
248
249 #define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
250
251 static int
252 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *sockp, int *datlen)
253 {
254 struct mbuf *m;
255 int error;
256 int len;
257 uint8_t sa_family;
258 int hlen;
259
260 switch (linktype) {
261
262 #if SLIP
263 case DLT_SLIP:
264 sa_family = AF_INET;
265 hlen = 0;
266 break;
267 #endif /* SLIP */
268
269 case DLT_EN10MB:
270 sa_family = AF_UNSPEC;
271 /* XXX Would MAXLINKHDR be better? */
272 hlen = sizeof(struct ether_header);
273 break;
274
275 #if FDDI
276 case DLT_FDDI:
277 #if defined(__FreeBSD__) || defined(__bsdi__)
278 sa_family = AF_IMPLINK;
279 hlen = 0;
280 #else
281 sa_family = AF_UNSPEC;
282 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
283 hlen = 24;
284 #endif
285 break;
286 #endif /* FDDI */
287
288 case DLT_RAW:
289 case DLT_NULL:
290 sa_family = AF_UNSPEC;
291 hlen = 0;
292 break;
293
294 #ifdef __FreeBSD__
295 case DLT_ATM_RFC1483:
296 /*
297 * en atm driver requires 4-byte atm pseudo header.
298 * though it isn't standard, vpi:vci needs to be
299 * specified anyway.
300 */
301 sa_family = AF_UNSPEC;
302 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
303 break;
304 #endif
305
306 case DLT_PPP:
307 sa_family = AF_UNSPEC;
308 hlen = 4; /* This should match PPP_HDRLEN */
309 break;
310
311 case DLT_APPLE_IP_OVER_IEEE1394:
312 sa_family = AF_UNSPEC;
313 hlen = sizeof(struct firewire_header);
314 break;
315
316 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
317 sa_family = AF_IEEE80211;
318 hlen = 0;
319 break;
320
321 case DLT_IEEE802_11_RADIO:
322 sa_family = AF_IEEE80211;
323 hlen = 0;
324 break;
325
326 default:
327 return (EIO);
328 }
329
330 // LP64todo - fix this!
331 len = uio_resid(uio);
332 *datlen = len - hlen;
333 if ((unsigned)len > MCLBYTES)
334 return (EIO);
335
336 if (sockp) {
337 /*
338 * Build a sockaddr based on the data link layer type.
339 * We do this at this level because the ethernet header
340 * is copied directly into the data field of the sockaddr.
341 * In the case of SLIP, there is no header and the packet
342 * is forwarded as is.
343 * Also, we are careful to leave room at the front of the mbuf
344 * for the link level header.
345 */
346 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
347 return (EIO);
348 }
349 sockp->sa_family = sa_family;
350 } else {
351 /*
352 * We're directly sending the packet data supplied by
353 * the user; we don't need to make room for the link
354 * header, and don't need the header length value any
355 * more, so set it to 0.
356 */
357 hlen = 0;
358 }
359
360 MGETHDR(m, M_WAIT, MT_DATA);
361 if (m == 0)
362 return (ENOBUFS);
363 if ((unsigned)len > MHLEN) {
364 MCLGET(m, M_WAIT);
365 if ((m->m_flags & M_EXT) == 0) {
366 error = ENOBUFS;
367 goto bad;
368 }
369 }
370 m->m_pkthdr.len = m->m_len = len;
371 m->m_pkthdr.rcvif = NULL;
372 *mp = m;
373
374 /*
375 * Make room for link header.
376 */
377 if (hlen != 0) {
378 m->m_pkthdr.len -= hlen;
379 m->m_len -= hlen;
380 m->m_data += hlen; /* XXX */
381 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
382 if (error)
383 goto bad;
384 }
385 error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
386 if (error)
387 goto bad;
388
389 /* Check for multicast destination */
390 switch (linktype) {
391 case DLT_EN10MB: {
392 struct ether_header *eh = mtod(m, struct ether_header *);
393
394 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
395 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0)
396 m->m_flags |= M_BCAST;
397 else
398 m->m_flags |= M_MCAST;
399 }
400 break;
401 }
402 }
403
404 return 0;
405 bad:
406 m_freem(m);
407 return (error);
408 }
409
410 #ifdef __APPLE__
411
412 /*
413 * The dynamic addition of a new device node must block all processes that
414 * are opening the last device so that no process will get an unexpected
415 * ENOENT
416 */
417 static void
418 bpf_make_dev_t(int maj)
419 {
420 static int bpf_growing = 0;
421 unsigned int cur_size = nbpfilter, i;
422
423 if (nbpfilter >= bpf_maxdevices)
424 return;
425
426 while (bpf_growing) {
427 /* Wait until new device has been created */
428 (void)tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
429 }
430 if (nbpfilter > cur_size) {
431 /* other thread grew it already */
432 return;
433 }
434 bpf_growing = 1;
435
436 /* need to grow bpf_dtab first */
437 if (nbpfilter == bpf_dtab_size) {
438 int new_dtab_size;
439 struct bpf_d **new_dtab = NULL;
440 struct bpf_d **old_dtab = NULL;
441
442 new_dtab_size = bpf_dtab_size + NBPFILTER;
443 new_dtab = (struct bpf_d **)_MALLOC(sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
444 if (new_dtab == 0) {
445 printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
446 goto done;
447 }
448 if (bpf_dtab) {
449 bcopy(bpf_dtab, new_dtab,
450 sizeof(struct bpf_d *) * bpf_dtab_size);
451 }
452 bzero(new_dtab + bpf_dtab_size,
453 sizeof(struct bpf_d *) * NBPFILTER);
454 old_dtab = bpf_dtab;
455 bpf_dtab = new_dtab;
456 bpf_dtab_size = new_dtab_size;
457 if (old_dtab != NULL)
458 _FREE(old_dtab, M_DEVBUF);
459 }
460 i = nbpfilter++;
461 (void) devfs_make_node(makedev(maj, i),
462 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
463 "bpf%d", i);
464 done:
465 bpf_growing = 0;
466 wakeup((caddr_t)&bpf_growing);
467 }
468
469 #endif
470
471 /*
472 * Attach file to the bpf interface, i.e. make d listen on bp.
473 */
474 static errno_t
475 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
476 {
477 int first = bp->bif_dlist == NULL;
478 int error = 0;
479
480 /*
481 * Point d at bp, and add d to the interface's list of listeners.
482 * Finally, point the driver's bpf cookie at the interface so
483 * it will divert packets to bpf.
484 */
485 d->bd_bif = bp;
486 d->bd_next = bp->bif_dlist;
487 bp->bif_dlist = d;
488
489 if (first) {
490 /* Find the default bpf entry for this ifp */
491 if (bp->bif_ifp->if_bpf == NULL) {
492 struct bpf_if *tmp, *primary = NULL;
493
494 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
495 if (tmp->bif_ifp != bp->bif_ifp)
496 continue;
497 primary = tmp;
498 /*
499 * Make DLT_PKTAP only if process knows how
500 * to deal with it, otherwise find another one
501 */
502 if (tmp->bif_dlt == DLT_PKTAP &&
503 !(d->bd_flags & BPF_WANT_PKTAP))
504 continue;
505 break;
506 }
507 bp->bif_ifp->if_bpf = primary;
508 }
509
510 /* Only call dlil_set_bpf_tap for primary dlt */
511 if (bp->bif_ifp->if_bpf == bp)
512 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT, bpf_tap_callback);
513
514 if (bp->bif_tap)
515 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT);
516 }
517
518 if (bp->bif_ifp->if_bpf != NULL &&
519 bp->bif_ifp->if_bpf->bif_dlt == DLT_PKTAP)
520 d->bd_flags |= BPF_FINALIZE_PKTAP;
521 else
522 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
523
524 return error;
525 }
526
527 /*
528 * Detach a file from its interface.
529 */
530 static void
531 bpf_detachd(struct bpf_d *d)
532 {
533 struct bpf_d **p;
534 struct bpf_if *bp;
535 struct ifnet *ifp;
536
537 ifp = d->bd_bif->bif_ifp;
538 bp = d->bd_bif;
539
540 /* Remove d from the interface's descriptor list. */
541 p = &bp->bif_dlist;
542 while (*p != d) {
543 p = &(*p)->bd_next;
544 if (*p == 0)
545 panic("bpf_detachd: descriptor not in list");
546 }
547 *p = (*p)->bd_next;
548 if (bp->bif_dlist == 0) {
549 /*
550 * Let the driver know that there are no more listeners.
551 */
552 /* Only call dlil_set_bpf_tap for primary dlt */
553 if (bp->bif_ifp->if_bpf == bp)
554 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
555 if (bp->bif_tap)
556 bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
557
558 for (bp = bpf_iflist; bp; bp = bp->bif_next)
559 if (bp->bif_ifp == ifp && bp->bif_dlist != 0)
560 break;
561 if (bp == NULL)
562 ifp->if_bpf = NULL;
563 }
564 d->bd_bif = NULL;
565 /*
566 * Check if this descriptor had requested promiscuous mode.
567 * If so, turn it off.
568 */
569 if (d->bd_promisc) {
570 d->bd_promisc = 0;
571 lck_mtx_unlock(bpf_mlock);
572 if (ifnet_set_promiscuous(ifp, 0)) {
573 /*
574 * Something is really wrong if we were able to put
575 * the driver into promiscuous mode, but can't
576 * take it out.
577 * Most likely the network interface is gone.
578 */
579 printf("bpf: ifnet_set_promiscuous failed");
580 }
581 lck_mtx_lock(bpf_mlock);
582 }
583 }
584
585
586 /*
587 * Start asynchronous timer, if necessary.
588 * Must be called with bpf_mlock held.
589 */
590 static void
591 bpf_start_timer(struct bpf_d *d)
592 {
593 uint64_t deadline;
594 struct timeval tv;
595
596 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
597 tv.tv_sec = d->bd_rtout / hz;
598 tv.tv_usec = (d->bd_rtout % hz) * tick;
599
600 clock_interval_to_deadline(
601 (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
602 NSEC_PER_USEC, &deadline);
603 /*
604 * The state is BPF_IDLE, so the timer hasn't
605 * been started yet, and hasn't gone off yet;
606 * there is no thread call scheduled, so this
607 * won't change the schedule.
608 *
609 * XXX - what if, by the time it gets entered,
610 * the deadline has already passed?
611 */
612 thread_call_enter_delayed(d->bd_thread_call, deadline);
613 d->bd_state = BPF_WAITING;
614 }
615 }
616
617 /*
618 * Cancel asynchronous timer.
619 * Must be called with bpf_mlock held.
620 */
621 static boolean_t
622 bpf_stop_timer(struct bpf_d *d)
623 {
624 /*
625 * If the timer has already gone off, this does nothing.
626 * Our caller is expected to set d->bd_state to BPF_IDLE,
627 * with the bpf_mlock, after we are called. bpf_timed_out()
628 * also grabs bpf_mlock, so, if the timer has gone off and
629 * bpf_timed_out() hasn't finished, it's waiting for the
630 * lock; when this thread releases the lock, it will
631 * find the state is BPF_IDLE, and just release the
632 * lock and return.
633 */
634 return (thread_call_cancel(d->bd_thread_call));
635 }
636
637
638
639 /*
640 * Open ethernet device. Returns ENXIO for illegal minor device number,
641 * EBUSY if file is open by another process.
642 */
643 /* ARGSUSED */
644 int
645 bpfopen(dev_t dev, int flags, __unused int fmt,
646 __unused struct proc *p)
647 {
648 struct bpf_d *d;
649
650 lck_mtx_lock(bpf_mlock);
651 if ((unsigned int) minor(dev) >= nbpfilter) {
652 lck_mtx_unlock(bpf_mlock);
653 return (ENXIO);
654 }
655 /*
656 * New device nodes are created on demand when opening the last one.
657 * The programming model is for processes to loop on the minor starting at 0
658 * as long as EBUSY is returned. The loop stops when either the open succeeds or
659 * an error other that EBUSY is returned. That means that bpf_make_dev_t() must
660 * block all processes that are opening the last node. If not all
661 * processes are blocked, they could unexpectedly get ENOENT and abort their
662 * opening loop.
663 */
664 if ((unsigned int) minor(dev) == (nbpfilter - 1))
665 bpf_make_dev_t(major(dev));
666
667 /*
668 * Each minor can be opened by only one process. If the requested
669 * minor is in use, return EBUSY.
670 *
671 * Important: bpfopen() and bpfclose() have to check and set the status of a device
672 * in the same lockin context otherwise the device may be leaked because the vnode use count
673 * will be unpextectly greater than 1 when close() is called.
674 */
675 if (bpf_dtab[minor(dev)] == 0) {
676 bpf_dtab[minor(dev)] = (void *)1; /* Mark opening */
677 } else {
678 lck_mtx_unlock(bpf_mlock);
679 return (EBUSY);
680 }
681 d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF, M_WAIT);
682 if (d == NULL) {
683 /* this really is a catastrophic failure */
684 printf("bpfopen: malloc bpf_d failed\n");
685 bpf_dtab[minor(dev)] = NULL;
686 lck_mtx_unlock(bpf_mlock);
687 return ENOMEM;
688 }
689 bzero(d, sizeof(struct bpf_d));
690
691 /*
692 * It is not necessary to take the BPF lock here because no other
693 * thread can access the device until it is marked opened...
694 */
695
696 /* Mark "in use" and do most initialization. */
697 d->bd_bufsize = bpf_bufsize;
698 d->bd_sig = SIGIO;
699 d->bd_seesent = 1;
700 d->bd_oflags = flags;
701 d->bd_state = BPF_IDLE;
702 d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
703 d->bd_traffic_class = SO_TC_BE;
704 if (bpf_wantpktap)
705 d->bd_flags |= BPF_WANT_PKTAP;
706 else
707 d->bd_flags &= ~BPF_WANT_PKTAP;
708
709 if (d->bd_thread_call == NULL) {
710 printf("bpfopen: malloc thread call failed\n");
711 bpf_dtab[minor(dev)] = NULL;
712 lck_mtx_unlock(bpf_mlock);
713 _FREE(d, M_DEVBUF);
714 return ENOMEM;
715 }
716 #if CONFIG_MACF_NET
717 mac_bpfdesc_label_init(d);
718 mac_bpfdesc_label_associate(kauth_cred_get(), d);
719 #endif
720 bpf_dtab[minor(dev)] = d; /* Mark opened */
721 lck_mtx_unlock(bpf_mlock);
722
723 return (0);
724 }
725
726 /*
727 * Close the descriptor by detaching it from its interface,
728 * deallocating its buffers, and marking it free.
729 */
730 /* ARGSUSED */
731 int
732 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
733 __unused struct proc *p)
734 {
735 struct bpf_d *d;
736
737 /* Take BPF lock to ensure no other thread is using the device */
738 lck_mtx_lock(bpf_mlock);
739
740 d = bpf_dtab[minor(dev)];
741 if (d == 0 || d == (void *)1) {
742 lck_mtx_unlock(bpf_mlock);
743 return (ENXIO);
744 }
745 bpf_dtab[minor(dev)] = (void *)1; /* Mark closing */
746
747 /*
748 * Deal with any in-progress timeouts.
749 */
750 switch (d->bd_state) {
751 case BPF_IDLE:
752 /*
753 * Not waiting for a timeout, and no timeout happened.
754 */
755 break;
756
757 case BPF_WAITING:
758 /*
759 * Waiting for a timeout.
760 * Cancel any timer that has yet to go off,
761 * and mark the state as "closing".
762 * Then drop the lock to allow any timers that
763 * *have* gone off to run to completion, and wait
764 * for them to finish.
765 */
766 if (!bpf_stop_timer(d)) {
767 /*
768 * There was no pending call, so the call must
769 * have been in progress. Wait for the call to
770 * complete; we have to drop the lock while
771 * waiting. to let the in-progrss call complete
772 */
773 d->bd_state = BPF_DRAINING;
774 while (d->bd_state == BPF_DRAINING)
775 msleep((caddr_t)d, bpf_mlock, PRINET,
776 "bpfdraining", NULL);
777 }
778 d->bd_state = BPF_IDLE;
779 break;
780
781 case BPF_TIMED_OUT:
782 /*
783 * Timer went off, and the timeout routine finished.
784 */
785 d->bd_state = BPF_IDLE;
786 break;
787
788 case BPF_DRAINING:
789 /*
790 * Another thread is blocked on a close waiting for
791 * a timeout to finish.
792 * This "shouldn't happen", as the first thread to enter
793 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
794 * all subsequent threads should see that and fail with
795 * ENXIO.
796 */
797 panic("Two threads blocked in a BPF close");
798 break;
799 }
800
801 if (d->bd_bif)
802 bpf_detachd(d);
803 selthreadclear(&d->bd_sel);
804 #if CONFIG_MACF_NET
805 mac_bpfdesc_label_destroy(d);
806 #endif
807 thread_call_free(d->bd_thread_call);
808
809 while (d->bd_hbuf_read)
810 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
811
812 bpf_freed(d);
813
814 /* Mark free in same context as bpfopen comes to check */
815 bpf_dtab[minor(dev)] = NULL; /* Mark closed */
816 lck_mtx_unlock(bpf_mlock);
817
818 _FREE(d, M_DEVBUF);
819
820 return (0);
821 }
822
823
824 #define BPF_SLEEP bpf_sleep
825
826 static int
827 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
828 {
829 u_int64_t abstime = 0;
830
831 if(timo)
832 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
833
834 return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
835 }
836
837 /*
838 * Rotate the packet buffers in descriptor d. Move the store buffer
839 * into the hold slot, and the free buffer into the store slot.
840 * Zero the length of the new store buffer.
841 */
842 #define ROTATE_BUFFERS(d) \
843 if (d->bd_hbuf_read) \
844 panic("rotating bpf buffers during read"); \
845 (d)->bd_hbuf = (d)->bd_sbuf; \
846 (d)->bd_hlen = (d)->bd_slen; \
847 (d)->bd_sbuf = (d)->bd_fbuf; \
848 (d)->bd_slen = 0; \
849 (d)->bd_fbuf = NULL;
850 /*
851 * bpfread - read next chunk of packets from buffers
852 */
853 int
854 bpfread(dev_t dev, struct uio *uio, int ioflag)
855 {
856 struct bpf_d *d;
857 caddr_t hbuf;
858 int timed_out, hbuf_len;
859 int error;
860 int flags;
861
862 lck_mtx_lock(bpf_mlock);
863
864 d = bpf_dtab[minor(dev)];
865 if (d == 0 || d == (void *)1) {
866 lck_mtx_unlock(bpf_mlock);
867 return (ENXIO);
868 }
869
870 /*
871 * Restrict application to use a buffer the same size as
872 * as kernel buffers.
873 */
874 if (uio_resid(uio) != d->bd_bufsize) {
875 lck_mtx_unlock(bpf_mlock);
876 return (EINVAL);
877 }
878
879 if (d->bd_state == BPF_WAITING)
880 bpf_stop_timer(d);
881
882 timed_out = (d->bd_state == BPF_TIMED_OUT);
883 d->bd_state = BPF_IDLE;
884
885 while (d->bd_hbuf_read)
886 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
887
888 d = bpf_dtab[minor(dev)];
889 if (d == 0 || d == (void *)1) {
890 lck_mtx_unlock(bpf_mlock);
891 return (ENXIO);
892 }
893 /*
894 * If the hold buffer is empty, then do a timed sleep, which
895 * ends when the timeout expires or when enough packets
896 * have arrived to fill the store buffer.
897 */
898 while (d->bd_hbuf == 0) {
899 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY))
900 && d->bd_slen != 0) {
901 /*
902 * We're in immediate mode, or are reading
903 * in non-blocking mode, or a timer was
904 * started before the read (e.g., by select()
905 * or poll()) and has expired and a packet(s)
906 * either arrived since the previous
907 * read or arrived while we were asleep.
908 * Rotate the buffers and return what's here.
909 */
910 ROTATE_BUFFERS(d);
911 break;
912 }
913
914 /*
915 * No data is available, check to see if the bpf device
916 * is still pointed at a real interface. If not, return
917 * ENXIO so that the userland process knows to rebind
918 * it before using it again.
919 */
920 if (d->bd_bif == NULL) {
921 lck_mtx_unlock(bpf_mlock);
922 return (ENXIO);
923 }
924 if (ioflag & IO_NDELAY) {
925 lck_mtx_unlock(bpf_mlock);
926 return (EWOULDBLOCK);
927 }
928 error = BPF_SLEEP(d, PRINET|PCATCH, "bpf",
929 d->bd_rtout);
930 /*
931 * Make sure device is still opened
932 */
933 d = bpf_dtab[minor(dev)];
934 if (d == 0 || d == (void *)1) {
935 lck_mtx_unlock(bpf_mlock);
936 return (ENXIO);
937 }
938
939 while (d->bd_hbuf_read)
940 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
941
942 d = bpf_dtab[minor(dev)];
943 if (d == 0 || d == (void *)1) {
944 lck_mtx_unlock(bpf_mlock);
945 return (ENXIO);
946 }
947
948 if (error == EINTR || error == ERESTART) {
949 if (d->bd_hbuf) {
950 /*
951 * Because we msleep, the hold buffer might
952 * be filled when we wake up. Avoid rotating
953 * in this case.
954 */
955 break;
956 }
957 if (d->bd_slen) {
958 /*
959 * Sometimes we may be interrupted often and
960 * the sleep above will not timeout.
961 * Regardless, we should rotate the buffers
962 * if there's any new data pending and
963 * return it.
964 */
965 ROTATE_BUFFERS(d);
966 break;
967 }
968 lck_mtx_unlock(bpf_mlock);
969 return (error);
970 }
971 if (error == EWOULDBLOCK) {
972 /*
973 * On a timeout, return what's in the buffer,
974 * which may be nothing. If there is something
975 * in the store buffer, we can rotate the buffers.
976 */
977 if (d->bd_hbuf)
978 /*
979 * We filled up the buffer in between
980 * getting the timeout and arriving
981 * here, so we don't need to rotate.
982 */
983 break;
984
985 if (d->bd_slen == 0) {
986 lck_mtx_unlock(bpf_mlock);
987 return (0);
988 }
989 ROTATE_BUFFERS(d);
990 break;
991 }
992 }
993 /*
994 * At this point, we know we have something in the hold slot.
995 */
996
997 /*
998 * Set the hold buffer read. So we do not
999 * rotate the buffers until the hold buffer
1000 * read is complete. Also to avoid issues resulting
1001 * from page faults during disk sleep (<rdar://problem/13436396>).
1002 */
1003 d->bd_hbuf_read = 1;
1004 hbuf = d->bd_hbuf;
1005 hbuf_len = d->bd_hlen;
1006 flags = d->bd_flags;
1007 lck_mtx_unlock(bpf_mlock);
1008
1009 #ifdef __APPLE__
1010 /*
1011 * Before we move data to userland, we fill out the extended
1012 * header fields.
1013 */
1014 if (flags & BPF_EXTENDED_HDR) {
1015 char *p;
1016
1017 p = hbuf;
1018 while (p < hbuf + hbuf_len) {
1019 struct bpf_hdr_ext *ehp;
1020 uint32_t flowid;
1021 struct so_procinfo soprocinfo;
1022 int found = 0;
1023
1024 ehp = (struct bpf_hdr_ext *)(void *)p;
1025 if ((flowid = ehp->bh_flowid)) {
1026 if (ehp->bh_proto == IPPROTO_TCP)
1027 found = inp_findinpcb_procinfo(&tcbinfo,
1028 flowid, &soprocinfo);
1029 else if (ehp->bh_proto == IPPROTO_UDP)
1030 found = inp_findinpcb_procinfo(&udbinfo,
1031 flowid, &soprocinfo);
1032 if (found == 1) {
1033 ehp->bh_pid = soprocinfo.spi_pid;
1034 proc_name(ehp->bh_pid, ehp->bh_comm, MAXCOMLEN);
1035 }
1036 ehp->bh_flowid = 0;
1037 }
1038 if (flags & BPF_FINALIZE_PKTAP) {
1039 struct pktap_header *pktaphdr;
1040
1041 pktaphdr = (struct pktap_header *)(void *)
1042 (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1043
1044 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1045 pktap_finalize_proc_info(pktaphdr);
1046
1047 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1048 ehp->bh_tstamp.tv_sec =
1049 pktaphdr->pth_tstamp.tv_sec;
1050 ehp->bh_tstamp.tv_usec =
1051 pktaphdr->pth_tstamp.tv_usec;
1052 }
1053 }
1054 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1055 }
1056 } else if (flags & BPF_FINALIZE_PKTAP) {
1057 char *p;
1058
1059 p = hbuf;
1060 while (p < hbuf + hbuf_len) {
1061 struct bpf_hdr *hp;
1062 struct pktap_header *pktaphdr;
1063
1064 hp = (struct bpf_hdr *)(void *)p;
1065 pktaphdr = (struct pktap_header *)(void *)
1066 (p + BPF_WORDALIGN(hp->bh_hdrlen));
1067
1068 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1069 pktap_finalize_proc_info(pktaphdr);
1070
1071 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1072 hp->bh_tstamp.tv_sec =
1073 pktaphdr->pth_tstamp.tv_sec;
1074 hp->bh_tstamp.tv_usec =
1075 pktaphdr->pth_tstamp.tv_usec;
1076 }
1077
1078 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1079 }
1080 }
1081 #endif
1082
1083 /*
1084 * Move data from hold buffer into user space.
1085 * We know the entire buffer is transferred since
1086 * we checked above that the read buffer is bpf_bufsize bytes.
1087 */
1088 error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1089
1090 lck_mtx_lock(bpf_mlock);
1091 /*
1092 * Make sure device is still opened
1093 */
1094 d = bpf_dtab[minor(dev)];
1095 if (d == 0 || d == (void *)1) {
1096 lck_mtx_unlock(bpf_mlock);
1097 return (ENXIO);
1098 }
1099
1100 d->bd_hbuf_read = 0;
1101 d->bd_fbuf = d->bd_hbuf;
1102 d->bd_hbuf = NULL;
1103 d->bd_hlen = 0;
1104 wakeup((caddr_t)d);
1105 lck_mtx_unlock(bpf_mlock);
1106 return (error);
1107
1108 }
1109
1110
1111 /*
1112 * If there are processes sleeping on this descriptor, wake them up.
1113 */
1114 static void
1115 bpf_wakeup(struct bpf_d *d)
1116 {
1117 if (d->bd_state == BPF_WAITING) {
1118 bpf_stop_timer(d);
1119 d->bd_state = BPF_IDLE;
1120 }
1121 wakeup((caddr_t)d);
1122 if (d->bd_async && d->bd_sig && d->bd_sigio)
1123 pgsigio(d->bd_sigio, d->bd_sig);
1124
1125 selwakeup(&d->bd_sel);
1126 KNOTE(&d->bd_sel.si_note, 1);
1127 #ifndef __APPLE__
1128 /* XXX */
1129 d->bd_sel.si_pid = 0;
1130 #endif
1131 }
1132
1133
1134 static void
1135 bpf_timed_out(void *arg, __unused void *dummy)
1136 {
1137 struct bpf_d *d = (struct bpf_d *)arg;
1138
1139 lck_mtx_lock(bpf_mlock);
1140 if (d->bd_state == BPF_WAITING) {
1141 /*
1142 * There's a select or kqueue waiting for this; if there's
1143 * now stuff to read, wake it up.
1144 */
1145 d->bd_state = BPF_TIMED_OUT;
1146 if (d->bd_slen != 0)
1147 bpf_wakeup(d);
1148 } else if (d->bd_state == BPF_DRAINING) {
1149 /*
1150 * A close is waiting for this to finish.
1151 * Mark it as finished, and wake the close up.
1152 */
1153 d->bd_state = BPF_IDLE;
1154 bpf_wakeup(d);
1155 }
1156 lck_mtx_unlock(bpf_mlock);
1157 }
1158
1159
1160
1161
1162
1163 /* keep in sync with bpf_movein above: */
1164 #define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
1165
1166 int
1167 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1168 {
1169 struct bpf_d *d;
1170 struct ifnet *ifp;
1171 struct mbuf *m = NULL;
1172 int error;
1173 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1174 int datlen = 0;
1175 int bif_dlt;
1176 int bd_hdrcmplt;
1177
1178 lck_mtx_lock(bpf_mlock);
1179
1180 d = bpf_dtab[minor(dev)];
1181 if (d == 0 || d == (void *)1) {
1182 lck_mtx_unlock(bpf_mlock);
1183 return (ENXIO);
1184 }
1185 if (d->bd_bif == 0) {
1186 lck_mtx_unlock(bpf_mlock);
1187 return (ENXIO);
1188 }
1189
1190 ifp = d->bd_bif->bif_ifp;
1191
1192 if ((ifp->if_flags & IFF_UP) == 0) {
1193 lck_mtx_unlock(bpf_mlock);
1194 return (ENETDOWN);
1195 }
1196 if (uio_resid(uio) == 0) {
1197 lck_mtx_unlock(bpf_mlock);
1198 return (0);
1199 }
1200 ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1201
1202 /*
1203 * fix for PR-6849527
1204 * geting variables onto stack before dropping lock for bpf_movein()
1205 */
1206 bif_dlt = (int)d->bd_bif->bif_dlt;
1207 bd_hdrcmplt = d->bd_hdrcmplt;
1208
1209 /* bpf_movein allocating mbufs; drop lock */
1210 lck_mtx_unlock(bpf_mlock);
1211
1212 error = bpf_movein(uio, bif_dlt, &m,
1213 bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1214 &datlen);
1215
1216 if (error) {
1217 return (error);
1218 }
1219
1220 /* taking the lock again and verifying whether device is open */
1221 lck_mtx_lock(bpf_mlock);
1222 d = bpf_dtab[minor(dev)];
1223 if (d == 0 || d == (void *)1) {
1224 lck_mtx_unlock(bpf_mlock);
1225 m_freem(m);
1226 return (ENXIO);
1227 }
1228
1229 if (d->bd_bif == NULL) {
1230 lck_mtx_unlock(bpf_mlock);
1231 m_free(m);
1232 return (ENXIO);
1233 }
1234
1235 if ((unsigned)datlen > ifp->if_mtu) {
1236 lck_mtx_unlock(bpf_mlock);
1237 m_freem(m);
1238 return (EMSGSIZE);
1239 }
1240
1241
1242 #if CONFIG_MACF_NET
1243 mac_mbuf_label_associate_bpfdesc(d, m);
1244 #endif
1245
1246 bpf_set_packet_service_class(m, d->bd_traffic_class);
1247
1248 lck_mtx_unlock(bpf_mlock);
1249
1250 if (d->bd_hdrcmplt) {
1251 if (d->bd_bif->bif_send)
1252 error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1253 else
1254 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1255 } else {
1256 error = dlil_output(ifp, PF_INET, m, NULL,
1257 (struct sockaddr *)dst_buf, 0, NULL);
1258 }
1259
1260 /*
1261 * The driver frees the mbuf.
1262 */
1263 return (error);
1264 }
1265
1266 /*
1267 * Reset a descriptor by flushing its packet buffer and clearing the
1268 * receive and drop counts.
1269 */
1270 static void
1271 reset_d(struct bpf_d *d)
1272 {
1273 if (d->bd_hbuf_read)
1274 panic("resetting buffers during read");
1275
1276 if (d->bd_hbuf) {
1277 /* Free the hold buffer. */
1278 d->bd_fbuf = d->bd_hbuf;
1279 d->bd_hbuf = NULL;
1280 }
1281 d->bd_slen = 0;
1282 d->bd_hlen = 0;
1283 d->bd_rcount = 0;
1284 d->bd_dcount = 0;
1285 }
1286
1287 /*
1288 * FIONREAD Check for read packet available.
1289 * SIOCGIFADDR Get interface address - convenient hook to driver.
1290 * BIOCGBLEN Get buffer len [for read()].
1291 * BIOCSETF Set ethernet read filter.
1292 * BIOCFLUSH Flush read packet buffer.
1293 * BIOCPROMISC Put interface into promiscuous mode.
1294 * BIOCGDLT Get link layer type.
1295 * BIOCGETIF Get interface name.
1296 * BIOCSETIF Set interface.
1297 * BIOCSRTIMEOUT Set read timeout.
1298 * BIOCGRTIMEOUT Get read timeout.
1299 * BIOCGSTATS Get packet stats.
1300 * BIOCIMMEDIATE Set immediate mode.
1301 * BIOCVERSION Get filter language version.
1302 * BIOCGHDRCMPLT Get "header already complete" flag
1303 * BIOCSHDRCMPLT Set "header already complete" flag
1304 * BIOCGSEESENT Get "see packets sent" flag
1305 * BIOCSSEESENT Set "see packets sent" flag
1306 * BIOCSETTC Set traffic class.
1307 * BIOCGETTC Get traffic class.
1308 * BIOCSEXTHDR Set "extended header" flag
1309 */
1310 /* ARGSUSED */
1311 int
1312 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1313 struct proc *p)
1314 {
1315 struct bpf_d *d;
1316 int error = 0;
1317 u_int int_arg;
1318 struct ifreq ifr;
1319
1320 lck_mtx_lock(bpf_mlock);
1321
1322 d = bpf_dtab[minor(dev)];
1323 if (d == 0 || d == (void *)1) {
1324 lck_mtx_unlock(bpf_mlock);
1325 return (ENXIO);
1326 }
1327
1328 if (d->bd_state == BPF_WAITING)
1329 bpf_stop_timer(d);
1330 d->bd_state = BPF_IDLE;
1331
1332 switch (cmd) {
1333
1334 default:
1335 error = EINVAL;
1336 break;
1337
1338 /*
1339 * Check for read packet available.
1340 */
1341 case FIONREAD: /* int */
1342 {
1343 int n;
1344
1345 n = d->bd_slen;
1346 if (d->bd_hbuf && d->bd_hbuf_read == 0)
1347 n += d->bd_hlen;
1348
1349 bcopy(&n, addr, sizeof (n));
1350 break;
1351 }
1352
1353 case SIOCGIFADDR: /* struct ifreq */
1354 {
1355 struct ifnet *ifp;
1356
1357 if (d->bd_bif == 0)
1358 error = EINVAL;
1359 else {
1360 ifp = d->bd_bif->bif_ifp;
1361 error = ifnet_ioctl(ifp, 0, cmd, addr);
1362 }
1363 break;
1364 }
1365
1366 /*
1367 * Get buffer len [for read()].
1368 */
1369 case BIOCGBLEN: /* u_int */
1370 bcopy(&d->bd_bufsize, addr, sizeof (u_int));
1371 break;
1372
1373 /*
1374 * Set buffer length.
1375 */
1376 case BIOCSBLEN: /* u_int */
1377 if (d->bd_bif != 0)
1378 error = EINVAL;
1379 else {
1380 u_int size;
1381
1382 bcopy(addr, &size, sizeof (size));
1383
1384 if (size > bpf_maxbufsize)
1385 size = bpf_maxbufsize;
1386 else if (size < BPF_MINBUFSIZE)
1387 size = BPF_MINBUFSIZE;
1388 bcopy(&size, addr, sizeof (size));
1389 d->bd_bufsize = size;
1390 }
1391 break;
1392
1393 /*
1394 * Set link layer read filter.
1395 */
1396 case BIOCSETF32:
1397 case BIOCSETFNR32: { /* struct bpf_program32 */
1398 struct bpf_program32 prg32;
1399
1400 bcopy(addr, &prg32, sizeof (prg32));
1401 error = bpf_setf(d, prg32.bf_len,
1402 CAST_USER_ADDR_T(prg32.bf_insns), dev, cmd);
1403 break;
1404 }
1405
1406 case BIOCSETF64:
1407 case BIOCSETFNR64: { /* struct bpf_program64 */
1408 struct bpf_program64 prg64;
1409
1410 bcopy(addr, &prg64, sizeof (prg64));
1411 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, dev, cmd);
1412 break;
1413 }
1414
1415 /*
1416 * Flush read packet buffer.
1417 */
1418 case BIOCFLUSH:
1419 while (d->bd_hbuf_read) {
1420 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1421 }
1422
1423 d = bpf_dtab[minor(dev)];
1424 if (d == 0 || d == (void *)1)
1425 return (ENXIO);
1426
1427 reset_d(d);
1428 break;
1429
1430 /*
1431 * Put interface into promiscuous mode.
1432 */
1433 case BIOCPROMISC:
1434 if (d->bd_bif == 0) {
1435 /*
1436 * No interface attached yet.
1437 */
1438 error = EINVAL;
1439 break;
1440 }
1441 if (d->bd_promisc == 0) {
1442 lck_mtx_unlock(bpf_mlock);
1443 error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1444 lck_mtx_lock(bpf_mlock);
1445 if (error == 0)
1446 d->bd_promisc = 1;
1447 }
1448 break;
1449
1450 /*
1451 * Get device parameters.
1452 */
1453 case BIOCGDLT: /* u_int */
1454 if (d->bd_bif == 0)
1455 error = EINVAL;
1456 else
1457 bcopy(&d->bd_bif->bif_dlt, addr, sizeof (u_int));
1458 break;
1459
1460 /*
1461 * Get a list of supported data link types.
1462 */
1463 case BIOCGDLTLIST: /* struct bpf_dltlist */
1464 if (d->bd_bif == NULL) {
1465 error = EINVAL;
1466 } else {
1467 error = bpf_getdltlist(d, addr, p);
1468 }
1469 break;
1470
1471 /*
1472 * Set data link type.
1473 */
1474 case BIOCSDLT: /* u_int */
1475 if (d->bd_bif == NULL) {
1476 error = EINVAL;
1477 } else {
1478 u_int dlt;
1479
1480 bcopy(addr, &dlt, sizeof (dlt));
1481 error = bpf_setdlt(d, dlt, dev);
1482 }
1483 break;
1484
1485 /*
1486 * Get interface name.
1487 */
1488 case BIOCGETIF: /* struct ifreq */
1489 if (d->bd_bif == 0)
1490 error = EINVAL;
1491 else {
1492 struct ifnet *const ifp = d->bd_bif->bif_ifp;
1493
1494 snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1495 sizeof (ifr.ifr_name), "%s", if_name(ifp));
1496 }
1497 break;
1498
1499 /*
1500 * Set interface.
1501 */
1502 case BIOCSETIF: { /* struct ifreq */
1503 ifnet_t ifp;
1504
1505 bcopy(addr, &ifr, sizeof (ifr));
1506 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1507 ifp = ifunit(ifr.ifr_name);
1508 if (ifp == NULL)
1509 error = ENXIO;
1510 else
1511 error = bpf_setif(d, ifp, 0, dev);
1512 break;
1513 }
1514
1515 /*
1516 * Set read timeout.
1517 */
1518 case BIOCSRTIMEOUT32: { /* struct user32_timeval */
1519 struct user32_timeval _tv;
1520 struct timeval tv;
1521
1522 bcopy(addr, &_tv, sizeof (_tv));
1523 tv.tv_sec = _tv.tv_sec;
1524 tv.tv_usec = _tv.tv_usec;
1525
1526 /*
1527 * Subtract 1 tick from tvtohz() since this isn't
1528 * a one-shot timer.
1529 */
1530 if ((error = itimerfix(&tv)) == 0)
1531 d->bd_rtout = tvtohz(&tv) - 1;
1532 break;
1533 }
1534
1535 case BIOCSRTIMEOUT64: { /* struct user64_timeval */
1536 struct user64_timeval _tv;
1537 struct timeval tv;
1538
1539 bcopy(addr, &_tv, sizeof (_tv));
1540 tv.tv_sec = _tv.tv_sec;
1541 tv.tv_usec = _tv.tv_usec;
1542
1543 /*
1544 * Subtract 1 tick from tvtohz() since this isn't
1545 * a one-shot timer.
1546 */
1547 if ((error = itimerfix(&tv)) == 0)
1548 d->bd_rtout = tvtohz(&tv) - 1;
1549 break;
1550 }
1551
1552 /*
1553 * Get read timeout.
1554 */
1555 case BIOCGRTIMEOUT32: { /* struct user32_timeval */
1556 struct user32_timeval tv;
1557
1558 bzero(&tv, sizeof (tv));
1559 tv.tv_sec = d->bd_rtout / hz;
1560 tv.tv_usec = (d->bd_rtout % hz) * tick;
1561 bcopy(&tv, addr, sizeof (tv));
1562 break;
1563 }
1564
1565 case BIOCGRTIMEOUT64: { /* struct user64_timeval */
1566 struct user64_timeval tv;
1567
1568 bzero(&tv, sizeof (tv));
1569 tv.tv_sec = d->bd_rtout / hz;
1570 tv.tv_usec = (d->bd_rtout % hz) * tick;
1571 bcopy(&tv, addr, sizeof (tv));
1572 break;
1573 }
1574
1575 /*
1576 * Get packet stats.
1577 */
1578 case BIOCGSTATS: { /* struct bpf_stat */
1579 struct bpf_stat bs;
1580
1581 bzero(&bs, sizeof (bs));
1582 bs.bs_recv = d->bd_rcount;
1583 bs.bs_drop = d->bd_dcount;
1584 bcopy(&bs, addr, sizeof (bs));
1585 break;
1586 }
1587
1588 /*
1589 * Set immediate mode.
1590 */
1591 case BIOCIMMEDIATE: /* u_int */
1592 bcopy(addr, &d->bd_immediate, sizeof (u_int));
1593 break;
1594
1595 case BIOCVERSION: { /* struct bpf_version */
1596 struct bpf_version bv;
1597
1598 bzero(&bv, sizeof (bv));
1599 bv.bv_major = BPF_MAJOR_VERSION;
1600 bv.bv_minor = BPF_MINOR_VERSION;
1601 bcopy(&bv, addr, sizeof (bv));
1602 break;
1603 }
1604
1605 /*
1606 * Get "header already complete" flag
1607 */
1608 case BIOCGHDRCMPLT: /* u_int */
1609 bcopy(&d->bd_hdrcmplt, addr, sizeof (u_int));
1610 break;
1611
1612 /*
1613 * Set "header already complete" flag
1614 */
1615 case BIOCSHDRCMPLT: /* u_int */
1616 bcopy(addr, &int_arg, sizeof (int_arg));
1617 d->bd_hdrcmplt = int_arg ? 1 : 0;
1618 break;
1619
1620 /*
1621 * Get "see sent packets" flag
1622 */
1623 case BIOCGSEESENT: /* u_int */
1624 bcopy(&d->bd_seesent, addr, sizeof (u_int));
1625 break;
1626
1627 /*
1628 * Set "see sent packets" flag
1629 */
1630 case BIOCSSEESENT: /* u_int */
1631 bcopy(addr, &d->bd_seesent, sizeof (u_int));
1632 break;
1633
1634 /*
1635 * Set traffic service class
1636 */
1637 case BIOCSETTC: { /* int */
1638 int tc;
1639
1640 bcopy(addr, &tc, sizeof (int));
1641 error = bpf_set_traffic_class(d, tc);
1642 break;
1643 }
1644
1645 /*
1646 * Get traffic service class
1647 */
1648 case BIOCGETTC: /* int */
1649 bcopy(&d->bd_traffic_class, addr, sizeof (int));
1650 break;
1651
1652 case FIONBIO: /* Non-blocking I/O; int */
1653 break;
1654
1655 case FIOASYNC: /* Send signal on receive packets; int */
1656 bcopy(addr, &d->bd_async, sizeof (int));
1657 break;
1658 #ifndef __APPLE__
1659 case FIOSETOWN:
1660 error = fsetown(*(int *)addr, &d->bd_sigio);
1661 break;
1662
1663 case FIOGETOWN:
1664 *(int *)addr = fgetown(d->bd_sigio);
1665 break;
1666
1667 /* This is deprecated, FIOSETOWN should be used instead. */
1668 case TIOCSPGRP:
1669 error = fsetown(-(*(int *)addr), &d->bd_sigio);
1670 break;
1671
1672 /* This is deprecated, FIOGETOWN should be used instead. */
1673 case TIOCGPGRP:
1674 *(int *)addr = -fgetown(d->bd_sigio);
1675 break;
1676 #endif
1677 case BIOCSRSIG: { /* Set receive signal; u_int */
1678 u_int sig;
1679
1680 bcopy(addr, &sig, sizeof (u_int));
1681
1682 if (sig >= NSIG)
1683 error = EINVAL;
1684 else
1685 d->bd_sig = sig;
1686 break;
1687 }
1688 case BIOCGRSIG: /* u_int */
1689 bcopy(&d->bd_sig, addr, sizeof (u_int));
1690 break;
1691 #ifdef __APPLE__
1692 case BIOCSEXTHDR: /* u_int */
1693 bcopy(addr, &int_arg, sizeof (int_arg));
1694 if (int_arg)
1695 d->bd_flags |= BPF_EXTENDED_HDR;
1696 else
1697 d->bd_flags &= ~BPF_EXTENDED_HDR;
1698 break;
1699
1700 case BIOCGIFATTACHCOUNT: { /* struct ifreq */
1701 ifnet_t ifp;
1702 struct bpf_if *bp;
1703
1704 bcopy(addr, &ifr, sizeof (ifr));
1705 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1706 ifp = ifunit(ifr.ifr_name);
1707 if (ifp == NULL) {
1708 error = ENXIO;
1709 break;
1710 }
1711 ifr.ifr_intval = 0;
1712 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1713 struct bpf_d *bpf_d;
1714
1715 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp)
1716 continue;
1717 for (bpf_d = bp->bif_dlist; bpf_d; bpf_d = bpf_d->bd_next) {
1718 ifr.ifr_intval += 1;
1719 }
1720 }
1721 bcopy(&ifr, addr, sizeof (ifr));
1722 break;
1723 }
1724 case BIOCGWANTPKTAP: /* u_int */
1725 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
1726 bcopy(&int_arg, addr, sizeof (int_arg));
1727 break;
1728
1729 case BIOCSWANTPKTAP: /* u_int */
1730 bcopy(addr, &int_arg, sizeof (int_arg));
1731 if (int_arg)
1732 d->bd_flags |= BPF_WANT_PKTAP;
1733 else
1734 d->bd_flags &= ~BPF_WANT_PKTAP;
1735 break;
1736 #endif
1737 }
1738
1739 lck_mtx_unlock(bpf_mlock);
1740
1741 return (error);
1742 }
1743
1744 /*
1745 * Set d's packet filter program to fp. If this file already has a filter,
1746 * free it and replace it. Returns EINVAL for bogus requests.
1747 */
1748 static int
1749 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns, dev_t dev, u_long cmd)
1750 {
1751 struct bpf_insn *fcode, *old;
1752 u_int flen, size;
1753
1754 while (d->bd_hbuf_read)
1755 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1756
1757 d = bpf_dtab[minor(dev)];
1758 if (d == 0 || d == (void *)1)
1759 return (ENXIO);
1760
1761 old = d->bd_filter;
1762 if (bf_insns == USER_ADDR_NULL) {
1763 if (bf_len != 0)
1764 return (EINVAL);
1765 d->bd_filter = NULL;
1766 reset_d(d);
1767 if (old != 0)
1768 FREE((caddr_t)old, M_DEVBUF);
1769 return (0);
1770 }
1771 flen = bf_len;
1772 if (flen > BPF_MAXINSNS)
1773 return (EINVAL);
1774
1775 size = flen * sizeof(struct bpf_insn);
1776 fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
1777 #ifdef __APPLE__
1778 if (fcode == NULL)
1779 return (ENOBUFS);
1780 #endif
1781 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
1782 bpf_validate(fcode, (int)flen)) {
1783 d->bd_filter = fcode;
1784
1785 if (cmd == BIOCSETF32 || cmd == BIOCSETF64)
1786 reset_d(d);
1787
1788 if (old != 0)
1789 FREE((caddr_t)old, M_DEVBUF);
1790
1791 return (0);
1792 }
1793 FREE((caddr_t)fcode, M_DEVBUF);
1794 return (EINVAL);
1795 }
1796
1797 /*
1798 * Detach a file from its current interface (if attached at all) and attach
1799 * to the interface indicated by the name stored in ifr.
1800 * Return an errno or 0.
1801 */
1802 static int
1803 bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt, dev_t dev)
1804 {
1805 struct bpf_if *bp;
1806 int error;
1807
1808 while (d->bd_hbuf_read)
1809 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1810
1811 d = bpf_dtab[minor(dev)];
1812 if (d == 0 || d == (void *)1)
1813 return (ENXIO);
1814
1815 /*
1816 * Look through attached interfaces for the named one.
1817 */
1818 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1819 struct ifnet *ifp = bp->bif_ifp;
1820
1821 if (ifp == 0 || ifp != theywant || (dlt != 0 && dlt != bp->bif_dlt))
1822 continue;
1823 /*
1824 * If the process knows how to deal with DLT_PKTAP, use it
1825 * by default
1826 */
1827 if (dlt == 0 && bp->bif_dlt == DLT_PKTAP &&
1828 !(d->bd_flags & BPF_WANT_PKTAP))
1829 continue;
1830 /*
1831 * We found the requested interface.
1832 * Allocate the packet buffers if we need to.
1833 * If we're already attached to requested interface,
1834 * just flush the buffer.
1835 */
1836 if (d->bd_sbuf == 0) {
1837 error = bpf_allocbufs(d);
1838 if (error != 0)
1839 return (error);
1840 }
1841 if (bp != d->bd_bif) {
1842 if (d->bd_bif)
1843 /*
1844 * Detach if attached to something else.
1845 */
1846 bpf_detachd(d);
1847
1848 if (bpf_attachd(d, bp) != 0) {
1849 return ENXIO;
1850 }
1851 }
1852 reset_d(d);
1853 return (0);
1854 }
1855 /* Not found. */
1856 return (ENXIO);
1857 }
1858
1859
1860
1861 /*
1862 * Get a list of available data link type of the interface.
1863 */
1864 static int
1865 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
1866 {
1867 u_int n;
1868 int error;
1869 struct ifnet *ifp;
1870 struct bpf_if *bp;
1871 user_addr_t dlist;
1872 struct bpf_dltlist bfl;
1873
1874 bcopy(addr, &bfl, sizeof (bfl));
1875 if (proc_is64bit(p)) {
1876 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
1877 } else {
1878 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
1879 }
1880
1881 ifp = d->bd_bif->bif_ifp;
1882 n = 0;
1883 error = 0;
1884
1885 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
1886 if (bp->bif_ifp != ifp)
1887 continue;
1888 /*
1889 * Return DLT_PKTAP only to processes that know how to handle it
1890 */
1891 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
1892 continue;
1893 if (dlist != USER_ADDR_NULL) {
1894 if (n >= bfl.bfl_len) {
1895 return (ENOMEM);
1896 }
1897 error = copyout(&bp->bif_dlt, dlist,
1898 sizeof (bp->bif_dlt));
1899 if (error != 0)
1900 break;
1901 dlist += sizeof (bp->bif_dlt);
1902 }
1903 n++;
1904 }
1905 bfl.bfl_len = n;
1906 bcopy(&bfl, addr, sizeof (bfl));
1907
1908 return (error);
1909 }
1910
1911 /*
1912 * Set the data link type of a BPF instance.
1913 */
1914 static int
1915 bpf_setdlt(struct bpf_d *d, uint32_t dlt, dev_t dev)
1916 {
1917 int error, opromisc;
1918 struct ifnet *ifp;
1919 struct bpf_if *bp;
1920
1921 if (d->bd_bif->bif_dlt == dlt)
1922 return (0);
1923
1924 while (d->bd_hbuf_read)
1925 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1926
1927 d = bpf_dtab[minor(dev)];
1928 if (d == 0 || d == (void *)1)
1929 return (ENXIO);
1930
1931 ifp = d->bd_bif->bif_ifp;
1932 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
1933 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1934 break;
1935 }
1936 if (bp != NULL) {
1937 opromisc = d->bd_promisc;
1938 bpf_detachd(d);
1939 error = bpf_attachd(d, bp);
1940 if (error) {
1941 printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
1942 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error);
1943 return error;
1944 }
1945 reset_d(d);
1946 if (opromisc) {
1947 lck_mtx_unlock(bpf_mlock);
1948 error = ifnet_set_promiscuous(bp->bif_ifp, 1);
1949 lck_mtx_lock(bpf_mlock);
1950 if (error)
1951 printf("bpf_setdlt: ifpromisc %s%d failed (%d)\n",
1952 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error);
1953 else
1954 d->bd_promisc = 1;
1955 }
1956 }
1957 return (bp == NULL ? EINVAL : 0);
1958 }
1959
1960 static int
1961 bpf_set_traffic_class(struct bpf_d *d, int tc)
1962 {
1963 int error = 0;
1964
1965 if (!SO_VALID_TC(tc))
1966 error = EINVAL;
1967 else
1968 d->bd_traffic_class = tc;
1969
1970 return (error);
1971 }
1972
1973 static void
1974 bpf_set_packet_service_class(struct mbuf *m, int tc)
1975 {
1976 if (!(m->m_flags & M_PKTHDR))
1977 return;
1978
1979 VERIFY(SO_VALID_TC(tc));
1980 (void) m_set_service_class(m, so_tc2msc(tc));
1981 }
1982
1983 /*
1984 * Support for select()
1985 *
1986 * Return true iff the specific operation will not block indefinitely.
1987 * Otherwise, return false but make a note that a selwakeup() must be done.
1988 */
1989 int
1990 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
1991 {
1992 struct bpf_d *d;
1993 int ret = 0;
1994
1995 lck_mtx_lock(bpf_mlock);
1996
1997 d = bpf_dtab[minor(dev)];
1998 if (d == 0 || d == (void *)1) {
1999 lck_mtx_unlock(bpf_mlock);
2000 return (ENXIO);
2001 }
2002
2003 if (d->bd_bif == NULL) {
2004 lck_mtx_unlock(bpf_mlock);
2005 return (ENXIO);
2006 }
2007
2008 while (d->bd_hbuf_read)
2009 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2010
2011 d = bpf_dtab[minor(dev)];
2012 if (d == 0 || d == (void *)1) {
2013 lck_mtx_unlock(bpf_mlock);
2014 return (ENXIO);
2015 }
2016
2017 switch (which) {
2018 case FREAD:
2019 if (d->bd_hlen != 0 ||
2020 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
2021 d->bd_slen != 0))
2022 ret = 1; /* read has data to return */
2023 else {
2024 /*
2025 * Read has no data to return.
2026 * Make the select wait, and start a timer if
2027 * necessary.
2028 */
2029 selrecord(p, &d->bd_sel, wql);
2030 bpf_start_timer(d);
2031 }
2032 break;
2033
2034 case FWRITE:
2035 ret = 1; /* can't determine whether a write would block */
2036 break;
2037 }
2038
2039 lck_mtx_unlock(bpf_mlock);
2040 return (ret);
2041 }
2042
2043
2044 /*
2045 * Support for kevent() system call. Register EVFILT_READ filters and
2046 * reject all others.
2047 */
2048 int bpfkqfilter(dev_t dev, struct knote *kn);
2049 static void filt_bpfdetach(struct knote *);
2050 static int filt_bpfread(struct knote *, long);
2051
2052 static struct filterops bpfread_filtops = {
2053 .f_isfd = 1,
2054 .f_detach = filt_bpfdetach,
2055 .f_event = filt_bpfread,
2056 };
2057
2058 int
2059 bpfkqfilter(dev_t dev, struct knote *kn)
2060 {
2061 struct bpf_d *d;
2062
2063 /*
2064 * Is this device a bpf?
2065 */
2066 if (major(dev) != CDEV_MAJOR) {
2067 return (EINVAL);
2068 }
2069
2070 if (kn->kn_filter != EVFILT_READ) {
2071 return (EINVAL);
2072 }
2073
2074 lck_mtx_lock(bpf_mlock);
2075
2076 d = bpf_dtab[minor(dev)];
2077 if (d == 0 || d == (void *)1) {
2078 lck_mtx_unlock(bpf_mlock);
2079 return (ENXIO);
2080 }
2081
2082 if (d->bd_bif == NULL) {
2083 lck_mtx_unlock(bpf_mlock);
2084 return (ENXIO);
2085 }
2086
2087 kn->kn_hook = d;
2088 kn->kn_fop = &bpfread_filtops;
2089 KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2090 lck_mtx_unlock(bpf_mlock);
2091 return 0;
2092 }
2093
2094 static void
2095 filt_bpfdetach(struct knote *kn)
2096 {
2097 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2098
2099 lck_mtx_lock(bpf_mlock);
2100 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2101 lck_mtx_unlock(bpf_mlock);
2102 }
2103
2104 static int
2105 filt_bpfread(struct knote *kn, long hint)
2106 {
2107 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2108 int ready = 0;
2109
2110 if (hint == 0)
2111 lck_mtx_lock(bpf_mlock);
2112
2113 if (d->bd_immediate) {
2114 /*
2115 * If there's data in the hold buffer, it's the
2116 * amount of data a read will return.
2117 *
2118 * If there's no data in the hold buffer, but
2119 * there's data in the store buffer, a read will
2120 * immediately rotate the store buffer to the
2121 * hold buffer, the amount of data in the store
2122 * buffer is the amount of data a read will
2123 * return.
2124 *
2125 * If there's no data in either buffer, we're not
2126 * ready to read.
2127 */
2128 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read)
2129 ? d->bd_slen : d->bd_hlen);
2130 int64_t lowwat = 1;
2131 if (kn->kn_sfflags & NOTE_LOWAT)
2132 {
2133 if (kn->kn_sdata > d->bd_bufsize)
2134 lowwat = d->bd_bufsize;
2135 else if (kn->kn_sdata > lowwat)
2136 lowwat = kn->kn_sdata;
2137 }
2138 ready = (kn->kn_data >= lowwat);
2139 } else {
2140 /*
2141 * If there's data in the hold buffer, it's the
2142 * amount of data a read will return.
2143 *
2144 * If there's no data in the hold buffer, but
2145 * there's data in the store buffer, if the
2146 * timer has expired a read will immediately
2147 * rotate the store buffer to the hold buffer,
2148 * so the amount of data in the store buffer is
2149 * the amount of data a read will return.
2150 *
2151 * If there's no data in either buffer, or there's
2152 * no data in the hold buffer and the timer hasn't
2153 * expired, we're not ready to read.
2154 */
2155 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read) && d->bd_state == BPF_TIMED_OUT ?
2156 d->bd_slen : d->bd_hlen);
2157 ready = (kn->kn_data > 0);
2158 }
2159 if (!ready)
2160 bpf_start_timer(d);
2161
2162 if (hint == 0)
2163 lck_mtx_unlock(bpf_mlock);
2164 return (ready);
2165 }
2166
2167 /*
2168 * Copy data from an mbuf chain into a buffer. This code is derived
2169 * from m_copydata in sys/uipc_mbuf.c.
2170 */
2171 static void
2172 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
2173 {
2174 struct mbuf *m = (struct mbuf *)(uintptr_t)(src_arg);
2175 u_int count;
2176 u_char *dst;
2177
2178 dst = dst_arg;
2179 while (len > 0) {
2180 if (m == 0)
2181 panic("bpf_mcopy");
2182 count = min(m->m_len, len);
2183 bcopy(mbuf_data(m), dst, count);
2184 m = m->m_next;
2185 dst += count;
2186 len -= count;
2187 }
2188 }
2189
2190 static inline void
2191 bpf_tap_imp(
2192 ifnet_t ifp,
2193 u_int32_t dlt,
2194 mbuf_t m,
2195 void* hdr,
2196 size_t hlen,
2197 int outbound)
2198 {
2199 struct bpf_if *bp;
2200 struct mbuf *savedm = m;
2201
2202 /*
2203 * It's possible that we get here after the bpf descriptor has been
2204 * detached from the interface; in such a case we simply return.
2205 * Lock ordering is important since we can be called asynchronously
2206 * (from the IOKit) to process an inbound packet; when that happens
2207 * we would have been holding its "gateLock" and will be acquiring
2208 * "bpf_mlock" upon entering this routine. Due to that, we release
2209 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2210 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2211 * when a ifnet_set_promiscuous request simultaneously collides with
2212 * an inbound packet being passed into the tap callback.
2213 */
2214 lck_mtx_lock(bpf_mlock);
2215 if (ifp->if_bpf == NULL) {
2216 lck_mtx_unlock(bpf_mlock);
2217 return;
2218 }
2219 bp = ifp->if_bpf;
2220 for (bp = ifp->if_bpf; bp && bp->bif_ifp == ifp &&
2221 (dlt != 0 && bp->bif_dlt != dlt); bp = bp->bif_next)
2222 ;
2223 if (bp && bp->bif_ifp == ifp && bp->bif_dlist != NULL) {
2224 struct bpf_d *d;
2225 struct m_hdr hack_hdr;
2226 u_int pktlen = 0;
2227 u_int slen = 0;
2228 struct mbuf *m0;
2229
2230 if (hdr) {
2231 /*
2232 * This is gross. We mock up an mbuf that points to the
2233 * header buffer. This means we don't have to copy the
2234 * header. A number of interfaces prepended headers just
2235 * for bpf by allocating an mbuf on the stack. We want to
2236 * give developers an easy way to prepend a header for bpf.
2237 * Since a developer allocating an mbuf on the stack is bad,
2238 * we do even worse here, allocating only a header to point
2239 * to a buffer the developer supplied. This makes assumptions
2240 * that bpf_filter and catchpacket will not look at anything
2241 * in the mbuf other than the header. This was true at the
2242 * time this code was written.
2243 */
2244 hack_hdr.mh_next = m;
2245 hack_hdr.mh_nextpkt = NULL;
2246 hack_hdr.mh_len = hlen;
2247 hack_hdr.mh_data = hdr;
2248 hack_hdr.mh_type = m->m_type;
2249 hack_hdr.mh_flags = 0;
2250
2251 m = (mbuf_t)&hack_hdr;
2252 }
2253
2254 for (m0 = m; m0 != 0; m0 = m0->m_next)
2255 pktlen += m0->m_len;
2256
2257 for (d = bp->bif_dlist; d; d = d->bd_next) {
2258 if (outbound && !d->bd_seesent)
2259 continue;
2260 ++d->bd_rcount;
2261 slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
2262 if (slen != 0) {
2263 #if CONFIG_MACF_NET
2264 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0)
2265 continue;
2266 #endif
2267 catchpacket(d, (u_char *)m, savedm, pktlen,
2268 slen, outbound, bpf_mcopy);
2269 }
2270 }
2271 }
2272 lck_mtx_unlock(bpf_mlock);
2273 }
2274
2275 void
2276 bpf_tap_out(
2277 ifnet_t ifp,
2278 u_int32_t dlt,
2279 mbuf_t m,
2280 void* hdr,
2281 size_t hlen)
2282 {
2283 bpf_tap_imp(ifp, dlt, m, hdr, hlen, 1);
2284 }
2285
2286 void
2287 bpf_tap_in(
2288 ifnet_t ifp,
2289 u_int32_t dlt,
2290 mbuf_t m,
2291 void* hdr,
2292 size_t hlen)
2293 {
2294 bpf_tap_imp(ifp, dlt, m, hdr, hlen, 0);
2295 }
2296
2297 /* Callback registered with Ethernet driver. */
2298 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2299 {
2300 bpf_tap_imp(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2301
2302 return 0;
2303 }
2304
2305 /*
2306 * Move the packet data from interface memory (pkt) into the
2307 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
2308 * otherwise 0. "copy" is the routine called to do the actual data
2309 * transfer. bcopy is passed in to copy contiguous chunks, while
2310 * bpf_mcopy is passed in to copy mbuf chains. In the latter case,
2311 * pkt is really an mbuf.
2312 */
2313 static void
2314 catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen,
2315 u_int snaplen, int outbound,
2316 void (*cpfn)(const void *, void *, size_t))
2317 {
2318 struct bpf_hdr *hp;
2319 struct bpf_hdr_ext *ehp;
2320 int totlen, curlen;
2321 int hdrlen, caplen;
2322 int do_wakeup = 0;
2323 u_char *payload;
2324 struct timeval tv;
2325 struct m_tag *mt = NULL;
2326 struct bpf_mtag *bt = NULL;
2327
2328 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
2329 d->bd_bif->bif_hdrlen;
2330 /*
2331 * Figure out how many bytes to move. If the packet is
2332 * greater or equal to the snapshot length, transfer that
2333 * much. Otherwise, transfer the whole packet (unless
2334 * we hit the buffer size limit).
2335 */
2336 totlen = hdrlen + min(snaplen, pktlen);
2337 if (totlen > d->bd_bufsize)
2338 totlen = d->bd_bufsize;
2339
2340 /*
2341 * Round up the end of the previous packet to the next longword.
2342 */
2343 curlen = BPF_WORDALIGN(d->bd_slen);
2344 if (curlen + totlen > d->bd_bufsize) {
2345 /*
2346 * This packet will overflow the storage buffer.
2347 * Rotate the buffers if we can, then wakeup any
2348 * pending reads.
2349 */
2350 if (d->bd_fbuf == NULL) {
2351 /*
2352 * We haven't completed the previous read yet,
2353 * so drop the packet.
2354 */
2355 ++d->bd_dcount;
2356 return;
2357 }
2358 ROTATE_BUFFERS(d);
2359 do_wakeup = 1;
2360 curlen = 0;
2361 }
2362 else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
2363 /*
2364 * Immediate mode is set, or the read timeout has
2365 * already expired during a select call. A packet
2366 * arrived, so the reader should be woken up.
2367 */
2368 do_wakeup = 1;
2369
2370 /*
2371 * Append the bpf header.
2372 */
2373 microtime(&tv);
2374 if (d->bd_flags & BPF_EXTENDED_HDR) {
2375 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
2376 memset(ehp, 0, sizeof(*ehp));
2377 ehp->bh_tstamp.tv_sec = tv.tv_sec;
2378 ehp->bh_tstamp.tv_usec = tv.tv_usec;
2379 ehp->bh_datalen = pktlen;
2380 ehp->bh_hdrlen = hdrlen;
2381 ehp->bh_caplen = totlen - hdrlen;
2382 mt = m_tag_locate(m, bpf_mtag_id, 0, NULL);
2383 if (mt && mt->m_tag_len >= sizeof(*bt)) {
2384 bt = (struct bpf_mtag *)(mt + 1);
2385 ehp->bh_pid = bt->bt_pid;
2386 strlcpy(ehp->bh_comm, bt->bt_comm,
2387 sizeof(ehp->bh_comm));
2388 ehp->bh_svc = so_svc2tc(bt->bt_svc);
2389 if (bt->bt_direction == BPF_MTAG_DIR_OUT)
2390 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2391 else
2392 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2393 m_tag_delete(m, mt);
2394 } else if (outbound) {
2395 /* only do lookups on non-raw INPCB */
2396 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID|
2397 PKTF_FLOW_LOCALSRC|PKTF_FLOW_RAWSOCK)) ==
2398 (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) &&
2399 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
2400 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
2401 ehp->bh_proto = m->m_pkthdr.pkt_proto;
2402 }
2403 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
2404 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2405 } else
2406 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2407 payload = (u_char *)ehp + hdrlen;
2408 caplen = ehp->bh_caplen;
2409 } else {
2410 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
2411 hp->bh_tstamp.tv_sec = tv.tv_sec;
2412 hp->bh_tstamp.tv_usec = tv.tv_usec;
2413 hp->bh_datalen = pktlen;
2414 hp->bh_hdrlen = hdrlen;
2415 hp->bh_caplen = totlen - hdrlen;
2416 payload = (u_char *)hp + hdrlen;
2417 caplen = hp->bh_caplen;
2418 }
2419 /*
2420 * Copy the packet data into the store buffer and update its length.
2421 */
2422 (*cpfn)(pkt, payload, caplen);
2423 d->bd_slen = curlen + totlen;
2424
2425 if (do_wakeup)
2426 bpf_wakeup(d);
2427 }
2428
2429 /*
2430 * Initialize all nonzero fields of a descriptor.
2431 */
2432 static int
2433 bpf_allocbufs(struct bpf_d *d)
2434 {
2435 d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2436 if (d->bd_fbuf == 0)
2437 return (ENOBUFS);
2438
2439 d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2440 if (d->bd_sbuf == 0) {
2441 FREE(d->bd_fbuf, M_DEVBUF);
2442 return (ENOBUFS);
2443 }
2444 d->bd_slen = 0;
2445 d->bd_hlen = 0;
2446 return (0);
2447 }
2448
2449 /*
2450 * Free buffers currently in use by a descriptor.
2451 * Called on close.
2452 */
2453 static void
2454 bpf_freed(struct bpf_d *d)
2455 {
2456 /*
2457 * We don't need to lock out interrupts since this descriptor has
2458 * been detached from its interface and it yet hasn't been marked
2459 * free.
2460 */
2461 if (d->bd_hbuf_read)
2462 panic("bpf buffer freed during read");
2463
2464 if (d->bd_sbuf != 0) {
2465 FREE(d->bd_sbuf, M_DEVBUF);
2466 if (d->bd_hbuf != 0)
2467 FREE(d->bd_hbuf, M_DEVBUF);
2468 if (d->bd_fbuf != 0)
2469 FREE(d->bd_fbuf, M_DEVBUF);
2470 }
2471 if (d->bd_filter)
2472 FREE((caddr_t)d->bd_filter, M_DEVBUF);
2473 }
2474
2475 /*
2476 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
2477 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
2478 * size of the link header (variable length headers not yet supported).
2479 */
2480 void
2481 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2482 {
2483 bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
2484 }
2485
2486 errno_t
2487 bpf_attach(
2488 ifnet_t ifp,
2489 u_int32_t dlt,
2490 u_int32_t hdrlen,
2491 bpf_send_func send,
2492 bpf_tap_func tap)
2493 {
2494 struct bpf_if *bp_new;
2495 struct bpf_if *bp_temp;
2496 struct bpf_if *bp_first = NULL;
2497
2498 bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF, M_WAIT);
2499 if (bp_new == 0)
2500 panic("bpfattach");
2501
2502 lck_mtx_lock(bpf_mlock);
2503
2504 /*
2505 * Check if this interface/dlt is already attached, record first
2506 * attachment for this interface.
2507 */
2508 for (bp_temp = bpf_iflist; bp_temp && (bp_temp->bif_ifp != ifp ||
2509 bp_temp->bif_dlt != dlt); bp_temp = bp_temp->bif_next) {
2510 if (bp_temp->bif_ifp == ifp && bp_first == NULL)
2511 bp_first = bp_temp;
2512 }
2513
2514 if (bp_temp != NULL) {
2515 printf("bpfattach - %s with dlt %d is already attached\n",
2516 if_name(ifp), dlt);
2517 FREE(bp_new, M_DEVBUF);
2518 lck_mtx_unlock(bpf_mlock);
2519 return EEXIST;
2520 }
2521
2522 bzero(bp_new, sizeof(*bp_new));
2523 bp_new->bif_ifp = ifp;
2524 bp_new->bif_dlt = dlt;
2525 bp_new->bif_send = send;
2526 bp_new->bif_tap = tap;
2527
2528 if (bp_first == NULL) {
2529 /* No other entries for this ifp */
2530 bp_new->bif_next = bpf_iflist;
2531 bpf_iflist = bp_new;
2532 }
2533 else {
2534 /* Add this after the first entry for this interface */
2535 bp_new->bif_next = bp_first->bif_next;
2536 bp_first->bif_next = bp_new;
2537 }
2538
2539 /*
2540 * Compute the length of the bpf header. This is not necessarily
2541 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
2542 * that the network layer header begins on a longword boundary (for
2543 * performance reasons and to alleviate alignment restrictions).
2544 */
2545 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
2546 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
2547 sizeof(struct bpf_hdr_ext)) - hdrlen;
2548
2549 /* Take a reference on the interface */
2550 ifnet_reference(ifp);
2551
2552 lck_mtx_unlock(bpf_mlock);
2553
2554 #ifndef __APPLE__
2555 if (bootverbose)
2556 printf("bpf: %s attached\n", if_name(ifp));
2557 #endif
2558
2559 return 0;
2560 }
2561
2562 /*
2563 * Detach bpf from an interface. This involves detaching each descriptor
2564 * associated with the interface, and leaving bd_bif NULL. Notify each
2565 * descriptor as it's detached so that any sleepers wake up and get
2566 * ENXIO.
2567 */
2568 void
2569 bpfdetach(struct ifnet *ifp)
2570 {
2571 struct bpf_if *bp, *bp_prev, *bp_next;
2572 struct bpf_if *bp_free_list = NULL;
2573 struct bpf_d *d;
2574
2575 lck_mtx_lock(bpf_mlock);
2576
2577 /*
2578 * Build the list of devices attached to that interface
2579 * that we need to free while keeping the lock to maintain
2580 * the integrity of the interface list
2581 */
2582 bp_prev = NULL;
2583 for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
2584 bp_next = bp->bif_next;
2585
2586 if (ifp != bp->bif_ifp) {
2587 bp_prev = bp;
2588 continue;
2589 }
2590 /* Unlink from the interface list */
2591 if (bp_prev)
2592 bp_prev->bif_next = bp->bif_next;
2593 else
2594 bpf_iflist = bp->bif_next;
2595
2596 /* Add to the list to be freed */
2597 bp->bif_next = bp_free_list;
2598 bp_free_list = bp;
2599 }
2600
2601 /*
2602 * Detach the bpf devices attached to the interface
2603 * Now we do not care if we lose the bpf_mlock in bpf_detachd
2604 */
2605 for (bp = bp_free_list; bp != NULL; bp = bp->bif_next) {
2606 while ((d = bp->bif_dlist) != NULL) {
2607 bpf_detachd(d);
2608 bpf_wakeup(d);
2609 }
2610 ifnet_release(ifp);
2611 }
2612
2613 lck_mtx_unlock(bpf_mlock);
2614
2615 /*
2616 * Free the list
2617 */
2618 while ((bp = bp_free_list) != NULL) {
2619 bp_free_list = bp->bif_next;
2620 FREE(bp, M_DEVBUF);
2621 }
2622 }
2623
2624 void
2625 bpf_init(__unused void *unused)
2626 {
2627 #ifdef __APPLE__
2628 int i;
2629 int maj;
2630
2631 if (bpf_devsw_installed == 0) {
2632 bpf_devsw_installed = 1;
2633 bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
2634 bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
2635 bpf_mlock_attr = lck_attr_alloc_init();
2636 lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
2637 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
2638 if (maj == -1) {
2639 if (bpf_mlock_attr)
2640 lck_attr_free(bpf_mlock_attr);
2641 if (bpf_mlock_grp)
2642 lck_grp_free(bpf_mlock_grp);
2643 if (bpf_mlock_grp_attr)
2644 lck_grp_attr_free(bpf_mlock_grp_attr);
2645
2646 bpf_mlock = NULL;
2647 bpf_mlock_attr = NULL;
2648 bpf_mlock_grp = NULL;
2649 bpf_mlock_grp_attr = NULL;
2650 bpf_devsw_installed = 0;
2651 printf("bpf_init: failed to allocate a major number!\n");
2652 return;
2653 }
2654
2655 for (i = 0 ; i < NBPFILTER; i++)
2656 bpf_make_dev_t(maj);
2657
2658 VERIFY(mbuf_tag_id_find(BPF_CONTROL_NAME, &bpf_mtag_id) == 0);
2659 }
2660 #else
2661 cdevsw_add(&bpf_cdevsw);
2662 #endif
2663 }
2664
2665 #ifndef __APPLE__
2666 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
2667 #endif
2668
2669 #if CONFIG_MACF_NET
2670 struct label *
2671 mac_bpfdesc_label_get(struct bpf_d *d)
2672 {
2673
2674 return (d->bd_label);
2675 }
2676
2677 void
2678 mac_bpfdesc_label_set(struct bpf_d *d, struct label *label)
2679 {
2680
2681 d->bd_label = label;
2682 }
2683 #endif