]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/bpf.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / net / bpf.c
CommitLineData
1c79356b 1/*
f427ee49 2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
d9a64523 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
d9a64523 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
d9a64523 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
d9a64523 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1990, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
d9a64523 65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
1c79356b 66 *
9bccf70c 67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
1c79356b 68 */
2d21ac55
A
69/*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
1c79356b 75
9bccf70c 76#include "bpf.h"
1c79356b
A
77
78#ifndef __GNUC__
0a7de745 79#define inline
1c79356b 80#else
0a7de745 81#define inline __inline
1c79356b
A
82#endif
83
84#include <sys/param.h>
85#include <sys/systm.h>
86#include <sys/conf.h>
87#include <sys/malloc.h>
88#include <sys/mbuf.h>
89#include <sys/time.h>
90#include <sys/proc.h>
1c79356b
A
91#include <sys/signalvar.h>
92#include <sys/filio.h>
93#include <sys/sockio.h>
94#include <sys/ttycom.h>
95#include <sys/filedesc.h>
91447636 96#include <sys/uio_internal.h>
b0d623f7
A
97#include <sys/file_internal.h>
98#include <sys/event.h>
1c79356b 99
9bccf70c
A
100#include <sys/poll.h>
101
1c79356b 102#include <sys/socket.h>
316670eb 103#include <sys/socketvar.h>
1c79356b
A
104#include <sys/vnode.h>
105
106#include <net/if.h>
107#include <net/bpf.h>
108#include <net/bpfdesc.h>
109
110#include <netinet/in.h>
d9a64523
A
111#include <netinet/ip.h>
112#include <netinet/ip6.h>
316670eb
A
113#include <netinet/in_pcb.h>
114#include <netinet/in_var.h>
115#include <netinet/ip_var.h>
116#include <netinet/tcp.h>
117#include <netinet/tcp_var.h>
118#include <netinet/udp.h>
119#include <netinet/udp_var.h>
1c79356b 120#include <netinet/if_ether.h>
d9a64523
A
121#include <netinet/isakmp.h>
122#include <netinet6/esp.h>
1c79356b
A
123#include <sys/kernel.h>
124#include <sys/sysctl.h>
55e303ae 125#include <net/firewire.h>
1c79356b 126
1c79356b
A
127#include <miscfs/devfs/devfs.h>
128#include <net/dlil.h>
fe8ab488 129#include <net/pktap.h>
1c79356b 130
91447636 131#include <kern/locks.h>
6d2010ae 132#include <kern/thread_call.h>
5ba3f43e 133#include <libkern/section_keywords.h>
91447636 134
d9a64523
A
135#include <os/log.h>
136
2d21ac55 137extern int tvtohz(struct timeval *);
9bccf70c 138
0a7de745
A
139#define BPF_BUFSIZE 4096
140#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
1c79356b 141
0a7de745 142#define PRINET 26 /* interruptible */
55e303ae 143
d9a64523
A
144#define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
145#define ESP_HDR_SIZE sizeof(struct newesp)
1c79356b 146
5ba3f43e
A
147typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
148
1c79356b
A
149/*
150 * The default read buffer size is patchable.
151 */
91447636 152static unsigned int bpf_bufsize = BPF_BUFSIZE;
6d2010ae 153SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 154 &bpf_bufsize, 0, "");
cb323159
A
155
156static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
157extern const int copysize_limit_panic;
158#define BPF_MAXSIZE_CAP (copysize_limit_panic >> 1)
6d2010ae 159__private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
cb323159
A
160SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
161 &bpf_maxbufsize, 0,
162 sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
163
91447636 164static unsigned int bpf_maxdevices = 256;
6d2010ae 165SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 166 &bpf_maxdevices, 0, "");
fe8ab488
A
167/*
168 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
169 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
170 * explicitly to be able to use DLT_PKTAP.
171 */
f427ee49 172#if !XNU_TARGET_OS_OSX
5ba3f43e 173static unsigned int bpf_wantpktap = 1;
f427ee49 174#else /* XNU_TARGET_OS_OSX */
fe8ab488 175static unsigned int bpf_wantpktap = 0;
f427ee49 176#endif /* XNU_TARGET_OS_OSX */
fe8ab488 177SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 178 &bpf_wantpktap, 0, "");
1c79356b 179
3e170ce0
A
180static int bpf_debug = 0;
181SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 182 &bpf_debug, 0, "");
3e170ce0 183
1c79356b
A
184/*
185 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
55e303ae 186 * bpf_dtab holds pointer to the descriptors, indexed by minor device #
1c79356b 187 */
0a7de745 188static struct bpf_if *bpf_iflist;
9bccf70c
A
189#ifdef __APPLE__
190/*
191 * BSD now stores the bpf_d in the dev_t which is a struct
192 * on their system. Our dev_t is an int, so we still store
193 * the bpf_d in a separate table indexed by minor device #.
91447636
A
194 *
195 * The value stored in bpf_dtab[n] represent three states:
d9a64523
A
196 * NULL: device not opened
197 * BPF_DEV_RESERVED: device opening or closing
91447636 198 * other: device <n> opened with pointer to storage
9bccf70c 199 */
0a7de745
A
200#define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
201static struct bpf_d **bpf_dtab = NULL;
91447636 202static unsigned int bpf_dtab_size = 0;
0a7de745 203static unsigned int nbpfilter = 0;
91447636 204
316670eb 205decl_lck_mtx_data(static, bpf_mlock_data);
0a7de745
A
206static lck_mtx_t *bpf_mlock = &bpf_mlock_data;
207static lck_grp_t *bpf_mlock_grp;
208static lck_grp_attr_t *bpf_mlock_grp_attr;
209static lck_attr_t *bpf_mlock_attr;
55e303ae 210
55e303ae 211#endif /* __APPLE__ */
1c79356b 212
0a7de745
A
213static int bpf_allocbufs(struct bpf_d *);
214static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
215static int bpf_detachd(struct bpf_d *d, int);
216static void bpf_freed(struct bpf_d *);
217static int bpf_movein(struct uio *, int,
218 struct mbuf **, struct sockaddr *, int *);
219static int bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool);
220static void bpf_timed_out(void *, void *);
221static void bpf_wakeup(struct bpf_d *);
222static u_int get_pkt_trunc_len(u_char *, u_int);
223static void catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
224static void reset_d(struct bpf_d *);
225static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
226static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
227static int bpf_setdlt(struct bpf_d *, u_int);
228static int bpf_set_traffic_class(struct bpf_d *, int);
229static void bpf_set_packet_service_class(struct mbuf *, int);
230
231static void bpf_acquire_d(struct bpf_d *);
232static void bpf_release_d(struct bpf_d *);
233
234static int bpf_devsw_installed;
55e303ae 235
91447636 236void bpf_init(void *unused);
2d21ac55 237static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
55e303ae 238
9bccf70c
A
239/*
240 * Darwin differs from BSD here, the following are static
241 * on BSD and not static on Darwin.
242 */
0a7de745
A
243d_open_t bpfopen;
244d_close_t bpfclose;
245d_read_t bpfread;
246d_write_t bpfwrite;
247ioctl_fcn_t bpfioctl;
248select_fcn_t bpfselect;
1c79356b 249
9bccf70c 250/* Darwin's cdevsw struct differs slightly from BSDs */
0a7de745 251#define CDEV_MAJOR 23
f427ee49 252static const struct cdevsw bpf_cdevsw = {
cb323159
A
253 .d_open = bpfopen,
254 .d_close = bpfclose,
255 .d_read = bpfread,
256 .d_write = bpfwrite,
257 .d_ioctl = bpfioctl,
258 .d_stop = eno_stop,
259 .d_reset = eno_reset,
260 .d_ttys = NULL,
261 .d_select = bpfselect,
262 .d_mmap = eno_mmap,
263 .d_strategy = eno_strat,
264 .d_reserved_1 = eno_getc,
265 .d_reserved_2 = eno_putc,
266 .d_type = 0
1c79356b
A
267};
268
0a7de745 269#define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
9bccf70c 270
1c79356b 271static int
d9a64523
A
272bpf_movein(struct uio *uio, int linktype, struct mbuf **mp,
273 struct sockaddr *sockp, int *datlen)
1c79356b
A
274{
275 struct mbuf *m;
276 int error;
277 int len;
2d21ac55 278 uint8_t sa_family;
1c79356b
A
279 int hlen;
280
2d21ac55 281 switch (linktype) {
2d21ac55
A
282#if SLIP
283 case DLT_SLIP:
284 sa_family = AF_INET;
285 hlen = 0;
286 break;
287#endif /* SLIP */
d9a64523 288
2d21ac55
A
289 case DLT_EN10MB:
290 sa_family = AF_UNSPEC;
291 /* XXX Would MAXLINKHDR be better? */
292 hlen = sizeof(struct ether_header);
293 break;
d9a64523 294
2d21ac55
A
295#if FDDI
296 case DLT_FDDI:
d9a64523 297#if defined(__FreeBSD__) || defined(__bsdi__)
2d21ac55
A
298 sa_family = AF_IMPLINK;
299 hlen = 0;
d9a64523 300#else
2d21ac55
A
301 sa_family = AF_UNSPEC;
302 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
303 hlen = 24;
d9a64523 304#endif
2d21ac55
A
305 break;
306#endif /* FDDI */
d9a64523 307
2d21ac55
A
308 case DLT_RAW:
309 case DLT_NULL:
310 sa_family = AF_UNSPEC;
311 hlen = 0;
312 break;
d9a64523
A
313
314#ifdef __FreeBSD__
2d21ac55
A
315 case DLT_ATM_RFC1483:
316 /*
317 * en atm driver requires 4-byte atm pseudo header.
318 * though it isn't standard, vpi:vci needs to be
319 * specified anyway.
320 */
321 sa_family = AF_UNSPEC;
0a7de745 322 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
2d21ac55 323 break;
d9a64523 324#endif
2d21ac55
A
325
326 case DLT_PPP:
327 sa_family = AF_UNSPEC;
0a7de745 328 hlen = 4; /* This should match PPP_HDRLEN */
2d21ac55 329 break;
d9a64523 330
2d21ac55
A
331 case DLT_APPLE_IP_OVER_IEEE1394:
332 sa_family = AF_UNSPEC;
333 hlen = sizeof(struct firewire_header);
334 break;
b0d623f7 335
0a7de745 336 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
b0d623f7
A
337 sa_family = AF_IEEE80211;
338 hlen = 0;
339 break;
316670eb 340
6d2010ae
A
341 case DLT_IEEE802_11_RADIO:
342 sa_family = AF_IEEE80211;
343 hlen = 0;
344 break;
b0d623f7 345
2d21ac55 346 default:
0a7de745 347 return EIO;
55e303ae 348 }
2d21ac55 349
91447636
A
350 // LP64todo - fix this!
351 len = uio_resid(uio);
1c79356b 352 *datlen = len - hlen;
0a7de745
A
353 if ((unsigned)len > MCLBYTES) {
354 return EIO;
355 }
1c79356b 356
2d21ac55
A
357 if (sockp) {
358 /*
359 * Build a sockaddr based on the data link layer type.
360 * We do this at this level because the ethernet header
361 * is copied directly into the data field of the sockaddr.
362 * In the case of SLIP, there is no header and the packet
363 * is forwarded as is.
364 * Also, we are careful to leave room at the front of the mbuf
365 * for the link level header.
366 */
367 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
0a7de745 368 return EIO;
2d21ac55
A
369 }
370 sockp->sa_family = sa_family;
371 } else {
372 /*
373 * We're directly sending the packet data supplied by
374 * the user; we don't need to make room for the link
375 * header, and don't need the header length value any
376 * more, so set it to 0.
377 */
378 hlen = 0;
379 }
d9a64523 380
1c79356b 381 MGETHDR(m, M_WAIT, MT_DATA);
0a7de745
A
382 if (m == 0) {
383 return ENOBUFS;
384 }
91447636 385 if ((unsigned)len > MHLEN) {
1c79356b
A
386 MCLGET(m, M_WAIT);
387 if ((m->m_flags & M_EXT) == 0) {
1c79356b
A
388 error = ENOBUFS;
389 goto bad;
390 }
391 }
392 m->m_pkthdr.len = m->m_len = len;
393 m->m_pkthdr.rcvif = NULL;
394 *mp = m;
d9a64523 395
1c79356b
A
396 /*
397 * Make room for link header.
398 */
399 if (hlen != 0) {
400 m->m_pkthdr.len -= hlen;
401 m->m_len -= hlen;
1c79356b 402 m->m_data += hlen; /* XXX */
1c79356b 403 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
0a7de745 404 if (error) {
1c79356b 405 goto bad;
0a7de745 406 }
1c79356b
A
407 }
408 error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
0a7de745 409 if (error) {
6d2010ae 410 goto bad;
0a7de745 411 }
d9a64523 412
6d2010ae
A
413 /* Check for multicast destination */
414 switch (linktype) {
0a7de745
A
415 case DLT_EN10MB: {
416 struct ether_header *eh;
417
418 eh = mtod(m, struct ether_header *);
419 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
420 if (_ether_cmp(etherbroadcastaddr,
421 eh->ether_dhost) == 0) {
422 m->m_flags |= M_BCAST;
423 } else {
424 m->m_flags |= M_MCAST;
6d2010ae 425 }
6d2010ae 426 }
0a7de745
A
427 break;
428 }
6d2010ae 429 }
d9a64523 430
0a7de745 431 return 0;
d9a64523 432bad:
1c79356b 433 m_freem(m);
0a7de745 434 return error;
1c79356b
A
435}
436
9bccf70c 437#ifdef __APPLE__
55e303ae
A
438
439/*
39236c6e
A
440 * The dynamic addition of a new device node must block all processes that
441 * are opening the last device so that no process will get an unexpected
d9a64523 442 * ENOENT
55e303ae 443 */
91447636
A
444static void
445bpf_make_dev_t(int maj)
55e303ae 446{
0a7de745
A
447 static int bpf_growing = 0;
448 unsigned int cur_size = nbpfilter, i;
55e303ae 449
0a7de745 450 if (nbpfilter >= bpf_maxdevices) {
91447636 451 return;
0a7de745 452 }
55e303ae 453
91447636
A
454 while (bpf_growing) {
455 /* Wait until new device has been created */
d9a64523 456 (void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
91447636
A
457 }
458 if (nbpfilter > cur_size) {
459 /* other thread grew it already */
460 return;
461 }
462 bpf_growing = 1;
d9a64523 463
91447636
A
464 /* need to grow bpf_dtab first */
465 if (nbpfilter == bpf_dtab_size) {
466 int new_dtab_size;
467 struct bpf_d **new_dtab = NULL;
468 struct bpf_d **old_dtab = NULL;
d9a64523
A
469
470 new_dtab_size = bpf_dtab_size + NBPFILTER;
471 new_dtab = (struct bpf_d **)_MALLOC(
0a7de745 472 sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
91447636
A
473 if (new_dtab == 0) {
474 printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
475 goto done;
476 }
477 if (bpf_dtab) {
d9a64523
A
478 bcopy(bpf_dtab, new_dtab,
479 sizeof(struct bpf_d *) * bpf_dtab_size);
91447636 480 }
d9a64523
A
481 bzero(new_dtab + bpf_dtab_size,
482 sizeof(struct bpf_d *) * NBPFILTER);
91447636
A
483 old_dtab = bpf_dtab;
484 bpf_dtab = new_dtab;
485 bpf_dtab_size = new_dtab_size;
0a7de745 486 if (old_dtab != NULL) {
91447636 487 _FREE(old_dtab, M_DEVBUF);
0a7de745 488 }
55e303ae 489 }
91447636
A
490 i = nbpfilter++;
491 (void) devfs_make_node(makedev(maj, i),
0a7de745
A
492 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
493 "bpf%d", i);
91447636
A
494done:
495 bpf_growing = 0;
496 wakeup((caddr_t)&bpf_growing);
55e303ae
A
497}
498
9bccf70c 499#endif
1c79356b
A
500
501/*
502 * Attach file to the bpf interface, i.e. make d listen on bp.
1c79356b 503 */
2d21ac55 504static errno_t
91447636 505bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
1c79356b 506{
2d21ac55 507 int first = bp->bif_dlist == NULL;
0a7de745 508 int error = 0;
d9a64523 509
1c79356b
A
510 /*
511 * Point d at bp, and add d to the interface's list of listeners.
512 * Finally, point the driver's bpf cookie at the interface so
513 * it will divert packets to bpf.
514 */
515 d->bd_bif = bp;
516 d->bd_next = bp->bif_dlist;
517 bp->bif_dlist = d;
3e170ce0
A
518
519 /*
520 * Take a reference on the device even if an error is returned
521 * because we keep the device in the interface's list of listeners
522 */
523 bpf_acquire_d(d);
524
2d21ac55
A
525 if (first) {
526 /* Find the default bpf entry for this ifp */
527 if (bp->bif_ifp->if_bpf == NULL) {
0a7de745 528 struct bpf_if *tmp, *primary = NULL;
d9a64523 529
fe8ab488 530 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
5ba3f43e
A
531 if (tmp->bif_ifp == bp->bif_ifp) {
532 primary = tmp;
533 break;
534 }
fe8ab488 535 }
2d21ac55
A
536 bp->bif_ifp->if_bpf = primary;
537 }
2d21ac55 538 /* Only call dlil_set_bpf_tap for primary dlt */
0a7de745 539 if (bp->bif_ifp->if_bpf == bp) {
d9a64523
A
540 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
541 bpf_tap_callback);
0a7de745 542 }
5ba3f43e 543
0a7de745 544 if (bp->bif_tap != NULL) {
d9a64523
A
545 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
546 BPF_TAP_INPUT_OUTPUT);
0a7de745 547 }
2d21ac55 548 }
1c79356b 549
3e170ce0
A
550 /*
551 * Reset the detach flags in case we previously detached an interface
552 */
553 d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
554
5ba3f43e 555 if (bp->bif_dlt == DLT_PKTAP) {
fe8ab488 556 d->bd_flags |= BPF_FINALIZE_PKTAP;
5ba3f43e 557 } else {
fe8ab488 558 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
5ba3f43e 559 }
0a7de745 560 return error;
1c79356b
A
561}
562
563/*
564 * Detach a file from its interface.
3e170ce0
A
565 *
566 * Return 1 if was closed by some thread, 0 otherwise
1c79356b 567 */
3e170ce0
A
568static int
569bpf_detachd(struct bpf_d *d, int closing)
1c79356b
A
570{
571 struct bpf_d **p;
572 struct bpf_if *bp;
573 struct ifnet *ifp;
574
a39ff7e2 575 int bpf_closed = d->bd_flags & BPF_CLOSING;
3e170ce0
A
576 /*
577 * Some other thread already detached
578 */
0a7de745 579 if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
3e170ce0 580 goto done;
0a7de745 581 }
3e170ce0
A
582 /*
583 * This thread is doing the detach
584 */
585 d->bd_flags |= BPF_DETACHING;
586
1c79356b 587 ifp = d->bd_bif->bif_ifp;
1c79356b 588 bp = d->bd_bif;
3e170ce0 589
0a7de745 590 if (bpf_debug != 0) {
3e170ce0
A
591 printf("%s: %llx %s%s\n",
592 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
593 if_name(ifp), closing ? " closing" : "");
0a7de745 594 }
3e170ce0 595
2d21ac55
A
596 /* Remove d from the interface's descriptor list. */
597 p = &bp->bif_dlist;
598 while (*p != d) {
599 p = &(*p)->bd_next;
0a7de745 600 if (*p == 0) {
2d21ac55 601 panic("bpf_detachd: descriptor not in list");
0a7de745 602 }
2d21ac55
A
603 }
604 *p = (*p)->bd_next;
605 if (bp->bif_dlist == 0) {
606 /*
607 * Let the driver know that there are no more listeners.
608 */
609 /* Only call dlil_set_bpf_tap for primary dlt */
0a7de745 610 if (bp->bif_ifp->if_bpf == bp) {
2d21ac55 611 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
0a7de745
A
612 }
613 if (bp->bif_tap) {
2d21ac55 614 bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
0a7de745 615 }
d9a64523 616
0a7de745
A
617 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
618 if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
2d21ac55 619 break;
0a7de745
A
620 }
621 }
622 if (bp == NULL) {
2d21ac55 623 ifp->if_bpf = NULL;
0a7de745 624 }
2d21ac55
A
625 }
626 d->bd_bif = NULL;
1c79356b
A
627 /*
628 * Check if this descriptor had requested promiscuous mode.
629 * If so, turn it off.
630 */
631 if (d->bd_promisc) {
632 d->bd_promisc = 0;
2d21ac55
A
633 lck_mtx_unlock(bpf_mlock);
634 if (ifnet_set_promiscuous(ifp, 0)) {
1c79356b
A
635 /*
636 * Something is really wrong if we were able to put
637 * the driver into promiscuous mode, but can't
638 * take it out.
9bccf70c 639 * Most likely the network interface is gone.
1c79356b 640 */
3e170ce0 641 printf("%s: ifnet_set_promiscuous failed\n", __func__);
2d21ac55
A
642 }
643 lck_mtx_lock(bpf_mlock);
1c79356b 644 }
3e170ce0
A
645
646 /*
647 * Wake up other thread that are waiting for this thread to finish
648 * detaching
649 */
650 d->bd_flags &= ~BPF_DETACHING;
651 d->bd_flags |= BPF_DETACHED;
a39ff7e2
A
652
653 /* Refresh the local variable as d could have been modified */
654 bpf_closed = d->bd_flags & BPF_CLOSING;
3e170ce0
A
655 /*
656 * Note that We've kept the reference because we may have dropped
657 * the lock when turning off promiscuous mode
658 */
659 bpf_release_d(d);
660
661done:
662 /*
663 * When closing makes sure no other thread refer to the bpf_d
664 */
0a7de745 665 if (bpf_debug != 0) {
3e170ce0
A
666 printf("%s: %llx done\n",
667 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
0a7de745 668 }
3e170ce0
A
669 /*
670 * Let the caller know the bpf_d is closed
671 */
0a7de745
A
672 if (bpf_closed) {
673 return 1;
674 } else {
675 return 0;
676 }
1c79356b
A
677}
678
6d2010ae
A
679/*
680 * Start asynchronous timer, if necessary.
681 * Must be called with bpf_mlock held.
682 */
683static void
684bpf_start_timer(struct bpf_d *d)
685{
686 uint64_t deadline;
687 struct timeval tv;
688
689 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
690 tv.tv_sec = d->bd_rtout / hz;
691 tv.tv_usec = (d->bd_rtout % hz) * tick;
692
39236c6e 693 clock_interval_to_deadline(
0a7de745
A
694 (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
695 NSEC_PER_USEC, &deadline);
6d2010ae 696 /*
d9a64523 697 * The state is BPF_IDLE, so the timer hasn't
6d2010ae
A
698 * been started yet, and hasn't gone off yet;
699 * there is no thread call scheduled, so this
700 * won't change the schedule.
701 *
702 * XXX - what if, by the time it gets entered,
703 * the deadline has already passed?
704 */
705 thread_call_enter_delayed(d->bd_thread_call, deadline);
706 d->bd_state = BPF_WAITING;
707 }
708}
709
710/*
711 * Cancel asynchronous timer.
712 * Must be called with bpf_mlock held.
713 */
714static boolean_t
715bpf_stop_timer(struct bpf_d *d)
716{
717 /*
718 * If the timer has already gone off, this does nothing.
719 * Our caller is expected to set d->bd_state to BPF_IDLE,
720 * with the bpf_mlock, after we are called. bpf_timed_out()
d9a64523 721 * also grabs bpf_mlock, so, if the timer has gone off and
6d2010ae 722 * bpf_timed_out() hasn't finished, it's waiting for the
d9a64523
A
723 * lock; when this thread releases the lock, it will
724 * find the state is BPF_IDLE, and just release the
6d2010ae
A
725 * lock and return.
726 */
0a7de745 727 return thread_call_cancel(d->bd_thread_call);
6d2010ae
A
728}
729
3e170ce0
A
730void
731bpf_acquire_d(struct bpf_d *d)
732{
733 void *lr_saved = __builtin_return_address(0);
734
5ba3f43e 735 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
3e170ce0
A
736
737 d->bd_refcnt += 1;
738
739 d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
740 d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
741}
742
743void
744bpf_release_d(struct bpf_d *d)
745{
746 void *lr_saved = __builtin_return_address(0);
747
5ba3f43e 748 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
3e170ce0 749
0a7de745 750 if (d->bd_refcnt <= 0) {
3e170ce0 751 panic("%s: %p refcnt <= 0", __func__, d);
0a7de745 752 }
3e170ce0
A
753
754 d->bd_refcnt -= 1;
6d2010ae 755
3e170ce0
A
756 d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
757 d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
758
759 if (d->bd_refcnt == 0) {
760 /* Assert the device is detached */
0a7de745 761 if ((d->bd_flags & BPF_DETACHED) == 0) {
3e170ce0 762 panic("%s: %p BPF_DETACHED not set", __func__, d);
0a7de745 763 }
3e170ce0
A
764
765 _FREE(d, M_DEVBUF);
766 }
767}
6d2010ae 768
1c79356b
A
769/*
770 * Open ethernet device. Returns ENXIO for illegal minor device number,
771 * EBUSY if file is open by another process.
772 */
773/* ARGSUSED */
2d21ac55 774int
b0d623f7 775bpfopen(dev_t dev, int flags, __unused int fmt,
0a7de745 776 struct proc *p)
1c79356b 777{
2d21ac55 778 struct bpf_d *d;
1c79356b 779
2d21ac55
A
780 lck_mtx_lock(bpf_mlock);
781 if ((unsigned int) minor(dev) >= nbpfilter) {
782 lck_mtx_unlock(bpf_mlock);
0a7de745 783 return ENXIO;
2d21ac55 784 }
d9a64523
A
785 /*
786 * New device nodes are created on demand when opening the last one.
787 * The programming model is for processes to loop on the minor starting
788 * at 0 as long as EBUSY is returned. The loop stops when either the
789 * open succeeds or an error other that EBUSY is returned. That means
790 * that bpf_make_dev_t() must block all processes that are opening the
791 * last node. If not all processes are blocked, they could unexpectedly
792 * get ENOENT and abort their opening loop.
91447636 793 */
0a7de745 794 if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
91447636 795 bpf_make_dev_t(major(dev));
0a7de745 796 }
9bccf70c 797
1c79356b 798 /*
d9a64523 799 * Each minor can be opened by only one process. If the requested
1c79356b 800 * minor is in use, return EBUSY.
91447636 801 *
d9a64523
A
802 * Important: bpfopen() and bpfclose() have to check and set the status
803 * of a device in the same lockin context otherwise the device may be
804 * leaked because the vnode use count will be unpextectly greater than 1
805 * when close() is called.
1c79356b 806 */
d9a64523
A
807 if (bpf_dtab[minor(dev)] == NULL) {
808 /* Reserve while opening */
809 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
2d21ac55
A
810 } else {
811 lck_mtx_unlock(bpf_mlock);
0a7de745 812 return EBUSY;
2d21ac55 813 }
3e170ce0
A
814 d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
815 M_WAIT | M_ZERO);
91447636
A
816 if (d == NULL) {
817 /* this really is a catastrophic failure */
818 printf("bpfopen: malloc bpf_d failed\n");
2d21ac55
A
819 bpf_dtab[minor(dev)] = NULL;
820 lck_mtx_unlock(bpf_mlock);
0a7de745 821 return ENOMEM;
1c79356b 822 }
3e170ce0 823
91447636 824 /* Mark "in use" and do most initialization. */
3e170ce0 825 bpf_acquire_d(d);
1c79356b
A
826 d->bd_bufsize = bpf_bufsize;
827 d->bd_sig = SIGIO;
9bccf70c 828 d->bd_seesent = 1;
b0d623f7 829 d->bd_oflags = flags;
6d2010ae 830 d->bd_state = BPF_IDLE;
316670eb 831 d->bd_traffic_class = SO_TC_BE;
3e170ce0 832 d->bd_flags |= BPF_DETACHED;
0a7de745 833 if (bpf_wantpktap) {
fe8ab488 834 d->bd_flags |= BPF_WANT_PKTAP;
0a7de745 835 } else {
fe8ab488 836 d->bd_flags &= ~BPF_WANT_PKTAP;
0a7de745 837 }
3e170ce0 838 d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
6d2010ae
A
839 if (d->bd_thread_call == NULL) {
840 printf("bpfopen: malloc thread call failed\n");
841 bpf_dtab[minor(dev)] = NULL;
3e170ce0 842 bpf_release_d(d);
6d2010ae 843 lck_mtx_unlock(bpf_mlock);
3e170ce0 844
0a7de745 845 return ENOMEM;
6d2010ae 846 }
d9a64523
A
847 d->bd_opened_by = p;
848 uuid_generate(d->bd_uuid);
849
d9a64523 850 bpf_dtab[minor(dev)] = d; /* Mark opened */
2d21ac55 851 lck_mtx_unlock(bpf_mlock);
55e303ae 852
0a7de745 853 return 0;
1c79356b
A
854}
855
856/*
857 * Close the descriptor by detaching it from its interface,
858 * deallocating its buffers, and marking it free.
859 */
860/* ARGSUSED */
2d21ac55
A
861int
862bpfclose(dev_t dev, __unused int flags, __unused int fmt,
d9a64523 863 __unused struct proc *p)
1c79356b 864{
2d21ac55
A
865 struct bpf_d *d;
866
867 /* Take BPF lock to ensure no other thread is using the device */
868 lck_mtx_lock(bpf_mlock);
1c79356b 869
55e303ae 870 d = bpf_dtab[minor(dev)];
d9a64523 871 if (d == NULL || d == BPF_DEV_RESERVED) {
2d21ac55 872 lck_mtx_unlock(bpf_mlock);
0a7de745 873 return ENXIO;
3e170ce0
A
874 }
875
876 /*
877 * Other threads may call bpd_detachd() if we drop the bpf_mlock
878 */
879 d->bd_flags |= BPF_CLOSING;
880
0a7de745 881 if (bpf_debug != 0) {
3e170ce0
A
882 printf("%s: %llx\n",
883 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
0a7de745 884 }
3e170ce0 885
d9a64523 886 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
55e303ae 887
6d2010ae
A
888 /*
889 * Deal with any in-progress timeouts.
890 */
891 switch (d->bd_state) {
0a7de745
A
892 case BPF_IDLE:
893 /*
894 * Not waiting for a timeout, and no timeout happened.
895 */
896 break;
6d2010ae 897
0a7de745
A
898 case BPF_WAITING:
899 /*
900 * Waiting for a timeout.
901 * Cancel any timer that has yet to go off,
902 * and mark the state as "closing".
903 * Then drop the lock to allow any timers that
904 * *have* gone off to run to completion, and wait
905 * for them to finish.
906 */
907 if (!bpf_stop_timer(d)) {
6d2010ae 908 /*
0a7de745
A
909 * There was no pending call, so the call must
910 * have been in progress. Wait for the call to
911 * complete; we have to drop the lock while
912 * waiting. to let the in-progrss call complete
6d2010ae 913 */
0a7de745
A
914 d->bd_state = BPF_DRAINING;
915 while (d->bd_state == BPF_DRAINING) {
916 msleep((caddr_t)d, bpf_mlock, PRINET,
917 "bpfdraining", NULL);
6d2010ae 918 }
0a7de745
A
919 }
920 d->bd_state = BPF_IDLE;
921 break;
6d2010ae 922
0a7de745
A
923 case BPF_TIMED_OUT:
924 /*
925 * Timer went off, and the timeout routine finished.
926 */
927 d->bd_state = BPF_IDLE;
928 break;
6d2010ae 929
0a7de745
A
930 case BPF_DRAINING:
931 /*
932 * Another thread is blocked on a close waiting for
933 * a timeout to finish.
934 * This "shouldn't happen", as the first thread to enter
935 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
936 * all subsequent threads should see that and fail with
937 * ENXIO.
938 */
939 panic("Two threads blocked in a BPF close");
940 break;
6d2010ae
A
941 }
942
0a7de745 943 if (d->bd_bif) {
3e170ce0 944 bpf_detachd(d, 1);
0a7de745 945 }
0b4e3aa0 946 selthreadclear(&d->bd_sel);
6d2010ae 947 thread_call_free(d->bd_thread_call);
39236c6e 948
0a7de745 949 while (d->bd_hbuf_read != 0) {
39236c6e 950 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745 951 }
39236c6e 952
1c79356b 953 bpf_freed(d);
91447636 954
2d21ac55 955 /* Mark free in same context as bpfopen comes to check */
0a7de745 956 bpf_dtab[minor(dev)] = NULL; /* Mark closed */
3e170ce0
A
957
958 bpf_release_d(d);
959
91447636 960 lck_mtx_unlock(bpf_mlock);
3e170ce0 961
0a7de745 962 return 0;
1c79356b
A
963}
964
0a7de745 965#define BPF_SLEEP bpf_sleep
1c79356b 966
91447636
A
967static int
968bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
1c79356b 969{
6d2010ae 970 u_int64_t abstime = 0;
1c79356b 971
0a7de745 972 if (timo != 0) {
6d2010ae 973 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
0a7de745 974 }
d9a64523 975
0a7de745 976 return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
d9a64523
A
977}
978
979static void
980bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
981{
982 if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
983 struct pktap_v2_hdr *pktap_v2_hdr;
984
985 pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
986
0a7de745 987 if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
d9a64523 988 pktap_v2_finalize_proc_info(pktap_v2_hdr);
0a7de745 989 }
d9a64523 990 } else {
0a7de745 991 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
d9a64523 992 pktap_finalize_proc_info(pktaphdr);
0a7de745 993 }
d9a64523
A
994
995 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
996 hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
997 hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
998 }
999 }
1c79356b 1000}
1c79356b
A
1001
1002/*
1003 * Rotate the packet buffers in descriptor d. Move the store buffer
1004 * into the hold slot, and the free buffer into the store slot.
1005 * Zero the length of the new store buffer.
1006 */
0a7de745 1007#define ROTATE_BUFFERS(d) \
d9a64523 1008 if (d->bd_hbuf_read != 0) \
0a7de745 1009 panic("rotating bpf buffers during read"); \
1c79356b
A
1010 (d)->bd_hbuf = (d)->bd_sbuf; \
1011 (d)->bd_hlen = (d)->bd_slen; \
3e170ce0 1012 (d)->bd_hcnt = (d)->bd_scnt; \
1c79356b
A
1013 (d)->bd_sbuf = (d)->bd_fbuf; \
1014 (d)->bd_slen = 0; \
3e170ce0 1015 (d)->bd_scnt = 0; \
2d21ac55 1016 (d)->bd_fbuf = NULL;
1c79356b
A
1017/*
1018 * bpfread - read next chunk of packets from buffers
1019 */
2d21ac55 1020int
91447636 1021bpfread(dev_t dev, struct uio *uio, int ioflag)
1c79356b 1022{
2d21ac55 1023 struct bpf_d *d;
d9a64523 1024 caddr_t hbuf;
39236c6e 1025 int timed_out, hbuf_len;
1c79356b 1026 int error;
fe8ab488 1027 int flags;
2d21ac55
A
1028
1029 lck_mtx_lock(bpf_mlock);
1c79356b 1030
55e303ae 1031 d = bpf_dtab[minor(dev)];
d9a64523
A
1032 if (d == NULL || d == BPF_DEV_RESERVED ||
1033 (d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55 1034 lck_mtx_unlock(bpf_mlock);
0a7de745 1035 return ENXIO;
2d21ac55 1036 }
55e303ae 1037
3e170ce0
A
1038 bpf_acquire_d(d);
1039
1c79356b
A
1040 /*
1041 * Restrict application to use a buffer the same size as
1042 * as kernel buffers.
1043 */
b0d623f7 1044 if (uio_resid(uio) != d->bd_bufsize) {
3e170ce0 1045 bpf_release_d(d);
91447636 1046 lck_mtx_unlock(bpf_mlock);
0a7de745 1047 return EINVAL;
1c79356b 1048 }
d9a64523 1049
0a7de745 1050 if (d->bd_state == BPF_WAITING) {
6d2010ae 1051 bpf_stop_timer(d);
0a7de745 1052 }
d9a64523 1053
6d2010ae
A
1054 timed_out = (d->bd_state == BPF_TIMED_OUT);
1055 d->bd_state = BPF_IDLE;
1c79356b 1056
0a7de745 1057 while (d->bd_hbuf_read != 0) {
39236c6e 1058 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745 1059 }
3e170ce0
A
1060
1061 if ((d->bd_flags & BPF_CLOSING) != 0) {
1062 bpf_release_d(d);
39236c6e 1063 lck_mtx_unlock(bpf_mlock);
0a7de745 1064 return ENXIO;
39236c6e 1065 }
1c79356b
A
1066 /*
1067 * If the hold buffer is empty, then do a timed sleep, which
1068 * ends when the timeout expires or when enough packets
1069 * have arrived to fill the store buffer.
1070 */
1071 while (d->bd_hbuf == 0) {
d9a64523
A
1072 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1073 d->bd_slen != 0) {
1c79356b 1074 /*
6d2010ae
A
1075 * We're in immediate mode, or are reading
1076 * in non-blocking mode, or a timer was
1077 * started before the read (e.g., by select()
1078 * or poll()) and has expired and a packet(s)
1079 * either arrived since the previous
1c79356b
A
1080 * read or arrived while we were asleep.
1081 * Rotate the buffers and return what's here.
1082 */
1083 ROTATE_BUFFERS(d);
1084 break;
1085 }
9bccf70c
A
1086
1087 /*
1088 * No data is available, check to see if the bpf device
1089 * is still pointed at a real interface. If not, return
1090 * ENXIO so that the userland process knows to rebind
1091 * it before using it again.
1092 */
1093 if (d->bd_bif == NULL) {
3e170ce0 1094 bpf_release_d(d);
91447636 1095 lck_mtx_unlock(bpf_mlock);
0a7de745 1096 return ENXIO;
9bccf70c 1097 }
b0d623f7 1098 if (ioflag & IO_NDELAY) {
3e170ce0 1099 bpf_release_d(d);
b0d623f7 1100 lck_mtx_unlock(bpf_mlock);
0a7de745 1101 return EWOULDBLOCK;
b0d623f7 1102 }
0a7de745 1103 error = BPF_SLEEP(d, PRINET | PCATCH, "bpf", d->bd_rtout);
2d21ac55
A
1104 /*
1105 * Make sure device is still opened
1106 */
3e170ce0
A
1107 if ((d->bd_flags & BPF_CLOSING) != 0) {
1108 bpf_release_d(d);
2d21ac55 1109 lck_mtx_unlock(bpf_mlock);
0a7de745 1110 return ENXIO;
2d21ac55 1111 }
39236c6e 1112
0a7de745 1113 while (d->bd_hbuf_read != 0) {
d9a64523
A
1114 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1115 NULL);
0a7de745 1116 }
39236c6e 1117
3e170ce0
A
1118 if ((d->bd_flags & BPF_CLOSING) != 0) {
1119 bpf_release_d(d);
39236c6e 1120 lck_mtx_unlock(bpf_mlock);
0a7de745 1121 return ENXIO;
39236c6e 1122 }
fe8ab488 1123
1c79356b 1124 if (error == EINTR || error == ERESTART) {
5ba3f43e 1125 if (d->bd_hbuf != NULL) {
fe8ab488
A
1126 /*
1127 * Because we msleep, the hold buffer might
1128 * be filled when we wake up. Avoid rotating
1129 * in this case.
1130 */
1131 break;
1132 }
5ba3f43e 1133 if (d->bd_slen != 0) {
39236c6e
A
1134 /*
1135 * Sometimes we may be interrupted often and
1136 * the sleep above will not timeout.
1137 * Regardless, we should rotate the buffers
1138 * if there's any new data pending and
1139 * return it.
1140 */
1141 ROTATE_BUFFERS(d);
1142 break;
1143 }
3e170ce0 1144 bpf_release_d(d);
91447636 1145 lck_mtx_unlock(bpf_mlock);
5ba3f43e
A
1146 if (error == ERESTART) {
1147 printf("%s: %llx ERESTART to EINTR\n",
1148 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
1149 error = EINTR;
1150 }
0a7de745 1151 return error;
1c79356b
A
1152 }
1153 if (error == EWOULDBLOCK) {
1154 /*
1155 * On a timeout, return what's in the buffer,
1156 * which may be nothing. If there is something
1157 * in the store buffer, we can rotate the buffers.
1158 */
0a7de745 1159 if (d->bd_hbuf) {
1c79356b
A
1160 /*
1161 * We filled up the buffer in between
1162 * getting the timeout and arriving
1163 * here, so we don't need to rotate.
1164 */
1165 break;
0a7de745 1166 }
1c79356b
A
1167
1168 if (d->bd_slen == 0) {
3e170ce0 1169 bpf_release_d(d);
91447636 1170 lck_mtx_unlock(bpf_mlock);
0a7de745 1171 return 0;
1c79356b
A
1172 }
1173 ROTATE_BUFFERS(d);
1174 break;
1175 }
1176 }
1177 /*
1178 * At this point, we know we have something in the hold slot.
1179 */
1c79356b 1180
fe8ab488 1181 /*
d9a64523 1182 * Set the hold buffer read. So we do not
fe8ab488
A
1183 * rotate the buffers until the hold buffer
1184 * read is complete. Also to avoid issues resulting
1185 * from page faults during disk sleep (<rdar://problem/13436396>).
1186 */
1187 d->bd_hbuf_read = 1;
1188 hbuf = d->bd_hbuf;
1189 hbuf_len = d->bd_hlen;
1190 flags = d->bd_flags;
1191 lck_mtx_unlock(bpf_mlock);
1192
39236c6e 1193#ifdef __APPLE__
316670eb
A
1194 /*
1195 * Before we move data to userland, we fill out the extended
1196 * header fields.
1197 */
fe8ab488 1198 if (flags & BPF_EXTENDED_HDR) {
316670eb
A
1199 char *p;
1200
fe8ab488
A
1201 p = hbuf;
1202 while (p < hbuf + hbuf_len) {
316670eb 1203 struct bpf_hdr_ext *ehp;
39236c6e
A
1204 uint32_t flowid;
1205 struct so_procinfo soprocinfo;
1206 int found = 0;
316670eb
A
1207
1208 ehp = (struct bpf_hdr_ext *)(void *)p;
d9a64523 1209 if ((flowid = ehp->bh_flowid) != 0) {
0a7de745 1210 if (ehp->bh_proto == IPPROTO_TCP) {
39236c6e
A
1211 found = inp_findinpcb_procinfo(&tcbinfo,
1212 flowid, &soprocinfo);
0a7de745 1213 } else if (ehp->bh_proto == IPPROTO_UDP) {
39236c6e
A
1214 found = inp_findinpcb_procinfo(&udbinfo,
1215 flowid, &soprocinfo);
0a7de745 1216 }
fe8ab488 1217 if (found == 1) {
39236c6e 1218 ehp->bh_pid = soprocinfo.spi_pid;
cb323159 1219 strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
316670eb 1220 }
39236c6e 1221 ehp->bh_flowid = 0;
316670eb 1222 }
5ba3f43e 1223
fe8ab488
A
1224 if (flags & BPF_FINALIZE_PKTAP) {
1225 struct pktap_header *pktaphdr;
d9a64523 1226
fe8ab488
A
1227 pktaphdr = (struct pktap_header *)(void *)
1228 (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1229
d9a64523
A
1230 bpf_finalize_pktap((struct bpf_hdr *) ehp,
1231 pktaphdr);
fe8ab488 1232 }
316670eb
A
1233 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1234 }
fe8ab488
A
1235 } else if (flags & BPF_FINALIZE_PKTAP) {
1236 char *p;
1237
1238 p = hbuf;
1239 while (p < hbuf + hbuf_len) {
1240 struct bpf_hdr *hp;
1241 struct pktap_header *pktaphdr;
d9a64523 1242
fe8ab488
A
1243 hp = (struct bpf_hdr *)(void *)p;
1244 pktaphdr = (struct pktap_header *)(void *)
1245 (p + BPF_WORDALIGN(hp->bh_hdrlen));
1246
d9a64523 1247 bpf_finalize_pktap(hp, pktaphdr);
fe8ab488
A
1248
1249 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1250 }
316670eb 1251 }
39236c6e 1252#endif
39236c6e 1253
1c79356b
A
1254 /*
1255 * Move data from hold buffer into user space.
1256 * We know the entire buffer is transferred since
1257 * we checked above that the read buffer is bpf_bufsize bytes.
1258 */
39236c6e 1259 error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
d9a64523 1260
39236c6e
A
1261 lck_mtx_lock(bpf_mlock);
1262 /*
1263 * Make sure device is still opened
1264 */
3e170ce0
A
1265 if ((d->bd_flags & BPF_CLOSING) != 0) {
1266 bpf_release_d(d);
39236c6e 1267 lck_mtx_unlock(bpf_mlock);
0a7de745 1268 return ENXIO;
39236c6e 1269 }
d9a64523 1270
39236c6e 1271 d->bd_hbuf_read = 0;
1c79356b 1272 d->bd_fbuf = d->bd_hbuf;
2d21ac55 1273 d->bd_hbuf = NULL;
1c79356b 1274 d->bd_hlen = 0;
3e170ce0 1275 d->bd_hcnt = 0;
39236c6e 1276 wakeup((caddr_t)d);
3e170ce0
A
1277
1278 bpf_release_d(d);
91447636 1279 lck_mtx_unlock(bpf_mlock);
0a7de745 1280 return error;
1c79356b
A
1281}
1282
1c79356b
A
1283/*
1284 * If there are processes sleeping on this descriptor, wake them up.
1285 */
91447636
A
1286static void
1287bpf_wakeup(struct bpf_d *d)
1c79356b 1288{
6d2010ae
A
1289 if (d->bd_state == BPF_WAITING) {
1290 bpf_stop_timer(d);
1291 d->bd_state = BPF_IDLE;
1292 }
1c79356b 1293 wakeup((caddr_t)d);
0a7de745 1294 if (d->bd_async && d->bd_sig && d->bd_sigio) {
2d21ac55 1295 pgsigio(d->bd_sigio, d->bd_sig);
0a7de745 1296 }
1c79356b 1297
1c79356b 1298 selwakeup(&d->bd_sel);
0a7de745 1299 if ((d->bd_flags & BPF_KNOTE)) {
3e170ce0 1300 KNOTE(&d->bd_sel.si_note, 1);
0a7de745 1301 }
1c79356b
A
1302}
1303
6d2010ae
A
1304static void
1305bpf_timed_out(void *arg, __unused void *dummy)
1306{
1307 struct bpf_d *d = (struct bpf_d *)arg;
1308
1309 lck_mtx_lock(bpf_mlock);
1310 if (d->bd_state == BPF_WAITING) {
1311 /*
d9a64523 1312 * There's a select or kqueue waiting for this; if there's
6d2010ae
A
1313 * now stuff to read, wake it up.
1314 */
1315 d->bd_state = BPF_TIMED_OUT;
0a7de745 1316 if (d->bd_slen != 0) {
6d2010ae 1317 bpf_wakeup(d);
0a7de745 1318 }
6d2010ae
A
1319 } else if (d->bd_state == BPF_DRAINING) {
1320 /*
1321 * A close is waiting for this to finish.
1322 * Mark it as finished, and wake the close up.
1323 */
1324 d->bd_state = BPF_IDLE;
1325 bpf_wakeup(d);
1326 }
1327 lck_mtx_unlock(bpf_mlock);
1328}
6d2010ae 1329
55e303ae 1330/* keep in sync with bpf_movein above: */
0a7de745 1331#define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
55e303ae 1332
2d21ac55 1333int
91447636 1334bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1c79356b 1335{
2d21ac55 1336 struct bpf_d *d;
1c79356b 1337 struct ifnet *ifp;
2d21ac55 1338 struct mbuf *m = NULL;
91447636 1339 int error;
0a7de745 1340 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
b0d623f7 1341 int datlen = 0;
39236c6e
A
1342 int bif_dlt;
1343 int bd_hdrcmplt;
1c79356b 1344
2d21ac55
A
1345 lck_mtx_lock(bpf_mlock);
1346
55e303ae 1347 d = bpf_dtab[minor(dev)];
d9a64523
A
1348 if (d == NULL || d == BPF_DEV_RESERVED ||
1349 (d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55 1350 lck_mtx_unlock(bpf_mlock);
0a7de745 1351 return ENXIO;
2d21ac55 1352 }
3e170ce0
A
1353
1354 bpf_acquire_d(d);
1355
1c79356b 1356 if (d->bd_bif == 0) {
3e170ce0 1357 bpf_release_d(d);
91447636 1358 lck_mtx_unlock(bpf_mlock);
0a7de745 1359 return ENXIO;
1c79356b
A
1360 }
1361
1362 ifp = d->bd_bif->bif_ifp;
1363
6d2010ae 1364 if ((ifp->if_flags & IFF_UP) == 0) {
3e170ce0 1365 bpf_release_d(d);
6d2010ae 1366 lck_mtx_unlock(bpf_mlock);
0a7de745 1367 return ENETDOWN;
6d2010ae 1368 }
b0d623f7 1369 if (uio_resid(uio) == 0) {
3e170ce0 1370 bpf_release_d(d);
91447636 1371 lck_mtx_unlock(bpf_mlock);
0a7de745 1372 return 0;
1c79356b 1373 }
55e303ae 1374 ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
6d2010ae 1375
316670eb
A
1376 /*
1377 * fix for PR-6849527
1378 * geting variables onto stack before dropping lock for bpf_movein()
1379 */
1380 bif_dlt = (int)d->bd_bif->bif_dlt;
1381 bd_hdrcmplt = d->bd_hdrcmplt;
1382
6d2010ae 1383 /* bpf_movein allocating mbufs; drop lock */
316670eb 1384 lck_mtx_unlock(bpf_mlock);
6d2010ae 1385
d9a64523 1386 error = bpf_movein(uio, bif_dlt, &m,
0a7de745
A
1387 bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1388 &datlen);
316670eb 1389
3e170ce0
A
1390 /* take the lock again */
1391 lck_mtx_lock(bpf_mlock);
316670eb 1392 if (error) {
3e170ce0
A
1393 bpf_release_d(d);
1394 lck_mtx_unlock(bpf_mlock);
0a7de745 1395 return error;
1c79356b
A
1396 }
1397
3e170ce0
A
1398 /* verify the device is still open */
1399 if ((d->bd_flags & BPF_CLOSING) != 0) {
1400 bpf_release_d(d);
91447636 1401 lck_mtx_unlock(bpf_mlock);
2d21ac55 1402 m_freem(m);
0a7de745 1403 return ENXIO;
2d21ac55 1404 }
6d2010ae
A
1405
1406 if (d->bd_bif == NULL) {
3e170ce0 1407 bpf_release_d(d);
6d2010ae
A
1408 lck_mtx_unlock(bpf_mlock);
1409 m_free(m);
0a7de745 1410 return ENXIO;
6d2010ae
A
1411 }
1412
1413 if ((unsigned)datlen > ifp->if_mtu) {
3e170ce0 1414 bpf_release_d(d);
2d21ac55
A
1415 lck_mtx_unlock(bpf_mlock);
1416 m_freem(m);
0a7de745 1417 return EMSGSIZE;
1c79356b
A
1418 }
1419
316670eb
A
1420 bpf_set_packet_service_class(m, d->bd_traffic_class);
1421
91447636
A
1422 lck_mtx_unlock(bpf_mlock);
1423
3e170ce0
A
1424 /*
1425 * The driver frees the mbuf.
1426 */
55e303ae 1427 if (d->bd_hdrcmplt) {
0a7de745 1428 if (d->bd_bif->bif_send) {
2d21ac55 1429 error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
0a7de745 1430 } else {
316670eb 1431 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
0a7de745 1432 }
316670eb
A
1433 } else {
1434 error = dlil_output(ifp, PF_INET, m, NULL,
1435 (struct sockaddr *)dst_buf, 0, NULL);
91447636 1436 }
6d2010ae 1437
3e170ce0
A
1438 lck_mtx_lock(bpf_mlock);
1439 bpf_release_d(d);
1440 lck_mtx_unlock(bpf_mlock);
1441
0a7de745 1442 return error;
1c79356b
A
1443}
1444
1445/*
1446 * Reset a descriptor by flushing its packet buffer and clearing the
2d21ac55 1447 * receive and drop counts.
1c79356b
A
1448 */
1449static void
91447636 1450reset_d(struct bpf_d *d)
1c79356b 1451{
0a7de745 1452 if (d->bd_hbuf_read != 0) {
39236c6e 1453 panic("resetting buffers during read");
0a7de745 1454 }
39236c6e 1455
1c79356b
A
1456 if (d->bd_hbuf) {
1457 /* Free the hold buffer. */
1458 d->bd_fbuf = d->bd_hbuf;
2d21ac55 1459 d->bd_hbuf = NULL;
1c79356b
A
1460 }
1461 d->bd_slen = 0;
1462 d->bd_hlen = 0;
3e170ce0
A
1463 d->bd_scnt = 0;
1464 d->bd_hcnt = 0;
1c79356b
A
1465 d->bd_rcount = 0;
1466 d->bd_dcount = 0;
1467}
1468
d9a64523
A
1469static struct bpf_d *
1470bpf_get_device_from_uuid(uuid_t uuid)
1471{
1472 unsigned int i;
1473
1474 for (i = 0; i < nbpfilter; i++) {
1475 struct bpf_d *d = bpf_dtab[i];
1476
1477 if (d == NULL || d == BPF_DEV_RESERVED ||
0a7de745 1478 (d->bd_flags & BPF_CLOSING) != 0) {
d9a64523 1479 continue;
0a7de745
A
1480 }
1481 if (uuid_compare(uuid, d->bd_uuid) == 0) {
1482 return d;
1483 }
d9a64523
A
1484 }
1485
0a7de745 1486 return NULL;
d9a64523
A
1487}
1488
1489/*
1490 * The BIOCSETUP command "atomically" attach to the interface and
1491 * copy the buffer from another interface. This minimizes the risk
1492 * of missing packet because this is done while holding
1493 * the BPF global lock
1494 */
1495static int
1496bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1497{
1498 struct bpf_d *d_from;
1499 int error = 0;
1500
1501 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1502
1503 /*
1504 * Sanity checks
1505 */
1506 d_from = bpf_get_device_from_uuid(uuid_from);
1507 if (d_from == NULL) {
1508 error = ENOENT;
1509 os_log_info(OS_LOG_DEFAULT,
1510 "%s: uuids not found error %d",
1511 __func__, error);
0a7de745 1512 return error;
d9a64523
A
1513 }
1514 if (d_from->bd_opened_by != d_to->bd_opened_by) {
1515 error = EACCES;
1516 os_log_info(OS_LOG_DEFAULT,
1517 "%s: processes not matching error %d",
1518 __func__, error);
0a7de745 1519 return error;
d9a64523
A
1520 }
1521
1522 /*
1523 * Prevent any read while copying
1524 */
0a7de745 1525 while (d_to->bd_hbuf_read != 0) {
d9a64523 1526 msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
0a7de745 1527 }
d9a64523
A
1528 d_to->bd_hbuf_read = 1;
1529
0a7de745 1530 while (d_from->bd_hbuf_read != 0) {
d9a64523 1531 msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
0a7de745 1532 }
d9a64523
A
1533 d_from->bd_hbuf_read = 1;
1534
1535 /*
1536 * Verify the devices have not been closed
1537 */
1538 if (d_to->bd_flags & BPF_CLOSING) {
1539 error = ENXIO;
1540 os_log_info(OS_LOG_DEFAULT,
1541 "%s: d_to is closing error %d",
1542 __func__, error);
1543 goto done;
1544 }
1545 if (d_from->bd_flags & BPF_CLOSING) {
1546 error = ENXIO;
1547 os_log_info(OS_LOG_DEFAULT,
1548 "%s: d_from is closing error %d",
1549 __func__, error);
1550 goto done;
1551 }
1552
1553 /*
1554 * For now require the same buffer size
1555 */
1556 if (d_from->bd_bufsize != d_to->bd_bufsize) {
1557 error = EINVAL;
1558 os_log_info(OS_LOG_DEFAULT,
1559 "%s: bufsizes not matching error %d",
1560 __func__, error);
1561 goto done;
1562 }
1563
1564 /*
1565 * Attach to the interface
1566 */
1567 error = bpf_setif(d_to, ifp, false, true);
1568 if (error != 0) {
1569 os_log_info(OS_LOG_DEFAULT,
1570 "%s: bpf_setif() failed error %d",
1571 __func__, error);
1572 goto done;
1573 }
1574
1575 /*
1576 * Make sure the buffers are setup as expected by bpf_setif()
1577 */
1578 ASSERT(d_to->bd_hbuf == NULL);
1579 ASSERT(d_to->bd_sbuf != NULL);
1580 ASSERT(d_to->bd_fbuf != NULL);
1581
1582 /*
1583 * Copy the buffers and update the pointers and counts
1584 */
1585 memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1586 d_to->bd_slen = d_from->bd_slen;
1587 d_to->bd_scnt = d_from->bd_scnt;
1588
1589 if (d_from->bd_hbuf != NULL) {
1590 d_to->bd_hbuf = d_to->bd_fbuf;
1591 d_to->bd_fbuf = NULL;
1592 memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1593 }
1594 d_to->bd_hlen = d_from->bd_hlen;
1595 d_to->bd_hcnt = d_from->bd_hcnt;
1596
1597 if (bpf_debug > 0) {
1598 os_log_info(OS_LOG_DEFAULT,
1599 "%s: done slen %u scnt %u hlen %u hcnt %u",
1600 __func__, d_to->bd_slen, d_to->bd_scnt,
1601 d_to->bd_hlen, d_to->bd_hcnt);
1602 }
1603done:
1604 d_from->bd_hbuf_read = 0;
1605 wakeup((caddr_t)d_from);
1606
1607 d_to->bd_hbuf_read = 0;
1608 wakeup((caddr_t)d_to);
1609
0a7de745 1610 return error;
d9a64523
A
1611}
1612
1c79356b
A
1613/*
1614 * FIONREAD Check for read packet available.
1615 * SIOCGIFADDR Get interface address - convenient hook to driver.
1616 * BIOCGBLEN Get buffer len [for read()].
1617 * BIOCSETF Set ethernet read filter.
1618 * BIOCFLUSH Flush read packet buffer.
1619 * BIOCPROMISC Put interface into promiscuous mode.
1620 * BIOCGDLT Get link layer type.
1621 * BIOCGETIF Get interface name.
1622 * BIOCSETIF Set interface.
1623 * BIOCSRTIMEOUT Set read timeout.
1624 * BIOCGRTIMEOUT Get read timeout.
1625 * BIOCGSTATS Get packet stats.
1626 * BIOCIMMEDIATE Set immediate mode.
1627 * BIOCVERSION Get filter language version.
9bccf70c
A
1628 * BIOCGHDRCMPLT Get "header already complete" flag
1629 * BIOCSHDRCMPLT Set "header already complete" flag
1630 * BIOCGSEESENT Get "see packets sent" flag
1631 * BIOCSSEESENT Set "see packets sent" flag
316670eb
A
1632 * BIOCSETTC Set traffic class.
1633 * BIOCGETTC Get traffic class.
1634 * BIOCSEXTHDR Set "extended header" flag
3e170ce0
A
1635 * BIOCSHEADDROP Drop head of the buffer if user is not reading
1636 * BIOCGHEADDROP Get "head-drop" flag
1c79356b
A
1637 */
1638/* ARGSUSED */
9bccf70c 1639int
2d21ac55 1640bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
b0d623f7 1641 struct proc *p)
1c79356b 1642{
2d21ac55 1643 struct bpf_d *d;
fe8ab488
A
1644 int error = 0;
1645 u_int int_arg;
316670eb 1646 struct ifreq ifr;
2d21ac55
A
1647
1648 lck_mtx_lock(bpf_mlock);
1c79356b 1649
55e303ae 1650 d = bpf_dtab[minor(dev)];
d9a64523
A
1651 if (d == NULL || d == BPF_DEV_RESERVED ||
1652 (d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55 1653 lck_mtx_unlock(bpf_mlock);
0a7de745 1654 return ENXIO;
2d21ac55 1655 }
1c79356b 1656
3e170ce0
A
1657 bpf_acquire_d(d);
1658
0a7de745 1659 if (d->bd_state == BPF_WAITING) {
6d2010ae 1660 bpf_stop_timer(d);
0a7de745 1661 }
6d2010ae
A
1662 d->bd_state = BPF_IDLE;
1663
1c79356b 1664 switch (cmd) {
1c79356b
A
1665 default:
1666 error = EINVAL;
1667 break;
1668
1669 /*
1670 * Check for read packet available.
1671 */
0a7de745
A
1672 case FIONREAD: /* int */
1673 {
1674 int n;
1c79356b 1675
0a7de745
A
1676 n = d->bd_slen;
1677 if (d->bd_hbuf && d->bd_hbuf_read == 0) {
1678 n += d->bd_hlen;
1c79356b
A
1679 }
1680
0a7de745
A
1681 bcopy(&n, addr, sizeof(n));
1682 break;
1683 }
1c79356b 1684
0a7de745
A
1685 case SIOCGIFADDR: /* struct ifreq */
1686 {
1687 struct ifnet *ifp;
1688
1689 if (d->bd_bif == 0) {
1690 error = EINVAL;
1691 } else {
1692 ifp = d->bd_bif->bif_ifp;
1693 error = ifnet_ioctl(ifp, 0, cmd, addr);
1c79356b 1694 }
0a7de745
A
1695 break;
1696 }
1c79356b
A
1697
1698 /*
1699 * Get buffer len [for read()].
1700 */
0a7de745
A
1701 case BIOCGBLEN: /* u_int */
1702 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1c79356b
A
1703 break;
1704
1705 /*
1706 * Set buffer length.
1707 */
0a7de745 1708 case BIOCSBLEN: { /* u_int */
d9a64523
A
1709 u_int size;
1710 unsigned int maxbufsize = bpf_maxbufsize;
316670eb 1711
d9a64523
A
1712 /*
1713 * Allow larger buffer in head drop mode to with the
1714 * assumption the reading process may be low priority but
1715 * is interested in the most recent traffic
1716 */
1717 if (d->bd_headdrop != 0) {
1718 maxbufsize = 2 * bpf_maxbufsize;
1719 }
1c79356b 1720
d9a64523 1721 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
813fb2f6 1722 /*
d9a64523 1723 * Interface already attached, unable to change buffers
813fb2f6 1724 */
d9a64523
A
1725 error = EINVAL;
1726 break;
1727 }
0a7de745 1728 bcopy(addr, &size, sizeof(size));
d9a64523
A
1729
1730 if (size > maxbufsize) {
1731 d->bd_bufsize = maxbufsize;
1732
1733 os_log_info(OS_LOG_DEFAULT,
1734 "%s bufsize capped to %u from %u",
1735 __func__, d->bd_bufsize, size);
1736 } else if (size < BPF_MINBUFSIZE) {
1737 d->bd_bufsize = BPF_MINBUFSIZE;
1738
1739 os_log_info(OS_LOG_DEFAULT,
1740 "%s bufsize bumped to %u from %u",
1741 __func__, d->bd_bufsize, size);
1742 } else {
1c79356b
A
1743 d->bd_bufsize = size;
1744 }
1c79356b 1745
d9a64523 1746 /* It's a read/write ioctl */
0a7de745 1747 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
d9a64523
A
1748 break;
1749 }
1c79356b
A
1750 /*
1751 * Set link layer read filter.
1752 */
39236c6e 1753 case BIOCSETF32:
0a7de745 1754 case BIOCSETFNR32: { /* struct bpf_program32 */
316670eb
A
1755 struct bpf_program32 prg32;
1756
0a7de745 1757 bcopy(addr, &prg32, sizeof(prg32));
316670eb 1758 error = bpf_setf(d, prg32.bf_len,
3e170ce0 1759 CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1c79356b 1760 break;
2d21ac55 1761 }
b0d623f7 1762
39236c6e 1763 case BIOCSETF64:
0a7de745 1764 case BIOCSETFNR64: { /* struct bpf_program64 */
316670eb
A
1765 struct bpf_program64 prg64;
1766
0a7de745 1767 bcopy(addr, &prg64, sizeof(prg64));
3e170ce0 1768 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
b0d623f7
A
1769 break;
1770 }
1771
1c79356b
A
1772 /*
1773 * Flush read packet buffer.
1774 */
1775 case BIOCFLUSH:
d9a64523
A
1776 while (d->bd_hbuf_read != 0) {
1777 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1778 NULL);
39236c6e 1779 }
3e170ce0
A
1780 if ((d->bd_flags & BPF_CLOSING) != 0) {
1781 error = ENXIO;
1782 break;
1783 }
1c79356b 1784 reset_d(d);
1c79356b
A
1785 break;
1786
1787 /*
1788 * Put interface into promiscuous mode.
1789 */
1790 case BIOCPROMISC:
1791 if (d->bd_bif == 0) {
1792 /*
1793 * No interface attached yet.
1794 */
1795 error = EINVAL;
1796 break;
1797 }
1c79356b 1798 if (d->bd_promisc == 0) {
2d21ac55 1799 lck_mtx_unlock(bpf_mlock);
91447636 1800 error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
2d21ac55 1801 lck_mtx_lock(bpf_mlock);
0a7de745 1802 if (error == 0) {
1c79356b 1803 d->bd_promisc = 1;
0a7de745 1804 }
1c79356b 1805 }
1c79356b
A
1806 break;
1807
1808 /*
1809 * Get device parameters.
1810 */
0a7de745
A
1811 case BIOCGDLT: /* u_int */
1812 if (d->bd_bif == 0) {
1c79356b 1813 error = EINVAL;
0a7de745
A
1814 } else {
1815 bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
1816 }
1c79356b
A
1817 break;
1818
2d21ac55
A
1819 /*
1820 * Get a list of supported data link types.
1821 */
0a7de745 1822 case BIOCGDLTLIST: /* struct bpf_dltlist */
b0d623f7
A
1823 if (d->bd_bif == NULL) {
1824 error = EINVAL;
1825 } else {
316670eb 1826 error = bpf_getdltlist(d, addr, p);
b0d623f7
A
1827 }
1828 break;
2d21ac55
A
1829
1830 /*
1831 * Set data link type.
1832 */
0a7de745 1833 case BIOCSDLT: /* u_int */
316670eb
A
1834 if (d->bd_bif == NULL) {
1835 error = EINVAL;
1836 } else {
1837 u_int dlt;
1838
0a7de745 1839 bcopy(addr, &dlt, sizeof(dlt));
d9a64523
A
1840
1841 if (dlt == DLT_PKTAP &&
1842 !(d->bd_flags & BPF_WANT_PKTAP)) {
5ba3f43e
A
1843 dlt = DLT_RAW;
1844 }
3e170ce0 1845 error = bpf_setdlt(d, dlt);
316670eb
A
1846 }
1847 break;
2d21ac55 1848
1c79356b 1849 /*
9bccf70c 1850 * Get interface name.
1c79356b 1851 */
0a7de745
A
1852 case BIOCGETIF: /* struct ifreq */
1853 if (d->bd_bif == 0) {
1c79356b 1854 error = EINVAL;
0a7de745 1855 } else {
9bccf70c 1856 struct ifnet *const ifp = d->bd_bif->bif_ifp;
9bccf70c 1857
316670eb 1858 snprintf(((struct ifreq *)(void *)addr)->ifr_name,
0a7de745 1859 sizeof(ifr.ifr_name), "%s", if_name(ifp));
9bccf70c 1860 }
1c79356b
A
1861 break;
1862
1863 /*
1864 * Set interface.
1865 */
0a7de745
A
1866 case BIOCSETIF: { /* struct ifreq */
1867 ifnet_t ifp;
316670eb 1868
0a7de745 1869 bcopy(addr, &ifr, sizeof(ifr));
316670eb
A
1870 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1871 ifp = ifunit(ifr.ifr_name);
0a7de745 1872 if (ifp == NULL) {
2d21ac55 1873 error = ENXIO;
0a7de745 1874 } else {
d9a64523 1875 error = bpf_setif(d, ifp, true, false);
0a7de745 1876 }
1c79356b 1877 break;
2d21ac55 1878 }
1c79356b
A
1879
1880 /*
1881 * Set read timeout.
1882 */
0a7de745 1883 case BIOCSRTIMEOUT32: { /* struct user32_timeval */
316670eb
A
1884 struct user32_timeval _tv;
1885 struct timeval tv;
b0d623f7 1886
0a7de745 1887 bcopy(addr, &_tv, sizeof(_tv));
316670eb
A
1888 tv.tv_sec = _tv.tv_sec;
1889 tv.tv_usec = _tv.tv_usec;
1890
1891 /*
1892 * Subtract 1 tick from tvtohz() since this isn't
1893 * a one-shot timer.
1894 */
0a7de745 1895 if ((error = itimerfix(&tv)) == 0) {
316670eb 1896 d->bd_rtout = tvtohz(&tv) - 1;
0a7de745 1897 }
316670eb
A
1898 break;
1899 }
1900
0a7de745 1901 case BIOCSRTIMEOUT64: { /* struct user64_timeval */
316670eb
A
1902 struct user64_timeval _tv;
1903 struct timeval tv;
1904
0a7de745 1905 bcopy(addr, &_tv, sizeof(_tv));
316670eb
A
1906 tv.tv_sec = _tv.tv_sec;
1907 tv.tv_usec = _tv.tv_usec;
1908
1909 /*
1910 * Subtract 1 tick from tvtohz() since this isn't
1911 * a one-shot timer.
1912 */
0a7de745 1913 if ((error = itimerfix(&tv)) == 0) {
316670eb 1914 d->bd_rtout = tvtohz(&tv) - 1;
0a7de745 1915 }
316670eb
A
1916 break;
1917 }
1c79356b 1918
39236c6e 1919 /*
1c79356b
A
1920 * Get read timeout.
1921 */
0a7de745 1922 case BIOCGRTIMEOUT32: { /* struct user32_timeval */
316670eb 1923 struct user32_timeval tv;
1c79356b 1924
0a7de745 1925 bzero(&tv, sizeof(tv));
316670eb
A
1926 tv.tv_sec = d->bd_rtout / hz;
1927 tv.tv_usec = (d->bd_rtout % hz) * tick;
0a7de745 1928 bcopy(&tv, addr, sizeof(tv));
316670eb
A
1929 break;
1930 }
6d2010ae 1931
0a7de745 1932 case BIOCGRTIMEOUT64: { /* struct user64_timeval */
316670eb 1933 struct user64_timeval tv;
6d2010ae 1934
0a7de745 1935 bzero(&tv, sizeof(tv));
316670eb
A
1936 tv.tv_sec = d->bd_rtout / hz;
1937 tv.tv_usec = (d->bd_rtout % hz) * tick;
0a7de745 1938 bcopy(&tv, addr, sizeof(tv));
316670eb
A
1939 break;
1940 }
1c79356b
A
1941
1942 /*
1943 * Get packet stats.
1944 */
0a7de745 1945 case BIOCGSTATS: { /* struct bpf_stat */
316670eb 1946 struct bpf_stat bs;
1c79356b 1947
0a7de745 1948 bzero(&bs, sizeof(bs));
316670eb
A
1949 bs.bs_recv = d->bd_rcount;
1950 bs.bs_drop = d->bd_dcount;
0a7de745 1951 bcopy(&bs, addr, sizeof(bs));
316670eb
A
1952 break;
1953 }
1c79356b
A
1954
1955 /*
1956 * Set immediate mode.
1957 */
0a7de745 1958 case BIOCIMMEDIATE: /* u_int */
3e170ce0 1959 d->bd_immediate = *(u_int *)(void *)addr;
1c79356b
A
1960 break;
1961
0a7de745 1962 case BIOCVERSION: { /* struct bpf_version */
316670eb 1963 struct bpf_version bv;
1c79356b 1964
0a7de745 1965 bzero(&bv, sizeof(bv));
316670eb
A
1966 bv.bv_major = BPF_MAJOR_VERSION;
1967 bv.bv_minor = BPF_MINOR_VERSION;
0a7de745 1968 bcopy(&bv, addr, sizeof(bv));
316670eb
A
1969 break;
1970 }
1c79356b 1971
9bccf70c
A
1972 /*
1973 * Get "header already complete" flag
1974 */
0a7de745
A
1975 case BIOCGHDRCMPLT: /* u_int */
1976 bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
9bccf70c
A
1977 break;
1978
1979 /*
1980 * Set "header already complete" flag
1981 */
0a7de745
A
1982 case BIOCSHDRCMPLT: /* u_int */
1983 bcopy(addr, &int_arg, sizeof(int_arg));
316670eb 1984 d->bd_hdrcmplt = int_arg ? 1 : 0;
9bccf70c
A
1985 break;
1986
1987 /*
1988 * Get "see sent packets" flag
1989 */
0a7de745
A
1990 case BIOCGSEESENT: /* u_int */
1991 bcopy(&d->bd_seesent, addr, sizeof(u_int));
9bccf70c
A
1992 break;
1993
1994 /*
1995 * Set "see sent packets" flag
1996 */
0a7de745
A
1997 case BIOCSSEESENT: /* u_int */
1998 bcopy(addr, &d->bd_seesent, sizeof(u_int));
316670eb
A
1999 break;
2000
2001 /*
2002 * Set traffic service class
2003 */
0a7de745 2004 case BIOCSETTC: { /* int */
316670eb
A
2005 int tc;
2006
0a7de745 2007 bcopy(addr, &tc, sizeof(int));
316670eb 2008 error = bpf_set_traffic_class(d, tc);
9bccf70c 2009 break;
316670eb 2010 }
9bccf70c 2011
316670eb
A
2012 /*
2013 * Get traffic service class
2014 */
0a7de745
A
2015 case BIOCGETTC: /* int */
2016 bcopy(&d->bd_traffic_class, addr, sizeof(int));
1c79356b
A
2017 break;
2018
0a7de745 2019 case FIONBIO: /* Non-blocking I/O; int */
316670eb
A
2020 break;
2021
0a7de745
A
2022 case FIOASYNC: /* Send signal on receive packets; int */
2023 bcopy(addr, &d->bd_async, sizeof(int));
1c79356b 2024 break;
9bccf70c 2025#ifndef __APPLE__
1c79356b
A
2026 case FIOSETOWN:
2027 error = fsetown(*(int *)addr, &d->bd_sigio);
2028 break;
2029
2030 case FIOGETOWN:
2031 *(int *)addr = fgetown(d->bd_sigio);
2032 break;
2033
2034 /* This is deprecated, FIOSETOWN should be used instead. */
2035 case TIOCSPGRP:
2036 error = fsetown(-(*(int *)addr), &d->bd_sigio);
2037 break;
2038
2039 /* This is deprecated, FIOGETOWN should be used instead. */
2040 case TIOCGPGRP:
2041 *(int *)addr = -fgetown(d->bd_sigio);
2042 break;
2043#endif
0a7de745 2044 case BIOCSRSIG: { /* Set receive signal; u_int */
316670eb 2045 u_int sig;
1c79356b 2046
0a7de745 2047 bcopy(addr, &sig, sizeof(u_int));
1c79356b 2048
0a7de745 2049 if (sig >= NSIG) {
316670eb 2050 error = EINVAL;
0a7de745 2051 } else {
316670eb 2052 d->bd_sig = sig;
0a7de745 2053 }
1c79356b
A
2054 break;
2055 }
0a7de745
A
2056 case BIOCGRSIG: /* u_int */
2057 bcopy(&d->bd_sig, addr, sizeof(u_int));
316670eb 2058 break;
39236c6e 2059#ifdef __APPLE__
0a7de745
A
2060 case BIOCSEXTHDR: /* u_int */
2061 bcopy(addr, &int_arg, sizeof(int_arg));
2062 if (int_arg) {
fe8ab488 2063 d->bd_flags |= BPF_EXTENDED_HDR;
0a7de745 2064 } else {
fe8ab488 2065 d->bd_flags &= ~BPF_EXTENDED_HDR;
0a7de745 2066 }
316670eb 2067 break;
39236c6e 2068
0a7de745
A
2069 case BIOCGIFATTACHCOUNT: { /* struct ifreq */
2070 ifnet_t ifp;
39236c6e
A
2071 struct bpf_if *bp;
2072
0a7de745 2073 bcopy(addr, &ifr, sizeof(ifr));
39236c6e
A
2074 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2075 ifp = ifunit(ifr.ifr_name);
2076 if (ifp == NULL) {
2077 error = ENXIO;
2078 break;
2079 }
2080 ifr.ifr_intval = 0;
2081 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2082 struct bpf_d *bpf_d;
d9a64523 2083
0a7de745 2084 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
39236c6e 2085 continue;
0a7de745 2086 }
d9a64523
A
2087 for (bpf_d = bp->bif_dlist; bpf_d;
2088 bpf_d = bpf_d->bd_next) {
39236c6e
A
2089 ifr.ifr_intval += 1;
2090 }
2091 }
0a7de745 2092 bcopy(&ifr, addr, sizeof(ifr));
39236c6e
A
2093 break;
2094 }
0a7de745 2095 case BIOCGWANTPKTAP: /* u_int */
fe8ab488 2096 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
0a7de745 2097 bcopy(&int_arg, addr, sizeof(int_arg));
fe8ab488
A
2098 break;
2099
0a7de745
A
2100 case BIOCSWANTPKTAP: /* u_int */
2101 bcopy(addr, &int_arg, sizeof(int_arg));
2102 if (int_arg) {
d9a64523 2103 d->bd_flags |= BPF_WANT_PKTAP;
0a7de745 2104 } else {
d9a64523 2105 d->bd_flags &= ~BPF_WANT_PKTAP;
0a7de745 2106 }
fe8ab488 2107 break;
39236c6e 2108#endif
3e170ce0
A
2109
2110 case BIOCSHEADDROP:
0a7de745 2111 bcopy(addr, &int_arg, sizeof(int_arg));
3e170ce0
A
2112 d->bd_headdrop = int_arg ? 1 : 0;
2113 break;
2114
2115 case BIOCGHEADDROP:
0a7de745 2116 bcopy(&d->bd_headdrop, addr, sizeof(int));
3e170ce0 2117 break;
d9a64523
A
2118
2119 case BIOCSTRUNCATE:
2120 bcopy(addr, &int_arg, sizeof(int_arg));
0a7de745
A
2121 if (int_arg) {
2122 d->bd_flags |= BPF_TRUNCATE;
2123 } else {
d9a64523 2124 d->bd_flags &= ~BPF_TRUNCATE;
0a7de745 2125 }
d9a64523
A
2126 break;
2127
2128 case BIOCGETUUID:
0a7de745 2129 bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
d9a64523
A
2130 break;
2131
2132 case BIOCSETUP: {
2133 struct bpf_setup_args bsa;
0a7de745 2134 ifnet_t ifp;
d9a64523 2135
0a7de745 2136 bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
d9a64523
A
2137 bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2138 ifp = ifunit(bsa.bsa_ifname);
2139 if (ifp == NULL) {
2140 error = ENXIO;
2141 os_log_info(OS_LOG_DEFAULT,
2142 "%s: ifnet not found for %s error %d",
2143 __func__, bsa.bsa_ifname, error);
2144 break;
0a7de745 2145 }
d9a64523
A
2146
2147 error = bpf_setup(d, bsa.bsa_uuid, ifp);
2148 break;
2149 }
2150 case BIOCSPKTHDRV2:
2151 bcopy(addr, &int_arg, sizeof(int_arg));
0a7de745 2152 if (int_arg != 0) {
d9a64523 2153 d->bd_flags |= BPF_PKTHDRV2;
0a7de745 2154 } else {
d9a64523 2155 d->bd_flags &= ~BPF_PKTHDRV2;
0a7de745 2156 }
d9a64523
A
2157 break;
2158
2159 case BIOCGPKTHDRV2:
2160 int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
0a7de745 2161 bcopy(&int_arg, addr, sizeof(int));
d9a64523 2162 break;
316670eb
A
2163 }
2164
3e170ce0 2165 bpf_release_d(d);
91447636 2166 lck_mtx_unlock(bpf_mlock);
b0d623f7 2167
0a7de745 2168 return error;
1c79356b
A
2169}
2170
2171/*
2172 * Set d's packet filter program to fp. If this file already has a filter,
2173 * free it and replace it. Returns EINVAL for bogus requests.
2174 */
2175static int
3e170ce0
A
2176bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2177 u_long cmd)
1c79356b
A
2178{
2179 struct bpf_insn *fcode, *old;
2180 u_int flen, size;
1c79356b 2181
0a7de745 2182 while (d->bd_hbuf_read != 0) {
39236c6e 2183 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745 2184 }
39236c6e 2185
0a7de745
A
2186 if ((d->bd_flags & BPF_CLOSING) != 0) {
2187 return ENXIO;
2188 }
d9a64523 2189
1c79356b 2190 old = d->bd_filter;
2d21ac55 2191 if (bf_insns == USER_ADDR_NULL) {
0a7de745
A
2192 if (bf_len != 0) {
2193 return EINVAL;
2194 }
2d21ac55 2195 d->bd_filter = NULL;
1c79356b 2196 reset_d(d);
0a7de745
A
2197 if (old != 0) {
2198 FREE(old, M_DEVBUF);
2199 }
2200 return 0;
1c79356b 2201 }
2d21ac55 2202 flen = bf_len;
0a7de745
A
2203 if (flen > BPF_MAXINSNS) {
2204 return EINVAL;
2205 }
1c79356b 2206
91447636 2207 size = flen * sizeof(struct bpf_insn);
1c79356b 2208 fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
9bccf70c 2209#ifdef __APPLE__
0a7de745
A
2210 if (fcode == NULL) {
2211 return ENOBUFS;
2212 }
9bccf70c 2213#endif
2d21ac55 2214 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
1c79356b 2215 bpf_validate(fcode, (int)flen)) {
1c79356b 2216 d->bd_filter = fcode;
d9a64523 2217
0a7de745 2218 if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
39236c6e 2219 reset_d(d);
0a7de745 2220 }
d9a64523 2221
0a7de745
A
2222 if (old != 0) {
2223 FREE(old, M_DEVBUF);
2224 }
1c79356b 2225
0a7de745 2226 return 0;
1c79356b 2227 }
0a7de745
A
2228 FREE(fcode, M_DEVBUF);
2229 return EINVAL;
1c79356b
A
2230}
2231
2232/*
2233 * Detach a file from its current interface (if attached at all) and attach
2234 * to the interface indicated by the name stored in ifr.
2235 * Return an errno or 0.
2236 */
2237static int
d9a64523 2238bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read)
1c79356b
A
2239{
2240 struct bpf_if *bp;
2d21ac55 2241 int error;
39236c6e 2242
0a7de745 2243 while (d->bd_hbuf_read != 0 && !has_hbuf_read) {
39236c6e 2244 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745 2245 }
39236c6e 2246
0a7de745
A
2247 if ((d->bd_flags & BPF_CLOSING) != 0) {
2248 return ENXIO;
2249 }
39236c6e 2250
1c79356b
A
2251 /*
2252 * Look through attached interfaces for the named one.
2253 */
2254 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2255 struct ifnet *ifp = bp->bif_ifp;
2256
0a7de745 2257 if (ifp == 0 || ifp != theywant) {
1c79356b 2258 continue;
0a7de745 2259 }
fe8ab488 2260 /*
5ba3f43e 2261 * Do not use DLT_PKTAP, unless requested explicitly
fe8ab488 2262 */
0a7de745 2263 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
fe8ab488 2264 continue;
0a7de745 2265 }
5c9f4661
A
2266 /*
2267 * Skip the coprocessor interface
2268 */
0a7de745 2269 if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
5c9f4661 2270 continue;
0a7de745 2271 }
1c79356b
A
2272 /*
2273 * We found the requested interface.
813fb2f6
A
2274 * Allocate the packet buffers.
2275 */
2276 error = bpf_allocbufs(d);
0a7de745
A
2277 if (error != 0) {
2278 return error;
2279 }
813fb2f6
A
2280 /*
2281 * Detach if attached to something else.
1c79356b 2282 */
1c79356b 2283 if (bp != d->bd_bif) {
813fb2f6 2284 if (d->bd_bif != NULL) {
0a7de745
A
2285 if (bpf_detachd(d, 0) != 0) {
2286 return ENXIO;
2287 }
2288 }
2289 if (bpf_attachd(d, bp) != 0) {
2290 return ENXIO;
2d21ac55 2291 }
1c79356b 2292 }
d9a64523 2293 if (do_reset) {
0a7de745 2294 reset_d(d);
d9a64523 2295 }
0a7de745 2296 return 0;
1c79356b
A
2297 }
2298 /* Not found. */
0a7de745 2299 return ENXIO;
1c79356b
A
2300}
2301
2d21ac55
A
2302/*
2303 * Get a list of available data link type of the interface.
2304 */
2305static int
316670eb 2306bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2d21ac55 2307{
0a7de745
A
2308 u_int n;
2309 int error;
2310 struct ifnet *ifp;
2311 struct bpf_if *bp;
2312 user_addr_t dlist;
316670eb 2313 struct bpf_dltlist bfl;
b0d623f7 2314
0a7de745 2315 bcopy(addr, &bfl, sizeof(bfl));
b0d623f7 2316 if (proc_is64bit(p)) {
316670eb 2317 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
b0d623f7 2318 } else {
316670eb 2319 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2d21ac55 2320 }
b0d623f7 2321
2d21ac55
A
2322 ifp = d->bd_bif->bif_ifp;
2323 n = 0;
2324 error = 0;
fe8ab488 2325
2d21ac55 2326 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
0a7de745 2327 if (bp->bif_ifp != ifp) {
2d21ac55 2328 continue;
0a7de745 2329 }
d9a64523 2330 /*
5ba3f43e 2331 * Do not use DLT_PKTAP, unless requested explicitly
fe8ab488 2332 */
0a7de745 2333 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
fe8ab488 2334 continue;
0a7de745 2335 }
b0d623f7 2336 if (dlist != USER_ADDR_NULL) {
316670eb 2337 if (n >= bfl.bfl_len) {
0a7de745 2338 return ENOMEM;
2d21ac55 2339 }
b0d623f7 2340 error = copyout(&bp->bif_dlt, dlist,
0a7de745
A
2341 sizeof(bp->bif_dlt));
2342 if (error != 0) {
316670eb 2343 break;
0a7de745
A
2344 }
2345 dlist += sizeof(bp->bif_dlt);
2d21ac55
A
2346 }
2347 n++;
2348 }
316670eb 2349 bfl.bfl_len = n;
0a7de745 2350 bcopy(&bfl, addr, sizeof(bfl));
316670eb 2351
0a7de745 2352 return error;
2d21ac55
A
2353}
2354
2355/*
2356 * Set the data link type of a BPF instance.
2357 */
2358static int
3e170ce0 2359bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2d21ac55
A
2360{
2361 int error, opromisc;
2362 struct ifnet *ifp;
2363 struct bpf_if *bp;
d9a64523 2364
0a7de745
A
2365 if (d->bd_bif->bif_dlt == dlt) {
2366 return 0;
2367 }
d9a64523 2368
0a7de745 2369 while (d->bd_hbuf_read != 0) {
39236c6e 2370 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745 2371 }
39236c6e 2372
0a7de745
A
2373 if ((d->bd_flags & BPF_CLOSING) != 0) {
2374 return ENXIO;
2375 }
fe8ab488 2376
2d21ac55
A
2377 ifp = d->bd_bif->bif_ifp;
2378 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
5ba3f43e
A
2379 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2380 /*
2381 * Do not use DLT_PKTAP, unless requested explicitly
2382 */
d9a64523
A
2383 if (bp->bif_dlt == DLT_PKTAP &&
2384 !(d->bd_flags & BPF_WANT_PKTAP)) {
5ba3f43e
A
2385 continue;
2386 }
2d21ac55 2387 break;
5ba3f43e 2388 }
2d21ac55
A
2389 }
2390 if (bp != NULL) {
2391 opromisc = d->bd_promisc;
0a7de745
A
2392 if (bpf_detachd(d, 0) != 0) {
2393 return ENXIO;
2394 }
2d21ac55
A
2395 error = bpf_attachd(d, bp);
2396 if (error) {
2397 printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
d9a64523
A
2398 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp),
2399 error);
0a7de745 2400 return error;
2d21ac55
A
2401 }
2402 reset_d(d);
2403 if (opromisc) {
2404 lck_mtx_unlock(bpf_mlock);
2405 error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2406 lck_mtx_lock(bpf_mlock);
3e170ce0
A
2407 if (error) {
2408 printf("%s: ifpromisc %s%d failed (%d)\n",
2409 __func__, ifnet_name(bp->bif_ifp),
2410 ifnet_unit(bp->bif_ifp), error);
2411 } else {
2d21ac55 2412 d->bd_promisc = 1;
3e170ce0 2413 }
2d21ac55
A
2414 }
2415 }
0a7de745 2416 return bp == NULL ? EINVAL : 0;
2d21ac55
A
2417}
2418
316670eb
A
2419static int
2420bpf_set_traffic_class(struct bpf_d *d, int tc)
2421{
2422 int error = 0;
2423
0a7de745 2424 if (!SO_VALID_TC(tc)) {
316670eb 2425 error = EINVAL;
0a7de745 2426 } else {
316670eb 2427 d->bd_traffic_class = tc;
0a7de745 2428 }
316670eb 2429
0a7de745 2430 return error;
316670eb
A
2431}
2432
2433static void
2434bpf_set_packet_service_class(struct mbuf *m, int tc)
2435{
0a7de745 2436 if (!(m->m_flags & M_PKTHDR)) {
316670eb 2437 return;
0a7de745 2438 }
316670eb
A
2439
2440 VERIFY(SO_VALID_TC(tc));
2441 (void) m_set_service_class(m, so_tc2msc(tc));
2442}
2443
1c79356b 2444/*
b0d623f7 2445 * Support for select()
1c79356b
A
2446 *
2447 * Return true iff the specific operation will not block indefinitely.
2448 * Otherwise, return false but make a note that a selwakeup() must be done.
2449 */
2450int
6d2010ae 2451bpfselect(dev_t dev, int which, void * wql, struct proc *p)
1c79356b 2452{
2d21ac55 2453 struct bpf_d *d;
6d2010ae 2454 int ret = 0;
1c79356b 2455
2d21ac55
A
2456 lck_mtx_lock(bpf_mlock);
2457
55e303ae 2458 d = bpf_dtab[minor(dev)];
d9a64523
A
2459 if (d == NULL || d == BPF_DEV_RESERVED ||
2460 (d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55 2461 lck_mtx_unlock(bpf_mlock);
0a7de745 2462 return ENXIO;
2d21ac55 2463 }
55e303ae 2464
3e170ce0
A
2465 bpf_acquire_d(d);
2466
9bccf70c 2467 if (d->bd_bif == NULL) {
3e170ce0 2468 bpf_release_d(d);
91447636 2469 lck_mtx_unlock(bpf_mlock);
0a7de745 2470 return ENXIO;
9bccf70c
A
2471 }
2472
0a7de745 2473 while (d->bd_hbuf_read != 0) {
39236c6e 2474 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745 2475 }
3e170ce0
A
2476
2477 if ((d->bd_flags & BPF_CLOSING) != 0) {
2478 bpf_release_d(d);
39236c6e 2479 lck_mtx_unlock(bpf_mlock);
0a7de745 2480 return ENXIO;
39236c6e
A
2481 }
2482
6d2010ae 2483 switch (which) {
0a7de745
A
2484 case FREAD:
2485 if (d->bd_hlen != 0 ||
2486 ((d->bd_immediate ||
2487 d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2488 ret = 1; /* read has data to return */
2489 } else {
2490 /*
2491 * Read has no data to return.
2492 * Make the select wait, and start a timer if
2493 * necessary.
2494 */
2495 selrecord(p, &d->bd_sel, wql);
2496 bpf_start_timer(d);
2497 }
2498 break;
6d2010ae 2499
0a7de745
A
2500 case FWRITE:
2501 /* can't determine whether a write would block */
2502 ret = 1;
2503 break;
9bccf70c 2504 }
91447636 2505
3e170ce0 2506 bpf_release_d(d);
91447636 2507 lck_mtx_unlock(bpf_mlock);
3e170ce0 2508
0a7de745 2509 return ret;
1c79356b
A
2510}
2511
b0d623f7
A
2512/*
2513 * Support for kevent() system call. Register EVFILT_READ filters and
2514 * reject all others.
2515 */
2516int bpfkqfilter(dev_t dev, struct knote *kn);
2517static void filt_bpfdetach(struct knote *);
2518static int filt_bpfread(struct knote *, long);
cb323159
A
2519static int filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev);
2520static int filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev);
b0d623f7 2521
5ba3f43e 2522SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
d9a64523 2523 .f_isfd = 1,
b0d623f7
A
2524 .f_detach = filt_bpfdetach,
2525 .f_event = filt_bpfread,
39037602
A
2526 .f_touch = filt_bpftouch,
2527 .f_process = filt_bpfprocess,
b0d623f7
A
2528};
2529
b0d623f7 2530static int
cb323159 2531filt_bpfread_common(struct knote *kn, struct kevent_qos_s *kev, struct bpf_d *d)
b0d623f7 2532{
b0d623f7 2533 int ready = 0;
cb323159 2534 int64_t data = 0;
b0d623f7 2535
b0d623f7 2536 if (d->bd_immediate) {
6d2010ae 2537 /*
d9a64523 2538 * If there's data in the hold buffer, it's the
6d2010ae
A
2539 * amount of data a read will return.
2540 *
2541 * If there's no data in the hold buffer, but
2542 * there's data in the store buffer, a read will
d9a64523 2543 * immediately rotate the store buffer to the
6d2010ae 2544 * hold buffer, the amount of data in the store
d9a64523 2545 * buffer is the amount of data a read will
6d2010ae
A
2546 * return.
2547 *
d9a64523 2548 * If there's no data in either buffer, we're not
6d2010ae
A
2549 * ready to read.
2550 */
cb323159 2551 data = (d->bd_hlen == 0 || d->bd_hbuf_read != 0 ?
d9a64523 2552 d->bd_slen : d->bd_hlen);
cb323159
A
2553 int64_t lowwat = knote_low_watermark(kn);
2554 if (lowwat > d->bd_bufsize) {
2555 lowwat = d->bd_bufsize;
6d2010ae 2556 }
cb323159 2557 ready = (data >= lowwat);
b0d623f7 2558 } else {
6d2010ae 2559 /*
d9a64523 2560 * If there's data in the hold buffer, it's the
6d2010ae
A
2561 * amount of data a read will return.
2562 *
d9a64523
A
2563 * If there's no data in the hold buffer, but
2564 * there's data in the store buffer, if the
6d2010ae
A
2565 * timer has expired a read will immediately
2566 * rotate the store buffer to the hold buffer,
d9a64523 2567 * so the amount of data in the store buffer is
6d2010ae
A
2568 * the amount of data a read will return.
2569 *
d9a64523
A
2570 * If there's no data in either buffer, or there's
2571 * no data in the hold buffer and the timer hasn't
6d2010ae
A
2572 * expired, we're not ready to read.
2573 */
cb323159 2574 data = ((d->bd_hlen == 0 || d->bd_hbuf_read != 0) &&
d9a64523 2575 d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
cb323159 2576 ready = (data > 0);
b0d623f7 2577 }
0a7de745 2578 if (!ready) {
6d2010ae 2579 bpf_start_timer(d);
cb323159
A
2580 } else if (kev) {
2581 knote_fill_kevent(kn, kev, data);
0a7de745 2582 }
b0d623f7 2583
0a7de745 2584 return ready;
b0d623f7
A
2585}
2586
39037602
A
2587int
2588bpfkqfilter(dev_t dev, struct knote *kn)
2589{
2590 struct bpf_d *d;
2591 int res;
2592
2593 /*
2594 * Is this device a bpf?
2595 */
cb323159
A
2596 if (major(dev) != CDEV_MAJOR || kn->kn_filter != EVFILT_READ) {
2597 knote_set_error(kn, EINVAL);
0a7de745 2598 return 0;
39037602
A
2599 }
2600
2601 lck_mtx_lock(bpf_mlock);
2602
2603 d = bpf_dtab[minor(dev)];
2604
d9a64523
A
2605 if (d == NULL || d == BPF_DEV_RESERVED ||
2606 (d->bd_flags & BPF_CLOSING) != 0 ||
2607 d->bd_bif == NULL) {
39037602 2608 lck_mtx_unlock(bpf_mlock);
cb323159 2609 knote_set_error(kn, ENXIO);
0a7de745 2610 return 0;
39037602
A
2611 }
2612
2613 kn->kn_hook = d;
2614 kn->kn_filtid = EVFILTID_BPFREAD;
2615 KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2616 d->bd_flags |= BPF_KNOTE;
2617
2618 /* capture the current state */
cb323159 2619 res = filt_bpfread_common(kn, NULL, d);
39037602
A
2620
2621 lck_mtx_unlock(bpf_mlock);
2622
0a7de745 2623 return res;
39037602
A
2624}
2625
2626static void
2627filt_bpfdetach(struct knote *kn)
2628{
2629 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2630
2631 lck_mtx_lock(bpf_mlock);
2632 if (d->bd_flags & BPF_KNOTE) {
2633 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2634 d->bd_flags &= ~BPF_KNOTE;
2635 }
2636 lck_mtx_unlock(bpf_mlock);
2637}
2638
2639static int
2640filt_bpfread(struct knote *kn, long hint)
2641{
2642#pragma unused(hint)
2643 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2644
cb323159 2645 return filt_bpfread_common(kn, NULL, d);
39037602
A
2646}
2647
2648static int
cb323159 2649filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev)
39037602
A
2650{
2651 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2652 int res;
2653
2654 lck_mtx_lock(bpf_mlock);
2655
2656 /* save off the lowat threshold and flag */
2657 kn->kn_sdata = kev->data;
2658 kn->kn_sfflags = kev->fflags;
39037602
A
2659
2660 /* output data will be re-generated here */
cb323159 2661 res = filt_bpfread_common(kn, NULL, d);
39037602
A
2662
2663 lck_mtx_unlock(bpf_mlock);
2664
0a7de745 2665 return res;
39037602
A
2666}
2667
2668static int
cb323159 2669filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev)
39037602 2670{
39037602
A
2671 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2672 int res;
2673
2674 lck_mtx_lock(bpf_mlock);
cb323159 2675 res = filt_bpfread_common(kn, kev, d);
39037602
A
2676 lck_mtx_unlock(bpf_mlock);
2677
0a7de745 2678 return res;
39037602
A
2679}
2680
1c79356b 2681/*
d9a64523 2682 * Copy data from an mbuf chain into a buffer. This code is derived
5ba3f43e 2683 * from m_copydata in kern/uipc_mbuf.c.
1c79356b
A
2684 */
2685static void
5ba3f43e 2686bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
1c79356b 2687{
91447636 2688 u_int count;
1c79356b
A
2689 u_char *dst;
2690
1c79356b
A
2691 dst = dst_arg;
2692 while (len > 0) {
0a7de745 2693 if (m == 0) {
1c79356b 2694 panic("bpf_mcopy");
0a7de745 2695 }
1c79356b 2696 count = min(m->m_len, len);
2d21ac55 2697 bcopy(mbuf_data(m), dst, count);
1c79356b
A
2698 m = m->m_next;
2699 dst += count;
2700 len -= count;
2701 }
2702}
2703
2d21ac55
A
2704static inline void
2705bpf_tap_imp(
0a7de745
A
2706 ifnet_t ifp,
2707 u_int32_t dlt,
5ba3f43e 2708 struct bpf_packet *bpf_pkt,
0a7de745 2709 int outbound)
1c79356b 2710{
0a7de745 2711 struct bpf_d *d;
5ba3f43e 2712 u_int slen;
91447636 2713 struct bpf_if *bp;
1c79356b 2714
2d21ac55
A
2715 /*
2716 * It's possible that we get here after the bpf descriptor has been
2717 * detached from the interface; in such a case we simply return.
2718 * Lock ordering is important since we can be called asynchronously
5ba3f43e 2719 * (from IOKit) to process an inbound packet; when that happens
2d21ac55
A
2720 * we would have been holding its "gateLock" and will be acquiring
2721 * "bpf_mlock" upon entering this routine. Due to that, we release
2722 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2723 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2724 * when a ifnet_set_promiscuous request simultaneously collides with
2725 * an inbound packet being passed into the tap callback.
2726 */
91447636 2727 lck_mtx_lock(bpf_mlock);
2d21ac55
A
2728 if (ifp->if_bpf == NULL) {
2729 lck_mtx_unlock(bpf_mlock);
2730 return;
2731 }
5ba3f43e
A
2732 for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2733 if (bp->bif_ifp != ifp) {
2734 /* wrong interface */
2735 bp = NULL;
2736 break;
2d21ac55 2737 }
5ba3f43e
A
2738 if (dlt == 0 || bp->bif_dlt == dlt) {
2739 /* tapping default DLT or DLT matches */
2740 break;
2741 }
2742 }
2743 if (bp == NULL) {
2744 goto done;
2745 }
2746 for (d = bp->bif_dlist; d; d = d->bd_next) {
d9a64523
A
2747 struct bpf_packet *bpf_pkt_saved = bpf_pkt;
2748 struct bpf_packet bpf_pkt_tmp;
2749 struct pktap_header_buffer bpfp_header_tmp;
2750
0a7de745 2751 if (outbound && !d->bd_seesent) {
5ba3f43e 2752 continue;
0a7de745 2753 }
d9a64523 2754
5ba3f43e
A
2755 ++d->bd_rcount;
2756 slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
d9a64523
A
2757 bpf_pkt->bpfp_total_length, 0);
2758 if (bp->bif_ifp->if_type == IFT_PKTAP &&
2759 bp->bif_dlt == DLT_PKTAP) {
2760 /*
2761 * Need to copy the bpf_pkt because the conversion
2762 * to v2 pktap header modifies the content of the
2763 * bpfp_header
2764 */
2765 if ((d->bd_flags & BPF_PKTHDRV2) &&
2766 bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
2767 bpf_pkt_tmp = *bpf_pkt;
2768
2769 bpf_pkt = &bpf_pkt_tmp;
2770
2771 memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
2772 bpf_pkt->bpfp_header_length);
2773
2774 bpf_pkt->bpfp_header = &bpfp_header_tmp;
2775
2776 convert_to_pktap_header_to_v2(bpf_pkt,
2777 !!(d->bd_flags & BPF_TRUNCATE));
2778 }
2779
0a7de745 2780 if (d->bd_flags & BPF_TRUNCATE) {
d9a64523
A
2781 slen = min(slen,
2782 get_pkt_trunc_len((u_char *)bpf_pkt,
0a7de745
A
2783 bpf_pkt->bpfp_total_length));
2784 }
d9a64523 2785 }
5ba3f43e 2786 if (slen != 0) {
5ba3f43e 2787 catchpacket(d, bpf_pkt, slen, outbound);
91447636 2788 }
d9a64523 2789 bpf_pkt = bpf_pkt_saved;
1c79356b 2790 }
5ba3f43e 2791
d9a64523 2792done:
91447636 2793 lck_mtx_unlock(bpf_mlock);
1c79356b
A
2794}
2795
5ba3f43e
A
2796static inline void
2797bpf_tap_mbuf(
0a7de745
A
2798 ifnet_t ifp,
2799 u_int32_t dlt,
2800 mbuf_t m,
2801 void* hdr,
2802 size_t hlen,
2803 int outbound)
5ba3f43e
A
2804{
2805 struct bpf_packet bpf_pkt;
2806 struct mbuf *m0;
2807
2808 if (ifp->if_bpf == NULL) {
2809 /* quickly check without taking lock */
2810 return;
2811 }
2812 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2813 bpf_pkt.bpfp_mbuf = m;
2814 bpf_pkt.bpfp_total_length = 0;
0a7de745 2815 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
5ba3f43e 2816 bpf_pkt.bpfp_total_length += m0->m_len;
0a7de745 2817 }
5ba3f43e
A
2818 bpf_pkt.bpfp_header = hdr;
2819 if (hdr != NULL) {
2820 bpf_pkt.bpfp_total_length += hlen;
2821 bpf_pkt.bpfp_header_length = hlen;
2822 } else {
2823 bpf_pkt.bpfp_header_length = 0;
2824 }
2825 bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2826}
2827
2d21ac55
A
2828void
2829bpf_tap_out(
0a7de745
A
2830 ifnet_t ifp,
2831 u_int32_t dlt,
2832 mbuf_t m,
2833 void* hdr,
2834 size_t hlen)
2d21ac55 2835{
5ba3f43e 2836 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
2d21ac55
A
2837}
2838
2839void
2840bpf_tap_in(
0a7de745
A
2841 ifnet_t ifp,
2842 u_int32_t dlt,
2843 mbuf_t m,
2844 void* hdr,
2845 size_t hlen)
2d21ac55 2846{
5ba3f43e 2847 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
2d21ac55
A
2848}
2849
2850/* Callback registered with Ethernet driver. */
0a7de745
A
2851static int
2852bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2d21ac55 2853{
5ba3f43e 2854 bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
d9a64523 2855
0a7de745 2856 return 0;
2d21ac55
A
2857}
2858
5ba3f43e 2859
d9a64523
A
2860static errno_t
2861bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
2862{
2863 errno_t err = 0;
2864 if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
2865 err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
2866 } else {
2867 err = EINVAL;
2868 }
2869
0a7de745 2870 return err;
d9a64523
A
2871}
2872
5ba3f43e
A
2873static void
2874copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
2875{
2876 /* copy the optional header */
2877 if (pkt->bpfp_header_length != 0) {
0a7de745 2878 size_t count = min(len, pkt->bpfp_header_length);
5ba3f43e
A
2879 bcopy(pkt->bpfp_header, dst, count);
2880 len -= count;
2881 dst += count;
2882 }
2883 if (len == 0) {
2884 /* nothing past the header */
2885 return;
2886 }
2887 /* copy the packet */
2888 switch (pkt->bpfp_type) {
2889 case BPF_PACKET_TYPE_MBUF:
2890 bpf_mcopy(pkt->bpfp_mbuf, dst, len);
2891 break;
2892 default:
2893 break;
2894 }
2895}
2896
d9a64523
A
2897static uint16_t
2898get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
2899 const uint16_t remaining_caplen)
2900{
2901 /*
2902 * For some reason tcpdump expects to have one byte beyond the ESP header
2903 */
2904 uint16_t trunc_len = ESP_HDR_SIZE + 1;
2905
0a7de745
A
2906 if (trunc_len > remaining_caplen) {
2907 return remaining_caplen;
2908 }
d9a64523 2909
0a7de745 2910 return trunc_len;
d9a64523
A
2911}
2912
2913static uint16_t
2914get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
2915 const uint16_t remaining_caplen)
2916{
2917 /*
2918 * Include the payload generic header
2919 */
2920 uint16_t trunc_len = ISAKMP_HDR_SIZE;
2921
0a7de745
A
2922 if (trunc_len > remaining_caplen) {
2923 return remaining_caplen;
2924 }
d9a64523 2925
0a7de745 2926 return trunc_len;
d9a64523
A
2927}
2928
2929static uint16_t
2930get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint16_t off,
2931 const uint16_t remaining_caplen)
2932{
2933 int err = 0;
2934 uint16_t trunc_len = 0;
2935 char payload[remaining_caplen];
2936
2937 err = bpf_copydata(pkt, off, remaining_caplen, payload);
0a7de745
A
2938 if (err != 0) {
2939 return remaining_caplen;
2940 }
d9a64523
A
2941 /*
2942 * They are three cases:
2943 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
2944 * - keep alive: 1 byte payload
2945 * - otherwise it's ESP
2946 */
2947 if (remaining_caplen >= 4 &&
0a7de745
A
2948 payload[0] == 0 && payload[1] == 0 &&
2949 payload[2] == 0 && payload[3] == 0) {
d9a64523
A
2950 trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
2951 } else if (remaining_caplen == 1) {
2952 trunc_len = 1;
2953 } else {
2954 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
2955 }
2956
0a7de745
A
2957 if (trunc_len > remaining_caplen) {
2958 return remaining_caplen;
2959 }
d9a64523 2960
0a7de745 2961 return trunc_len;
d9a64523
A
2962}
2963
2964static uint16_t
2965get_udp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
2966{
2967 int err = 0;
2968 uint16_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
2969
0a7de745
A
2970 if (trunc_len >= remaining_caplen) {
2971 return remaining_caplen;
2972 }
d9a64523
A
2973
2974 struct udphdr udphdr;
2975 err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
0a7de745
A
2976 if (err != 0) {
2977 return remaining_caplen;
2978 }
d9a64523
A
2979
2980 u_short sport, dport;
2981
2982 sport = EXTRACT_SHORT(&udphdr.uh_sport);
2983 dport = EXTRACT_SHORT(&udphdr.uh_dport);
2984
2985 if (dport == PORT_DNS || sport == PORT_DNS) {
2986 /*
2987 * Full UDP payload for DNS
2988 */
2989 trunc_len = remaining_caplen;
2990 } else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
0a7de745 2991 (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
d9a64523
A
2992 /*
2993 * Full UDP payload for BOOTP and DHCP
2994 */
2995 trunc_len = remaining_caplen;
2996 } else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
2997 /*
2998 * Return the ISAKMP header
2999 */
3000 trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
3001 remaining_caplen - sizeof(struct udphdr));
3002 } else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3003 trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
3004 remaining_caplen - sizeof(struct udphdr));
3005 }
0a7de745
A
3006 if (trunc_len >= remaining_caplen) {
3007 return remaining_caplen;
3008 }
d9a64523 3009
0a7de745 3010 return trunc_len;
d9a64523
A
3011}
3012
3013static uint16_t
3014get_tcp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3015{
3016 int err = 0;
3017 uint16_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
0a7de745
A
3018 if (trunc_len >= remaining_caplen) {
3019 return remaining_caplen;
3020 }
d9a64523
A
3021
3022 struct tcphdr tcphdr;
3023 err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
0a7de745
A
3024 if (err != 0) {
3025 return remaining_caplen;
3026 }
d9a64523
A
3027
3028 u_short sport, dport;
3029 sport = EXTRACT_SHORT(&tcphdr.th_sport);
3030 dport = EXTRACT_SHORT(&tcphdr.th_dport);
3031
3032 if (dport == PORT_DNS || sport == PORT_DNS) {
3033 /*
3034 * Full TCP payload for DNS
3035 */
3036 trunc_len = remaining_caplen;
3037 } else {
3038 trunc_len = tcphdr.th_off << 2;
3039 }
0a7de745
A
3040 if (trunc_len >= remaining_caplen) {
3041 return remaining_caplen;
3042 }
d9a64523 3043
0a7de745 3044 return trunc_len;
d9a64523
A
3045}
3046
3047static uint16_t
3048get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3049{
3050 uint16_t trunc_len;
3051
3052 switch (proto) {
3053 case IPPROTO_ICMP: {
3054 /*
3055 * Full IMCP payload
3056 */
3057 trunc_len = remaining_caplen;
3058 break;
3059 }
3060 case IPPROTO_ICMPV6: {
3061 /*
3062 * Full IMCPV6 payload
3063 */
3064 trunc_len = remaining_caplen;
3065 break;
3066 }
3067 case IPPROTO_IGMP: {
3068 /*
3069 * Full IGMP payload
3070 */
3071 trunc_len = remaining_caplen;
3072 break;
3073 }
3074 case IPPROTO_UDP: {
3075 trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3076 break;
3077 }
3078 case IPPROTO_TCP: {
3079 trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3080 break;
3081 }
3082 case IPPROTO_ESP: {
3083 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3084 break;
3085 }
3086 default: {
3087 /*
3088 * By default we only include the IP header
3089 */
3090 trunc_len = 0;
3091 break;
3092 }
3093 }
0a7de745
A
3094 if (trunc_len >= remaining_caplen) {
3095 return remaining_caplen;
3096 }
d9a64523 3097
0a7de745 3098 return trunc_len;
d9a64523
A
3099}
3100
3101static uint16_t
3102get_ip_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3103{
3104 int err = 0;
3105 uint16_t iplen = sizeof(struct ip);
0a7de745
A
3106 if (iplen >= remaining_caplen) {
3107 return remaining_caplen;
3108 }
d9a64523
A
3109
3110 struct ip iphdr;
3111 err = bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
0a7de745
A
3112 if (err != 0) {
3113 return remaining_caplen;
3114 }
d9a64523
A
3115
3116 uint8_t proto = 0;
3117
3118 iplen = iphdr.ip_hl << 2;
0a7de745
A
3119 if (iplen >= remaining_caplen) {
3120 return remaining_caplen;
3121 }
d9a64523
A
3122
3123 proto = iphdr.ip_p;
3124 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3125
0a7de745
A
3126 if (iplen >= remaining_caplen) {
3127 return remaining_caplen;
3128 }
d9a64523 3129
0a7de745 3130 return iplen;
d9a64523
A
3131}
3132
3133static uint16_t
3134get_ip6_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3135{
3136 int err = 0;
3137 uint16_t iplen = sizeof(struct ip6_hdr);
0a7de745
A
3138 if (iplen >= remaining_caplen) {
3139 return remaining_caplen;
3140 }
d9a64523
A
3141
3142 struct ip6_hdr ip6hdr;
3143 err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
0a7de745
A
3144 if (err != 0) {
3145 return remaining_caplen;
3146 }
d9a64523
A
3147
3148 uint8_t proto = 0;
3149
3150 /*
3151 * TBD: process the extension headers
3152 */
3153 proto = ip6hdr.ip6_nxt;
3154 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3155
0a7de745
A
3156 if (iplen >= remaining_caplen) {
3157 return remaining_caplen;
3158 }
d9a64523 3159
0a7de745 3160 return iplen;
d9a64523
A
3161}
3162
3163static uint16_t
3164get_ether_trunc_len(struct bpf_packet *pkt, int off, const uint16_t remaining_caplen)
3165{
3166 int err = 0;
3167 uint16_t ethlen = sizeof(struct ether_header);
0a7de745
A
3168 if (ethlen >= remaining_caplen) {
3169 return remaining_caplen;
3170 }
d9a64523
A
3171
3172 struct ether_header eh;
3173 u_short type;
3174 err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
0a7de745
A
3175 if (err != 0) {
3176 return remaining_caplen;
3177 }
d9a64523
A
3178
3179 type = EXTRACT_SHORT(&eh.ether_type);
3180 /* Include full ARP */
3181 if (type == ETHERTYPE_ARP) {
3182 ethlen = remaining_caplen;
3183 } else if (type != ETHERTYPE_IP && type != ETHERTYPE_IPV6) {
3184 ethlen = min(BPF_MIN_PKT_SIZE, remaining_caplen);
3185 } else {
3186 if (type == ETHERTYPE_IP) {
3187 ethlen += get_ip_trunc_len(pkt, sizeof(struct ether_header),
3188 remaining_caplen);
3189 } else if (type == ETHERTYPE_IPV6) {
3190 ethlen += get_ip6_trunc_len(pkt, sizeof(struct ether_header),
0a7de745 3191 remaining_caplen);
d9a64523
A
3192 }
3193 }
0a7de745 3194 return ethlen;
d9a64523
A
3195}
3196
3197static uint32_t
3198get_pkt_trunc_len(u_char *p, u_int len)
3199{
3200 struct bpf_packet *pkt = (struct bpf_packet *)(void *) p;
3201 struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3202 uint32_t out_pkt_len = 0, tlen = 0;
3203 /*
3204 * pktap->pth_frame_pre_length is L2 header length and accounts
3205 * for both pre and pre_adjust.
3206 * pktap->pth_length is sizeof(pktap_header) (excl the pre/pre_adjust)
3207 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3208 * pre is the offset to the L3 header after the bpfp_header, or length
3209 * of L2 header after bpfp_header, if present.
0a7de745 3210 */
cb323159 3211 int32_t pre = pktap->pth_frame_pre_length -
d9a64523
A
3212 (pkt->bpfp_header_length - pktap->pth_length);
3213
3214 /* Length of the input packet starting from L3 header */
3215 uint32_t in_pkt_len = len - pkt->bpfp_header_length - pre;
3216 if (pktap->pth_protocol_family == AF_INET ||
3217 pktap->pth_protocol_family == AF_INET6) {
3218 /* Contains L2 header */
3219 if (pre > 0) {
cb323159 3220 if (pre < (int32_t)sizeof(struct ether_header)) {
d9a64523 3221 goto too_short;
0a7de745 3222 }
d9a64523
A
3223
3224 out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3225 } else if (pre == 0) {
3226 if (pktap->pth_protocol_family == AF_INET) {
3227 out_pkt_len = get_ip_trunc_len(pkt, pre, in_pkt_len);
3228 } else if (pktap->pth_protocol_family == AF_INET6) {
3229 out_pkt_len = get_ip6_trunc_len(pkt, pre, in_pkt_len);
3230 }
3231 } else {
3232 /* Ideally pre should be >= 0. This is an exception */
3233 out_pkt_len = min(BPF_MIN_PKT_SIZE, in_pkt_len);
3234 }
3235 } else {
3236 if (pktap->pth_iftype == IFT_ETHER) {
3237 if (in_pkt_len < sizeof(struct ether_header)) {
3238 goto too_short;
3239 }
3240 /* At most include the Ethernet header and 16 bytes */
3241 out_pkt_len = MIN(sizeof(struct ether_header) + 16,
3242 in_pkt_len);
3243 } else {
3244 /*
3245 * For unknown protocols include at most 16 bytes
3246 */
3247 out_pkt_len = MIN(16, in_pkt_len);
3248 }
3249 }
3250done:
3251 tlen = pkt->bpfp_header_length + out_pkt_len + pre;
0a7de745 3252 return tlen;
d9a64523
A
3253too_short:
3254 out_pkt_len = in_pkt_len;
3255 goto done;
3256}
3257
1c79356b
A
3258/*
3259 * Move the packet data from interface memory (pkt) into the
3260 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
5ba3f43e 3261 * otherwise 0.
1c79356b
A
3262 */
3263static void
5ba3f43e 3264catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
0a7de745 3265 u_int snaplen, int outbound)
1c79356b 3266{
2d21ac55 3267 struct bpf_hdr *hp;
316670eb 3268 struct bpf_hdr_ext *ehp;
2d21ac55 3269 int totlen, curlen;
316670eb 3270 int hdrlen, caplen;
6d2010ae 3271 int do_wakeup = 0;
316670eb 3272 u_char *payload;
39236c6e 3273 struct timeval tv;
316670eb 3274
fe8ab488 3275 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
316670eb 3276 d->bd_bif->bif_hdrlen;
1c79356b
A
3277 /*
3278 * Figure out how many bytes to move. If the packet is
3279 * greater or equal to the snapshot length, transfer that
3280 * much. Otherwise, transfer the whole packet (unless
3281 * we hit the buffer size limit).
3282 */
5ba3f43e 3283 totlen = hdrlen + min(snaplen, pkt->bpfp_total_length);
0a7de745 3284 if (totlen > d->bd_bufsize) {
1c79356b 3285 totlen = d->bd_bufsize;
0a7de745 3286 }
1c79356b 3287
0a7de745 3288 if (hdrlen > totlen) {
a39ff7e2 3289 return;
0a7de745 3290 }
a39ff7e2 3291
1c79356b
A
3292 /*
3293 * Round up the end of the previous packet to the next longword.
3294 */
3295 curlen = BPF_WORDALIGN(d->bd_slen);
3296 if (curlen + totlen > d->bd_bufsize) {
3297 /*
3298 * This packet will overflow the storage buffer.
3299 * Rotate the buffers if we can, then wakeup any
3300 * pending reads.
813fb2f6
A
3301 *
3302 * We cannot rotate buffers if a read is in progress
3303 * so drop the packet
1c79356b 3304 */
d9a64523 3305 if (d->bd_hbuf_read != 0) {
813fb2f6
A
3306 ++d->bd_dcount;
3307 return;
3308 }
d9a64523 3309
6d2010ae 3310 if (d->bd_fbuf == NULL) {
3e170ce0
A
3311 if (d->bd_headdrop == 0) {
3312 /*
3313 * We haven't completed the previous read yet,
3314 * so drop the packet.
3315 */
3316 ++d->bd_dcount;
3317 return;
3318 }
1c79356b 3319 /*
3e170ce0 3320 * Drop the hold buffer as it contains older packets
1c79356b 3321 */
3e170ce0
A
3322 d->bd_dcount += d->bd_hcnt;
3323 d->bd_fbuf = d->bd_hbuf;
3324 ROTATE_BUFFERS(d);
3325 } else {
3326 ROTATE_BUFFERS(d);
1c79356b 3327 }
6d2010ae 3328 do_wakeup = 1;
1c79356b 3329 curlen = 0;
0a7de745 3330 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
1c79356b 3331 /*
d9a64523
A
3332 * Immediate mode is set, or the read timeout has
3333 * already expired during a select call. A packet
6d2010ae 3334 * arrived, so the reader should be woken up.
1c79356b 3335 */
6d2010ae 3336 do_wakeup = 1;
0a7de745 3337 }
1c79356b
A
3338
3339 /*
3340 * Append the bpf header.
3341 */
b0d623f7 3342 microtime(&tv);
d9a64523 3343 if (d->bd_flags & BPF_EXTENDED_HDR) {
5ba3f43e
A
3344 struct mbuf *m;
3345
3346 m = (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF)
0a7de745 3347 ? pkt->bpfp_mbuf : NULL;
d9a64523
A
3348 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
3349 memset(ehp, 0, sizeof(*ehp));
3350 ehp->bh_tstamp.tv_sec = tv.tv_sec;
3351 ehp->bh_tstamp.tv_usec = tv.tv_usec;
5ba3f43e
A
3352
3353 ehp->bh_datalen = pkt->bpfp_total_length;
d9a64523 3354 ehp->bh_hdrlen = hdrlen;
5ba3f43e
A
3355 caplen = ehp->bh_caplen = totlen - hdrlen;
3356 if (m == NULL) {
3357 if (outbound) {
39236c6e 3358 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
5ba3f43e 3359 } else {
39236c6e 3360 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
5ba3f43e 3361 }
39236c6e 3362 } else if (outbound) {
5ba3f43e
A
3363 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3364
39236c6e 3365 /* only do lookups on non-raw INPCB */
0a7de745
A
3366 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
3367 PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
3368 (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
39236c6e
A
3369 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3370 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3371 ehp->bh_proto = m->m_pkthdr.pkt_proto;
3372 }
3373 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
0a7de745 3374 if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
39037602 3375 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
0a7de745
A
3376 }
3377 if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
39037602 3378 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
0a7de745
A
3379 }
3380 if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
39037602 3381 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
0a7de745 3382 }
39037602
A
3383 if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3384 ehp->bh_unsent_bytes =
3385 m->m_pkthdr.bufstatus_if;
3386 ehp->bh_unsent_snd =
3387 m->m_pkthdr.bufstatus_sndbuf;
3388 }
0a7de745 3389 } else {
316670eb 3390 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
0a7de745 3391 }
d9a64523
A
3392 payload = (u_char *)ehp + hdrlen;
3393 } else {
3394 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
3395 hp->bh_tstamp.tv_sec = tv.tv_sec;
3396 hp->bh_tstamp.tv_usec = tv.tv_usec;
5ba3f43e 3397 hp->bh_datalen = pkt->bpfp_total_length;
d9a64523 3398 hp->bh_hdrlen = hdrlen;
5ba3f43e 3399 caplen = hp->bh_caplen = totlen - hdrlen;
d9a64523
A
3400 payload = (u_char *)hp + hdrlen;
3401 }
1c79356b
A
3402 /*
3403 * Copy the packet data into the store buffer and update its length.
3404 */
5ba3f43e 3405 copy_bpf_packet(pkt, payload, caplen);
1c79356b 3406 d->bd_slen = curlen + totlen;
3e170ce0 3407 d->bd_scnt += 1;
6d2010ae 3408
0a7de745 3409 if (do_wakeup) {
6d2010ae 3410 bpf_wakeup(d);
0a7de745 3411 }
1c79356b
A
3412}
3413
3414/*
3415 * Initialize all nonzero fields of a descriptor.
3416 */
3417static int
91447636 3418bpf_allocbufs(struct bpf_d *d)
1c79356b 3419{
813fb2f6
A
3420 if (d->bd_sbuf != NULL) {
3421 FREE(d->bd_sbuf, M_DEVBUF);
3422 d->bd_sbuf = NULL;
3423 }
3424 if (d->bd_hbuf != NULL) {
3425 FREE(d->bd_hbuf, M_DEVBUF);
3426 d->bd_hbuf = NULL;
3427 }
3428 if (d->bd_fbuf != NULL) {
3429 FREE(d->bd_fbuf, M_DEVBUF);
3430 d->bd_fbuf = NULL;
3431 }
3432
1c79356b 3433 d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
0a7de745
A
3434 if (d->bd_fbuf == NULL) {
3435 return ENOBUFS;
3436 }
1c79356b
A
3437
3438 d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
813fb2f6 3439 if (d->bd_sbuf == NULL) {
1c79356b 3440 FREE(d->bd_fbuf, M_DEVBUF);
813fb2f6 3441 d->bd_fbuf = NULL;
0a7de745 3442 return ENOBUFS;
1c79356b
A
3443 }
3444 d->bd_slen = 0;
3445 d->bd_hlen = 0;
3e170ce0
A
3446 d->bd_scnt = 0;
3447 d->bd_hcnt = 0;
0a7de745 3448 return 0;
1c79356b
A
3449}
3450
3451/*
3452 * Free buffers currently in use by a descriptor.
3453 * Called on close.
3454 */
3455static void
91447636 3456bpf_freed(struct bpf_d *d)
1c79356b
A
3457{
3458 /*
3459 * We don't need to lock out interrupts since this descriptor has
3460 * been detached from its interface and it yet hasn't been marked
3461 * free.
3462 */
0a7de745 3463 if (d->bd_hbuf_read != 0) {
39236c6e 3464 panic("bpf buffer freed during read");
0a7de745 3465 }
39236c6e 3466
1c79356b
A
3467 if (d->bd_sbuf != 0) {
3468 FREE(d->bd_sbuf, M_DEVBUF);
0a7de745 3469 if (d->bd_hbuf != 0) {
1c79356b 3470 FREE(d->bd_hbuf, M_DEVBUF);
0a7de745
A
3471 }
3472 if (d->bd_fbuf != 0) {
1c79356b 3473 FREE(d->bd_fbuf, M_DEVBUF);
0a7de745
A
3474 }
3475 }
3476 if (d->bd_filter) {
3477 FREE(d->bd_filter, M_DEVBUF);
1c79356b 3478 }
1c79356b
A
3479}
3480
3481/*
d9a64523 3482 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
1c79356b
A
3483 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
3484 * size of the link header (variable length headers not yet supported).
3485 */
3486void
91447636 3487bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1c79356b 3488{
2d21ac55
A
3489 bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
3490}
3491
3492errno_t
3493bpf_attach(
0a7de745
A
3494 ifnet_t ifp,
3495 u_int32_t dlt,
3496 u_int32_t hdrlen,
3497 bpf_send_func send,
3498 bpf_tap_func tap)
2d21ac55 3499{
5ba3f43e 3500 struct bpf_if *bp;
2d21ac55 3501 struct bpf_if *bp_new;
5ba3f43e 3502 struct bpf_if *bp_before_first = NULL;
2d21ac55 3503 struct bpf_if *bp_first = NULL;
5ba3f43e
A
3504 struct bpf_if *bp_last = NULL;
3505 boolean_t found;
3506
3e170ce0
A
3507 bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF,
3508 M_WAIT | M_ZERO);
0a7de745 3509 if (bp_new == 0) {
1c79356b 3510 panic("bpfattach");
0a7de745 3511 }
1c79356b 3512
91447636
A
3513 lck_mtx_lock(bpf_mlock);
3514
2d21ac55 3515 /*
5ba3f43e
A
3516 * Check if this interface/dlt is already attached. Remember the
3517 * first and last attachment for this interface, as well as the
3518 * element before the first attachment.
2d21ac55 3519 */
5ba3f43e
A
3520 found = FALSE;
3521 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
3522 if (bp->bif_ifp != ifp) {
3523 if (bp_first != NULL) {
3524 /* no more elements for this interface */
3525 break;
3526 }
3527 bp_before_first = bp;
3528 } else {
3529 if (bp->bif_dlt == dlt) {
3530 found = TRUE;
3531 break;
3532 }
3533 if (bp_first == NULL) {
3534 bp_first = bp;
3535 }
3536 bp_last = bp;
3537 }
2d21ac55 3538 }
5ba3f43e
A
3539 if (found) {
3540 lck_mtx_unlock(bpf_mlock);
39236c6e 3541 printf("bpfattach - %s with dlt %d is already attached\n",
0a7de745 3542 if_name(ifp), dlt);
2d21ac55 3543 FREE(bp_new, M_DEVBUF);
0a7de745 3544 return EEXIST;
2d21ac55 3545 }
d9a64523 3546
2d21ac55
A
3547 bp_new->bif_ifp = ifp;
3548 bp_new->bif_dlt = dlt;
3549 bp_new->bif_send = send;
3550 bp_new->bif_tap = tap;
d9a64523 3551
2d21ac55
A
3552 if (bp_first == NULL) {
3553 /* No other entries for this ifp */
3554 bp_new->bif_next = bpf_iflist;
3555 bpf_iflist = bp_new;
d9a64523 3556 } else {
5ba3f43e
A
3557 if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
3558 /* Make this the first entry for this interface */
3559 if (bp_before_first != NULL) {
3560 /* point the previous to us */
3561 bp_before_first->bif_next = bp_new;
3562 } else {
3563 /* we're the new head */
3564 bpf_iflist = bp_new;
3565 }
3566 bp_new->bif_next = bp_first;
3567 } else {
3568 /* Add this after the last entry for this interface */
3569 bp_new->bif_next = bp_last->bif_next;
3570 bp_last->bif_next = bp_new;
3571 }
2d21ac55 3572 }
d9a64523 3573
1c79356b
A
3574 /*
3575 * Compute the length of the bpf header. This is not necessarily
3576 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
3577 * that the network layer header begins on a longword boundary (for
3578 * performance reasons and to alleviate alignment restrictions).
3579 */
2d21ac55 3580 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
316670eb
A
3581 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
3582 sizeof(struct bpf_hdr_ext)) - hdrlen;
d9a64523 3583
91447636 3584 /* Take a reference on the interface */
2d21ac55 3585 ifnet_reference(ifp);
91447636
A
3586
3587 lck_mtx_unlock(bpf_mlock);
1c79356b 3588
55e303ae 3589#ifndef __APPLE__
0a7de745 3590 if (bootverbose) {
39236c6e 3591 printf("bpf: %s attached\n", if_name(ifp));
0a7de745 3592 }
1c79356b 3593#endif
2d21ac55 3594
0a7de745 3595 return 0;
1c79356b
A
3596}
3597
9bccf70c
A
3598/*
3599 * Detach bpf from an interface. This involves detaching each descriptor
3600 * associated with the interface, and leaving bd_bif NULL. Notify each
3601 * descriptor as it's detached so that any sleepers wake up and get
3602 * ENXIO.
3603 */
3604void
91447636 3605bpfdetach(struct ifnet *ifp)
9bccf70c 3606{
0a7de745
A
3607 struct bpf_if *bp, *bp_prev, *bp_next;
3608 struct bpf_d *d;
9bccf70c 3609
0a7de745 3610 if (bpf_debug != 0) {
5ba3f43e 3611 printf("%s: %s\n", __func__, if_name(ifp));
0a7de745 3612 }
3e170ce0 3613
91447636 3614 lck_mtx_lock(bpf_mlock);
9bccf70c 3615
fe8ab488
A
3616 /*
3617 * Build the list of devices attached to that interface
3618 * that we need to free while keeping the lock to maintain
3619 * the integrity of the interface list
3620 */
9bccf70c 3621 bp_prev = NULL;
2d21ac55
A
3622 for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
3623 bp_next = bp->bif_next;
fe8ab488 3624
2d21ac55
A
3625 if (ifp != bp->bif_ifp) {
3626 bp_prev = bp;
3627 continue;
3628 }
fe8ab488 3629 /* Unlink from the interface list */
0a7de745 3630 if (bp_prev) {
fe8ab488 3631 bp_prev->bif_next = bp->bif_next;
0a7de745 3632 } else {
fe8ab488 3633 bpf_iflist = bp->bif_next;
0a7de745 3634 }
fe8ab488 3635
3e170ce0 3636 /* Detach the devices attached to the interface */
2d21ac55 3637 while ((d = bp->bif_dlist) != NULL) {
3e170ce0
A
3638 /*
3639 * Take an extra reference to prevent the device
3640 * from being freed when bpf_detachd() releases
3641 * the reference for the interface list
3642 */
3643 bpf_acquire_d(d);
3644 bpf_detachd(d, 0);
2d21ac55 3645 bpf_wakeup(d);
3e170ce0 3646 bpf_release_d(d);
2d21ac55 3647 }
2d21ac55 3648 ifnet_release(ifp);
9bccf70c
A
3649 }
3650
91447636 3651 lck_mtx_unlock(bpf_mlock);
9bccf70c
A
3652}
3653
1c79356b 3654void
91447636 3655bpf_init(__unused void *unused)
1c79356b 3656{
9bccf70c 3657#ifdef __APPLE__
0a7de745
A
3658 int i;
3659 int maj;
1c79356b 3660
91447636 3661 if (bpf_devsw_installed == 0) {
9bccf70c 3662 bpf_devsw_installed = 1;
39236c6e
A
3663 bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
3664 bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
3665 bpf_mlock_attr = lck_attr_alloc_init();
3666 lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
9bccf70c
A
3667 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
3668 if (maj == -1) {
0a7de745 3669 if (bpf_mlock_attr) {
91447636 3670 lck_attr_free(bpf_mlock_attr);
0a7de745
A
3671 }
3672 if (bpf_mlock_grp) {
91447636 3673 lck_grp_free(bpf_mlock_grp);
0a7de745
A
3674 }
3675 if (bpf_mlock_grp_attr) {
91447636 3676 lck_grp_attr_free(bpf_mlock_grp_attr);
0a7de745 3677 }
d9a64523 3678
2d21ac55
A
3679 bpf_mlock = NULL;
3680 bpf_mlock_attr = NULL;
3681 bpf_mlock_grp = NULL;
3682 bpf_mlock_grp_attr = NULL;
91447636 3683 bpf_devsw_installed = 0;
d9a64523 3684 printf("bpf_init: failed to allocate a major number\n");
55e303ae 3685 return;
9bccf70c 3686 }
91447636 3687
0a7de745 3688 for (i = 0; i < NBPFILTER; i++) {
55e303ae 3689 bpf_make_dev_t(maj);
0a7de745 3690 }
9bccf70c
A
3691 }
3692#else
3693 cdevsw_add(&bpf_cdevsw);
3694#endif
1c79356b
A
3695}
3696
9bccf70c 3697#ifndef __APPLE__
cb323159 3698SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE + CDEV_MAJOR, bpf_drvinit, NULL);
1c79356b 3699#endif
9bccf70c 3700
cb323159
A
3701static int
3702sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
3703{
3704#pragma unused(arg1, arg2)
3705 int i, err;
3706
3707 i = bpf_maxbufsize;
3708
3709 err = sysctl_handle_int(oidp, &i, 0, req);
3710 if (err != 0 || req->newptr == USER_ADDR_NULL) {
3711 return err;
3712 }
3713
3714 if (i < 0 || i > BPF_MAXSIZE_CAP) {
3715 i = BPF_MAXSIZE_CAP;
3716 }
3717
3718 bpf_maxbufsize = i;
3719 return err;
3720}