git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2020 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* Copyright (c) 1990, 1991, 1993
	30	* The Regents of the University of California. All rights reserved.
	31	*
	32	* This code is derived from the Stanford/CMU enet packet filter,
	33	* (net/enet.c) distributed as part of 4.3BSD, and code contributed
	34	* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
	35	* Berkeley Laboratory.
	36	*
	37	* Redistribution and use in source and binary forms, with or without
	38	* modification, are permitted provided that the following conditions
	39	* are met:
	40	* 1. Redistributions of source code must retain the above copyright
	41	* notice, this list of conditions and the following disclaimer.
	42	* 2. Redistributions in binary form must reproduce the above copyright
	43	* notice, this list of conditions and the following disclaimer in the
	44	* documentation and/or other materials provided with the distribution.
	45	* 3. All advertising materials mentioning features or use of this software
	46	* must display the following acknowledgement:
	47	* This product includes software developed by the University of
	48	* California, Berkeley and its contributors.
	49	* 4. Neither the name of the University nor the names of its contributors
	50	* may be used to endorse or promote products derived from this software
	51	* without specific prior written permission.
	52	*
	53	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	54	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	55	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	56	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	57	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	58	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	59	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	60	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	61	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	62	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	63	* SUCH DAMAGE.
	64	*
	65	* @(#)bpf.c 8.2 (Berkeley) 3/28/94
	66	*
	67	* $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
	68	*/
	69	/*
	70	* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
	71	* support for mandatory and extensible security protections. This notice
	72	* is included in support of clause 2.2 (b) of the Apple Public License,
	73	* Version 2.0.
	74	*/
	75
	76	#include "bpf.h"
	77
	78	#ifndef __GNUC__
	79	#define inline
	80	#else
	81	#define inline __inline
	82	#endif
	83
	84	#include <sys/param.h>
	85	#include <sys/systm.h>
	86	#include <sys/conf.h>
	87	#include <sys/malloc.h>
	88	#include <sys/mbuf.h>
	89	#include <sys/time.h>
	90	#include <sys/proc.h>
	91	#include <sys/signalvar.h>
	92	#include <sys/filio.h>
	93	#include <sys/sockio.h>
	94	#include <sys/ttycom.h>
	95	#include <sys/filedesc.h>
	96	#include <sys/uio_internal.h>
	97	#include <sys/file_internal.h>
	98	#include <sys/event.h>
	99
	100	#include <sys/poll.h>
	101
	102	#include <sys/socket.h>
	103	#include <sys/socketvar.h>
	104	#include <sys/vnode.h>
	105
	106	#include <net/if.h>
	107	#include <net/bpf.h>
	108	#include <net/bpfdesc.h>
	109
	110	#include <netinet/in.h>
	111	#include <netinet/ip.h>
	112	#include <netinet/ip6.h>
	113	#include <netinet/in_pcb.h>
	114	#include <netinet/in_var.h>
	115	#include <netinet/ip_var.h>
	116	#include <netinet/tcp.h>
	117	#include <netinet/tcp_var.h>
	118	#include <netinet/udp.h>
	119	#include <netinet/udp_var.h>
	120	#include <netinet/if_ether.h>
	121	#include <netinet/isakmp.h>
	122	#include <netinet6/esp.h>
	123	#include <sys/kernel.h>
	124	#include <sys/sysctl.h>
	125	#include <net/firewire.h>
	126
	127	#include <miscfs/devfs/devfs.h>
	128	#include <net/dlil.h>
	129	#include <net/pktap.h>
	130
	131	#include <kern/locks.h>
	132	#include <kern/thread_call.h>
	133	#include <libkern/section_keywords.h>
	134
	135	#include <os/log.h>
	136
	137	extern int tvtohz(struct timeval *);
	138
	139	#define BPF_BUFSIZE 4096
	140	#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
	141
	142	#define PRINET 26 /* interruptible */
	143
	144	#define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
	145	#define ESP_HDR_SIZE sizeof(struct newesp)
	146
	147	typedef void (pktcopyfunc_t)(const void , void *, size_t);
	148
	149	/*
	150	* The default read buffer size is patchable.
	151	*/
	152	static unsigned int bpf_bufsize = BPF_BUFSIZE;
	153	SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW \| CTLFLAG_LOCKED,
	154	&bpf_bufsize, 0, "");
	155
	156	static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
	157	extern const int copysize_limit_panic;
	158	#define BPF_MAXSIZE_CAP (copysize_limit_panic >> 1)
	159	__private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
	160	SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT \| CTLFLAG_RW \| CTLFLAG_LOCKED,
	161	&bpf_maxbufsize, 0,
	162	sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
	163
	164	static unsigned int bpf_maxdevices = 256;
	165	SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW \| CTLFLAG_LOCKED,
	166	&bpf_maxdevices, 0, "");
	167	/*
	168	* bpf_wantpktap controls the defaul visibility of DLT_PKTAP
	169	* For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
	170	* explicitly to be able to use DLT_PKTAP.
	171	*/
	172	#if !XNU_TARGET_OS_OSX
	173	static unsigned int bpf_wantpktap = 1;
	174	#else /* XNU_TARGET_OS_OSX */
	175	static unsigned int bpf_wantpktap = 0;
	176	#endif /* XNU_TARGET_OS_OSX */
	177	SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW \| CTLFLAG_LOCKED,
	178	&bpf_wantpktap, 0, "");
	179
	180	static int bpf_debug = 0;
	181	SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW \| CTLFLAG_LOCKED,
	182	&bpf_debug, 0, "");
	183
	184	/*
	185	* bpf_iflist is the list of interfaces; each corresponds to an ifnet
	186	* bpf_dtab holds pointer to the descriptors, indexed by minor device #
	187	*/
	188	static struct bpf_if *bpf_iflist;
	189	#ifdef __APPLE__
	190	/*
	191	* BSD now stores the bpf_d in the dev_t which is a struct
	192	* on their system. Our dev_t is an int, so we still store
	193	* the bpf_d in a separate table indexed by minor device #.
	194	*
	195	* The value stored in bpf_dtab[n] represent three states:
	196	* NULL: device not opened
	197	* BPF_DEV_RESERVED: device opening or closing
	198	* other: device <n> opened with pointer to storage
	199	*/
	200	#define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
	201	static struct bpf_d **bpf_dtab = NULL;
	202	static unsigned int bpf_dtab_size = 0;
	203	static unsigned int nbpfilter = 0;
	204
	205	decl_lck_mtx_data(static, bpf_mlock_data);
	206	static lck_mtx_t *bpf_mlock = &bpf_mlock_data;
	207	static lck_grp_t *bpf_mlock_grp;
	208	static lck_grp_attr_t *bpf_mlock_grp_attr;
	209	static lck_attr_t *bpf_mlock_attr;
	210
	211	#endif /* __APPLE__ */
	212
	213	static int bpf_allocbufs(struct bpf_d *);
	214	static errno_t bpf_attachd(struct bpf_d d, struct bpf_if bp);
	215	static int bpf_detachd(struct bpf_d *d, int);
	216	static void bpf_freed(struct bpf_d *);
	217	static int bpf_movein(struct uio *, int,
	218	struct mbuf *, struct sockaddr , int *);
	219	static int bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool);
	220	static void bpf_timed_out(void , void );
	221	static void bpf_wakeup(struct bpf_d *);
	222	static u_int get_pkt_trunc_len(u_char *, u_int);
	223	static void catchpacket(struct bpf_d , struct bpf_packet , u_int, int);
	224	static void reset_d(struct bpf_d *);
	225	static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
	226	static int bpf_getdltlist(struct bpf_d , caddr_t, struct proc );
	227	static int bpf_setdlt(struct bpf_d *, u_int);
	228	static int bpf_set_traffic_class(struct bpf_d *, int);
	229	static void bpf_set_packet_service_class(struct mbuf *, int);
	230
	231	static void bpf_acquire_d(struct bpf_d *);
	232	static void bpf_release_d(struct bpf_d *);
	233
	234	static int bpf_devsw_installed;
	235
	236	void bpf_init(void *unused);
	237	static int bpf_tap_callback(struct ifnet ifp, struct mbuf m);
	238
	239	/*
	240	* Darwin differs from BSD here, the following are static
	241	* on BSD and not static on Darwin.
	242	*/
	243	d_open_t bpfopen;
	244	d_close_t bpfclose;
	245	d_read_t bpfread;
	246	d_write_t bpfwrite;
	247	ioctl_fcn_t bpfioctl;
	248	select_fcn_t bpfselect;
	249
	250	/* Darwin's cdevsw struct differs slightly from BSDs */
	251	#define CDEV_MAJOR 23
	252	static const struct cdevsw bpf_cdevsw = {
	253	.d_open = bpfopen,
	254	.d_close = bpfclose,
	255	.d_read = bpfread,
	256	.d_write = bpfwrite,
	257	.d_ioctl = bpfioctl,
	258	.d_stop = eno_stop,
	259	.d_reset = eno_reset,
	260	.d_ttys = NULL,
	261	.d_select = bpfselect,
	262	.d_mmap = eno_mmap,
	263	.d_strategy = eno_strat,
	264	.d_reserved_1 = eno_getc,
	265	.d_reserved_2 = eno_putc,
	266	.d_type = 0
	267	};
	268
	269	#define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
	270
	271	static int
	272	bpf_movein(struct uio uio, int linktype, struct mbuf *mp,
	273	struct sockaddr sockp, int datlen)
	274	{
	275	struct mbuf *m;
	276	int error;
	277	int len;
	278	uint8_t sa_family;
	279	int hlen;
	280
	281	switch (linktype) {
	282	#if SLIP
	283	case DLT_SLIP:
	284	sa_family = AF_INET;
	285	hlen = 0;
	286	break;
	287	#endif /* SLIP */
	288
	289	case DLT_EN10MB:
	290	sa_family = AF_UNSPEC;
	291	/* XXX Would MAXLINKHDR be better? */
	292	hlen = sizeof(struct ether_header);
	293	break;
	294
	295	#if FDDI
	296	case DLT_FDDI:
	297	#if defined(__FreeBSD__) \|\| defined(__bsdi__)
	298	sa_family = AF_IMPLINK;
	299	hlen = 0;
	300	#else
	301	sa_family = AF_UNSPEC;
	302	/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
	303	hlen = 24;
	304	#endif
	305	break;
	306	#endif /* FDDI */
	307
	308	case DLT_RAW:
	309	case DLT_NULL:
	310	sa_family = AF_UNSPEC;
	311	hlen = 0;
	312	break;
	313
	314	#ifdef __FreeBSD__
	315	case DLT_ATM_RFC1483:
	316	/*
	317	* en atm driver requires 4-byte atm pseudo header.
	318	* though it isn't standard, vpi:vci needs to be
	319	* specified anyway.
	320	*/
	321	sa_family = AF_UNSPEC;
	322	hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
	323	break;
	324	#endif
	325
	326	case DLT_PPP:
	327	sa_family = AF_UNSPEC;
	328	hlen = 4; /* This should match PPP_HDRLEN */
	329	break;
	330
	331	case DLT_APPLE_IP_OVER_IEEE1394:
	332	sa_family = AF_UNSPEC;
	333	hlen = sizeof(struct firewire_header);
	334	break;
	335
	336	case DLT_IEEE802_11: /* IEEE 802.11 wireless */
	337	sa_family = AF_IEEE80211;
	338	hlen = 0;
	339	break;
	340
	341	case DLT_IEEE802_11_RADIO:
	342	sa_family = AF_IEEE80211;
	343	hlen = 0;
	344	break;
	345
	346	default:
	347	return EIO;
	348	}
	349
	350	// LP64todo - fix this!
	351	len = uio_resid(uio);
	352	*datlen = len - hlen;
	353	if ((unsigned)len > MCLBYTES) {
	354	return EIO;
	355	}
	356
	357	if (sockp) {
	358	/*
	359	* Build a sockaddr based on the data link layer type.
	360	* We do this at this level because the ethernet header
	361	* is copied directly into the data field of the sockaddr.
	362	* In the case of SLIP, there is no header and the packet
	363	* is forwarded as is.
	364	* Also, we are careful to leave room at the front of the mbuf
	365	* for the link level header.
	366	*/
	367	if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
	368	return EIO;
	369	}
	370	sockp->sa_family = sa_family;
	371	} else {
	372	/*
	373	* We're directly sending the packet data supplied by
	374	* the user; we don't need to make room for the link
	375	* header, and don't need the header length value any
	376	* more, so set it to 0.
	377	*/
	378	hlen = 0;
	379	}
	380
	381	MGETHDR(m, M_WAIT, MT_DATA);
	382	if (m == 0) {
	383	return ENOBUFS;
	384	}
	385	if ((unsigned)len > MHLEN) {
	386	MCLGET(m, M_WAIT);
	387	if ((m->m_flags & M_EXT) == 0) {
	388	error = ENOBUFS;
	389	goto bad;
	390	}
	391	}
	392	m->m_pkthdr.len = m->m_len = len;
	393	m->m_pkthdr.rcvif = NULL;
	394	*mp = m;
	395
	396	/*
	397	* Make room for link header.
	398	*/
	399	if (hlen != 0) {
	400	m->m_pkthdr.len -= hlen;
	401	m->m_len -= hlen;
	402	m->m_data += hlen; /* XXX */
	403	error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
	404	if (error) {
	405	goto bad;
	406	}
	407	}
	408	error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
	409	if (error) {
	410	goto bad;
	411	}
	412
	413	/* Check for multicast destination */
	414	switch (linktype) {
	415	case DLT_EN10MB: {
	416	struct ether_header *eh;
	417
	418	eh = mtod(m, struct ether_header *);
	419	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
	420	if (_ether_cmp(etherbroadcastaddr,
	421	eh->ether_dhost) == 0) {
	422	m->m_flags \|= M_BCAST;
	423	} else {
	424	m->m_flags \|= M_MCAST;
	425	}
	426	}
	427	break;
	428	}
	429	}
	430
	431	return 0;
	432	bad:
	433	m_freem(m);
	434	return error;
	435	}
	436
	437	#ifdef __APPLE__
	438
	439	/*
	440	* The dynamic addition of a new device node must block all processes that
	441	* are opening the last device so that no process will get an unexpected
	442	* ENOENT
	443	*/
	444	static void
	445	bpf_make_dev_t(int maj)
	446	{
	447	static int bpf_growing = 0;
	448	unsigned int cur_size = nbpfilter, i;
	449
	450	if (nbpfilter >= bpf_maxdevices) {
	451	return;
	452	}
	453
	454	while (bpf_growing) {
	455	/* Wait until new device has been created */
	456	(void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
	457	}
	458	if (nbpfilter > cur_size) {
	459	/* other thread grew it already */
	460	return;
	461	}
	462	bpf_growing = 1;
	463
	464	/* need to grow bpf_dtab first */
	465	if (nbpfilter == bpf_dtab_size) {
	466	int new_dtab_size;
	467	struct bpf_d **new_dtab = NULL;
	468	struct bpf_d **old_dtab = NULL;
	469
	470	new_dtab_size = bpf_dtab_size + NBPFILTER;
	471	new_dtab = (struct bpf_d **)_MALLOC(
	472	sizeof(struct bpf_d ) new_dtab_size, M_DEVBUF, M_WAIT);
	473	if (new_dtab == 0) {
	474	printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
	475	goto done;
	476	}
	477	if (bpf_dtab) {
	478	bcopy(bpf_dtab, new_dtab,
	479	sizeof(struct bpf_d ) bpf_dtab_size);
	480	}
	481	bzero(new_dtab + bpf_dtab_size,
	482	sizeof(struct bpf_d ) NBPFILTER);
	483	old_dtab = bpf_dtab;
	484	bpf_dtab = new_dtab;
	485	bpf_dtab_size = new_dtab_size;
	486	if (old_dtab != NULL) {
	487	_FREE(old_dtab, M_DEVBUF);
	488	}
	489	}
	490	i = nbpfilter++;
	491	(void) devfs_make_node(makedev(maj, i),
	492	DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
	493	"bpf%d", i);
	494	done:
	495	bpf_growing = 0;
	496	wakeup((caddr_t)&bpf_growing);
	497	}
	498
	499	#endif
	500
	501	/*
	502	* Attach file to the bpf interface, i.e. make d listen on bp.
	503	*/
	504	static errno_t
	505	bpf_attachd(struct bpf_d d, struct bpf_if bp)
	506	{
	507	int first = bp->bif_dlist == NULL;
	508	int error = 0;
	509
	510	/*
	511	* Point d at bp, and add d to the interface's list of listeners.
	512	* Finally, point the driver's bpf cookie at the interface so
	513	* it will divert packets to bpf.
	514	*/
	515	d->bd_bif = bp;
	516	d->bd_next = bp->bif_dlist;
	517	bp->bif_dlist = d;
	518
	519	/*
	520	* Take a reference on the device even if an error is returned
	521	* because we keep the device in the interface's list of listeners
	522	*/
	523	bpf_acquire_d(d);
	524
	525	if (first) {
	526	/* Find the default bpf entry for this ifp */
	527	if (bp->bif_ifp->if_bpf == NULL) {
	528	struct bpf_if tmp, primary = NULL;
	529
	530	for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
	531	if (tmp->bif_ifp == bp->bif_ifp) {
	532	primary = tmp;
	533	break;
	534	}
	535	}
	536	bp->bif_ifp->if_bpf = primary;
	537	}
	538	/* Only call dlil_set_bpf_tap for primary dlt */
	539	if (bp->bif_ifp->if_bpf == bp) {
	540	dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
	541	bpf_tap_callback);
	542	}
	543
	544	if (bp->bif_tap != NULL) {
	545	error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
	546	BPF_TAP_INPUT_OUTPUT);
	547	}
	548	}
	549
	550	/*
	551	* Reset the detach flags in case we previously detached an interface
	552	*/
	553	d->bd_flags &= ~(BPF_DETACHING \| BPF_DETACHED);
	554
	555	if (bp->bif_dlt == DLT_PKTAP) {
	556	d->bd_flags \|= BPF_FINALIZE_PKTAP;
	557	} else {
	558	d->bd_flags &= ~BPF_FINALIZE_PKTAP;
	559	}
	560	return error;
	561	}
	562
	563	/*
	564	* Detach a file from its interface.
	565	*
	566	* Return 1 if was closed by some thread, 0 otherwise
	567	*/
	568	static int
	569	bpf_detachd(struct bpf_d *d, int closing)
	570	{
	571	struct bpf_d **p;
	572	struct bpf_if *bp;
	573	struct ifnet *ifp;
	574
	575	int bpf_closed = d->bd_flags & BPF_CLOSING;
	576	/*
	577	* Some other thread already detached
	578	*/
	579	if ((d->bd_flags & (BPF_DETACHED \| BPF_DETACHING)) != 0) {
	580	goto done;
	581	}
	582	/*
	583	* This thread is doing the detach
	584	*/
	585	d->bd_flags \|= BPF_DETACHING;
	586
	587	ifp = d->bd_bif->bif_ifp;
	588	bp = d->bd_bif;
	589
	590	if (bpf_debug != 0) {
	591	printf("%s: %llx %s%s\n",
	592	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
	593	if_name(ifp), closing ? " closing" : "");
	594	}
	595
	596	/* Remove d from the interface's descriptor list. */
	597	p = &bp->bif_dlist;
	598	while (*p != d) {
	599	p = &(*p)->bd_next;
	600	if (*p == 0) {
	601	panic("bpf_detachd: descriptor not in list");
	602	}
	603	}
	604	p = (p)->bd_next;
	605	if (bp->bif_dlist == 0) {
	606	/*
	607	* Let the driver know that there are no more listeners.
	608	*/
	609	/* Only call dlil_set_bpf_tap for primary dlt */
	610	if (bp->bif_ifp->if_bpf == bp) {
	611	dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
	612	}
	613	if (bp->bif_tap) {
	614	bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
	615	}
	616
	617	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
	618	if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
	619	break;
	620	}
	621	}
	622	if (bp == NULL) {
	623	ifp->if_bpf = NULL;
	624	}
	625	}
	626	d->bd_bif = NULL;
	627	/*
	628	* Check if this descriptor had requested promiscuous mode.
	629	* If so, turn it off.
	630	*/
	631	if (d->bd_promisc) {
	632	d->bd_promisc = 0;
	633	lck_mtx_unlock(bpf_mlock);
	634	if (ifnet_set_promiscuous(ifp, 0)) {
	635	/*
	636	* Something is really wrong if we were able to put
	637	* the driver into promiscuous mode, but can't
	638	* take it out.
	639	* Most likely the network interface is gone.
	640	*/
	641	printf("%s: ifnet_set_promiscuous failed\n", __func__);
	642	}
	643	lck_mtx_lock(bpf_mlock);
	644	}
	645
	646	/*
	647	* Wake up other thread that are waiting for this thread to finish
	648	* detaching
	649	*/
	650	d->bd_flags &= ~BPF_DETACHING;
	651	d->bd_flags \|= BPF_DETACHED;
	652
	653	/* Refresh the local variable as d could have been modified */
	654	bpf_closed = d->bd_flags & BPF_CLOSING;
	655	/*
	656	* Note that We've kept the reference because we may have dropped
	657	* the lock when turning off promiscuous mode
	658	*/
	659	bpf_release_d(d);
	660
	661	done:
	662	/*
	663	* When closing makes sure no other thread refer to the bpf_d
	664	*/
	665	if (bpf_debug != 0) {
	666	printf("%s: %llx done\n",
	667	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
	668	}
	669	/*
	670	* Let the caller know the bpf_d is closed
	671	*/
	672	if (bpf_closed) {
	673	return 1;
	674	} else {
	675	return 0;
	676	}
	677	}
	678
	679	/*
	680	* Start asynchronous timer, if necessary.
	681	* Must be called with bpf_mlock held.
	682	*/
	683	static void
	684	bpf_start_timer(struct bpf_d *d)
	685	{
	686	uint64_t deadline;
	687	struct timeval tv;
	688
	689	if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
	690	tv.tv_sec = d->bd_rtout / hz;
	691	tv.tv_usec = (d->bd_rtout % hz) * tick;
	692
	693	clock_interval_to_deadline(
	694	(uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
	695	NSEC_PER_USEC, &deadline);
	696	/*
	697	* The state is BPF_IDLE, so the timer hasn't
	698	* been started yet, and hasn't gone off yet;
	699	* there is no thread call scheduled, so this
	700	* won't change the schedule.
	701	*
	702	* XXX - what if, by the time it gets entered,
	703	* the deadline has already passed?
	704	*/
	705	thread_call_enter_delayed(d->bd_thread_call, deadline);
	706	d->bd_state = BPF_WAITING;
	707	}
	708	}
	709
	710	/*
	711	* Cancel asynchronous timer.
	712	* Must be called with bpf_mlock held.
	713	*/
	714	static boolean_t
	715	bpf_stop_timer(struct bpf_d *d)
	716	{
	717	/*
	718	* If the timer has already gone off, this does nothing.
	719	* Our caller is expected to set d->bd_state to BPF_IDLE,
	720	* with the bpf_mlock, after we are called. bpf_timed_out()
	721	* also grabs bpf_mlock, so, if the timer has gone off and
	722	* bpf_timed_out() hasn't finished, it's waiting for the
	723	* lock; when this thread releases the lock, it will
	724	* find the state is BPF_IDLE, and just release the
	725	* lock and return.
	726	*/
	727	return thread_call_cancel(d->bd_thread_call);
	728	}
	729
	730	void
	731	bpf_acquire_d(struct bpf_d *d)
	732	{
	733	void *lr_saved = __builtin_return_address(0);
	734
	735	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
	736
	737	d->bd_refcnt += 1;
	738
	739	d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
	740	d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
	741	}
	742
	743	void
	744	bpf_release_d(struct bpf_d *d)
	745	{
	746	void *lr_saved = __builtin_return_address(0);
	747
	748	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
	749
	750	if (d->bd_refcnt <= 0) {
	751	panic("%s: %p refcnt <= 0", __func__, d);
	752	}
	753
	754	d->bd_refcnt -= 1;
	755
	756	d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
	757	d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
	758
	759	if (d->bd_refcnt == 0) {
	760	/* Assert the device is detached */
	761	if ((d->bd_flags & BPF_DETACHED) == 0) {
	762	panic("%s: %p BPF_DETACHED not set", __func__, d);
	763	}
	764
	765	_FREE(d, M_DEVBUF);
	766	}
	767	}
	768
	769	/*
	770	* Open ethernet device. Returns ENXIO for illegal minor device number,
	771	* EBUSY if file is open by another process.
	772	*/
	773	/* ARGSUSED */
	774	int
	775	bpfopen(dev_t dev, int flags, __unused int fmt,
	776	struct proc *p)
	777	{
	778	struct bpf_d *d;
	779
	780	lck_mtx_lock(bpf_mlock);
	781	if ((unsigned int) minor(dev) >= nbpfilter) {
	782	lck_mtx_unlock(bpf_mlock);
	783	return ENXIO;
	784	}
	785	/*
	786	* New device nodes are created on demand when opening the last one.
	787	* The programming model is for processes to loop on the minor starting
	788	* at 0 as long as EBUSY is returned. The loop stops when either the
	789	* open succeeds or an error other that EBUSY is returned. That means
	790	* that bpf_make_dev_t() must block all processes that are opening the
	791	* last node. If not all processes are blocked, they could unexpectedly
	792	* get ENOENT and abort their opening loop.
	793	*/
	794	if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
	795	bpf_make_dev_t(major(dev));
	796	}
	797
	798	/*
	799	* Each minor can be opened by only one process. If the requested
	800	* minor is in use, return EBUSY.
	801	*
	802	* Important: bpfopen() and bpfclose() have to check and set the status
	803	* of a device in the same lockin context otherwise the device may be
	804	* leaked because the vnode use count will be unpextectly greater than 1
	805	* when close() is called.
	806	*/
	807	if (bpf_dtab[minor(dev)] == NULL) {
	808	/* Reserve while opening */
	809	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
	810	} else {
	811	lck_mtx_unlock(bpf_mlock);
	812	return EBUSY;
	813	}
	814	d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
	815	M_WAIT \| M_ZERO);
	816	if (d == NULL) {
	817	/* this really is a catastrophic failure */
	818	printf("bpfopen: malloc bpf_d failed\n");
	819	bpf_dtab[minor(dev)] = NULL;
	820	lck_mtx_unlock(bpf_mlock);
	821	return ENOMEM;
	822	}
	823
	824	/* Mark "in use" and do most initialization. */
	825	bpf_acquire_d(d);
	826	d->bd_bufsize = bpf_bufsize;
	827	d->bd_sig = SIGIO;
	828	d->bd_seesent = 1;
	829	d->bd_oflags = flags;
	830	d->bd_state = BPF_IDLE;
	831	d->bd_traffic_class = SO_TC_BE;
	832	d->bd_flags \|= BPF_DETACHED;
	833	if (bpf_wantpktap) {
	834	d->bd_flags \|= BPF_WANT_PKTAP;
	835	} else {
	836	d->bd_flags &= ~BPF_WANT_PKTAP;
	837	}
	838	d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
	839	if (d->bd_thread_call == NULL) {
	840	printf("bpfopen: malloc thread call failed\n");
	841	bpf_dtab[minor(dev)] = NULL;
	842	bpf_release_d(d);
	843	lck_mtx_unlock(bpf_mlock);
	844
	845	return ENOMEM;
	846	}
	847	d->bd_opened_by = p;
	848	uuid_generate(d->bd_uuid);
	849
	850	bpf_dtab[minor(dev)] = d; /* Mark opened */
	851	lck_mtx_unlock(bpf_mlock);
	852
	853	return 0;
	854	}
	855
	856	/*
	857	* Close the descriptor by detaching it from its interface,
	858	* deallocating its buffers, and marking it free.
	859	*/
	860	/* ARGSUSED */
	861	int
	862	bpfclose(dev_t dev, __unused int flags, __unused int fmt,
	863	__unused struct proc *p)
	864	{
	865	struct bpf_d *d;
	866
	867	/* Take BPF lock to ensure no other thread is using the device */
	868	lck_mtx_lock(bpf_mlock);
	869
	870	d = bpf_dtab[minor(dev)];
	871	if (d == NULL \|\| d == BPF_DEV_RESERVED) {
	872	lck_mtx_unlock(bpf_mlock);
	873	return ENXIO;
	874	}
	875
	876	/*
	877	* Other threads may call bpd_detachd() if we drop the bpf_mlock
	878	*/
	879	d->bd_flags \|= BPF_CLOSING;
	880
	881	if (bpf_debug != 0) {
	882	printf("%s: %llx\n",
	883	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
	884	}
	885
	886	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
	887
	888	/*
	889	* Deal with any in-progress timeouts.
	890	*/
	891	switch (d->bd_state) {
	892	case BPF_IDLE:
	893	/*
	894	* Not waiting for a timeout, and no timeout happened.
	895	*/
	896	break;
	897
	898	case BPF_WAITING:
	899	/*
	900	* Waiting for a timeout.
	901	* Cancel any timer that has yet to go off,
	902	* and mark the state as "closing".
	903	* Then drop the lock to allow any timers that
	904	* have gone off to run to completion, and wait
	905	* for them to finish.
	906	*/
	907	if (!bpf_stop_timer(d)) {
	908	/*
	909	* There was no pending call, so the call must
	910	* have been in progress. Wait for the call to
	911	* complete; we have to drop the lock while
	912	* waiting. to let the in-progrss call complete
	913	*/
	914	d->bd_state = BPF_DRAINING;
	915	while (d->bd_state == BPF_DRAINING) {
	916	msleep((caddr_t)d, bpf_mlock, PRINET,
	917	"bpfdraining", NULL);
	918	}
	919	}
	920	d->bd_state = BPF_IDLE;
	921	break;
	922
	923	case BPF_TIMED_OUT:
	924	/*
	925	* Timer went off, and the timeout routine finished.
	926	*/
	927	d->bd_state = BPF_IDLE;
	928	break;
	929
	930	case BPF_DRAINING:
	931	/*
	932	* Another thread is blocked on a close waiting for
	933	* a timeout to finish.
	934	* This "shouldn't happen", as the first thread to enter
	935	* bpfclose() will set bpf_dtab[minor(dev)] to 1, and
	936	* all subsequent threads should see that and fail with
	937	* ENXIO.
	938	*/
	939	panic("Two threads blocked in a BPF close");
	940	break;
	941	}
	942
	943	if (d->bd_bif) {
	944	bpf_detachd(d, 1);
	945	}
	946	selthreadclear(&d->bd_sel);
	947	thread_call_free(d->bd_thread_call);
	948
	949	while (d->bd_hbuf_read != 0) {
	950	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
	951	}
	952
	953	bpf_freed(d);
	954
	955	/* Mark free in same context as bpfopen comes to check */
	956	bpf_dtab[minor(dev)] = NULL; /* Mark closed */
	957
	958	bpf_release_d(d);
	959
	960	lck_mtx_unlock(bpf_mlock);
	961
	962	return 0;
	963	}
	964
	965	#define BPF_SLEEP bpf_sleep
	966
	967	static int
	968	bpf_sleep(struct bpf_d d, int pri, const char wmesg, int timo)
	969	{
	970	u_int64_t abstime = 0;
	971
	972	if (timo != 0) {
	973	clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
	974	}
	975
	976	return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
	977	}
	978
	979	static void
	980	bpf_finalize_pktap(struct bpf_hdr hp, struct pktap_header pktaphdr)
	981	{
	982	if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
	983	struct pktap_v2_hdr *pktap_v2_hdr;
	984
	985	pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
	986
	987	if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
	988	pktap_v2_finalize_proc_info(pktap_v2_hdr);
	989	}
	990	} else {
	991	if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
	992	pktap_finalize_proc_info(pktaphdr);
	993	}
	994
	995	if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
	996	hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
	997	hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
	998	}
	999	}
	1000	}
	1001
	1002	/*
	1003	* Rotate the packet buffers in descriptor d. Move the store buffer
	1004	* into the hold slot, and the free buffer into the store slot.
	1005	* Zero the length of the new store buffer.
	1006	*/
	1007	#define ROTATE_BUFFERS(d) \
	1008	if (d->bd_hbuf_read != 0) \
	1009	panic("rotating bpf buffers during read"); \
	1010	(d)->bd_hbuf = (d)->bd_sbuf; \
	1011	(d)->bd_hlen = (d)->bd_slen; \
	1012	(d)->bd_hcnt = (d)->bd_scnt; \
	1013	(d)->bd_sbuf = (d)->bd_fbuf; \
	1014	(d)->bd_slen = 0; \
	1015	(d)->bd_scnt = 0; \
	1016	(d)->bd_fbuf = NULL;
	1017	/*
	1018	* bpfread - read next chunk of packets from buffers
	1019	*/
	1020	int
	1021	bpfread(dev_t dev, struct uio *uio, int ioflag)
	1022	{
	1023	struct bpf_d *d;
	1024	caddr_t hbuf;
	1025	int timed_out, hbuf_len;
	1026	int error;
	1027	int flags;
	1028
	1029	lck_mtx_lock(bpf_mlock);
	1030
	1031	d = bpf_dtab[minor(dev)];
	1032	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
	1033	(d->bd_flags & BPF_CLOSING) != 0) {
	1034	lck_mtx_unlock(bpf_mlock);
	1035	return ENXIO;
	1036	}
	1037
	1038	bpf_acquire_d(d);
	1039
	1040	/*
	1041	* Restrict application to use a buffer the same size as
	1042	* as kernel buffers.
	1043	*/
	1044	if (uio_resid(uio) != d->bd_bufsize) {
	1045	bpf_release_d(d);
	1046	lck_mtx_unlock(bpf_mlock);
	1047	return EINVAL;
	1048	}
	1049
	1050	if (d->bd_state == BPF_WAITING) {
	1051	bpf_stop_timer(d);
	1052	}
	1053
	1054	timed_out = (d->bd_state == BPF_TIMED_OUT);
	1055	d->bd_state = BPF_IDLE;
	1056
	1057	while (d->bd_hbuf_read != 0) {
	1058	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
	1059	}
	1060
	1061	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1062	bpf_release_d(d);
	1063	lck_mtx_unlock(bpf_mlock);
	1064	return ENXIO;
	1065	}
	1066	/*
	1067	* If the hold buffer is empty, then do a timed sleep, which
	1068	* ends when the timeout expires or when enough packets
	1069	* have arrived to fill the store buffer.
	1070	*/
	1071	while (d->bd_hbuf == 0) {
	1072	if ((d->bd_immediate \|\| timed_out \|\| (ioflag & IO_NDELAY)) &&
	1073	d->bd_slen != 0) {
	1074	/*
	1075	* We're in immediate mode, or are reading
	1076	* in non-blocking mode, or a timer was
	1077	* started before the read (e.g., by select()
	1078	* or poll()) and has expired and a packet(s)
	1079	* either arrived since the previous
	1080	* read or arrived while we were asleep.
	1081	* Rotate the buffers and return what's here.
	1082	*/
	1083	ROTATE_BUFFERS(d);
	1084	break;
	1085	}
	1086
	1087	/*
	1088	* No data is available, check to see if the bpf device
	1089	* is still pointed at a real interface. If not, return
	1090	* ENXIO so that the userland process knows to rebind
	1091	* it before using it again.
	1092	*/
	1093	if (d->bd_bif == NULL) {
	1094	bpf_release_d(d);
	1095	lck_mtx_unlock(bpf_mlock);
	1096	return ENXIO;
	1097	}
	1098	if (ioflag & IO_NDELAY) {
	1099	bpf_release_d(d);
	1100	lck_mtx_unlock(bpf_mlock);
	1101	return EWOULDBLOCK;
	1102	}
	1103	error = BPF_SLEEP(d, PRINET \| PCATCH, "bpf", d->bd_rtout);
	1104	/*
	1105	* Make sure device is still opened
	1106	*/
	1107	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1108	bpf_release_d(d);
	1109	lck_mtx_unlock(bpf_mlock);
	1110	return ENXIO;
	1111	}
	1112
	1113	while (d->bd_hbuf_read != 0) {
	1114	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
	1115	NULL);
	1116	}
	1117
	1118	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1119	bpf_release_d(d);
	1120	lck_mtx_unlock(bpf_mlock);
	1121	return ENXIO;
	1122	}
	1123
	1124	if (error == EINTR \|\| error == ERESTART) {
	1125	if (d->bd_hbuf != NULL) {
	1126	/*
	1127	* Because we msleep, the hold buffer might
	1128	* be filled when we wake up. Avoid rotating
	1129	* in this case.
	1130	*/
	1131	break;
	1132	}
	1133	if (d->bd_slen != 0) {
	1134	/*
	1135	* Sometimes we may be interrupted often and
	1136	* the sleep above will not timeout.
	1137	* Regardless, we should rotate the buffers
	1138	* if there's any new data pending and
	1139	* return it.
	1140	*/
	1141	ROTATE_BUFFERS(d);
	1142	break;
	1143	}
	1144	bpf_release_d(d);
	1145	lck_mtx_unlock(bpf_mlock);
	1146	if (error == ERESTART) {
	1147	printf("%s: %llx ERESTART to EINTR\n",
	1148	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
	1149	error = EINTR;
	1150	}
	1151	return error;
	1152	}
	1153	if (error == EWOULDBLOCK) {
	1154	/*
	1155	* On a timeout, return what's in the buffer,
	1156	* which may be nothing. If there is something
	1157	* in the store buffer, we can rotate the buffers.
	1158	*/
	1159	if (d->bd_hbuf) {
	1160	/*
	1161	* We filled up the buffer in between
	1162	* getting the timeout and arriving
	1163	* here, so we don't need to rotate.
	1164	*/
	1165	break;
	1166	}
	1167
	1168	if (d->bd_slen == 0) {
	1169	bpf_release_d(d);
	1170	lck_mtx_unlock(bpf_mlock);
	1171	return 0;
	1172	}
	1173	ROTATE_BUFFERS(d);
	1174	break;
	1175	}
	1176	}
	1177	/*
	1178	* At this point, we know we have something in the hold slot.
	1179	*/
	1180
	1181	/*
	1182	* Set the hold buffer read. So we do not
	1183	* rotate the buffers until the hold buffer
	1184	* read is complete. Also to avoid issues resulting
	1185	* from page faults during disk sleep (<rdar://problem/13436396>).
	1186	*/
	1187	d->bd_hbuf_read = 1;
	1188	hbuf = d->bd_hbuf;
	1189	hbuf_len = d->bd_hlen;
	1190	flags = d->bd_flags;
	1191	lck_mtx_unlock(bpf_mlock);
	1192
	1193	#ifdef __APPLE__
	1194	/*
	1195	* Before we move data to userland, we fill out the extended
	1196	* header fields.
	1197	*/
	1198	if (flags & BPF_EXTENDED_HDR) {
	1199	char *p;
	1200
	1201	p = hbuf;
	1202	while (p < hbuf + hbuf_len) {
	1203	struct bpf_hdr_ext *ehp;
	1204	uint32_t flowid;
	1205	struct so_procinfo soprocinfo;
	1206	int found = 0;
	1207
	1208	ehp = (struct bpf_hdr_ext )(void )p;
	1209	if ((flowid = ehp->bh_flowid) != 0) {
	1210	if (ehp->bh_proto == IPPROTO_TCP) {
	1211	found = inp_findinpcb_procinfo(&tcbinfo,
	1212	flowid, &soprocinfo);
	1213	} else if (ehp->bh_proto == IPPROTO_UDP) {
	1214	found = inp_findinpcb_procinfo(&udbinfo,
	1215	flowid, &soprocinfo);
	1216	}
	1217	if (found == 1) {
	1218	ehp->bh_pid = soprocinfo.spi_pid;
	1219	strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
	1220	}
	1221	ehp->bh_flowid = 0;
	1222	}
	1223
	1224	if (flags & BPF_FINALIZE_PKTAP) {
	1225	struct pktap_header *pktaphdr;
	1226
	1227	pktaphdr = (struct pktap_header )(void )
	1228	(p + BPF_WORDALIGN(ehp->bh_hdrlen));
	1229
	1230	bpf_finalize_pktap((struct bpf_hdr *) ehp,
	1231	pktaphdr);
	1232	}
	1233	p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
	1234	}
	1235	} else if (flags & BPF_FINALIZE_PKTAP) {
	1236	char *p;
	1237
	1238	p = hbuf;
	1239	while (p < hbuf + hbuf_len) {
	1240	struct bpf_hdr *hp;
	1241	struct pktap_header *pktaphdr;
	1242
	1243	hp = (struct bpf_hdr )(void )p;
	1244	pktaphdr = (struct pktap_header )(void )
	1245	(p + BPF_WORDALIGN(hp->bh_hdrlen));
	1246
	1247	bpf_finalize_pktap(hp, pktaphdr);
	1248
	1249	p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
	1250	}
	1251	}
	1252	#endif
	1253
	1254	/*
	1255	* Move data from hold buffer into user space.
	1256	* We know the entire buffer is transferred since
	1257	* we checked above that the read buffer is bpf_bufsize bytes.
	1258	*/
	1259	error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
	1260
	1261	lck_mtx_lock(bpf_mlock);
	1262	/*
	1263	* Make sure device is still opened
	1264	*/
	1265	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1266	bpf_release_d(d);
	1267	lck_mtx_unlock(bpf_mlock);
	1268	return ENXIO;
	1269	}
	1270
	1271	d->bd_hbuf_read = 0;
	1272	d->bd_fbuf = d->bd_hbuf;
	1273	d->bd_hbuf = NULL;
	1274	d->bd_hlen = 0;
	1275	d->bd_hcnt = 0;
	1276	wakeup((caddr_t)d);
	1277
	1278	bpf_release_d(d);
	1279	lck_mtx_unlock(bpf_mlock);
	1280	return error;
	1281	}
	1282
	1283	/*
	1284	* If there are processes sleeping on this descriptor, wake them up.
	1285	*/
	1286	static void
	1287	bpf_wakeup(struct bpf_d *d)
	1288	{
	1289	if (d->bd_state == BPF_WAITING) {
	1290	bpf_stop_timer(d);
	1291	d->bd_state = BPF_IDLE;
	1292	}
	1293	wakeup((caddr_t)d);
	1294	if (d->bd_async && d->bd_sig && d->bd_sigio) {
	1295	pgsigio(d->bd_sigio, d->bd_sig);
	1296	}
	1297
	1298	selwakeup(&d->bd_sel);
	1299	if ((d->bd_flags & BPF_KNOTE)) {
	1300	KNOTE(&d->bd_sel.si_note, 1);
	1301	}
	1302	}
	1303
	1304	static void
	1305	bpf_timed_out(void arg, __unused void dummy)
	1306	{
	1307	struct bpf_d d = (struct bpf_d )arg;
	1308
	1309	lck_mtx_lock(bpf_mlock);
	1310	if (d->bd_state == BPF_WAITING) {
	1311	/*
	1312	* There's a select or kqueue waiting for this; if there's
	1313	* now stuff to read, wake it up.
	1314	*/
	1315	d->bd_state = BPF_TIMED_OUT;
	1316	if (d->bd_slen != 0) {
	1317	bpf_wakeup(d);
	1318	}
	1319	} else if (d->bd_state == BPF_DRAINING) {
	1320	/*
	1321	* A close is waiting for this to finish.
	1322	* Mark it as finished, and wake the close up.
	1323	*/
	1324	d->bd_state = BPF_IDLE;
	1325	bpf_wakeup(d);
	1326	}
	1327	lck_mtx_unlock(bpf_mlock);
	1328	}
	1329
	1330	/* keep in sync with bpf_movein above: */
	1331	#define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
	1332
	1333	int
	1334	bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
	1335	{
	1336	struct bpf_d *d;
	1337	struct ifnet *ifp;
	1338	struct mbuf *m = NULL;
	1339	int error;
	1340	char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
	1341	int datlen = 0;
	1342	int bif_dlt;
	1343	int bd_hdrcmplt;
	1344
	1345	lck_mtx_lock(bpf_mlock);
	1346
	1347	d = bpf_dtab[minor(dev)];
	1348	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
	1349	(d->bd_flags & BPF_CLOSING) != 0) {
	1350	lck_mtx_unlock(bpf_mlock);
	1351	return ENXIO;
	1352	}
	1353
	1354	bpf_acquire_d(d);
	1355
	1356	if (d->bd_bif == 0) {
	1357	bpf_release_d(d);
	1358	lck_mtx_unlock(bpf_mlock);
	1359	return ENXIO;
	1360	}
	1361
	1362	ifp = d->bd_bif->bif_ifp;
	1363
	1364	if ((ifp->if_flags & IFF_UP) == 0) {
	1365	bpf_release_d(d);
	1366	lck_mtx_unlock(bpf_mlock);
	1367	return ENETDOWN;
	1368	}
	1369	if (uio_resid(uio) == 0) {
	1370	bpf_release_d(d);
	1371	lck_mtx_unlock(bpf_mlock);
	1372	return 0;
	1373	}
	1374	((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
	1375
	1376	/*
	1377	* fix for PR-6849527
	1378	* geting variables onto stack before dropping lock for bpf_movein()
	1379	*/
	1380	bif_dlt = (int)d->bd_bif->bif_dlt;
	1381	bd_hdrcmplt = d->bd_hdrcmplt;
	1382
	1383	/* bpf_movein allocating mbufs; drop lock */
	1384	lck_mtx_unlock(bpf_mlock);
	1385
	1386	error = bpf_movein(uio, bif_dlt, &m,
	1387	bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
	1388	&datlen);
	1389
	1390	/* take the lock again */
	1391	lck_mtx_lock(bpf_mlock);
	1392	if (error) {
	1393	bpf_release_d(d);
	1394	lck_mtx_unlock(bpf_mlock);
	1395	return error;
	1396	}
	1397
	1398	/* verify the device is still open */
	1399	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1400	bpf_release_d(d);
	1401	lck_mtx_unlock(bpf_mlock);
	1402	m_freem(m);
	1403	return ENXIO;
	1404	}
	1405
	1406	if (d->bd_bif == NULL) {
	1407	bpf_release_d(d);
	1408	lck_mtx_unlock(bpf_mlock);
	1409	m_free(m);
	1410	return ENXIO;
	1411	}
	1412
	1413	if ((unsigned)datlen > ifp->if_mtu) {
	1414	bpf_release_d(d);
	1415	lck_mtx_unlock(bpf_mlock);
	1416	m_freem(m);
	1417	return EMSGSIZE;
	1418	}
	1419
	1420	bpf_set_packet_service_class(m, d->bd_traffic_class);
	1421
	1422	lck_mtx_unlock(bpf_mlock);
	1423
	1424	/*
	1425	* The driver frees the mbuf.
	1426	*/
	1427	if (d->bd_hdrcmplt) {
	1428	if (d->bd_bif->bif_send) {
	1429	error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
	1430	} else {
	1431	error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
	1432	}
	1433	} else {
	1434	error = dlil_output(ifp, PF_INET, m, NULL,
	1435	(struct sockaddr *)dst_buf, 0, NULL);
	1436	}
	1437
	1438	lck_mtx_lock(bpf_mlock);
	1439	bpf_release_d(d);
	1440	lck_mtx_unlock(bpf_mlock);
	1441
	1442	return error;
	1443	}
	1444
	1445	/*
	1446	* Reset a descriptor by flushing its packet buffer and clearing the
	1447	* receive and drop counts.
	1448	*/
	1449	static void
	1450	reset_d(struct bpf_d *d)
	1451	{
	1452	if (d->bd_hbuf_read != 0) {
	1453	panic("resetting buffers during read");
	1454	}
	1455
	1456	if (d->bd_hbuf) {
	1457	/* Free the hold buffer. */
	1458	d->bd_fbuf = d->bd_hbuf;
	1459	d->bd_hbuf = NULL;
	1460	}
	1461	d->bd_slen = 0;
	1462	d->bd_hlen = 0;
	1463	d->bd_scnt = 0;
	1464	d->bd_hcnt = 0;
	1465	d->bd_rcount = 0;
	1466	d->bd_dcount = 0;
	1467	}
	1468
	1469	static struct bpf_d *
	1470	bpf_get_device_from_uuid(uuid_t uuid)
	1471	{
	1472	unsigned int i;
	1473
	1474	for (i = 0; i < nbpfilter; i++) {
	1475	struct bpf_d *d = bpf_dtab[i];
	1476
	1477	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
	1478	(d->bd_flags & BPF_CLOSING) != 0) {
	1479	continue;
	1480	}
	1481	if (uuid_compare(uuid, d->bd_uuid) == 0) {
	1482	return d;
	1483	}
	1484	}
	1485
	1486	return NULL;
	1487	}
	1488
	1489	/*
	1490	* The BIOCSETUP command "atomically" attach to the interface and
	1491	* copy the buffer from another interface. This minimizes the risk
	1492	* of missing packet because this is done while holding
	1493	* the BPF global lock
	1494	*/
	1495	static int
	1496	bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
	1497	{
	1498	struct bpf_d *d_from;
	1499	int error = 0;
	1500
	1501	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
	1502
	1503	/*
	1504	* Sanity checks
	1505	*/
	1506	d_from = bpf_get_device_from_uuid(uuid_from);
	1507	if (d_from == NULL) {
	1508	error = ENOENT;
	1509	os_log_info(OS_LOG_DEFAULT,
	1510	"%s: uuids not found error %d",
	1511	__func__, error);
	1512	return error;
	1513	}
	1514	if (d_from->bd_opened_by != d_to->bd_opened_by) {
	1515	error = EACCES;
	1516	os_log_info(OS_LOG_DEFAULT,
	1517	"%s: processes not matching error %d",
	1518	__func__, error);
	1519	return error;
	1520	}
	1521
	1522	/*
	1523	* Prevent any read while copying
	1524	*/
	1525	while (d_to->bd_hbuf_read != 0) {
	1526	msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
	1527	}
	1528	d_to->bd_hbuf_read = 1;
	1529
	1530	while (d_from->bd_hbuf_read != 0) {
	1531	msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
	1532	}
	1533	d_from->bd_hbuf_read = 1;
	1534
	1535	/*
	1536	* Verify the devices have not been closed
	1537	*/
	1538	if (d_to->bd_flags & BPF_CLOSING) {
	1539	error = ENXIO;
	1540	os_log_info(OS_LOG_DEFAULT,
	1541	"%s: d_to is closing error %d",
	1542	__func__, error);
	1543	goto done;
	1544	}
	1545	if (d_from->bd_flags & BPF_CLOSING) {
	1546	error = ENXIO;
	1547	os_log_info(OS_LOG_DEFAULT,
	1548	"%s: d_from is closing error %d",
	1549	__func__, error);
	1550	goto done;
	1551	}
	1552
	1553	/*
	1554	* For now require the same buffer size
	1555	*/
	1556	if (d_from->bd_bufsize != d_to->bd_bufsize) {
	1557	error = EINVAL;
	1558	os_log_info(OS_LOG_DEFAULT,
	1559	"%s: bufsizes not matching error %d",
	1560	__func__, error);
	1561	goto done;
	1562	}
	1563
	1564	/*
	1565	* Attach to the interface
	1566	*/
	1567	error = bpf_setif(d_to, ifp, false, true);
	1568	if (error != 0) {
	1569	os_log_info(OS_LOG_DEFAULT,
	1570	"%s: bpf_setif() failed error %d",
	1571	__func__, error);
	1572	goto done;
	1573	}
	1574
	1575	/*
	1576	* Make sure the buffers are setup as expected by bpf_setif()
	1577	*/
	1578	ASSERT(d_to->bd_hbuf == NULL);
	1579	ASSERT(d_to->bd_sbuf != NULL);
	1580	ASSERT(d_to->bd_fbuf != NULL);
	1581
	1582	/*
	1583	* Copy the buffers and update the pointers and counts
	1584	*/
	1585	memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
	1586	d_to->bd_slen = d_from->bd_slen;
	1587	d_to->bd_scnt = d_from->bd_scnt;
	1588
	1589	if (d_from->bd_hbuf != NULL) {
	1590	d_to->bd_hbuf = d_to->bd_fbuf;
	1591	d_to->bd_fbuf = NULL;
	1592	memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
	1593	}
	1594	d_to->bd_hlen = d_from->bd_hlen;
	1595	d_to->bd_hcnt = d_from->bd_hcnt;
	1596
	1597	if (bpf_debug > 0) {
	1598	os_log_info(OS_LOG_DEFAULT,
	1599	"%s: done slen %u scnt %u hlen %u hcnt %u",
	1600	__func__, d_to->bd_slen, d_to->bd_scnt,
	1601	d_to->bd_hlen, d_to->bd_hcnt);
	1602	}
	1603	done:
	1604	d_from->bd_hbuf_read = 0;
	1605	wakeup((caddr_t)d_from);
	1606
	1607	d_to->bd_hbuf_read = 0;
	1608	wakeup((caddr_t)d_to);
	1609
	1610	return error;
	1611	}
	1612
	1613	/*
	1614	* FIONREAD Check for read packet available.
	1615	* SIOCGIFADDR Get interface address - convenient hook to driver.
	1616	* BIOCGBLEN Get buffer len [for read()].
	1617	* BIOCSETF Set ethernet read filter.
	1618	* BIOCFLUSH Flush read packet buffer.
	1619	* BIOCPROMISC Put interface into promiscuous mode.
	1620	* BIOCGDLT Get link layer type.
	1621	* BIOCGETIF Get interface name.
	1622	* BIOCSETIF Set interface.
	1623	* BIOCSRTIMEOUT Set read timeout.
	1624	* BIOCGRTIMEOUT Get read timeout.
	1625	* BIOCGSTATS Get packet stats.
	1626	* BIOCIMMEDIATE Set immediate mode.
	1627	* BIOCVERSION Get filter language version.
	1628	* BIOCGHDRCMPLT Get "header already complete" flag
	1629	* BIOCSHDRCMPLT Set "header already complete" flag
	1630	* BIOCGSEESENT Get "see packets sent" flag
	1631	* BIOCSSEESENT Set "see packets sent" flag
	1632	* BIOCSETTC Set traffic class.
	1633	* BIOCGETTC Get traffic class.
	1634	* BIOCSEXTHDR Set "extended header" flag
	1635	* BIOCSHEADDROP Drop head of the buffer if user is not reading
	1636	* BIOCGHEADDROP Get "head-drop" flag
	1637	*/
	1638	/* ARGSUSED */
	1639	int
	1640	bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
	1641	struct proc *p)
	1642	{
	1643	struct bpf_d *d;
	1644	int error = 0;
	1645	u_int int_arg;
	1646	struct ifreq ifr;
	1647
	1648	lck_mtx_lock(bpf_mlock);
	1649
	1650	d = bpf_dtab[minor(dev)];
	1651	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
	1652	(d->bd_flags & BPF_CLOSING) != 0) {
	1653	lck_mtx_unlock(bpf_mlock);
	1654	return ENXIO;
	1655	}
	1656
	1657	bpf_acquire_d(d);
	1658
	1659	if (d->bd_state == BPF_WAITING) {
	1660	bpf_stop_timer(d);
	1661	}
	1662	d->bd_state = BPF_IDLE;
	1663
	1664	switch (cmd) {
	1665	default:
	1666	error = EINVAL;
	1667	break;
	1668
	1669	/*
	1670	* Check for read packet available.
	1671	*/
	1672	case FIONREAD: /* int */
	1673	{
	1674	int n;
	1675
	1676	n = d->bd_slen;
	1677	if (d->bd_hbuf && d->bd_hbuf_read == 0) {
	1678	n += d->bd_hlen;
	1679	}
	1680
	1681	bcopy(&n, addr, sizeof(n));
	1682	break;
	1683	}
	1684
	1685	case SIOCGIFADDR: /* struct ifreq */
	1686	{
	1687	struct ifnet *ifp;
	1688
	1689	if (d->bd_bif == 0) {
	1690	error = EINVAL;
	1691	} else {
	1692	ifp = d->bd_bif->bif_ifp;
	1693	error = ifnet_ioctl(ifp, 0, cmd, addr);
	1694	}
	1695	break;
	1696	}
	1697
	1698	/*
	1699	* Get buffer len [for read()].
	1700	*/
	1701	case BIOCGBLEN: /* u_int */
	1702	bcopy(&d->bd_bufsize, addr, sizeof(u_int));
	1703	break;
	1704
	1705	/*
	1706	* Set buffer length.
	1707	*/
	1708	case BIOCSBLEN: { /* u_int */
	1709	u_int size;
	1710	unsigned int maxbufsize = bpf_maxbufsize;
	1711
	1712	/*
	1713	* Allow larger buffer in head drop mode to with the
	1714	* assumption the reading process may be low priority but
	1715	* is interested in the most recent traffic
	1716	*/
	1717	if (d->bd_headdrop != 0) {
	1718	maxbufsize = 2 * bpf_maxbufsize;
	1719	}
	1720
	1721	if (d->bd_bif != 0 \|\| (d->bd_flags & BPF_DETACHING)) {
	1722	/*
	1723	* Interface already attached, unable to change buffers
	1724	*/
	1725	error = EINVAL;
	1726	break;
	1727	}
	1728	bcopy(addr, &size, sizeof(size));
	1729
	1730	if (size > maxbufsize) {
	1731	d->bd_bufsize = maxbufsize;
	1732
	1733	os_log_info(OS_LOG_DEFAULT,
	1734	"%s bufsize capped to %u from %u",
	1735	__func__, d->bd_bufsize, size);
	1736	} else if (size < BPF_MINBUFSIZE) {
	1737	d->bd_bufsize = BPF_MINBUFSIZE;
	1738
	1739	os_log_info(OS_LOG_DEFAULT,
	1740	"%s bufsize bumped to %u from %u",
	1741	__func__, d->bd_bufsize, size);
	1742	} else {
	1743	d->bd_bufsize = size;
	1744	}
	1745
	1746	/* It's a read/write ioctl */
	1747	bcopy(&d->bd_bufsize, addr, sizeof(u_int));
	1748	break;
	1749	}
	1750	/*
	1751	* Set link layer read filter.
	1752	*/
	1753	case BIOCSETF32:
	1754	case BIOCSETFNR32: { /* struct bpf_program32 */
	1755	struct bpf_program32 prg32;
	1756
	1757	bcopy(addr, &prg32, sizeof(prg32));
	1758	error = bpf_setf(d, prg32.bf_len,
	1759	CAST_USER_ADDR_T(prg32.bf_insns), cmd);
	1760	break;
	1761	}
	1762
	1763	case BIOCSETF64:
	1764	case BIOCSETFNR64: { /* struct bpf_program64 */
	1765	struct bpf_program64 prg64;
	1766
	1767	bcopy(addr, &prg64, sizeof(prg64));
	1768	error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
	1769	break;
	1770	}
	1771
	1772	/*
	1773	* Flush read packet buffer.
	1774	*/
	1775	case BIOCFLUSH:
	1776	while (d->bd_hbuf_read != 0) {
	1777	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
	1778	NULL);
	1779	}
	1780	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1781	error = ENXIO;
	1782	break;
	1783	}
	1784	reset_d(d);
	1785	break;
	1786
	1787	/*
	1788	* Put interface into promiscuous mode.
	1789	*/
	1790	case BIOCPROMISC:
	1791	if (d->bd_bif == 0) {
	1792	/*
	1793	* No interface attached yet.
	1794	*/
	1795	error = EINVAL;
	1796	break;
	1797	}
	1798	if (d->bd_promisc == 0) {
	1799	lck_mtx_unlock(bpf_mlock);
	1800	error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
	1801	lck_mtx_lock(bpf_mlock);
	1802	if (error == 0) {
	1803	d->bd_promisc = 1;
	1804	}
	1805	}
	1806	break;
	1807
	1808	/*
	1809	* Get device parameters.
	1810	*/
	1811	case BIOCGDLT: /* u_int */
	1812	if (d->bd_bif == 0) {
	1813	error = EINVAL;
	1814	} else {
	1815	bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
	1816	}
	1817	break;
	1818
	1819	/*
	1820	* Get a list of supported data link types.
	1821	*/
	1822	case BIOCGDLTLIST: /* struct bpf_dltlist */
	1823	if (d->bd_bif == NULL) {
	1824	error = EINVAL;
	1825	} else {
	1826	error = bpf_getdltlist(d, addr, p);
	1827	}
	1828	break;
	1829
	1830	/*
	1831	* Set data link type.
	1832	*/
	1833	case BIOCSDLT: /* u_int */
	1834	if (d->bd_bif == NULL) {
	1835	error = EINVAL;
	1836	} else {
	1837	u_int dlt;
	1838
	1839	bcopy(addr, &dlt, sizeof(dlt));
	1840
	1841	if (dlt == DLT_PKTAP &&
	1842	!(d->bd_flags & BPF_WANT_PKTAP)) {
	1843	dlt = DLT_RAW;
	1844	}
	1845	error = bpf_setdlt(d, dlt);
	1846	}
	1847	break;
	1848
	1849	/*
	1850	* Get interface name.
	1851	*/
	1852	case BIOCGETIF: /* struct ifreq */
	1853	if (d->bd_bif == 0) {
	1854	error = EINVAL;
	1855	} else {
	1856	struct ifnet *const ifp = d->bd_bif->bif_ifp;
	1857
	1858	snprintf(((struct ifreq )(void )addr)->ifr_name,
	1859	sizeof(ifr.ifr_name), "%s", if_name(ifp));
	1860	}
	1861	break;
	1862
	1863	/*
	1864	* Set interface.
	1865	*/
	1866	case BIOCSETIF: { /* struct ifreq */
	1867	ifnet_t ifp;
	1868
	1869	bcopy(addr, &ifr, sizeof(ifr));
	1870	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
	1871	ifp = ifunit(ifr.ifr_name);
	1872	if (ifp == NULL) {
	1873	error = ENXIO;
	1874	} else {
	1875	error = bpf_setif(d, ifp, true, false);
	1876	}
	1877	break;
	1878	}
	1879
	1880	/*
	1881	* Set read timeout.
	1882	*/
	1883	case BIOCSRTIMEOUT32: { /* struct user32_timeval */
	1884	struct user32_timeval _tv;
	1885	struct timeval tv;
	1886
	1887	bcopy(addr, &_tv, sizeof(_tv));
	1888	tv.tv_sec = _tv.tv_sec;
	1889	tv.tv_usec = _tv.tv_usec;
	1890
	1891	/*
	1892	* Subtract 1 tick from tvtohz() since this isn't
	1893	* a one-shot timer.
	1894	*/
	1895	if ((error = itimerfix(&tv)) == 0) {
	1896	d->bd_rtout = tvtohz(&tv) - 1;
	1897	}
	1898	break;
	1899	}
	1900
	1901	case BIOCSRTIMEOUT64: { /* struct user64_timeval */
	1902	struct user64_timeval _tv;
	1903	struct timeval tv;
	1904
	1905	bcopy(addr, &_tv, sizeof(_tv));
	1906	tv.tv_sec = _tv.tv_sec;
	1907	tv.tv_usec = _tv.tv_usec;
	1908
	1909	/*
	1910	* Subtract 1 tick from tvtohz() since this isn't
	1911	* a one-shot timer.
	1912	*/
	1913	if ((error = itimerfix(&tv)) == 0) {
	1914	d->bd_rtout = tvtohz(&tv) - 1;
	1915	}
	1916	break;
	1917	}
	1918
	1919	/*
	1920	* Get read timeout.
	1921	*/
	1922	case BIOCGRTIMEOUT32: { /* struct user32_timeval */
	1923	struct user32_timeval tv;
	1924
	1925	bzero(&tv, sizeof(tv));
	1926	tv.tv_sec = d->bd_rtout / hz;
	1927	tv.tv_usec = (d->bd_rtout % hz) * tick;
	1928	bcopy(&tv, addr, sizeof(tv));
	1929	break;
	1930	}
	1931
	1932	case BIOCGRTIMEOUT64: { /* struct user64_timeval */
	1933	struct user64_timeval tv;
	1934
	1935	bzero(&tv, sizeof(tv));
	1936	tv.tv_sec = d->bd_rtout / hz;
	1937	tv.tv_usec = (d->bd_rtout % hz) * tick;
	1938	bcopy(&tv, addr, sizeof(tv));
	1939	break;
	1940	}
	1941
	1942	/*
	1943	* Get packet stats.
	1944	*/
	1945	case BIOCGSTATS: { /* struct bpf_stat */
	1946	struct bpf_stat bs;
	1947
	1948	bzero(&bs, sizeof(bs));
	1949	bs.bs_recv = d->bd_rcount;
	1950	bs.bs_drop = d->bd_dcount;
	1951	bcopy(&bs, addr, sizeof(bs));
	1952	break;
	1953	}
	1954
	1955	/*
	1956	* Set immediate mode.
	1957	*/
	1958	case BIOCIMMEDIATE: /* u_int */
	1959	d->bd_immediate = (u_int )(void *)addr;
	1960	break;
	1961
	1962	case BIOCVERSION: { /* struct bpf_version */
	1963	struct bpf_version bv;
	1964
	1965	bzero(&bv, sizeof(bv));
	1966	bv.bv_major = BPF_MAJOR_VERSION;
	1967	bv.bv_minor = BPF_MINOR_VERSION;
	1968	bcopy(&bv, addr, sizeof(bv));
	1969	break;
	1970	}
	1971
	1972	/*
	1973	* Get "header already complete" flag
	1974	*/
	1975	case BIOCGHDRCMPLT: /* u_int */
	1976	bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
	1977	break;
	1978
	1979	/*
	1980	* Set "header already complete" flag
	1981	*/
	1982	case BIOCSHDRCMPLT: /* u_int */
	1983	bcopy(addr, &int_arg, sizeof(int_arg));
	1984	d->bd_hdrcmplt = int_arg ? 1 : 0;
	1985	break;
	1986
	1987	/*
	1988	* Get "see sent packets" flag
	1989	*/
	1990	case BIOCGSEESENT: /* u_int */
	1991	bcopy(&d->bd_seesent, addr, sizeof(u_int));
	1992	break;
	1993
	1994	/*
	1995	* Set "see sent packets" flag
	1996	*/
	1997	case BIOCSSEESENT: /* u_int */
	1998	bcopy(addr, &d->bd_seesent, sizeof(u_int));
	1999	break;
	2000
	2001	/*
	2002	* Set traffic service class
	2003	*/
	2004	case BIOCSETTC: { /* int */
	2005	int tc;
	2006
	2007	bcopy(addr, &tc, sizeof(int));
	2008	error = bpf_set_traffic_class(d, tc);
	2009	break;
	2010	}
	2011
	2012	/*
	2013	* Get traffic service class
	2014	*/
	2015	case BIOCGETTC: /* int */
	2016	bcopy(&d->bd_traffic_class, addr, sizeof(int));
	2017	break;
	2018
	2019	case FIONBIO: /* Non-blocking I/O; int */
	2020	break;
	2021
	2022	case FIOASYNC: /* Send signal on receive packets; int */
	2023	bcopy(addr, &d->bd_async, sizeof(int));
	2024	break;
	2025	#ifndef __APPLE__
	2026	case FIOSETOWN:
	2027	error = fsetown((int )addr, &d->bd_sigio);
	2028	break;
	2029
	2030	case FIOGETOWN:
	2031	(int )addr = fgetown(d->bd_sigio);
	2032	break;
	2033
	2034	/* This is deprecated, FIOSETOWN should be used instead. */
	2035	case TIOCSPGRP:
	2036	error = fsetown(-((int )addr), &d->bd_sigio);
	2037	break;
	2038
	2039	/* This is deprecated, FIOGETOWN should be used instead. */
	2040	case TIOCGPGRP:
	2041	(int )addr = -fgetown(d->bd_sigio);
	2042	break;
	2043	#endif
	2044	case BIOCSRSIG: { /* Set receive signal; u_int */
	2045	u_int sig;
	2046
	2047	bcopy(addr, &sig, sizeof(u_int));
	2048
	2049	if (sig >= NSIG) {
	2050	error = EINVAL;
	2051	} else {
	2052	d->bd_sig = sig;
	2053	}
	2054	break;
	2055	}
	2056	case BIOCGRSIG: /* u_int */
	2057	bcopy(&d->bd_sig, addr, sizeof(u_int));
	2058	break;
	2059	#ifdef __APPLE__
	2060	case BIOCSEXTHDR: /* u_int */
	2061	bcopy(addr, &int_arg, sizeof(int_arg));
	2062	if (int_arg) {
	2063	d->bd_flags \|= BPF_EXTENDED_HDR;
	2064	} else {
	2065	d->bd_flags &= ~BPF_EXTENDED_HDR;
	2066	}
	2067	break;
	2068
	2069	case BIOCGIFATTACHCOUNT: { /* struct ifreq */
	2070	ifnet_t ifp;
	2071	struct bpf_if *bp;
	2072
	2073	bcopy(addr, &ifr, sizeof(ifr));
	2074	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
	2075	ifp = ifunit(ifr.ifr_name);
	2076	if (ifp == NULL) {
	2077	error = ENXIO;
	2078	break;
	2079	}
	2080	ifr.ifr_intval = 0;
	2081	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
	2082	struct bpf_d *bpf_d;
	2083
	2084	if (bp->bif_ifp == NULL \|\| bp->bif_ifp != ifp) {
	2085	continue;
	2086	}
	2087	for (bpf_d = bp->bif_dlist; bpf_d;
	2088	bpf_d = bpf_d->bd_next) {
	2089	ifr.ifr_intval += 1;
	2090	}
	2091	}
	2092	bcopy(&ifr, addr, sizeof(ifr));
	2093	break;
	2094	}
	2095	case BIOCGWANTPKTAP: /* u_int */
	2096	int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
	2097	bcopy(&int_arg, addr, sizeof(int_arg));
	2098	break;
	2099
	2100	case BIOCSWANTPKTAP: /* u_int */
	2101	bcopy(addr, &int_arg, sizeof(int_arg));
	2102	if (int_arg) {
	2103	d->bd_flags \|= BPF_WANT_PKTAP;
	2104	} else {
	2105	d->bd_flags &= ~BPF_WANT_PKTAP;
	2106	}
	2107	break;
	2108	#endif
	2109
	2110	case BIOCSHEADDROP:
	2111	bcopy(addr, &int_arg, sizeof(int_arg));
	2112	d->bd_headdrop = int_arg ? 1 : 0;
	2113	break;
	2114
	2115	case BIOCGHEADDROP:
	2116	bcopy(&d->bd_headdrop, addr, sizeof(int));
	2117	break;
	2118
	2119	case BIOCSTRUNCATE:
	2120	bcopy(addr, &int_arg, sizeof(int_arg));
	2121	if (int_arg) {
	2122	d->bd_flags \|= BPF_TRUNCATE;
	2123	} else {
	2124	d->bd_flags &= ~BPF_TRUNCATE;
	2125	}
	2126	break;
	2127
	2128	case BIOCGETUUID:
	2129	bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
	2130	break;
	2131
	2132	case BIOCSETUP: {
	2133	struct bpf_setup_args bsa;
	2134	ifnet_t ifp;
	2135
	2136	bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
	2137	bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
	2138	ifp = ifunit(bsa.bsa_ifname);
	2139	if (ifp == NULL) {
	2140	error = ENXIO;
	2141	os_log_info(OS_LOG_DEFAULT,
	2142	"%s: ifnet not found for %s error %d",
	2143	__func__, bsa.bsa_ifname, error);
	2144	break;
	2145	}
	2146
	2147	error = bpf_setup(d, bsa.bsa_uuid, ifp);
	2148	break;
	2149	}
	2150	case BIOCSPKTHDRV2:
	2151	bcopy(addr, &int_arg, sizeof(int_arg));
	2152	if (int_arg != 0) {
	2153	d->bd_flags \|= BPF_PKTHDRV2;
	2154	} else {
	2155	d->bd_flags &= ~BPF_PKTHDRV2;
	2156	}
	2157	break;
	2158
	2159	case BIOCGPKTHDRV2:
	2160	int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
	2161	bcopy(&int_arg, addr, sizeof(int));
	2162	break;
	2163	}
	2164
	2165	bpf_release_d(d);
	2166	lck_mtx_unlock(bpf_mlock);
	2167
	2168	return error;
	2169	}
	2170
	2171	/*
	2172	* Set d's packet filter program to fp. If this file already has a filter,
	2173	* free it and replace it. Returns EINVAL for bogus requests.
	2174	*/
	2175	static int
	2176	bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
	2177	u_long cmd)
	2178	{
	2179	struct bpf_insn fcode, old;
	2180	u_int flen, size;
	2181
	2182	while (d->bd_hbuf_read != 0) {
	2183	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
	2184	}
	2185
	2186	if ((d->bd_flags & BPF_CLOSING) != 0) {
	2187	return ENXIO;
	2188	}
	2189
	2190	old = d->bd_filter;
	2191	if (bf_insns == USER_ADDR_NULL) {
	2192	if (bf_len != 0) {
	2193	return EINVAL;
	2194	}
	2195	d->bd_filter = NULL;
	2196	reset_d(d);
	2197	if (old != 0) {
	2198	FREE(old, M_DEVBUF);
	2199	}
	2200	return 0;
	2201	}
	2202	flen = bf_len;
	2203	if (flen > BPF_MAXINSNS) {
	2204	return EINVAL;
	2205	}
	2206
	2207	size = flen * sizeof(struct bpf_insn);
	2208	fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
	2209	#ifdef __APPLE__
	2210	if (fcode == NULL) {
	2211	return ENOBUFS;
	2212	}
	2213	#endif
	2214	if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
	2215	bpf_validate(fcode, (int)flen)) {
	2216	d->bd_filter = fcode;
	2217
	2218	if (cmd == BIOCSETF32 \|\| cmd == BIOCSETF64) {
	2219	reset_d(d);
	2220	}
	2221
	2222	if (old != 0) {
	2223	FREE(old, M_DEVBUF);
	2224	}
	2225
	2226	return 0;
	2227	}
	2228	FREE(fcode, M_DEVBUF);
	2229	return EINVAL;
	2230	}
	2231
	2232	/*
	2233	* Detach a file from its current interface (if attached at all) and attach
	2234	* to the interface indicated by the name stored in ifr.
	2235	* Return an errno or 0.
	2236	*/
	2237	static int
	2238	bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read)
	2239	{
	2240	struct bpf_if *bp;
	2241	int error;
	2242
	2243	while (d->bd_hbuf_read != 0 && !has_hbuf_read) {
	2244	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
	2245	}
	2246
	2247	if ((d->bd_flags & BPF_CLOSING) != 0) {
	2248	return ENXIO;
	2249	}
	2250
	2251	/*
	2252	* Look through attached interfaces for the named one.
	2253	*/
	2254	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
	2255	struct ifnet *ifp = bp->bif_ifp;
	2256
	2257	if (ifp == 0 \|\| ifp != theywant) {
	2258	continue;
	2259	}
	2260	/*
	2261	* Do not use DLT_PKTAP, unless requested explicitly
	2262	*/
	2263	if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
	2264	continue;
	2265	}
	2266	/*
	2267	* Skip the coprocessor interface
	2268	*/
	2269	if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
	2270	continue;
	2271	}
	2272	/*
	2273	* We found the requested interface.
	2274	* Allocate the packet buffers.
	2275	*/
	2276	error = bpf_allocbufs(d);
	2277	if (error != 0) {
	2278	return error;
	2279	}
	2280	/*
	2281	* Detach if attached to something else.
	2282	*/
	2283	if (bp != d->bd_bif) {
	2284	if (d->bd_bif != NULL) {
	2285	if (bpf_detachd(d, 0) != 0) {
	2286	return ENXIO;
	2287	}
	2288	}
	2289	if (bpf_attachd(d, bp) != 0) {
	2290	return ENXIO;
	2291	}
	2292	}
	2293	if (do_reset) {
	2294	reset_d(d);
	2295	}
	2296	return 0;
	2297	}
	2298	/* Not found. */
	2299	return ENXIO;
	2300	}
	2301
	2302	/*
	2303	* Get a list of available data link type of the interface.
	2304	*/
	2305	static int
	2306	bpf_getdltlist(struct bpf_d d, caddr_t addr, struct proc p)
	2307	{
	2308	u_int n;
	2309	int error;
	2310	struct ifnet *ifp;
	2311	struct bpf_if *bp;
	2312	user_addr_t dlist;
	2313	struct bpf_dltlist bfl;
	2314
	2315	bcopy(addr, &bfl, sizeof(bfl));
	2316	if (proc_is64bit(p)) {
	2317	dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
	2318	} else {
	2319	dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
	2320	}
	2321
	2322	ifp = d->bd_bif->bif_ifp;
	2323	n = 0;
	2324	error = 0;
	2325
	2326	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
	2327	if (bp->bif_ifp != ifp) {
	2328	continue;
	2329	}
	2330	/*
	2331	* Do not use DLT_PKTAP, unless requested explicitly
	2332	*/
	2333	if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
	2334	continue;
	2335	}
	2336	if (dlist != USER_ADDR_NULL) {
	2337	if (n >= bfl.bfl_len) {
	2338	return ENOMEM;
	2339	}
	2340	error = copyout(&bp->bif_dlt, dlist,
	2341	sizeof(bp->bif_dlt));
	2342	if (error != 0) {
	2343	break;
	2344	}
	2345	dlist += sizeof(bp->bif_dlt);
	2346	}
	2347	n++;
	2348	}
	2349	bfl.bfl_len = n;
	2350	bcopy(&bfl, addr, sizeof(bfl));
	2351
	2352	return error;
	2353	}
	2354
	2355	/*
	2356	* Set the data link type of a BPF instance.
	2357	*/
	2358	static int
	2359	bpf_setdlt(struct bpf_d *d, uint32_t dlt)
	2360	{
	2361	int error, opromisc;
	2362	struct ifnet *ifp;
	2363	struct bpf_if *bp;
	2364
	2365	if (d->bd_bif->bif_dlt == dlt) {
	2366	return 0;
	2367	}
	2368
	2369	while (d->bd_hbuf_read != 0) {
	2370	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
	2371	}
	2372
	2373	if ((d->bd_flags & BPF_CLOSING) != 0) {
	2374	return ENXIO;
	2375	}
	2376
	2377	ifp = d->bd_bif->bif_ifp;
	2378	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
	2379	if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
	2380	/*
	2381	* Do not use DLT_PKTAP, unless requested explicitly
	2382	*/
	2383	if (bp->bif_dlt == DLT_PKTAP &&
	2384	!(d->bd_flags & BPF_WANT_PKTAP)) {
	2385	continue;
	2386	}
	2387	break;
	2388	}
	2389	}
	2390	if (bp != NULL) {
	2391	opromisc = d->bd_promisc;
	2392	if (bpf_detachd(d, 0) != 0) {
	2393	return ENXIO;
	2394	}
	2395	error = bpf_attachd(d, bp);
	2396	if (error) {
	2397	printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
	2398	ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp),
	2399	error);
	2400	return error;
	2401	}
	2402	reset_d(d);
	2403	if (opromisc) {
	2404	lck_mtx_unlock(bpf_mlock);
	2405	error = ifnet_set_promiscuous(bp->bif_ifp, 1);
	2406	lck_mtx_lock(bpf_mlock);
	2407	if (error) {
	2408	printf("%s: ifpromisc %s%d failed (%d)\n",
	2409	__func__, ifnet_name(bp->bif_ifp),
	2410	ifnet_unit(bp->bif_ifp), error);
	2411	} else {
	2412	d->bd_promisc = 1;
	2413	}
	2414	}
	2415	}
	2416	return bp == NULL ? EINVAL : 0;
	2417	}
	2418
	2419	static int
	2420	bpf_set_traffic_class(struct bpf_d *d, int tc)
	2421	{
	2422	int error = 0;
	2423
	2424	if (!SO_VALID_TC(tc)) {
	2425	error = EINVAL;
	2426	} else {
	2427	d->bd_traffic_class = tc;
	2428	}
	2429
	2430	return error;
	2431	}
	2432
	2433	static void
	2434	bpf_set_packet_service_class(struct mbuf *m, int tc)
	2435	{
	2436	if (!(m->m_flags & M_PKTHDR)) {
	2437	return;
	2438	}
	2439
	2440	VERIFY(SO_VALID_TC(tc));
	2441	(void) m_set_service_class(m, so_tc2msc(tc));
	2442	}
	2443
	2444	/*
	2445	* Support for select()
	2446	*
	2447	* Return true iff the specific operation will not block indefinitely.
	2448	* Otherwise, return false but make a note that a selwakeup() must be done.
	2449	*/
	2450	int
	2451	bpfselect(dev_t dev, int which, void * wql, struct proc *p)
	2452	{
	2453	struct bpf_d *d;
	2454	int ret = 0;
	2455
	2456	lck_mtx_lock(bpf_mlock);
	2457
	2458	d = bpf_dtab[minor(dev)];
	2459	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
	2460	(d->bd_flags & BPF_CLOSING) != 0) {
	2461	lck_mtx_unlock(bpf_mlock);
	2462	return ENXIO;
	2463	}
	2464
	2465	bpf_acquire_d(d);
	2466
	2467	if (d->bd_bif == NULL) {
	2468	bpf_release_d(d);
	2469	lck_mtx_unlock(bpf_mlock);
	2470	return ENXIO;
	2471	}
	2472
	2473	while (d->bd_hbuf_read != 0) {
	2474	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
	2475	}
	2476
	2477	if ((d->bd_flags & BPF_CLOSING) != 0) {
	2478	bpf_release_d(d);
	2479	lck_mtx_unlock(bpf_mlock);
	2480	return ENXIO;
	2481	}
	2482
	2483	switch (which) {
	2484	case FREAD:
	2485	if (d->bd_hlen != 0 \|\|
	2486	((d->bd_immediate \|\|
	2487	d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
	2488	ret = 1; /* read has data to return */
	2489	} else {
	2490	/*
	2491	* Read has no data to return.
	2492	* Make the select wait, and start a timer if
	2493	* necessary.
	2494	*/
	2495	selrecord(p, &d->bd_sel, wql);
	2496	bpf_start_timer(d);
	2497	}
	2498	break;
	2499
	2500	case FWRITE:
	2501	/* can't determine whether a write would block */
	2502	ret = 1;
	2503	break;
	2504	}
	2505
	2506	bpf_release_d(d);
	2507	lck_mtx_unlock(bpf_mlock);
	2508
	2509	return ret;
	2510	}
	2511
	2512	/*
	2513	* Support for kevent() system call. Register EVFILT_READ filters and
	2514	* reject all others.
	2515	*/
	2516	int bpfkqfilter(dev_t dev, struct knote *kn);
	2517	static void filt_bpfdetach(struct knote *);
	2518	static int filt_bpfread(struct knote *, long);
	2519	static int filt_bpftouch(struct knote kn, struct kevent_qos_s kev);
	2520	static int filt_bpfprocess(struct knote kn, struct kevent_qos_s kev);
	2521
	2522	SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
	2523	.f_isfd = 1,
	2524	.f_detach = filt_bpfdetach,
	2525	.f_event = filt_bpfread,
	2526	.f_touch = filt_bpftouch,
	2527	.f_process = filt_bpfprocess,
	2528	};
	2529
	2530	static int
	2531	filt_bpfread_common(struct knote kn, struct kevent_qos_s kev, struct bpf_d *d)
	2532	{
	2533	int ready = 0;
	2534	int64_t data = 0;
	2535
	2536	if (d->bd_immediate) {
	2537	/*
	2538	* If there's data in the hold buffer, it's the
	2539	* amount of data a read will return.
	2540	*
	2541	* If there's no data in the hold buffer, but
	2542	* there's data in the store buffer, a read will
	2543	* immediately rotate the store buffer to the
	2544	* hold buffer, the amount of data in the store
	2545	* buffer is the amount of data a read will
	2546	* return.
	2547	*
	2548	* If there's no data in either buffer, we're not
	2549	* ready to read.
	2550	*/
	2551	data = (d->bd_hlen == 0 \|\| d->bd_hbuf_read != 0 ?
	2552	d->bd_slen : d->bd_hlen);
	2553	int64_t lowwat = knote_low_watermark(kn);
	2554	if (lowwat > d->bd_bufsize) {
	2555	lowwat = d->bd_bufsize;
	2556	}
	2557	ready = (data >= lowwat);
	2558	} else {
	2559	/*
	2560	* If there's data in the hold buffer, it's the
	2561	* amount of data a read will return.
	2562	*
	2563	* If there's no data in the hold buffer, but
	2564	* there's data in the store buffer, if the
	2565	* timer has expired a read will immediately
	2566	* rotate the store buffer to the hold buffer,
	2567	* so the amount of data in the store buffer is
	2568	* the amount of data a read will return.
	2569	*
	2570	* If there's no data in either buffer, or there's
	2571	* no data in the hold buffer and the timer hasn't
	2572	* expired, we're not ready to read.
	2573	*/
	2574	data = ((d->bd_hlen == 0 \|\| d->bd_hbuf_read != 0) &&
	2575	d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
	2576	ready = (data > 0);
	2577	}
	2578	if (!ready) {
	2579	bpf_start_timer(d);
	2580	} else if (kev) {
	2581	knote_fill_kevent(kn, kev, data);
	2582	}
	2583
	2584	return ready;
	2585	}
	2586
	2587	int
	2588	bpfkqfilter(dev_t dev, struct knote *kn)
	2589	{
	2590	struct bpf_d *d;
	2591	int res;
	2592
	2593	/*
	2594	* Is this device a bpf?
	2595	*/
	2596	if (major(dev) != CDEV_MAJOR \|\| kn->kn_filter != EVFILT_READ) {
	2597	knote_set_error(kn, EINVAL);
	2598	return 0;
	2599	}
	2600
	2601	lck_mtx_lock(bpf_mlock);
	2602
	2603	d = bpf_dtab[minor(dev)];
	2604
	2605	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
	2606	(d->bd_flags & BPF_CLOSING) != 0 \|\|
	2607	d->bd_bif == NULL) {
	2608	lck_mtx_unlock(bpf_mlock);
	2609	knote_set_error(kn, ENXIO);
	2610	return 0;
	2611	}
	2612
	2613	kn->kn_hook = d;
	2614	kn->kn_filtid = EVFILTID_BPFREAD;
	2615	KNOTE_ATTACH(&d->bd_sel.si_note, kn);
	2616	d->bd_flags \|= BPF_KNOTE;
	2617
	2618	/* capture the current state */
	2619	res = filt_bpfread_common(kn, NULL, d);
	2620
	2621	lck_mtx_unlock(bpf_mlock);
	2622
	2623	return res;
	2624	}
	2625
	2626	static void
	2627	filt_bpfdetach(struct knote *kn)
	2628	{
	2629	struct bpf_d d = (struct bpf_d )kn->kn_hook;
	2630
	2631	lck_mtx_lock(bpf_mlock);
	2632	if (d->bd_flags & BPF_KNOTE) {
	2633	KNOTE_DETACH(&d->bd_sel.si_note, kn);
	2634	d->bd_flags &= ~BPF_KNOTE;
	2635	}
	2636	lck_mtx_unlock(bpf_mlock);
	2637	}
	2638
	2639	static int
	2640	filt_bpfread(struct knote *kn, long hint)
	2641	{
	2642	#pragma unused(hint)
	2643	struct bpf_d d = (struct bpf_d )kn->kn_hook;
	2644
	2645	return filt_bpfread_common(kn, NULL, d);
	2646	}
	2647
	2648	static int
	2649	filt_bpftouch(struct knote kn, struct kevent_qos_s kev)
	2650	{
	2651	struct bpf_d d = (struct bpf_d )kn->kn_hook;
	2652	int res;
	2653
	2654	lck_mtx_lock(bpf_mlock);
	2655
	2656	/* save off the lowat threshold and flag */
	2657	kn->kn_sdata = kev->data;
	2658	kn->kn_sfflags = kev->fflags;
	2659
	2660	/* output data will be re-generated here */
	2661	res = filt_bpfread_common(kn, NULL, d);
	2662
	2663	lck_mtx_unlock(bpf_mlock);
	2664
	2665	return res;
	2666	}
	2667
	2668	static int
	2669	filt_bpfprocess(struct knote kn, struct kevent_qos_s kev)
	2670	{
	2671	struct bpf_d d = (struct bpf_d )kn->kn_hook;
	2672	int res;
	2673
	2674	lck_mtx_lock(bpf_mlock);
	2675	res = filt_bpfread_common(kn, kev, d);
	2676	lck_mtx_unlock(bpf_mlock);
	2677
	2678	return res;
	2679	}
	2680
	2681	/*
	2682	* Copy data from an mbuf chain into a buffer. This code is derived
	2683	* from m_copydata in kern/uipc_mbuf.c.
	2684	*/
	2685	static void
	2686	bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
	2687	{
	2688	u_int count;
	2689	u_char *dst;
	2690
	2691	dst = dst_arg;
	2692	while (len > 0) {
	2693	if (m == 0) {
	2694	panic("bpf_mcopy");
	2695	}
	2696	count = min(m->m_len, len);
	2697	bcopy(mbuf_data(m), dst, count);
	2698	m = m->m_next;
	2699	dst += count;
	2700	len -= count;
	2701	}
	2702	}
	2703
	2704	static inline void
	2705	bpf_tap_imp(
	2706	ifnet_t ifp,
	2707	u_int32_t dlt,
	2708	struct bpf_packet *bpf_pkt,
	2709	int outbound)
	2710	{
	2711	struct bpf_d *d;
	2712	u_int slen;
	2713	struct bpf_if *bp;
	2714
	2715	/*
	2716	* It's possible that we get here after the bpf descriptor has been
	2717	* detached from the interface; in such a case we simply return.
	2718	* Lock ordering is important since we can be called asynchronously
	2719	* (from IOKit) to process an inbound packet; when that happens
	2720	* we would have been holding its "gateLock" and will be acquiring
	2721	* "bpf_mlock" upon entering this routine. Due to that, we release
	2722	* "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
	2723	* acquire "gateLock" in the IOKit), in order to avoid a deadlock
	2724	* when a ifnet_set_promiscuous request simultaneously collides with
	2725	* an inbound packet being passed into the tap callback.
	2726	*/
	2727	lck_mtx_lock(bpf_mlock);
	2728	if (ifp->if_bpf == NULL) {
	2729	lck_mtx_unlock(bpf_mlock);
	2730	return;
	2731	}
	2732	for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
	2733	if (bp->bif_ifp != ifp) {
	2734	/* wrong interface */
	2735	bp = NULL;
	2736	break;
	2737	}
	2738	if (dlt == 0 \|\| bp->bif_dlt == dlt) {
	2739	/* tapping default DLT or DLT matches */
	2740	break;
	2741	}
	2742	}
	2743	if (bp == NULL) {
	2744	goto done;
	2745	}
	2746	for (d = bp->bif_dlist; d; d = d->bd_next) {
	2747	struct bpf_packet *bpf_pkt_saved = bpf_pkt;
	2748	struct bpf_packet bpf_pkt_tmp;
	2749	struct pktap_header_buffer bpfp_header_tmp;
	2750
	2751	if (outbound && !d->bd_seesent) {
	2752	continue;
	2753	}
	2754
	2755	++d->bd_rcount;
	2756	slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
	2757	bpf_pkt->bpfp_total_length, 0);
	2758	if (bp->bif_ifp->if_type == IFT_PKTAP &&
	2759	bp->bif_dlt == DLT_PKTAP) {
	2760	/*
	2761	* Need to copy the bpf_pkt because the conversion
	2762	* to v2 pktap header modifies the content of the
	2763	* bpfp_header
	2764	*/
	2765	if ((d->bd_flags & BPF_PKTHDRV2) &&
	2766	bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
	2767	bpf_pkt_tmp = *bpf_pkt;
	2768
	2769	bpf_pkt = &bpf_pkt_tmp;
	2770
	2771	memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
	2772	bpf_pkt->bpfp_header_length);
	2773
	2774	bpf_pkt->bpfp_header = &bpfp_header_tmp;
	2775
	2776	convert_to_pktap_header_to_v2(bpf_pkt,
	2777	!!(d->bd_flags & BPF_TRUNCATE));
	2778	}
	2779
	2780	if (d->bd_flags & BPF_TRUNCATE) {
	2781	slen = min(slen,
	2782	get_pkt_trunc_len((u_char *)bpf_pkt,
	2783	bpf_pkt->bpfp_total_length));
	2784	}
	2785	}
	2786	if (slen != 0) {
	2787	catchpacket(d, bpf_pkt, slen, outbound);
	2788	}
	2789	bpf_pkt = bpf_pkt_saved;
	2790	}
	2791
	2792	done:
	2793	lck_mtx_unlock(bpf_mlock);
	2794	}
	2795
	2796	static inline void
	2797	bpf_tap_mbuf(
	2798	ifnet_t ifp,
	2799	u_int32_t dlt,
	2800	mbuf_t m,
	2801	void* hdr,
	2802	size_t hlen,
	2803	int outbound)
	2804	{
	2805	struct bpf_packet bpf_pkt;
	2806	struct mbuf *m0;
	2807
	2808	if (ifp->if_bpf == NULL) {
	2809	/* quickly check without taking lock */
	2810	return;
	2811	}
	2812	bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
	2813	bpf_pkt.bpfp_mbuf = m;
	2814	bpf_pkt.bpfp_total_length = 0;
	2815	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
	2816	bpf_pkt.bpfp_total_length += m0->m_len;
	2817	}
	2818	bpf_pkt.bpfp_header = hdr;
	2819	if (hdr != NULL) {
	2820	bpf_pkt.bpfp_total_length += hlen;
	2821	bpf_pkt.bpfp_header_length = hlen;
	2822	} else {
	2823	bpf_pkt.bpfp_header_length = 0;
	2824	}
	2825	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
	2826	}
	2827
	2828	void
	2829	bpf_tap_out(
	2830	ifnet_t ifp,
	2831	u_int32_t dlt,
	2832	mbuf_t m,
	2833	void* hdr,
	2834	size_t hlen)
	2835	{
	2836	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
	2837	}
	2838
	2839	void
	2840	bpf_tap_in(
	2841	ifnet_t ifp,
	2842	u_int32_t dlt,
	2843	mbuf_t m,
	2844	void* hdr,
	2845	size_t hlen)
	2846	{
	2847	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
	2848	}
	2849
	2850	/* Callback registered with Ethernet driver. */
	2851	static int
	2852	bpf_tap_callback(struct ifnet ifp, struct mbuf m)
	2853	{
	2854	bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
	2855
	2856	return 0;
	2857	}
	2858
	2859
	2860	static errno_t
	2861	bpf_copydata(struct bpf_packet pkt, size_t off, size_t len, void out_data)
	2862	{
	2863	errno_t err = 0;
	2864	if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
	2865	err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
	2866	} else {
	2867	err = EINVAL;
	2868	}
	2869
	2870	return err;
	2871	}
	2872
	2873	static void
	2874	copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
	2875	{
	2876	/* copy the optional header */
	2877	if (pkt->bpfp_header_length != 0) {
	2878	size_t count = min(len, pkt->bpfp_header_length);
	2879	bcopy(pkt->bpfp_header, dst, count);
	2880	len -= count;
	2881	dst += count;
	2882	}
	2883	if (len == 0) {
	2884	/* nothing past the header */
	2885	return;
	2886	}
	2887	/* copy the packet */
	2888	switch (pkt->bpfp_type) {
	2889	case BPF_PACKET_TYPE_MBUF:
	2890	bpf_mcopy(pkt->bpfp_mbuf, dst, len);
	2891	break;
	2892	default:
	2893	break;
	2894	}
	2895	}
	2896
	2897	static uint16_t
	2898	get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
	2899	const uint16_t remaining_caplen)
	2900	{
	2901	/*
	2902	* For some reason tcpdump expects to have one byte beyond the ESP header
	2903	*/
	2904	uint16_t trunc_len = ESP_HDR_SIZE + 1;
	2905
	2906	if (trunc_len > remaining_caplen) {
	2907	return remaining_caplen;
	2908	}
	2909
	2910	return trunc_len;
	2911	}
	2912
	2913	static uint16_t
	2914	get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
	2915	const uint16_t remaining_caplen)
	2916	{
	2917	/*
	2918	* Include the payload generic header
	2919	*/
	2920	uint16_t trunc_len = ISAKMP_HDR_SIZE;
	2921
	2922	if (trunc_len > remaining_caplen) {
	2923	return remaining_caplen;
	2924	}
	2925
	2926	return trunc_len;
	2927	}
	2928
	2929	static uint16_t
	2930	get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint16_t off,
	2931	const uint16_t remaining_caplen)
	2932	{
	2933	int err = 0;
	2934	uint16_t trunc_len = 0;
	2935	char payload[remaining_caplen];
	2936
	2937	err = bpf_copydata(pkt, off, remaining_caplen, payload);
	2938	if (err != 0) {
	2939	return remaining_caplen;
	2940	}
	2941	/*
	2942	* They are three cases:
	2943	* - IKE: payload start with 4 bytes header set to zero before ISAKMP header
	2944	* - keep alive: 1 byte payload
	2945	* - otherwise it's ESP
	2946	*/
	2947	if (remaining_caplen >= 4 &&
	2948	payload[0] == 0 && payload[1] == 0 &&
	2949	payload[2] == 0 && payload[3] == 0) {
	2950	trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
	2951	} else if (remaining_caplen == 1) {
	2952	trunc_len = 1;
	2953	} else {
	2954	trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
	2955	}
	2956
	2957	if (trunc_len > remaining_caplen) {
	2958	return remaining_caplen;
	2959	}
	2960
	2961	return trunc_len;
	2962	}
	2963
	2964	static uint16_t
	2965	get_udp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	2966	{
	2967	int err = 0;
	2968	uint16_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
	2969
	2970	if (trunc_len >= remaining_caplen) {
	2971	return remaining_caplen;
	2972	}
	2973
	2974	struct udphdr udphdr;
	2975	err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
	2976	if (err != 0) {
	2977	return remaining_caplen;
	2978	}
	2979
	2980	u_short sport, dport;
	2981
	2982	sport = EXTRACT_SHORT(&udphdr.uh_sport);
	2983	dport = EXTRACT_SHORT(&udphdr.uh_dport);
	2984
	2985	if (dport == PORT_DNS \|\| sport == PORT_DNS) {
	2986	/*
	2987	* Full UDP payload for DNS
	2988	*/
	2989	trunc_len = remaining_caplen;
	2990	} else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) \|\|
	2991	(sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
	2992	/*
	2993	* Full UDP payload for BOOTP and DHCP
	2994	*/
	2995	trunc_len = remaining_caplen;
	2996	} else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
	2997	/*
	2998	* Return the ISAKMP header
	2999	*/
	3000	trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
	3001	remaining_caplen - sizeof(struct udphdr));
	3002	} else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
	3003	trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
	3004	remaining_caplen - sizeof(struct udphdr));
	3005	}
	3006	if (trunc_len >= remaining_caplen) {
	3007	return remaining_caplen;
	3008	}
	3009
	3010	return trunc_len;
	3011	}
	3012
	3013	static uint16_t
	3014	get_tcp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	3015	{
	3016	int err = 0;
	3017	uint16_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
	3018	if (trunc_len >= remaining_caplen) {
	3019	return remaining_caplen;
	3020	}
	3021
	3022	struct tcphdr tcphdr;
	3023	err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
	3024	if (err != 0) {
	3025	return remaining_caplen;
	3026	}
	3027
	3028	u_short sport, dport;
	3029	sport = EXTRACT_SHORT(&tcphdr.th_sport);
	3030	dport = EXTRACT_SHORT(&tcphdr.th_dport);
	3031
	3032	if (dport == PORT_DNS \|\| sport == PORT_DNS) {
	3033	/*
	3034	* Full TCP payload for DNS
	3035	*/
	3036	trunc_len = remaining_caplen;
	3037	} else {
	3038	trunc_len = tcphdr.th_off << 2;
	3039	}
	3040	if (trunc_len >= remaining_caplen) {
	3041	return remaining_caplen;
	3042	}
	3043
	3044	return trunc_len;
	3045	}
	3046
	3047	static uint16_t
	3048	get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	3049	{
	3050	uint16_t trunc_len;
	3051
	3052	switch (proto) {
	3053	case IPPROTO_ICMP: {
	3054	/*
	3055	* Full IMCP payload
	3056	*/
	3057	trunc_len = remaining_caplen;
	3058	break;
	3059	}
	3060	case IPPROTO_ICMPV6: {
	3061	/*
	3062	* Full IMCPV6 payload
	3063	*/
	3064	trunc_len = remaining_caplen;
	3065	break;
	3066	}
	3067	case IPPROTO_IGMP: {
	3068	/*
	3069	* Full IGMP payload
	3070	*/
	3071	trunc_len = remaining_caplen;
	3072	break;
	3073	}
	3074	case IPPROTO_UDP: {
	3075	trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
	3076	break;
	3077	}
	3078	case IPPROTO_TCP: {
	3079	trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
	3080	break;
	3081	}
	3082	case IPPROTO_ESP: {
	3083	trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
	3084	break;
	3085	}
	3086	default: {
	3087	/*
	3088	* By default we only include the IP header
	3089	*/
	3090	trunc_len = 0;
	3091	break;
	3092	}
	3093	}
	3094	if (trunc_len >= remaining_caplen) {
	3095	return remaining_caplen;
	3096	}
	3097
	3098	return trunc_len;
	3099	}
	3100
	3101	static uint16_t
	3102	get_ip_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	3103	{
	3104	int err = 0;
	3105	uint16_t iplen = sizeof(struct ip);
	3106	if (iplen >= remaining_caplen) {
	3107	return remaining_caplen;
	3108	}
	3109
	3110	struct ip iphdr;
	3111	err = bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
	3112	if (err != 0) {
	3113	return remaining_caplen;
	3114	}
	3115
	3116	uint8_t proto = 0;
	3117
	3118	iplen = iphdr.ip_hl << 2;
	3119	if (iplen >= remaining_caplen) {
	3120	return remaining_caplen;
	3121	}
	3122
	3123	proto = iphdr.ip_p;
	3124	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
	3125
	3126	if (iplen >= remaining_caplen) {
	3127	return remaining_caplen;
	3128	}
	3129
	3130	return iplen;
	3131	}
	3132
	3133	static uint16_t
	3134	get_ip6_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	3135	{
	3136	int err = 0;
	3137	uint16_t iplen = sizeof(struct ip6_hdr);
	3138	if (iplen >= remaining_caplen) {
	3139	return remaining_caplen;
	3140	}
	3141
	3142	struct ip6_hdr ip6hdr;
	3143	err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
	3144	if (err != 0) {
	3145	return remaining_caplen;
	3146	}
	3147
	3148	uint8_t proto = 0;
	3149
	3150	/*
	3151	* TBD: process the extension headers
	3152	*/
	3153	proto = ip6hdr.ip6_nxt;
	3154	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
	3155
	3156	if (iplen >= remaining_caplen) {
	3157	return remaining_caplen;
	3158	}
	3159
	3160	return iplen;
	3161	}
	3162
	3163	static uint16_t
	3164	get_ether_trunc_len(struct bpf_packet *pkt, int off, const uint16_t remaining_caplen)
	3165	{
	3166	int err = 0;
	3167	uint16_t ethlen = sizeof(struct ether_header);
	3168	if (ethlen >= remaining_caplen) {
	3169	return remaining_caplen;
	3170	}
	3171
	3172	struct ether_header eh;
	3173	u_short type;
	3174	err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
	3175	if (err != 0) {
	3176	return remaining_caplen;
	3177	}
	3178
	3179	type = EXTRACT_SHORT(&eh.ether_type);
	3180	/* Include full ARP */
	3181	if (type == ETHERTYPE_ARP) {
	3182	ethlen = remaining_caplen;
	3183	} else if (type != ETHERTYPE_IP && type != ETHERTYPE_IPV6) {
	3184	ethlen = min(BPF_MIN_PKT_SIZE, remaining_caplen);
	3185	} else {
	3186	if (type == ETHERTYPE_IP) {
	3187	ethlen += get_ip_trunc_len(pkt, sizeof(struct ether_header),
	3188	remaining_caplen);
	3189	} else if (type == ETHERTYPE_IPV6) {
	3190	ethlen += get_ip6_trunc_len(pkt, sizeof(struct ether_header),
	3191	remaining_caplen);
	3192	}
	3193	}
	3194	return ethlen;
	3195	}
	3196
	3197	static uint32_t
	3198	get_pkt_trunc_len(u_char *p, u_int len)
	3199	{
	3200	struct bpf_packet pkt = (struct bpf_packet )(void *) p;
	3201	struct pktap_header pktap = (struct pktap_header ) (pkt->bpfp_header);
	3202	uint32_t out_pkt_len = 0, tlen = 0;
	3203	/*
	3204	* pktap->pth_frame_pre_length is L2 header length and accounts
	3205	* for both pre and pre_adjust.
	3206	* pktap->pth_length is sizeof(pktap_header) (excl the pre/pre_adjust)
	3207	* pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
	3208	* pre is the offset to the L3 header after the bpfp_header, or length
	3209	* of L2 header after bpfp_header, if present.
	3210	*/
	3211	int32_t pre = pktap->pth_frame_pre_length -
	3212	(pkt->bpfp_header_length - pktap->pth_length);
	3213
	3214	/* Length of the input packet starting from L3 header */
	3215	uint32_t in_pkt_len = len - pkt->bpfp_header_length - pre;
	3216	if (pktap->pth_protocol_family == AF_INET \|\|
	3217	pktap->pth_protocol_family == AF_INET6) {
	3218	/* Contains L2 header */
	3219	if (pre > 0) {
	3220	if (pre < (int32_t)sizeof(struct ether_header)) {
	3221	goto too_short;
	3222	}
	3223
	3224	out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
	3225	} else if (pre == 0) {
	3226	if (pktap->pth_protocol_family == AF_INET) {
	3227	out_pkt_len = get_ip_trunc_len(pkt, pre, in_pkt_len);
	3228	} else if (pktap->pth_protocol_family == AF_INET6) {
	3229	out_pkt_len = get_ip6_trunc_len(pkt, pre, in_pkt_len);
	3230	}
	3231	} else {
	3232	/* Ideally pre should be >= 0. This is an exception */
	3233	out_pkt_len = min(BPF_MIN_PKT_SIZE, in_pkt_len);
	3234	}
	3235	} else {
	3236	if (pktap->pth_iftype == IFT_ETHER) {
	3237	if (in_pkt_len < sizeof(struct ether_header)) {
	3238	goto too_short;
	3239	}
	3240	/* At most include the Ethernet header and 16 bytes */
	3241	out_pkt_len = MIN(sizeof(struct ether_header) + 16,
	3242	in_pkt_len);
	3243	} else {
	3244	/*
	3245	* For unknown protocols include at most 16 bytes
	3246	*/
	3247	out_pkt_len = MIN(16, in_pkt_len);
	3248	}
	3249	}
	3250	done:
	3251	tlen = pkt->bpfp_header_length + out_pkt_len + pre;
	3252	return tlen;
	3253	too_short:
	3254	out_pkt_len = in_pkt_len;
	3255	goto done;
	3256	}
	3257
	3258	/*
	3259	* Move the packet data from interface memory (pkt) into the
	3260	* store buffer. Return 1 if it's time to wakeup a listener (buffer full),
	3261	* otherwise 0.
	3262	*/
	3263	static void
	3264	catchpacket(struct bpf_d d, struct bpf_packet pkt,
	3265	u_int snaplen, int outbound)
	3266	{
	3267	struct bpf_hdr *hp;
	3268	struct bpf_hdr_ext *ehp;
	3269	int totlen, curlen;
	3270	int hdrlen, caplen;
	3271	int do_wakeup = 0;
	3272	u_char *payload;
	3273	struct timeval tv;
	3274
	3275	hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
	3276	d->bd_bif->bif_hdrlen;
	3277	/*
	3278	* Figure out how many bytes to move. If the packet is
	3279	* greater or equal to the snapshot length, transfer that
	3280	* much. Otherwise, transfer the whole packet (unless
	3281	* we hit the buffer size limit).
	3282	*/
	3283	totlen = hdrlen + min(snaplen, pkt->bpfp_total_length);
	3284	if (totlen > d->bd_bufsize) {
	3285	totlen = d->bd_bufsize;
	3286	}
	3287
	3288	if (hdrlen > totlen) {
	3289	return;
	3290	}
	3291
	3292	/*
	3293	* Round up the end of the previous packet to the next longword.
	3294	*/
	3295	curlen = BPF_WORDALIGN(d->bd_slen);
	3296	if (curlen + totlen > d->bd_bufsize) {
	3297	/*
	3298	* This packet will overflow the storage buffer.
	3299	* Rotate the buffers if we can, then wakeup any
	3300	* pending reads.
	3301	*
	3302	* We cannot rotate buffers if a read is in progress
	3303	* so drop the packet
	3304	*/
	3305	if (d->bd_hbuf_read != 0) {
	3306	++d->bd_dcount;
	3307	return;
	3308	}
	3309
	3310	if (d->bd_fbuf == NULL) {
	3311	if (d->bd_headdrop == 0) {
	3312	/*
	3313	* We haven't completed the previous read yet,
	3314	* so drop the packet.
	3315	*/
	3316	++d->bd_dcount;
	3317	return;
	3318	}
	3319	/*
	3320	* Drop the hold buffer as it contains older packets
	3321	*/
	3322	d->bd_dcount += d->bd_hcnt;
	3323	d->bd_fbuf = d->bd_hbuf;
	3324	ROTATE_BUFFERS(d);
	3325	} else {
	3326	ROTATE_BUFFERS(d);
	3327	}
	3328	do_wakeup = 1;
	3329	curlen = 0;
	3330	} else if (d->bd_immediate \|\| d->bd_state == BPF_TIMED_OUT) {
	3331	/*
	3332	* Immediate mode is set, or the read timeout has
	3333	* already expired during a select call. A packet
	3334	* arrived, so the reader should be woken up.
	3335	*/
	3336	do_wakeup = 1;
	3337	}
	3338
	3339	/*
	3340	* Append the bpf header.
	3341	*/
	3342	microtime(&tv);
	3343	if (d->bd_flags & BPF_EXTENDED_HDR) {
	3344	struct mbuf *m;
	3345
	3346	m = (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF)
	3347	? pkt->bpfp_mbuf : NULL;
	3348	ehp = (struct bpf_hdr_ext )(void )(d->bd_sbuf + curlen);
	3349	memset(ehp, 0, sizeof(*ehp));
	3350	ehp->bh_tstamp.tv_sec = tv.tv_sec;
	3351	ehp->bh_tstamp.tv_usec = tv.tv_usec;
	3352
	3353	ehp->bh_datalen = pkt->bpfp_total_length;
	3354	ehp->bh_hdrlen = hdrlen;
	3355	caplen = ehp->bh_caplen = totlen - hdrlen;
	3356	if (m == NULL) {
	3357	if (outbound) {
	3358	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_OUT;
	3359	} else {
	3360	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_IN;
	3361	}
	3362	} else if (outbound) {
	3363	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_OUT;
	3364
	3365	/* only do lookups on non-raw INPCB */
	3366	if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID \|
	3367	PKTF_FLOW_LOCALSRC \| PKTF_FLOW_RAWSOCK)) ==
	3368	(PKTF_FLOW_ID \| PKTF_FLOW_LOCALSRC) &&
	3369	m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
	3370	ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
	3371	ehp->bh_proto = m->m_pkthdr.pkt_proto;
	3372	}
	3373	ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
	3374	if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
	3375	ehp->bh_pktflags \|= BPF_PKTFLAGS_TCP_REXMT;
	3376	}
	3377	if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
	3378	ehp->bh_pktflags \|= BPF_PKTFLAGS_START_SEQ;
	3379	}
	3380	if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
	3381	ehp->bh_pktflags \|= BPF_PKTFLAGS_LAST_PKT;
	3382	}
	3383	if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
	3384	ehp->bh_unsent_bytes =
	3385	m->m_pkthdr.bufstatus_if;
	3386	ehp->bh_unsent_snd =
	3387	m->m_pkthdr.bufstatus_sndbuf;
	3388	}
	3389	} else {
	3390	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_IN;
	3391	}
	3392	payload = (u_char *)ehp + hdrlen;
	3393	} else {
	3394	hp = (struct bpf_hdr )(void )(d->bd_sbuf + curlen);
	3395	hp->bh_tstamp.tv_sec = tv.tv_sec;
	3396	hp->bh_tstamp.tv_usec = tv.tv_usec;
	3397	hp->bh_datalen = pkt->bpfp_total_length;
	3398	hp->bh_hdrlen = hdrlen;
	3399	caplen = hp->bh_caplen = totlen - hdrlen;
	3400	payload = (u_char *)hp + hdrlen;
	3401	}
	3402	/*
	3403	* Copy the packet data into the store buffer and update its length.
	3404	*/
	3405	copy_bpf_packet(pkt, payload, caplen);
	3406	d->bd_slen = curlen + totlen;
	3407	d->bd_scnt += 1;
	3408
	3409	if (do_wakeup) {
	3410	bpf_wakeup(d);
	3411	}
	3412	}
	3413
	3414	/*
	3415	* Initialize all nonzero fields of a descriptor.
	3416	*/
	3417	static int
	3418	bpf_allocbufs(struct bpf_d *d)
	3419	{
	3420	if (d->bd_sbuf != NULL) {
	3421	FREE(d->bd_sbuf, M_DEVBUF);
	3422	d->bd_sbuf = NULL;
	3423	}
	3424	if (d->bd_hbuf != NULL) {
	3425	FREE(d->bd_hbuf, M_DEVBUF);
	3426	d->bd_hbuf = NULL;
	3427	}
	3428	if (d->bd_fbuf != NULL) {
	3429	FREE(d->bd_fbuf, M_DEVBUF);
	3430	d->bd_fbuf = NULL;
	3431	}
	3432
	3433	d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
	3434	if (d->bd_fbuf == NULL) {
	3435	return ENOBUFS;
	3436	}
	3437
	3438	d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
	3439	if (d->bd_sbuf == NULL) {
	3440	FREE(d->bd_fbuf, M_DEVBUF);
	3441	d->bd_fbuf = NULL;
	3442	return ENOBUFS;
	3443	}
	3444	d->bd_slen = 0;
	3445	d->bd_hlen = 0;
	3446	d->bd_scnt = 0;
	3447	d->bd_hcnt = 0;
	3448	return 0;
	3449	}
	3450
	3451	/*
	3452	* Free buffers currently in use by a descriptor.
	3453	* Called on close.
	3454	*/
	3455	static void
	3456	bpf_freed(struct bpf_d *d)
	3457	{
	3458	/*
	3459	* We don't need to lock out interrupts since this descriptor has
	3460	* been detached from its interface and it yet hasn't been marked
	3461	* free.
	3462	*/
	3463	if (d->bd_hbuf_read != 0) {
	3464	panic("bpf buffer freed during read");
	3465	}
	3466
	3467	if (d->bd_sbuf != 0) {
	3468	FREE(d->bd_sbuf, M_DEVBUF);
	3469	if (d->bd_hbuf != 0) {
	3470	FREE(d->bd_hbuf, M_DEVBUF);
	3471	}
	3472	if (d->bd_fbuf != 0) {
	3473	FREE(d->bd_fbuf, M_DEVBUF);
	3474	}
	3475	}
	3476	if (d->bd_filter) {
	3477	FREE(d->bd_filter, M_DEVBUF);
	3478	}
	3479	}
	3480
	3481	/*
	3482	* Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
	3483	* in the driver's softc; dlt is the link layer type; hdrlen is the fixed
	3484	* size of the link header (variable length headers not yet supported).
	3485	*/
	3486	void
	3487	bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
	3488	{
	3489	bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
	3490	}
	3491
	3492	errno_t
	3493	bpf_attach(
	3494	ifnet_t ifp,
	3495	u_int32_t dlt,
	3496	u_int32_t hdrlen,
	3497	bpf_send_func send,
	3498	bpf_tap_func tap)
	3499	{
	3500	struct bpf_if *bp;
	3501	struct bpf_if *bp_new;
	3502	struct bpf_if *bp_before_first = NULL;
	3503	struct bpf_if *bp_first = NULL;
	3504	struct bpf_if *bp_last = NULL;
	3505	boolean_t found;
	3506
	3507	bp_new = (struct bpf_if ) _MALLOC(sizeof(bp_new), M_DEVBUF,
	3508	M_WAIT \| M_ZERO);
	3509	if (bp_new == 0) {
	3510	panic("bpfattach");
	3511	}
	3512
	3513	lck_mtx_lock(bpf_mlock);
	3514
	3515	/*
	3516	* Check if this interface/dlt is already attached. Remember the
	3517	* first and last attachment for this interface, as well as the
	3518	* element before the first attachment.
	3519	*/
	3520	found = FALSE;
	3521	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
	3522	if (bp->bif_ifp != ifp) {
	3523	if (bp_first != NULL) {
	3524	/* no more elements for this interface */
	3525	break;
	3526	}
	3527	bp_before_first = bp;
	3528	} else {
	3529	if (bp->bif_dlt == dlt) {
	3530	found = TRUE;
	3531	break;
	3532	}
	3533	if (bp_first == NULL) {
	3534	bp_first = bp;
	3535	}
	3536	bp_last = bp;
	3537	}
	3538	}
	3539	if (found) {
	3540	lck_mtx_unlock(bpf_mlock);
	3541	printf("bpfattach - %s with dlt %d is already attached\n",
	3542	if_name(ifp), dlt);
	3543	FREE(bp_new, M_DEVBUF);
	3544	return EEXIST;
	3545	}
	3546
	3547	bp_new->bif_ifp = ifp;
	3548	bp_new->bif_dlt = dlt;
	3549	bp_new->bif_send = send;
	3550	bp_new->bif_tap = tap;
	3551
	3552	if (bp_first == NULL) {
	3553	/* No other entries for this ifp */
	3554	bp_new->bif_next = bpf_iflist;
	3555	bpf_iflist = bp_new;
	3556	} else {
	3557	if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
	3558	/* Make this the first entry for this interface */
	3559	if (bp_before_first != NULL) {
	3560	/* point the previous to us */
	3561	bp_before_first->bif_next = bp_new;
	3562	} else {
	3563	/* we're the new head */
	3564	bpf_iflist = bp_new;
	3565	}
	3566	bp_new->bif_next = bp_first;
	3567	} else {
	3568	/* Add this after the last entry for this interface */
	3569	bp_new->bif_next = bp_last->bif_next;
	3570	bp_last->bif_next = bp_new;
	3571	}
	3572	}
	3573
	3574	/*
	3575	* Compute the length of the bpf header. This is not necessarily
	3576	* equal to SIZEOF_BPF_HDR because we want to insert spacing such
	3577	* that the network layer header begins on a longword boundary (for
	3578	* performance reasons and to alleviate alignment restrictions).
	3579	*/
	3580	bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
	3581	bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
	3582	sizeof(struct bpf_hdr_ext)) - hdrlen;
	3583
	3584	/* Take a reference on the interface */
	3585	ifnet_reference(ifp);
	3586
	3587	lck_mtx_unlock(bpf_mlock);
	3588
	3589	#ifndef __APPLE__
	3590	if (bootverbose) {
	3591	printf("bpf: %s attached\n", if_name(ifp));
	3592	}
	3593	#endif
	3594
	3595	return 0;
	3596	}
	3597
	3598	/*
	3599	* Detach bpf from an interface. This involves detaching each descriptor
	3600	* associated with the interface, and leaving bd_bif NULL. Notify each
	3601	* descriptor as it's detached so that any sleepers wake up and get
	3602	* ENXIO.
	3603	*/
	3604	void
	3605	bpfdetach(struct ifnet *ifp)
	3606	{
	3607	struct bpf_if bp, bp_prev, *bp_next;
	3608	struct bpf_d *d;
	3609
	3610	if (bpf_debug != 0) {
	3611	printf("%s: %s\n", __func__, if_name(ifp));
	3612	}
	3613
	3614	lck_mtx_lock(bpf_mlock);
	3615
	3616	/*
	3617	* Build the list of devices attached to that interface
	3618	* that we need to free while keeping the lock to maintain
	3619	* the integrity of the interface list
	3620	*/
	3621	bp_prev = NULL;
	3622	for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
	3623	bp_next = bp->bif_next;
	3624
	3625	if (ifp != bp->bif_ifp) {
	3626	bp_prev = bp;
	3627	continue;
	3628	}
	3629	/* Unlink from the interface list */
	3630	if (bp_prev) {
	3631	bp_prev->bif_next = bp->bif_next;
	3632	} else {
	3633	bpf_iflist = bp->bif_next;
	3634	}
	3635
	3636	/* Detach the devices attached to the interface */
	3637	while ((d = bp->bif_dlist) != NULL) {
	3638	/*
	3639	* Take an extra reference to prevent the device
	3640	* from being freed when bpf_detachd() releases
	3641	* the reference for the interface list
	3642	*/
	3643	bpf_acquire_d(d);
	3644	bpf_detachd(d, 0);
	3645	bpf_wakeup(d);
	3646	bpf_release_d(d);
	3647	}
	3648	ifnet_release(ifp);
	3649	}
	3650
	3651	lck_mtx_unlock(bpf_mlock);
	3652	}
	3653
	3654	void
	3655	bpf_init(__unused void *unused)
	3656	{
	3657	#ifdef __APPLE__
	3658	int i;
	3659	int maj;
	3660
	3661	if (bpf_devsw_installed == 0) {
	3662	bpf_devsw_installed = 1;
	3663	bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
	3664	bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
	3665	bpf_mlock_attr = lck_attr_alloc_init();
	3666	lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
	3667	maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
	3668	if (maj == -1) {
	3669	if (bpf_mlock_attr) {
	3670	lck_attr_free(bpf_mlock_attr);
	3671	}
	3672	if (bpf_mlock_grp) {
	3673	lck_grp_free(bpf_mlock_grp);
	3674	}
	3675	if (bpf_mlock_grp_attr) {
	3676	lck_grp_attr_free(bpf_mlock_grp_attr);
	3677	}
	3678
	3679	bpf_mlock = NULL;
	3680	bpf_mlock_attr = NULL;
	3681	bpf_mlock_grp = NULL;
	3682	bpf_mlock_grp_attr = NULL;
	3683	bpf_devsw_installed = 0;
	3684	printf("bpf_init: failed to allocate a major number\n");
	3685	return;
	3686	}
	3687
	3688	for (i = 0; i < NBPFILTER; i++) {
	3689	bpf_make_dev_t(maj);
	3690	}
	3691	}
	3692	#else
	3693	cdevsw_add(&bpf_cdevsw);
	3694	#endif
	3695	}
	3696
	3697	#ifndef __APPLE__
	3698	SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE + CDEV_MAJOR, bpf_drvinit, NULL);
	3699	#endif
	3700
	3701	static int
	3702	sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
	3703	{
	3704	#pragma unused(arg1, arg2)
	3705	int i, err;
	3706
	3707	i = bpf_maxbufsize;
	3708
	3709	err = sysctl_handle_int(oidp, &i, 0, req);
	3710	if (err != 0 \|\| req->newptr == USER_ADDR_NULL) {
	3711	return err;
	3712	}
	3713
	3714	if (i < 0 \|\| i > BPF_MAXSIZE_CAP) {
	3715	i = BPF_MAXSIZE_CAP;
	3716	}
	3717
	3718	bpf_maxbufsize = i;
	3719	return err;
	3720	}