git.saurik.com Git - apple/xnu.git/blame

Commit	Line	Data
1c79356b	1	/*
f427ee49	2	* Copyright (c) 2000-2020 Apple Inc. All rights reserved.
5d5c5d0d	3	*
2d21ac55	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
d9a64523	5	*
2d21ac55 A	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
d9a64523	14	*
2d21ac55 A	15	* Please obtain a copy of the License at
2d21ac55 A	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
d9a64523	17	*
2d21ac55 A	18	* The Original Code and all software distributed under the License are
2d21ac55 A	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5 A	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
8f6c56a5 A	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55 A	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
d9a64523	25	*
2d21ac55	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b A	27	*/
	28	/*
	29	* Copyright (c) 1990, 1991, 1993
	30	* The Regents of the University of California. All rights reserved.
	31	*
	32	* This code is derived from the Stanford/CMU enet packet filter,
	33	* (net/enet.c) distributed as part of 4.3BSD, and code contributed
	34	* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
	35	* Berkeley Laboratory.
	36	*
	37	* Redistribution and use in source and binary forms, with or without
	38	* modification, are permitted provided that the following conditions
	39	* are met:
	40	* 1. Redistributions of source code must retain the above copyright
	41	* notice, this list of conditions and the following disclaimer.
	42	* 2. Redistributions in binary form must reproduce the above copyright
	43	* notice, this list of conditions and the following disclaimer in the
	44	* documentation and/or other materials provided with the distribution.
	45	* 3. All advertising materials mentioning features or use of this software
	46	* must display the following acknowledgement:
	47	* This product includes software developed by the University of
	48	* California, Berkeley and its contributors.
	49	* 4. Neither the name of the University nor the names of its contributors
	50	* may be used to endorse or promote products derived from this software
	51	* without specific prior written permission.
	52	*
	53	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	54	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	55	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	56	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	57	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	58	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	59	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	60	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	61	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	62	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	63	* SUCH DAMAGE.
	64	*
d9a64523	65	* @(#)bpf.c 8.2 (Berkeley) 3/28/94
1c79356b	66	*
9bccf70c	67	* $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
1c79356b	68	*/
2d21ac55 A	69	/*
	70	* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
	71	* support for mandatory and extensible security protections. This notice
	72	* is included in support of clause 2.2 (b) of the Apple Public License,
	73	* Version 2.0.
	74	*/
1c79356b	75
9bccf70c	76	#include "bpf.h"
1c79356b A	77
1c79356b A	78	#ifndef __GNUC__
0a7de745	79	#define inline
1c79356b	80	#else
0a7de745	81	#define inline __inline
1c79356b A	82	#endif
	83
	84	#include <sys/param.h>
	85	#include <sys/systm.h>
	86	#include <sys/conf.h>
	87	#include <sys/malloc.h>
	88	#include <sys/mbuf.h>
	89	#include <sys/time.h>
	90	#include <sys/proc.h>
1c79356b A	91	#include <sys/signalvar.h>
	92	#include <sys/filio.h>
	93	#include <sys/sockio.h>
	94	#include <sys/ttycom.h>
	95	#include <sys/filedesc.h>
91447636	96	#include <sys/uio_internal.h>
b0d623f7 A	97	#include <sys/file_internal.h>
b0d623f7 A	98	#include <sys/event.h>
1c79356b	99
9bccf70c A	100	#include <sys/poll.h>
9bccf70c A	101
1c79356b	102	#include <sys/socket.h>
316670eb	103	#include <sys/socketvar.h>
1c79356b A	104	#include <sys/vnode.h>
	105
	106	#include <net/if.h>
	107	#include <net/bpf.h>
	108	#include <net/bpfdesc.h>
	109
	110	#include <netinet/in.h>
d9a64523 A	111	#include <netinet/ip.h>
d9a64523 A	112	#include <netinet/ip6.h>
316670eb A	113	#include <netinet/in_pcb.h>
	114	#include <netinet/in_var.h>
	115	#include <netinet/ip_var.h>
	116	#include <netinet/tcp.h>
	117	#include <netinet/tcp_var.h>
	118	#include <netinet/udp.h>
	119	#include <netinet/udp_var.h>
1c79356b	120	#include <netinet/if_ether.h>
d9a64523 A	121	#include <netinet/isakmp.h>
d9a64523 A	122	#include <netinet6/esp.h>
1c79356b A	123	#include <sys/kernel.h>
1c79356b A	124	#include <sys/sysctl.h>
55e303ae	125	#include <net/firewire.h>
1c79356b	126
1c79356b A	127	#include <miscfs/devfs/devfs.h>
1c79356b A	128	#include <net/dlil.h>
fe8ab488	129	#include <net/pktap.h>
1c79356b	130
91447636	131	#include <kern/locks.h>
6d2010ae	132	#include <kern/thread_call.h>
5ba3f43e	133	#include <libkern/section_keywords.h>
91447636	134
d9a64523 A	135	#include <os/log.h>
d9a64523 A	136
2d21ac55	137	extern int tvtohz(struct timeval *);
9bccf70c	138
0a7de745 A	139	#define BPF_BUFSIZE 4096
0a7de745 A	140	#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
1c79356b	141
0a7de745	142	#define PRINET 26 /* interruptible */
55e303ae	143
d9a64523 A	144	#define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
d9a64523 A	145	#define ESP_HDR_SIZE sizeof(struct newesp)
1c79356b	146
5ba3f43e A	147	typedef void (pktcopyfunc_t)(const void , void *, size_t);
5ba3f43e A	148
1c79356b A	149	/*
	150	* The default read buffer size is patchable.
	151	*/
91447636	152	static unsigned int bpf_bufsize = BPF_BUFSIZE;
6d2010ae	153	SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW \| CTLFLAG_LOCKED,
0a7de745	154	&bpf_bufsize, 0, "");
cb323159 A	155
	156	static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
	157	extern const int copysize_limit_panic;
	158	#define BPF_MAXSIZE_CAP (copysize_limit_panic >> 1)
6d2010ae	159	__private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
cb323159 A	160	SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT \| CTLFLAG_RW \| CTLFLAG_LOCKED,
	161	&bpf_maxbufsize, 0,
	162	sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
	163
91447636	164	static unsigned int bpf_maxdevices = 256;
6d2010ae	165	SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW \| CTLFLAG_LOCKED,
0a7de745	166	&bpf_maxdevices, 0, "");
fe8ab488 A	167	/*
	168	* bpf_wantpktap controls the defaul visibility of DLT_PKTAP
	169	* For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
	170	* explicitly to be able to use DLT_PKTAP.
	171	*/
f427ee49	172	#if !XNU_TARGET_OS_OSX
5ba3f43e	173	static unsigned int bpf_wantpktap = 1;
f427ee49	174	#else /* XNU_TARGET_OS_OSX */
fe8ab488	175	static unsigned int bpf_wantpktap = 0;
f427ee49	176	#endif /* XNU_TARGET_OS_OSX */
fe8ab488	177	SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW \| CTLFLAG_LOCKED,
0a7de745	178	&bpf_wantpktap, 0, "");
1c79356b	179
3e170ce0 A	180	static int bpf_debug = 0;
3e170ce0 A	181	SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW \| CTLFLAG_LOCKED,
0a7de745	182	&bpf_debug, 0, "");
3e170ce0	183
1c79356b A	184	/*
1c79356b A	185	* bpf_iflist is the list of interfaces; each corresponds to an ifnet
55e303ae	186	* bpf_dtab holds pointer to the descriptors, indexed by minor device #
1c79356b	187	*/
0a7de745	188	static struct bpf_if *bpf_iflist;
9bccf70c A	189	#ifdef __APPLE__
	190	/*
	191	* BSD now stores the bpf_d in the dev_t which is a struct
	192	* on their system. Our dev_t is an int, so we still store
	193	* the bpf_d in a separate table indexed by minor device #.
91447636 A	194	*
91447636 A	195	* The value stored in bpf_dtab[n] represent three states:
d9a64523 A	196	* NULL: device not opened
d9a64523 A	197	* BPF_DEV_RESERVED: device opening or closing
91447636	198	* other: device <n> opened with pointer to storage
9bccf70c	199	*/
0a7de745 A	200	#define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
0a7de745 A	201	static struct bpf_d **bpf_dtab = NULL;
91447636	202	static unsigned int bpf_dtab_size = 0;
0a7de745	203	static unsigned int nbpfilter = 0;
91447636	204
316670eb	205	decl_lck_mtx_data(static, bpf_mlock_data);
0a7de745 A	206	static lck_mtx_t *bpf_mlock = &bpf_mlock_data;
	207	static lck_grp_t *bpf_mlock_grp;
	208	static lck_grp_attr_t *bpf_mlock_grp_attr;
	209	static lck_attr_t *bpf_mlock_attr;
55e303ae	210
55e303ae	211	#endif /* __APPLE__ */
1c79356b	212
0a7de745 A	213	static int bpf_allocbufs(struct bpf_d *);
	214	static errno_t bpf_attachd(struct bpf_d d, struct bpf_if bp);
	215	static int bpf_detachd(struct bpf_d *d, int);
	216	static void bpf_freed(struct bpf_d *);
	217	static int bpf_movein(struct uio *, int,
	218	struct mbuf *, struct sockaddr , int *);
	219	static int bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool);
	220	static void bpf_timed_out(void , void );
	221	static void bpf_wakeup(struct bpf_d *);
	222	static u_int get_pkt_trunc_len(u_char *, u_int);
	223	static void catchpacket(struct bpf_d , struct bpf_packet , u_int, int);
	224	static void reset_d(struct bpf_d *);
	225	static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
	226	static int bpf_getdltlist(struct bpf_d , caddr_t, struct proc );
	227	static int bpf_setdlt(struct bpf_d *, u_int);
	228	static int bpf_set_traffic_class(struct bpf_d *, int);
	229	static void bpf_set_packet_service_class(struct mbuf *, int);
	230
	231	static void bpf_acquire_d(struct bpf_d *);
	232	static void bpf_release_d(struct bpf_d *);
	233
	234	static int bpf_devsw_installed;
55e303ae	235
91447636	236	void bpf_init(void *unused);
2d21ac55	237	static int bpf_tap_callback(struct ifnet ifp, struct mbuf m);
55e303ae	238
9bccf70c A	239	/*
	240	* Darwin differs from BSD here, the following are static
	241	* on BSD and not static on Darwin.
	242	*/
0a7de745 A	243	d_open_t bpfopen;
	244	d_close_t bpfclose;
	245	d_read_t bpfread;
	246	d_write_t bpfwrite;
	247	ioctl_fcn_t bpfioctl;
	248	select_fcn_t bpfselect;
1c79356b	249
9bccf70c	250	/* Darwin's cdevsw struct differs slightly from BSDs */
0a7de745	251	#define CDEV_MAJOR 23
f427ee49	252	static const struct cdevsw bpf_cdevsw = {
cb323159 A	253	.d_open = bpfopen,
	254	.d_close = bpfclose,
	255	.d_read = bpfread,
	256	.d_write = bpfwrite,
	257	.d_ioctl = bpfioctl,
	258	.d_stop = eno_stop,
	259	.d_reset = eno_reset,
	260	.d_ttys = NULL,
	261	.d_select = bpfselect,
	262	.d_mmap = eno_mmap,
	263	.d_strategy = eno_strat,
	264	.d_reserved_1 = eno_getc,
	265	.d_reserved_2 = eno_putc,
	266	.d_type = 0
1c79356b A	267	};
1c79356b A	268
0a7de745	269	#define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
9bccf70c	270
1c79356b	271	static int
d9a64523 A	272	bpf_movein(struct uio uio, int linktype, struct mbuf *mp,
d9a64523 A	273	struct sockaddr sockp, int datlen)
1c79356b A	274	{
	275	struct mbuf *m;
	276	int error;
	277	int len;
2d21ac55	278	uint8_t sa_family;
1c79356b A	279	int hlen;
1c79356b A	280
2d21ac55	281	switch (linktype) {
2d21ac55 A	282	#if SLIP
	283	case DLT_SLIP:
	284	sa_family = AF_INET;
	285	hlen = 0;
	286	break;
	287	#endif /* SLIP */
d9a64523	288
2d21ac55 A	289	case DLT_EN10MB:
	290	sa_family = AF_UNSPEC;
	291	/* XXX Would MAXLINKHDR be better? */
	292	hlen = sizeof(struct ether_header);
	293	break;
d9a64523	294
2d21ac55 A	295	#if FDDI
2d21ac55 A	296	case DLT_FDDI:
d9a64523	297	#if defined(__FreeBSD__) \|\| defined(__bsdi__)
2d21ac55 A	298	sa_family = AF_IMPLINK;
2d21ac55 A	299	hlen = 0;
d9a64523	300	#else
2d21ac55 A	301	sa_family = AF_UNSPEC;
	302	/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
	303	hlen = 24;
d9a64523	304	#endif
2d21ac55 A	305	break;
2d21ac55 A	306	#endif /* FDDI */
d9a64523	307
2d21ac55 A	308	case DLT_RAW:
	309	case DLT_NULL:
	310	sa_family = AF_UNSPEC;
	311	hlen = 0;
	312	break;
d9a64523 A	313
d9a64523 A	314	#ifdef __FreeBSD__
2d21ac55 A	315	case DLT_ATM_RFC1483:
	316	/*
	317	* en atm driver requires 4-byte atm pseudo header.
	318	* though it isn't standard, vpi:vci needs to be
	319	* specified anyway.
	320	*/
	321	sa_family = AF_UNSPEC;
0a7de745	322	hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
2d21ac55	323	break;
d9a64523	324	#endif
2d21ac55 A	325
	326	case DLT_PPP:
	327	sa_family = AF_UNSPEC;
0a7de745	328	hlen = 4; /* This should match PPP_HDRLEN */
2d21ac55	329	break;
d9a64523	330
2d21ac55 A	331	case DLT_APPLE_IP_OVER_IEEE1394:
	332	sa_family = AF_UNSPEC;
	333	hlen = sizeof(struct firewire_header);
	334	break;
b0d623f7	335
0a7de745	336	case DLT_IEEE802_11: /* IEEE 802.11 wireless */
b0d623f7 A	337	sa_family = AF_IEEE80211;
	338	hlen = 0;
	339	break;
316670eb	340
6d2010ae A	341	case DLT_IEEE802_11_RADIO:
	342	sa_family = AF_IEEE80211;
	343	hlen = 0;
	344	break;
b0d623f7	345
2d21ac55	346	default:
0a7de745	347	return EIO;
55e303ae	348	}
2d21ac55	349
91447636 A	350	// LP64todo - fix this!
91447636 A	351	len = uio_resid(uio);
1c79356b	352	*datlen = len - hlen;
0a7de745 A	353	if ((unsigned)len > MCLBYTES) {
	354	return EIO;
	355	}
1c79356b	356
2d21ac55 A	357	if (sockp) {
	358	/*
	359	* Build a sockaddr based on the data link layer type.
	360	* We do this at this level because the ethernet header
	361	* is copied directly into the data field of the sockaddr.
	362	* In the case of SLIP, there is no header and the packet
	363	* is forwarded as is.
	364	* Also, we are careful to leave room at the front of the mbuf
	365	* for the link level header.
	366	*/
	367	if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
0a7de745	368	return EIO;
2d21ac55 A	369	}
	370	sockp->sa_family = sa_family;
	371	} else {
	372	/*
	373	* We're directly sending the packet data supplied by
	374	* the user; we don't need to make room for the link
	375	* header, and don't need the header length value any
	376	* more, so set it to 0.
	377	*/
	378	hlen = 0;
	379	}
d9a64523	380
1c79356b	381	MGETHDR(m, M_WAIT, MT_DATA);
0a7de745 A	382	if (m == 0) {
	383	return ENOBUFS;
	384	}
91447636	385	if ((unsigned)len > MHLEN) {
1c79356b A	386	MCLGET(m, M_WAIT);
1c79356b A	387	if ((m->m_flags & M_EXT) == 0) {
1c79356b A	388	error = ENOBUFS;
	389	goto bad;
	390	}
	391	}
	392	m->m_pkthdr.len = m->m_len = len;
	393	m->m_pkthdr.rcvif = NULL;
	394	*mp = m;
d9a64523	395
1c79356b A	396	/*
	397	* Make room for link header.
	398	*/
	399	if (hlen != 0) {
	400	m->m_pkthdr.len -= hlen;
	401	m->m_len -= hlen;
1c79356b	402	m->m_data += hlen; /* XXX */
1c79356b	403	error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
0a7de745	404	if (error) {
1c79356b	405	goto bad;
0a7de745	406	}
1c79356b A	407	}
1c79356b A	408	error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
0a7de745	409	if (error) {
6d2010ae	410	goto bad;
0a7de745	411	}
d9a64523	412
6d2010ae A	413	/* Check for multicast destination */
6d2010ae A	414	switch (linktype) {
0a7de745 A	415	case DLT_EN10MB: {
	416	struct ether_header *eh;
	417
	418	eh = mtod(m, struct ether_header *);
	419	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
	420	if (_ether_cmp(etherbroadcastaddr,
	421	eh->ether_dhost) == 0) {
	422	m->m_flags \|= M_BCAST;
	423	} else {
	424	m->m_flags \|= M_MCAST;
6d2010ae	425	}
6d2010ae	426	}
0a7de745 A	427	break;
0a7de745 A	428	}
6d2010ae	429	}
d9a64523	430
0a7de745	431	return 0;
d9a64523	432	bad:
1c79356b	433	m_freem(m);
0a7de745	434	return error;
1c79356b A	435	}
1c79356b A	436
9bccf70c	437	#ifdef __APPLE__
55e303ae A	438
55e303ae A	439	/*
39236c6e A	440	* The dynamic addition of a new device node must block all processes that
39236c6e A	441	* are opening the last device so that no process will get an unexpected
d9a64523	442	* ENOENT
55e303ae	443	*/
91447636 A	444	static void
91447636 A	445	bpf_make_dev_t(int maj)
55e303ae	446	{
0a7de745 A	447	static int bpf_growing = 0;
0a7de745 A	448	unsigned int cur_size = nbpfilter, i;
55e303ae	449
0a7de745	450	if (nbpfilter >= bpf_maxdevices) {
91447636	451	return;
0a7de745	452	}
55e303ae	453
91447636 A	454	while (bpf_growing) {
91447636 A	455	/* Wait until new device has been created */
d9a64523	456	(void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
91447636 A	457	}
	458	if (nbpfilter > cur_size) {
	459	/* other thread grew it already */
	460	return;
	461	}
	462	bpf_growing = 1;
d9a64523	463
91447636 A	464	/* need to grow bpf_dtab first */
	465	if (nbpfilter == bpf_dtab_size) {
	466	int new_dtab_size;
	467	struct bpf_d **new_dtab = NULL;
	468	struct bpf_d **old_dtab = NULL;
d9a64523 A	469
	470	new_dtab_size = bpf_dtab_size + NBPFILTER;
	471	new_dtab = (struct bpf_d **)_MALLOC(
0a7de745	472	sizeof(struct bpf_d ) new_dtab_size, M_DEVBUF, M_WAIT);
91447636 A	473	if (new_dtab == 0) {
	474	printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
	475	goto done;
	476	}
	477	if (bpf_dtab) {
d9a64523 A	478	bcopy(bpf_dtab, new_dtab,
d9a64523 A	479	sizeof(struct bpf_d ) bpf_dtab_size);
91447636	480	}
d9a64523 A	481	bzero(new_dtab + bpf_dtab_size,
d9a64523 A	482	sizeof(struct bpf_d ) NBPFILTER);
91447636 A	483	old_dtab = bpf_dtab;
	484	bpf_dtab = new_dtab;
	485	bpf_dtab_size = new_dtab_size;
0a7de745	486	if (old_dtab != NULL) {
91447636	487	_FREE(old_dtab, M_DEVBUF);
0a7de745	488	}
55e303ae	489	}
91447636 A	490	i = nbpfilter++;
91447636 A	491	(void) devfs_make_node(makedev(maj, i),
0a7de745 A	492	DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
0a7de745 A	493	"bpf%d", i);
91447636 A	494	done:
	495	bpf_growing = 0;
	496	wakeup((caddr_t)&bpf_growing);
55e303ae A	497	}
55e303ae A	498
9bccf70c	499	#endif
1c79356b A	500
	501	/*
	502	* Attach file to the bpf interface, i.e. make d listen on bp.
1c79356b	503	*/
2d21ac55	504	static errno_t
91447636	505	bpf_attachd(struct bpf_d d, struct bpf_if bp)
1c79356b	506	{
2d21ac55	507	int first = bp->bif_dlist == NULL;
0a7de745	508	int error = 0;
d9a64523	509
1c79356b A	510	/*
	511	* Point d at bp, and add d to the interface's list of listeners.
	512	* Finally, point the driver's bpf cookie at the interface so
	513	* it will divert packets to bpf.
	514	*/
	515	d->bd_bif = bp;
	516	d->bd_next = bp->bif_dlist;
	517	bp->bif_dlist = d;
3e170ce0 A	518
	519	/*
	520	* Take a reference on the device even if an error is returned
	521	* because we keep the device in the interface's list of listeners
	522	*/
	523	bpf_acquire_d(d);
	524
2d21ac55 A	525	if (first) {
	526	/* Find the default bpf entry for this ifp */
	527	if (bp->bif_ifp->if_bpf == NULL) {
0a7de745	528	struct bpf_if tmp, primary = NULL;
d9a64523	529
fe8ab488	530	for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
5ba3f43e A	531	if (tmp->bif_ifp == bp->bif_ifp) {
	532	primary = tmp;
	533	break;
	534	}
fe8ab488	535	}
2d21ac55 A	536	bp->bif_ifp->if_bpf = primary;
2d21ac55 A	537	}
2d21ac55	538	/* Only call dlil_set_bpf_tap for primary dlt */
0a7de745	539	if (bp->bif_ifp->if_bpf == bp) {
d9a64523 A	540	dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
d9a64523 A	541	bpf_tap_callback);
0a7de745	542	}
5ba3f43e	543
0a7de745	544	if (bp->bif_tap != NULL) {
d9a64523 A	545	error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
d9a64523 A	546	BPF_TAP_INPUT_OUTPUT);
0a7de745	547	}
2d21ac55	548	}
1c79356b	549
3e170ce0 A	550	/*
	551	* Reset the detach flags in case we previously detached an interface
	552	*/
	553	d->bd_flags &= ~(BPF_DETACHING \| BPF_DETACHED);
	554
5ba3f43e	555	if (bp->bif_dlt == DLT_PKTAP) {
fe8ab488	556	d->bd_flags \|= BPF_FINALIZE_PKTAP;
5ba3f43e	557	} else {
fe8ab488	558	d->bd_flags &= ~BPF_FINALIZE_PKTAP;
5ba3f43e	559	}
0a7de745	560	return error;
1c79356b A	561	}
	562
	563	/*
	564	* Detach a file from its interface.
3e170ce0 A	565	*
3e170ce0 A	566	* Return 1 if was closed by some thread, 0 otherwise
1c79356b	567	*/
3e170ce0 A	568	static int
3e170ce0 A	569	bpf_detachd(struct bpf_d *d, int closing)
1c79356b A	570	{
	571	struct bpf_d **p;
	572	struct bpf_if *bp;
	573	struct ifnet *ifp;
	574
a39ff7e2	575	int bpf_closed = d->bd_flags & BPF_CLOSING;
3e170ce0 A	576	/*
	577	* Some other thread already detached
	578	*/
0a7de745	579	if ((d->bd_flags & (BPF_DETACHED \| BPF_DETACHING)) != 0) {
3e170ce0	580	goto done;
0a7de745	581	}
3e170ce0 A	582	/*
	583	* This thread is doing the detach
	584	*/
	585	d->bd_flags \|= BPF_DETACHING;
	586
1c79356b	587	ifp = d->bd_bif->bif_ifp;
1c79356b	588	bp = d->bd_bif;
3e170ce0	589
0a7de745	590	if (bpf_debug != 0) {
3e170ce0 A	591	printf("%s: %llx %s%s\n",
	592	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
	593	if_name(ifp), closing ? " closing" : "");
0a7de745	594	}
3e170ce0	595
2d21ac55 A	596	/* Remove d from the interface's descriptor list. */
	597	p = &bp->bif_dlist;
	598	while (*p != d) {
	599	p = &(*p)->bd_next;
0a7de745	600	if (*p == 0) {
2d21ac55	601	panic("bpf_detachd: descriptor not in list");
0a7de745	602	}
2d21ac55 A	603	}
	604	p = (p)->bd_next;
	605	if (bp->bif_dlist == 0) {
	606	/*
	607	* Let the driver know that there are no more listeners.
	608	*/
	609	/* Only call dlil_set_bpf_tap for primary dlt */
0a7de745	610	if (bp->bif_ifp->if_bpf == bp) {
2d21ac55	611	dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
0a7de745 A	612	}
0a7de745 A	613	if (bp->bif_tap) {
2d21ac55	614	bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
0a7de745	615	}
d9a64523	616
0a7de745 A	617	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
0a7de745 A	618	if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
2d21ac55	619	break;
0a7de745 A	620	}
	621	}
	622	if (bp == NULL) {
2d21ac55	623	ifp->if_bpf = NULL;
0a7de745	624	}
2d21ac55 A	625	}
2d21ac55 A	626	d->bd_bif = NULL;
1c79356b A	627	/*
	628	* Check if this descriptor had requested promiscuous mode.
	629	* If so, turn it off.
	630	*/
	631	if (d->bd_promisc) {
	632	d->bd_promisc = 0;
2d21ac55 A	633	lck_mtx_unlock(bpf_mlock);
2d21ac55 A	634	if (ifnet_set_promiscuous(ifp, 0)) {
1c79356b A	635	/*
	636	* Something is really wrong if we were able to put
	637	* the driver into promiscuous mode, but can't
	638	* take it out.
9bccf70c	639	* Most likely the network interface is gone.
1c79356b	640	*/
3e170ce0	641	printf("%s: ifnet_set_promiscuous failed\n", __func__);
2d21ac55 A	642	}
2d21ac55 A	643	lck_mtx_lock(bpf_mlock);
1c79356b	644	}
3e170ce0 A	645
	646	/*
	647	* Wake up other thread that are waiting for this thread to finish
	648	* detaching
	649	*/
	650	d->bd_flags &= ~BPF_DETACHING;
	651	d->bd_flags \|= BPF_DETACHED;
a39ff7e2 A	652
	653	/* Refresh the local variable as d could have been modified */
	654	bpf_closed = d->bd_flags & BPF_CLOSING;
3e170ce0 A	655	/*
	656	* Note that We've kept the reference because we may have dropped
	657	* the lock when turning off promiscuous mode
	658	*/
	659	bpf_release_d(d);
	660
	661	done:
	662	/*
	663	* When closing makes sure no other thread refer to the bpf_d
	664	*/
0a7de745	665	if (bpf_debug != 0) {
3e170ce0 A	666	printf("%s: %llx done\n",
3e170ce0 A	667	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
0a7de745	668	}
3e170ce0 A	669	/*
	670	* Let the caller know the bpf_d is closed
	671	*/
0a7de745 A	672	if (bpf_closed) {
	673	return 1;
	674	} else {
	675	return 0;
	676	}
1c79356b A	677	}
1c79356b A	678
6d2010ae A	679	/*
	680	* Start asynchronous timer, if necessary.
	681	* Must be called with bpf_mlock held.
	682	*/
	683	static void
	684	bpf_start_timer(struct bpf_d *d)
	685	{
	686	uint64_t deadline;
	687	struct timeval tv;
	688
	689	if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
	690	tv.tv_sec = d->bd_rtout / hz;
	691	tv.tv_usec = (d->bd_rtout % hz) * tick;
	692
39236c6e	693	clock_interval_to_deadline(
0a7de745 A	694	(uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
0a7de745 A	695	NSEC_PER_USEC, &deadline);
6d2010ae	696	/*
d9a64523	697	* The state is BPF_IDLE, so the timer hasn't
6d2010ae A	698	* been started yet, and hasn't gone off yet;
	699	* there is no thread call scheduled, so this
	700	* won't change the schedule.
	701	*
	702	* XXX - what if, by the time it gets entered,
	703	* the deadline has already passed?
	704	*/
	705	thread_call_enter_delayed(d->bd_thread_call, deadline);
	706	d->bd_state = BPF_WAITING;
	707	}
	708	}
	709
	710	/*
	711	* Cancel asynchronous timer.
	712	* Must be called with bpf_mlock held.
	713	*/
	714	static boolean_t
	715	bpf_stop_timer(struct bpf_d *d)
	716	{
	717	/*
	718	* If the timer has already gone off, this does nothing.
	719	* Our caller is expected to set d->bd_state to BPF_IDLE,
	720	* with the bpf_mlock, after we are called. bpf_timed_out()
d9a64523	721	* also grabs bpf_mlock, so, if the timer has gone off and
6d2010ae	722	* bpf_timed_out() hasn't finished, it's waiting for the
d9a64523 A	723	* lock; when this thread releases the lock, it will
d9a64523 A	724	* find the state is BPF_IDLE, and just release the
6d2010ae A	725	* lock and return.
6d2010ae A	726	*/
0a7de745	727	return thread_call_cancel(d->bd_thread_call);
6d2010ae A	728	}
6d2010ae A	729
3e170ce0 A	730	void
	731	bpf_acquire_d(struct bpf_d *d)
	732	{
	733	void *lr_saved = __builtin_return_address(0);
	734
5ba3f43e	735	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
3e170ce0 A	736
	737	d->bd_refcnt += 1;
	738
	739	d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
	740	d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
	741	}
	742
	743	void
	744	bpf_release_d(struct bpf_d *d)
	745	{
	746	void *lr_saved = __builtin_return_address(0);
	747
5ba3f43e	748	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
3e170ce0	749
0a7de745	750	if (d->bd_refcnt <= 0) {
3e170ce0	751	panic("%s: %p refcnt <= 0", __func__, d);
0a7de745	752	}
3e170ce0 A	753
3e170ce0 A	754	d->bd_refcnt -= 1;
6d2010ae	755
3e170ce0 A	756	d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
	757	d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
	758
	759	if (d->bd_refcnt == 0) {
	760	/* Assert the device is detached */
0a7de745	761	if ((d->bd_flags & BPF_DETACHED) == 0) {
3e170ce0	762	panic("%s: %p BPF_DETACHED not set", __func__, d);
0a7de745	763	}
3e170ce0 A	764
	765	_FREE(d, M_DEVBUF);
	766	}
	767	}
6d2010ae	768
1c79356b A	769	/*
	770	* Open ethernet device. Returns ENXIO for illegal minor device number,
	771	* EBUSY if file is open by another process.
	772	*/
	773	/* ARGSUSED */
2d21ac55	774	int
b0d623f7	775	bpfopen(dev_t dev, int flags, __unused int fmt,
0a7de745	776	struct proc *p)
1c79356b	777	{
2d21ac55	778	struct bpf_d *d;
1c79356b	779
2d21ac55 A	780	lck_mtx_lock(bpf_mlock);
	781	if ((unsigned int) minor(dev) >= nbpfilter) {
	782	lck_mtx_unlock(bpf_mlock);
0a7de745	783	return ENXIO;
2d21ac55	784	}
d9a64523 A	785	/*
	786	* New device nodes are created on demand when opening the last one.
	787	* The programming model is for processes to loop on the minor starting
	788	* at 0 as long as EBUSY is returned. The loop stops when either the
	789	* open succeeds or an error other that EBUSY is returned. That means
	790	* that bpf_make_dev_t() must block all processes that are opening the
	791	* last node. If not all processes are blocked, they could unexpectedly
	792	* get ENOENT and abort their opening loop.
91447636	793	*/
0a7de745	794	if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
91447636	795	bpf_make_dev_t(major(dev));
0a7de745	796	}
9bccf70c	797
1c79356b	798	/*
d9a64523	799	* Each minor can be opened by only one process. If the requested
1c79356b	800	* minor is in use, return EBUSY.
91447636	801	*
d9a64523 A	802	* Important: bpfopen() and bpfclose() have to check and set the status
	803	* of a device in the same lockin context otherwise the device may be
	804	* leaked because the vnode use count will be unpextectly greater than 1
	805	* when close() is called.
1c79356b	806	*/
d9a64523 A	807	if (bpf_dtab[minor(dev)] == NULL) {
	808	/* Reserve while opening */
	809	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
2d21ac55 A	810	} else {
2d21ac55 A	811	lck_mtx_unlock(bpf_mlock);
0a7de745	812	return EBUSY;
2d21ac55	813	}
3e170ce0 A	814	d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
3e170ce0 A	815	M_WAIT \| M_ZERO);
91447636 A	816	if (d == NULL) {
	817	/* this really is a catastrophic failure */
	818	printf("bpfopen: malloc bpf_d failed\n");
2d21ac55 A	819	bpf_dtab[minor(dev)] = NULL;
2d21ac55 A	820	lck_mtx_unlock(bpf_mlock);
0a7de745	821	return ENOMEM;
1c79356b	822	}
3e170ce0	823
91447636	824	/* Mark "in use" and do most initialization. */
3e170ce0	825	bpf_acquire_d(d);
1c79356b A	826	d->bd_bufsize = bpf_bufsize;
1c79356b A	827	d->bd_sig = SIGIO;
9bccf70c	828	d->bd_seesent = 1;
b0d623f7	829	d->bd_oflags = flags;
6d2010ae	830	d->bd_state = BPF_IDLE;
316670eb	831	d->bd_traffic_class = SO_TC_BE;
3e170ce0	832	d->bd_flags \|= BPF_DETACHED;
0a7de745	833	if (bpf_wantpktap) {
fe8ab488	834	d->bd_flags \|= BPF_WANT_PKTAP;
0a7de745	835	} else {
fe8ab488	836	d->bd_flags &= ~BPF_WANT_PKTAP;
0a7de745	837	}
3e170ce0	838	d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
6d2010ae A	839	if (d->bd_thread_call == NULL) {
	840	printf("bpfopen: malloc thread call failed\n");
	841	bpf_dtab[minor(dev)] = NULL;
3e170ce0	842	bpf_release_d(d);
6d2010ae	843	lck_mtx_unlock(bpf_mlock);
3e170ce0	844
0a7de745	845	return ENOMEM;
6d2010ae	846	}
d9a64523 A	847	d->bd_opened_by = p;
	848	uuid_generate(d->bd_uuid);
	849
d9a64523	850	bpf_dtab[minor(dev)] = d; /* Mark opened */
2d21ac55	851	lck_mtx_unlock(bpf_mlock);
55e303ae	852
0a7de745	853	return 0;
1c79356b A	854	}
	855
	856	/*
	857	* Close the descriptor by detaching it from its interface,
	858	* deallocating its buffers, and marking it free.
	859	*/
	860	/* ARGSUSED */
2d21ac55 A	861	int
2d21ac55 A	862	bpfclose(dev_t dev, __unused int flags, __unused int fmt,
d9a64523	863	__unused struct proc *p)
1c79356b	864	{
2d21ac55 A	865	struct bpf_d *d;
	866
	867	/* Take BPF lock to ensure no other thread is using the device */
	868	lck_mtx_lock(bpf_mlock);
1c79356b	869
55e303ae	870	d = bpf_dtab[minor(dev)];
d9a64523	871	if (d == NULL \|\| d == BPF_DEV_RESERVED) {
2d21ac55	872	lck_mtx_unlock(bpf_mlock);
0a7de745	873	return ENXIO;
3e170ce0 A	874	}
	875
	876	/*
	877	* Other threads may call bpd_detachd() if we drop the bpf_mlock
	878	*/
	879	d->bd_flags \|= BPF_CLOSING;
	880
0a7de745	881	if (bpf_debug != 0) {
3e170ce0 A	882	printf("%s: %llx\n",
3e170ce0 A	883	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
0a7de745	884	}
3e170ce0	885
d9a64523	886	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
55e303ae	887
6d2010ae A	888	/*
	889	* Deal with any in-progress timeouts.
	890	*/
	891	switch (d->bd_state) {
0a7de745 A	892	case BPF_IDLE:
	893	/*
	894	* Not waiting for a timeout, and no timeout happened.
	895	*/
	896	break;
6d2010ae	897
0a7de745 A	898	case BPF_WAITING:
	899	/*
	900	* Waiting for a timeout.
	901	* Cancel any timer that has yet to go off,
	902	* and mark the state as "closing".
	903	* Then drop the lock to allow any timers that
	904	* have gone off to run to completion, and wait
	905	* for them to finish.
	906	*/
	907	if (!bpf_stop_timer(d)) {
6d2010ae	908	/*
0a7de745 A	909	* There was no pending call, so the call must
	910	* have been in progress. Wait for the call to
	911	* complete; we have to drop the lock while
	912	* waiting. to let the in-progrss call complete
6d2010ae	913	*/
0a7de745 A	914	d->bd_state = BPF_DRAINING;
	915	while (d->bd_state == BPF_DRAINING) {
	916	msleep((caddr_t)d, bpf_mlock, PRINET,
	917	"bpfdraining", NULL);
6d2010ae	918	}
0a7de745 A	919	}
	920	d->bd_state = BPF_IDLE;
	921	break;
6d2010ae	922
0a7de745 A	923	case BPF_TIMED_OUT:
	924	/*
	925	* Timer went off, and the timeout routine finished.
	926	*/
	927	d->bd_state = BPF_IDLE;
	928	break;
6d2010ae	929
0a7de745 A	930	case BPF_DRAINING:
	931	/*
	932	* Another thread is blocked on a close waiting for
	933	* a timeout to finish.
	934	* This "shouldn't happen", as the first thread to enter
	935	* bpfclose() will set bpf_dtab[minor(dev)] to 1, and
	936	* all subsequent threads should see that and fail with
	937	* ENXIO.
	938	*/
	939	panic("Two threads blocked in a BPF close");
	940	break;
6d2010ae A	941	}
6d2010ae A	942
0a7de745	943	if (d->bd_bif) {
3e170ce0	944	bpf_detachd(d, 1);
0a7de745	945	}
0b4e3aa0	946	selthreadclear(&d->bd_sel);
6d2010ae	947	thread_call_free(d->bd_thread_call);
39236c6e	948
0a7de745	949	while (d->bd_hbuf_read != 0) {
39236c6e	950	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745	951	}
39236c6e	952
1c79356b	953	bpf_freed(d);
91447636	954
2d21ac55	955	/* Mark free in same context as bpfopen comes to check */
0a7de745	956	bpf_dtab[minor(dev)] = NULL; /* Mark closed */
3e170ce0 A	957
	958	bpf_release_d(d);
	959
91447636	960	lck_mtx_unlock(bpf_mlock);
3e170ce0	961
0a7de745	962	return 0;
1c79356b A	963	}
1c79356b A	964
0a7de745	965	#define BPF_SLEEP bpf_sleep
1c79356b	966
91447636 A	967	static int
91447636 A	968	bpf_sleep(struct bpf_d d, int pri, const char wmesg, int timo)
1c79356b	969	{
6d2010ae	970	u_int64_t abstime = 0;
1c79356b	971
0a7de745	972	if (timo != 0) {
6d2010ae	973	clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
0a7de745	974	}
d9a64523	975
0a7de745	976	return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
d9a64523 A	977	}
	978
	979	static void
	980	bpf_finalize_pktap(struct bpf_hdr hp, struct pktap_header pktaphdr)
	981	{
	982	if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
	983	struct pktap_v2_hdr *pktap_v2_hdr;
	984
	985	pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
	986
0a7de745	987	if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
d9a64523	988	pktap_v2_finalize_proc_info(pktap_v2_hdr);
0a7de745	989	}
d9a64523	990	} else {
0a7de745	991	if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
d9a64523	992	pktap_finalize_proc_info(pktaphdr);
0a7de745	993	}
d9a64523 A	994
	995	if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
	996	hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
	997	hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
	998	}
	999	}
1c79356b	1000	}
1c79356b A	1001
	1002	/*
	1003	* Rotate the packet buffers in descriptor d. Move the store buffer
	1004	* into the hold slot, and the free buffer into the store slot.
	1005	* Zero the length of the new store buffer.
	1006	*/
0a7de745	1007	#define ROTATE_BUFFERS(d) \
d9a64523	1008	if (d->bd_hbuf_read != 0) \
0a7de745	1009	panic("rotating bpf buffers during read"); \
1c79356b A	1010	(d)->bd_hbuf = (d)->bd_sbuf; \
1c79356b A	1011	(d)->bd_hlen = (d)->bd_slen; \
3e170ce0	1012	(d)->bd_hcnt = (d)->bd_scnt; \
1c79356b A	1013	(d)->bd_sbuf = (d)->bd_fbuf; \
1c79356b A	1014	(d)->bd_slen = 0; \
3e170ce0	1015	(d)->bd_scnt = 0; \
2d21ac55	1016	(d)->bd_fbuf = NULL;
1c79356b A	1017	/*
	1018	* bpfread - read next chunk of packets from buffers
	1019	*/
2d21ac55	1020	int
91447636	1021	bpfread(dev_t dev, struct uio *uio, int ioflag)
1c79356b	1022	{
2d21ac55	1023	struct bpf_d *d;
d9a64523	1024	caddr_t hbuf;
39236c6e	1025	int timed_out, hbuf_len;
1c79356b	1026	int error;
fe8ab488	1027	int flags;
2d21ac55 A	1028
2d21ac55 A	1029	lck_mtx_lock(bpf_mlock);
1c79356b	1030
55e303ae	1031	d = bpf_dtab[minor(dev)];
d9a64523 A	1032	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
d9a64523 A	1033	(d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55	1034	lck_mtx_unlock(bpf_mlock);
0a7de745	1035	return ENXIO;
2d21ac55	1036	}
55e303ae	1037
3e170ce0 A	1038	bpf_acquire_d(d);
3e170ce0 A	1039
1c79356b A	1040	/*
	1041	* Restrict application to use a buffer the same size as
	1042	* as kernel buffers.
	1043	*/
b0d623f7	1044	if (uio_resid(uio) != d->bd_bufsize) {
3e170ce0	1045	bpf_release_d(d);
91447636	1046	lck_mtx_unlock(bpf_mlock);
0a7de745	1047	return EINVAL;
1c79356b	1048	}
d9a64523	1049
0a7de745	1050	if (d->bd_state == BPF_WAITING) {
6d2010ae	1051	bpf_stop_timer(d);
0a7de745	1052	}
d9a64523	1053
6d2010ae A	1054	timed_out = (d->bd_state == BPF_TIMED_OUT);
6d2010ae A	1055	d->bd_state = BPF_IDLE;
1c79356b	1056
0a7de745	1057	while (d->bd_hbuf_read != 0) {
39236c6e	1058	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745	1059	}
3e170ce0 A	1060
	1061	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1062	bpf_release_d(d);
39236c6e	1063	lck_mtx_unlock(bpf_mlock);
0a7de745	1064	return ENXIO;
39236c6e	1065	}
1c79356b A	1066	/*
	1067	* If the hold buffer is empty, then do a timed sleep, which
	1068	* ends when the timeout expires or when enough packets
	1069	* have arrived to fill the store buffer.
	1070	*/
	1071	while (d->bd_hbuf == 0) {
d9a64523 A	1072	if ((d->bd_immediate \|\| timed_out \|\| (ioflag & IO_NDELAY)) &&
d9a64523 A	1073	d->bd_slen != 0) {
1c79356b	1074	/*
6d2010ae A	1075	* We're in immediate mode, or are reading
	1076	* in non-blocking mode, or a timer was
	1077	* started before the read (e.g., by select()
	1078	* or poll()) and has expired and a packet(s)
	1079	* either arrived since the previous
1c79356b A	1080	* read or arrived while we were asleep.
	1081	* Rotate the buffers and return what's here.
	1082	*/
	1083	ROTATE_BUFFERS(d);
	1084	break;
	1085	}
9bccf70c A	1086
	1087	/*
	1088	* No data is available, check to see if the bpf device
	1089	* is still pointed at a real interface. If not, return
	1090	* ENXIO so that the userland process knows to rebind
	1091	* it before using it again.
	1092	*/
	1093	if (d->bd_bif == NULL) {
3e170ce0	1094	bpf_release_d(d);
91447636	1095	lck_mtx_unlock(bpf_mlock);
0a7de745	1096	return ENXIO;
9bccf70c	1097	}
b0d623f7	1098	if (ioflag & IO_NDELAY) {
3e170ce0	1099	bpf_release_d(d);
b0d623f7	1100	lck_mtx_unlock(bpf_mlock);
0a7de745	1101	return EWOULDBLOCK;
b0d623f7	1102	}
0a7de745	1103	error = BPF_SLEEP(d, PRINET \| PCATCH, "bpf", d->bd_rtout);
2d21ac55 A	1104	/*
	1105	* Make sure device is still opened
	1106	*/
3e170ce0 A	1107	if ((d->bd_flags & BPF_CLOSING) != 0) {
3e170ce0 A	1108	bpf_release_d(d);
2d21ac55	1109	lck_mtx_unlock(bpf_mlock);
0a7de745	1110	return ENXIO;
2d21ac55	1111	}
39236c6e	1112
0a7de745	1113	while (d->bd_hbuf_read != 0) {
d9a64523 A	1114	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
d9a64523 A	1115	NULL);
0a7de745	1116	}
39236c6e	1117
3e170ce0 A	1118	if ((d->bd_flags & BPF_CLOSING) != 0) {
3e170ce0 A	1119	bpf_release_d(d);
39236c6e	1120	lck_mtx_unlock(bpf_mlock);
0a7de745	1121	return ENXIO;
39236c6e	1122	}
fe8ab488	1123
1c79356b	1124	if (error == EINTR \|\| error == ERESTART) {
5ba3f43e	1125	if (d->bd_hbuf != NULL) {
fe8ab488 A	1126	/*
	1127	* Because we msleep, the hold buffer might
	1128	* be filled when we wake up. Avoid rotating
	1129	* in this case.
	1130	*/
	1131	break;
	1132	}
5ba3f43e	1133	if (d->bd_slen != 0) {
39236c6e A	1134	/*
	1135	* Sometimes we may be interrupted often and
	1136	* the sleep above will not timeout.
	1137	* Regardless, we should rotate the buffers
	1138	* if there's any new data pending and
	1139	* return it.
	1140	*/
	1141	ROTATE_BUFFERS(d);
	1142	break;
	1143	}
3e170ce0	1144	bpf_release_d(d);
91447636	1145	lck_mtx_unlock(bpf_mlock);
5ba3f43e A	1146	if (error == ERESTART) {
	1147	printf("%s: %llx ERESTART to EINTR\n",
	1148	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
	1149	error = EINTR;
	1150	}
0a7de745	1151	return error;
1c79356b A	1152	}
	1153	if (error == EWOULDBLOCK) {
	1154	/*
	1155	* On a timeout, return what's in the buffer,
	1156	* which may be nothing. If there is something
	1157	* in the store buffer, we can rotate the buffers.
	1158	*/
0a7de745	1159	if (d->bd_hbuf) {
1c79356b A	1160	/*
	1161	* We filled up the buffer in between
	1162	* getting the timeout and arriving
	1163	* here, so we don't need to rotate.
	1164	*/
	1165	break;
0a7de745	1166	}
1c79356b A	1167
1c79356b A	1168	if (d->bd_slen == 0) {
3e170ce0	1169	bpf_release_d(d);
91447636	1170	lck_mtx_unlock(bpf_mlock);
0a7de745	1171	return 0;
1c79356b A	1172	}
	1173	ROTATE_BUFFERS(d);
	1174	break;
	1175	}
	1176	}
	1177	/*
	1178	* At this point, we know we have something in the hold slot.
	1179	*/
1c79356b	1180
fe8ab488	1181	/*
d9a64523	1182	* Set the hold buffer read. So we do not
fe8ab488 A	1183	* rotate the buffers until the hold buffer
	1184	* read is complete. Also to avoid issues resulting
	1185	* from page faults during disk sleep (<rdar://problem/13436396>).
	1186	*/
	1187	d->bd_hbuf_read = 1;
	1188	hbuf = d->bd_hbuf;
	1189	hbuf_len = d->bd_hlen;
	1190	flags = d->bd_flags;
	1191	lck_mtx_unlock(bpf_mlock);
	1192
39236c6e	1193	#ifdef __APPLE__
316670eb A	1194	/*
	1195	* Before we move data to userland, we fill out the extended
	1196	* header fields.
	1197	*/
fe8ab488	1198	if (flags & BPF_EXTENDED_HDR) {
316670eb A	1199	char *p;
316670eb A	1200
fe8ab488 A	1201	p = hbuf;
fe8ab488 A	1202	while (p < hbuf + hbuf_len) {
316670eb	1203	struct bpf_hdr_ext *ehp;
39236c6e A	1204	uint32_t flowid;
	1205	struct so_procinfo soprocinfo;
	1206	int found = 0;
316670eb A	1207
316670eb A	1208	ehp = (struct bpf_hdr_ext )(void )p;
d9a64523	1209	if ((flowid = ehp->bh_flowid) != 0) {
0a7de745	1210	if (ehp->bh_proto == IPPROTO_TCP) {
39236c6e A	1211	found = inp_findinpcb_procinfo(&tcbinfo,
39236c6e A	1212	flowid, &soprocinfo);
0a7de745	1213	} else if (ehp->bh_proto == IPPROTO_UDP) {
39236c6e A	1214	found = inp_findinpcb_procinfo(&udbinfo,
39236c6e A	1215	flowid, &soprocinfo);
0a7de745	1216	}
fe8ab488	1217	if (found == 1) {
39236c6e	1218	ehp->bh_pid = soprocinfo.spi_pid;
cb323159	1219	strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
316670eb	1220	}
39236c6e	1221	ehp->bh_flowid = 0;
316670eb	1222	}
5ba3f43e	1223
fe8ab488 A	1224	if (flags & BPF_FINALIZE_PKTAP) {
fe8ab488 A	1225	struct pktap_header *pktaphdr;
d9a64523	1226
fe8ab488 A	1227	pktaphdr = (struct pktap_header )(void )
	1228	(p + BPF_WORDALIGN(ehp->bh_hdrlen));
	1229
d9a64523 A	1230	bpf_finalize_pktap((struct bpf_hdr *) ehp,
d9a64523 A	1231	pktaphdr);
fe8ab488	1232	}
316670eb A	1233	p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
316670eb A	1234	}
fe8ab488 A	1235	} else if (flags & BPF_FINALIZE_PKTAP) {
	1236	char *p;
	1237
	1238	p = hbuf;
	1239	while (p < hbuf + hbuf_len) {
	1240	struct bpf_hdr *hp;
	1241	struct pktap_header *pktaphdr;
d9a64523	1242
fe8ab488 A	1243	hp = (struct bpf_hdr )(void )p;
	1244	pktaphdr = (struct pktap_header )(void )
	1245	(p + BPF_WORDALIGN(hp->bh_hdrlen));
	1246
d9a64523	1247	bpf_finalize_pktap(hp, pktaphdr);
fe8ab488 A	1248
	1249	p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
	1250	}
316670eb	1251	}
39236c6e	1252	#endif
39236c6e	1253
1c79356b A	1254	/*
	1255	* Move data from hold buffer into user space.
	1256	* We know the entire buffer is transferred since
	1257	* we checked above that the read buffer is bpf_bufsize bytes.
	1258	*/
39236c6e	1259	error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
d9a64523	1260
39236c6e A	1261	lck_mtx_lock(bpf_mlock);
	1262	/*
	1263	* Make sure device is still opened
	1264	*/
3e170ce0 A	1265	if ((d->bd_flags & BPF_CLOSING) != 0) {
3e170ce0 A	1266	bpf_release_d(d);
39236c6e	1267	lck_mtx_unlock(bpf_mlock);
0a7de745	1268	return ENXIO;
39236c6e	1269	}
d9a64523	1270
39236c6e	1271	d->bd_hbuf_read = 0;
1c79356b	1272	d->bd_fbuf = d->bd_hbuf;
2d21ac55	1273	d->bd_hbuf = NULL;
1c79356b	1274	d->bd_hlen = 0;
3e170ce0	1275	d->bd_hcnt = 0;
39236c6e	1276	wakeup((caddr_t)d);
3e170ce0 A	1277
3e170ce0 A	1278	bpf_release_d(d);
91447636	1279	lck_mtx_unlock(bpf_mlock);
0a7de745	1280	return error;
1c79356b A	1281	}
1c79356b A	1282
1c79356b A	1283	/*
	1284	* If there are processes sleeping on this descriptor, wake them up.
	1285	*/
91447636 A	1286	static void
91447636 A	1287	bpf_wakeup(struct bpf_d *d)
1c79356b	1288	{
6d2010ae A	1289	if (d->bd_state == BPF_WAITING) {
	1290	bpf_stop_timer(d);
	1291	d->bd_state = BPF_IDLE;
	1292	}
1c79356b	1293	wakeup((caddr_t)d);
0a7de745	1294	if (d->bd_async && d->bd_sig && d->bd_sigio) {
2d21ac55	1295	pgsigio(d->bd_sigio, d->bd_sig);
0a7de745	1296	}
1c79356b	1297
1c79356b	1298	selwakeup(&d->bd_sel);
0a7de745	1299	if ((d->bd_flags & BPF_KNOTE)) {
3e170ce0	1300	KNOTE(&d->bd_sel.si_note, 1);
0a7de745	1301	}
1c79356b A	1302	}
1c79356b A	1303
6d2010ae A	1304	static void
	1305	bpf_timed_out(void arg, __unused void dummy)
	1306	{
	1307	struct bpf_d d = (struct bpf_d )arg;
	1308
	1309	lck_mtx_lock(bpf_mlock);
	1310	if (d->bd_state == BPF_WAITING) {
	1311	/*
d9a64523	1312	* There's a select or kqueue waiting for this; if there's
6d2010ae A	1313	* now stuff to read, wake it up.
	1314	*/
	1315	d->bd_state = BPF_TIMED_OUT;
0a7de745	1316	if (d->bd_slen != 0) {
6d2010ae	1317	bpf_wakeup(d);
0a7de745	1318	}
6d2010ae A	1319	} else if (d->bd_state == BPF_DRAINING) {
	1320	/*
	1321	* A close is waiting for this to finish.
	1322	* Mark it as finished, and wake the close up.
	1323	*/
	1324	d->bd_state = BPF_IDLE;
	1325	bpf_wakeup(d);
	1326	}
	1327	lck_mtx_unlock(bpf_mlock);
	1328	}
6d2010ae	1329
55e303ae	1330	/* keep in sync with bpf_movein above: */
0a7de745	1331	#define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
55e303ae	1332
2d21ac55	1333	int
91447636	1334	bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1c79356b	1335	{
2d21ac55	1336	struct bpf_d *d;
1c79356b	1337	struct ifnet *ifp;
2d21ac55	1338	struct mbuf *m = NULL;
91447636	1339	int error;
0a7de745	1340	char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
b0d623f7	1341	int datlen = 0;
39236c6e A	1342	int bif_dlt;
39236c6e A	1343	int bd_hdrcmplt;
1c79356b	1344
2d21ac55 A	1345	lck_mtx_lock(bpf_mlock);
2d21ac55 A	1346
55e303ae	1347	d = bpf_dtab[minor(dev)];
d9a64523 A	1348	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
d9a64523 A	1349	(d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55	1350	lck_mtx_unlock(bpf_mlock);
0a7de745	1351	return ENXIO;
2d21ac55	1352	}
3e170ce0 A	1353
	1354	bpf_acquire_d(d);
	1355
1c79356b	1356	if (d->bd_bif == 0) {
3e170ce0	1357	bpf_release_d(d);
91447636	1358	lck_mtx_unlock(bpf_mlock);
0a7de745	1359	return ENXIO;
1c79356b A	1360	}
	1361
	1362	ifp = d->bd_bif->bif_ifp;
	1363
6d2010ae	1364	if ((ifp->if_flags & IFF_UP) == 0) {
3e170ce0	1365	bpf_release_d(d);
6d2010ae	1366	lck_mtx_unlock(bpf_mlock);
0a7de745	1367	return ENETDOWN;
6d2010ae	1368	}
b0d623f7	1369	if (uio_resid(uio) == 0) {
3e170ce0	1370	bpf_release_d(d);
91447636	1371	lck_mtx_unlock(bpf_mlock);
0a7de745	1372	return 0;
1c79356b	1373	}
55e303ae	1374	((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
6d2010ae	1375
316670eb A	1376	/*
	1377	* fix for PR-6849527
	1378	* geting variables onto stack before dropping lock for bpf_movein()
	1379	*/
	1380	bif_dlt = (int)d->bd_bif->bif_dlt;
	1381	bd_hdrcmplt = d->bd_hdrcmplt;
	1382
6d2010ae	1383	/* bpf_movein allocating mbufs; drop lock */
316670eb	1384	lck_mtx_unlock(bpf_mlock);
6d2010ae	1385
d9a64523	1386	error = bpf_movein(uio, bif_dlt, &m,
0a7de745 A	1387	bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
0a7de745 A	1388	&datlen);
316670eb	1389
3e170ce0 A	1390	/* take the lock again */
3e170ce0 A	1391	lck_mtx_lock(bpf_mlock);
316670eb	1392	if (error) {
3e170ce0 A	1393	bpf_release_d(d);
3e170ce0 A	1394	lck_mtx_unlock(bpf_mlock);
0a7de745	1395	return error;
1c79356b A	1396	}
1c79356b A	1397
3e170ce0 A	1398	/* verify the device is still open */
	1399	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1400	bpf_release_d(d);
91447636	1401	lck_mtx_unlock(bpf_mlock);
2d21ac55	1402	m_freem(m);
0a7de745	1403	return ENXIO;
2d21ac55	1404	}
6d2010ae A	1405
6d2010ae A	1406	if (d->bd_bif == NULL) {
3e170ce0	1407	bpf_release_d(d);
6d2010ae A	1408	lck_mtx_unlock(bpf_mlock);
6d2010ae A	1409	m_free(m);
0a7de745	1410	return ENXIO;
6d2010ae A	1411	}
	1412
	1413	if ((unsigned)datlen > ifp->if_mtu) {
3e170ce0	1414	bpf_release_d(d);
2d21ac55 A	1415	lck_mtx_unlock(bpf_mlock);
2d21ac55 A	1416	m_freem(m);
0a7de745	1417	return EMSGSIZE;
1c79356b A	1418	}
1c79356b A	1419
316670eb A	1420	bpf_set_packet_service_class(m, d->bd_traffic_class);
316670eb A	1421
91447636 A	1422	lck_mtx_unlock(bpf_mlock);
91447636 A	1423
3e170ce0 A	1424	/*
	1425	* The driver frees the mbuf.
	1426	*/
55e303ae	1427	if (d->bd_hdrcmplt) {
0a7de745	1428	if (d->bd_bif->bif_send) {
2d21ac55	1429	error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
0a7de745	1430	} else {
316670eb	1431	error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
0a7de745	1432	}
316670eb A	1433	} else {
	1434	error = dlil_output(ifp, PF_INET, m, NULL,
	1435	(struct sockaddr *)dst_buf, 0, NULL);
91447636	1436	}
6d2010ae	1437
3e170ce0 A	1438	lck_mtx_lock(bpf_mlock);
	1439	bpf_release_d(d);
	1440	lck_mtx_unlock(bpf_mlock);
	1441
0a7de745	1442	return error;
1c79356b A	1443	}
	1444
	1445	/*
	1446	* Reset a descriptor by flushing its packet buffer and clearing the
2d21ac55	1447	* receive and drop counts.
1c79356b A	1448	*/
1c79356b A	1449	static void
91447636	1450	reset_d(struct bpf_d *d)
1c79356b	1451	{
0a7de745	1452	if (d->bd_hbuf_read != 0) {
39236c6e	1453	panic("resetting buffers during read");
0a7de745	1454	}
39236c6e	1455
1c79356b A	1456	if (d->bd_hbuf) {
	1457	/* Free the hold buffer. */
	1458	d->bd_fbuf = d->bd_hbuf;
2d21ac55	1459	d->bd_hbuf = NULL;
1c79356b A	1460	}
	1461	d->bd_slen = 0;
	1462	d->bd_hlen = 0;
3e170ce0 A	1463	d->bd_scnt = 0;
3e170ce0 A	1464	d->bd_hcnt = 0;
1c79356b A	1465	d->bd_rcount = 0;
	1466	d->bd_dcount = 0;
	1467	}
	1468
d9a64523 A	1469	static struct bpf_d *
	1470	bpf_get_device_from_uuid(uuid_t uuid)
	1471	{
	1472	unsigned int i;
	1473
	1474	for (i = 0; i < nbpfilter; i++) {
	1475	struct bpf_d *d = bpf_dtab[i];
	1476
	1477	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
0a7de745	1478	(d->bd_flags & BPF_CLOSING) != 0) {
d9a64523	1479	continue;
0a7de745 A	1480	}
	1481	if (uuid_compare(uuid, d->bd_uuid) == 0) {
	1482	return d;
	1483	}
d9a64523 A	1484	}
d9a64523 A	1485
0a7de745	1486	return NULL;
d9a64523 A	1487	}
	1488
	1489	/*
	1490	* The BIOCSETUP command "atomically" attach to the interface and
	1491	* copy the buffer from another interface. This minimizes the risk
	1492	* of missing packet because this is done while holding
	1493	* the BPF global lock
	1494	*/
	1495	static int
	1496	bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
	1497	{
	1498	struct bpf_d *d_from;
	1499	int error = 0;
	1500
	1501	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
	1502
	1503	/*
	1504	* Sanity checks
	1505	*/
	1506	d_from = bpf_get_device_from_uuid(uuid_from);
	1507	if (d_from == NULL) {
	1508	error = ENOENT;
	1509	os_log_info(OS_LOG_DEFAULT,
	1510	"%s: uuids not found error %d",
	1511	__func__, error);
0a7de745	1512	return error;
d9a64523 A	1513	}
	1514	if (d_from->bd_opened_by != d_to->bd_opened_by) {
	1515	error = EACCES;
	1516	os_log_info(OS_LOG_DEFAULT,
	1517	"%s: processes not matching error %d",
	1518	__func__, error);
0a7de745	1519	return error;
d9a64523 A	1520	}
	1521
	1522	/*
	1523	* Prevent any read while copying
	1524	*/
0a7de745	1525	while (d_to->bd_hbuf_read != 0) {
d9a64523	1526	msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
0a7de745	1527	}
d9a64523 A	1528	d_to->bd_hbuf_read = 1;
d9a64523 A	1529
0a7de745	1530	while (d_from->bd_hbuf_read != 0) {
d9a64523	1531	msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
0a7de745	1532	}
d9a64523 A	1533	d_from->bd_hbuf_read = 1;
	1534
	1535	/*
	1536	* Verify the devices have not been closed
	1537	*/
	1538	if (d_to->bd_flags & BPF_CLOSING) {
	1539	error = ENXIO;
	1540	os_log_info(OS_LOG_DEFAULT,
	1541	"%s: d_to is closing error %d",
	1542	__func__, error);
	1543	goto done;
	1544	}
	1545	if (d_from->bd_flags & BPF_CLOSING) {
	1546	error = ENXIO;
	1547	os_log_info(OS_LOG_DEFAULT,
	1548	"%s: d_from is closing error %d",
	1549	__func__, error);
	1550	goto done;
	1551	}
	1552
	1553	/*
	1554	* For now require the same buffer size
	1555	*/
	1556	if (d_from->bd_bufsize != d_to->bd_bufsize) {
	1557	error = EINVAL;
	1558	os_log_info(OS_LOG_DEFAULT,
	1559	"%s: bufsizes not matching error %d",
	1560	__func__, error);
	1561	goto done;
	1562	}
	1563
	1564	/*
	1565	* Attach to the interface
	1566	*/
	1567	error = bpf_setif(d_to, ifp, false, true);
	1568	if (error != 0) {
	1569	os_log_info(OS_LOG_DEFAULT,
	1570	"%s: bpf_setif() failed error %d",
	1571	__func__, error);
	1572	goto done;
	1573	}
	1574
	1575	/*
	1576	* Make sure the buffers are setup as expected by bpf_setif()
	1577	*/
	1578	ASSERT(d_to->bd_hbuf == NULL);
	1579	ASSERT(d_to->bd_sbuf != NULL);
	1580	ASSERT(d_to->bd_fbuf != NULL);
	1581
	1582	/*
	1583	* Copy the buffers and update the pointers and counts
	1584	*/
	1585	memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
	1586	d_to->bd_slen = d_from->bd_slen;
	1587	d_to->bd_scnt = d_from->bd_scnt;
	1588
	1589	if (d_from->bd_hbuf != NULL) {
	1590	d_to->bd_hbuf = d_to->bd_fbuf;
	1591	d_to->bd_fbuf = NULL;
	1592	memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
	1593	}
	1594	d_to->bd_hlen = d_from->bd_hlen;
	1595	d_to->bd_hcnt = d_from->bd_hcnt;
	1596
1597	if (bpf_debug > 0) {
1598	os_log_info(OS_LOG_DEFAULT,
1599	"%s: done slen %u scnt %u hlen %u hcnt %u",
1600	__func__, d_to->bd_slen, d_to->bd_scnt,
1601	d_to->bd_hlen, d_to->bd_hcnt);
1602	}
1603	done:
1604	d_from->bd_hbuf_read = 0;
1605	wakeup((caddr_t)d_from);
1606
1607	d_to->bd_hbuf_read = 0;
1608	wakeup((caddr_t)d_to);
1609
0a7de745	1610	return error;
d9a64523 A	1611	}
d9a64523 A	1612
1c79356b A	1613	/*
	1614	* FIONREAD Check for read packet available.
	1615	* SIOCGIFADDR Get interface address - convenient hook to driver.
	1616	* BIOCGBLEN Get buffer len [for read()].
	1617	* BIOCSETF Set ethernet read filter.
	1618	* BIOCFLUSH Flush read packet buffer.
	1619	* BIOCPROMISC Put interface into promiscuous mode.
	1620	* BIOCGDLT Get link layer type.
	1621	* BIOCGETIF Get interface name.
	1622	* BIOCSETIF Set interface.
	1623	* BIOCSRTIMEOUT Set read timeout.
	1624	* BIOCGRTIMEOUT Get read timeout.
	1625	* BIOCGSTATS Get packet stats.
	1626	* BIOCIMMEDIATE Set immediate mode.
	1627	* BIOCVERSION Get filter language version.
9bccf70c A	1628	* BIOCGHDRCMPLT Get "header already complete" flag
	1629	* BIOCSHDRCMPLT Set "header already complete" flag
	1630	* BIOCGSEESENT Get "see packets sent" flag
	1631	* BIOCSSEESENT Set "see packets sent" flag
316670eb A	1632	* BIOCSETTC Set traffic class.
	1633	* BIOCGETTC Get traffic class.
	1634	* BIOCSEXTHDR Set "extended header" flag
3e170ce0 A	1635	* BIOCSHEADDROP Drop head of the buffer if user is not reading
3e170ce0 A	1636	* BIOCGHEADDROP Get "head-drop" flag
1c79356b A	1637	*/
1c79356b A	1638	/* ARGSUSED */
9bccf70c	1639	int
2d21ac55	1640	bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
b0d623f7	1641	struct proc *p)
1c79356b	1642	{
2d21ac55	1643	struct bpf_d *d;
fe8ab488 A	1644	int error = 0;
fe8ab488 A	1645	u_int int_arg;
316670eb	1646	struct ifreq ifr;
2d21ac55 A	1647
2d21ac55 A	1648	lck_mtx_lock(bpf_mlock);
1c79356b	1649
55e303ae	1650	d = bpf_dtab[minor(dev)];
d9a64523 A	1651	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
d9a64523 A	1652	(d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55	1653	lck_mtx_unlock(bpf_mlock);
0a7de745	1654	return ENXIO;
2d21ac55	1655	}
1c79356b	1656
3e170ce0 A	1657	bpf_acquire_d(d);
3e170ce0 A	1658
0a7de745	1659	if (d->bd_state == BPF_WAITING) {
6d2010ae	1660	bpf_stop_timer(d);
0a7de745	1661	}
6d2010ae A	1662	d->bd_state = BPF_IDLE;
6d2010ae A	1663
1c79356b	1664	switch (cmd) {
1c79356b A	1665	default:
	1666	error = EINVAL;
	1667	break;
	1668
	1669	/*
	1670	* Check for read packet available.
	1671	*/
0a7de745 A	1672	case FIONREAD: /* int */
	1673	{
	1674	int n;
1c79356b	1675
0a7de745 A	1676	n = d->bd_slen;
	1677	if (d->bd_hbuf && d->bd_hbuf_read == 0) {
	1678	n += d->bd_hlen;
1c79356b A	1679	}
1c79356b A	1680
0a7de745 A	1681	bcopy(&n, addr, sizeof(n));
	1682	break;
	1683	}
1c79356b	1684
0a7de745 A	1685	case SIOCGIFADDR: /* struct ifreq */
	1686	{
	1687	struct ifnet *ifp;
	1688
	1689	if (d->bd_bif == 0) {
	1690	error = EINVAL;
	1691	} else {
	1692	ifp = d->bd_bif->bif_ifp;
	1693	error = ifnet_ioctl(ifp, 0, cmd, addr);
1c79356b	1694	}
0a7de745 A	1695	break;
0a7de745 A	1696	}
1c79356b A	1697
	1698	/*
	1699	* Get buffer len [for read()].
	1700	*/
0a7de745 A	1701	case BIOCGBLEN: /* u_int */
0a7de745 A	1702	bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1c79356b A	1703	break;
	1704
	1705	/*
	1706	* Set buffer length.
	1707	*/
0a7de745	1708	case BIOCSBLEN: { /* u_int */
d9a64523 A	1709	u_int size;
d9a64523 A	1710	unsigned int maxbufsize = bpf_maxbufsize;
316670eb	1711
d9a64523 A	1712	/*
	1713	* Allow larger buffer in head drop mode to with the
	1714	* assumption the reading process may be low priority but
	1715	* is interested in the most recent traffic
	1716	*/
	1717	if (d->bd_headdrop != 0) {
	1718	maxbufsize = 2 * bpf_maxbufsize;
	1719	}
1c79356b	1720
d9a64523	1721	if (d->bd_bif != 0 \|\| (d->bd_flags & BPF_DETACHING)) {
813fb2f6	1722	/*
d9a64523	1723	* Interface already attached, unable to change buffers
813fb2f6	1724	*/
d9a64523 A	1725	error = EINVAL;
	1726	break;
	1727	}
0a7de745	1728	bcopy(addr, &size, sizeof(size));
d9a64523 A	1729
	1730	if (size > maxbufsize) {
	1731	d->bd_bufsize = maxbufsize;
	1732
	1733	os_log_info(OS_LOG_DEFAULT,
	1734	"%s bufsize capped to %u from %u",
	1735	__func__, d->bd_bufsize, size);
	1736	} else if (size < BPF_MINBUFSIZE) {
	1737	d->bd_bufsize = BPF_MINBUFSIZE;
	1738
	1739	os_log_info(OS_LOG_DEFAULT,
	1740	"%s bufsize bumped to %u from %u",
	1741	__func__, d->bd_bufsize, size);
	1742	} else {
1c79356b A	1743	d->bd_bufsize = size;
1c79356b A	1744	}
1c79356b	1745
d9a64523	1746	/* It's a read/write ioctl */
0a7de745	1747	bcopy(&d->bd_bufsize, addr, sizeof(u_int));
d9a64523 A	1748	break;
d9a64523 A	1749	}
1c79356b A	1750	/*
	1751	* Set link layer read filter.
	1752	*/
39236c6e	1753	case BIOCSETF32:
0a7de745	1754	case BIOCSETFNR32: { /* struct bpf_program32 */
316670eb A	1755	struct bpf_program32 prg32;
316670eb A	1756
0a7de745	1757	bcopy(addr, &prg32, sizeof(prg32));
316670eb	1758	error = bpf_setf(d, prg32.bf_len,
3e170ce0	1759	CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1c79356b	1760	break;
2d21ac55	1761	}
b0d623f7	1762
39236c6e	1763	case BIOCSETF64:
0a7de745	1764	case BIOCSETFNR64: { /* struct bpf_program64 */
316670eb A	1765	struct bpf_program64 prg64;
316670eb A	1766
0a7de745	1767	bcopy(addr, &prg64, sizeof(prg64));
3e170ce0	1768	error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
b0d623f7 A	1769	break;
	1770	}
	1771
1c79356b A	1772	/*
	1773	* Flush read packet buffer.
	1774	*/
	1775	case BIOCFLUSH:
d9a64523 A	1776	while (d->bd_hbuf_read != 0) {
	1777	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
	1778	NULL);
39236c6e	1779	}
3e170ce0 A	1780	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1781	error = ENXIO;
	1782	break;
	1783	}
1c79356b	1784	reset_d(d);
1c79356b A	1785	break;
	1786
	1787	/*
	1788	* Put interface into promiscuous mode.
	1789	*/
	1790	case BIOCPROMISC:
	1791	if (d->bd_bif == 0) {
	1792	/*
	1793	* No interface attached yet.
	1794	*/
	1795	error = EINVAL;
	1796	break;
	1797	}
1c79356b	1798	if (d->bd_promisc == 0) {
2d21ac55	1799	lck_mtx_unlock(bpf_mlock);
91447636	1800	error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
2d21ac55	1801	lck_mtx_lock(bpf_mlock);
0a7de745	1802	if (error == 0) {
1c79356b	1803	d->bd_promisc = 1;
0a7de745	1804	}
1c79356b	1805	}
1c79356b A	1806	break;
	1807
	1808	/*
	1809	* Get device parameters.
	1810	*/
0a7de745 A	1811	case BIOCGDLT: /* u_int */
0a7de745 A	1812	if (d->bd_bif == 0) {
1c79356b	1813	error = EINVAL;
0a7de745 A	1814	} else {
	1815	bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
	1816	}
1c79356b A	1817	break;
1c79356b A	1818
2d21ac55 A	1819	/*
	1820	* Get a list of supported data link types.
	1821	*/
0a7de745	1822	case BIOCGDLTLIST: /* struct bpf_dltlist */
b0d623f7 A	1823	if (d->bd_bif == NULL) {
	1824	error = EINVAL;
	1825	} else {
316670eb	1826	error = bpf_getdltlist(d, addr, p);
b0d623f7 A	1827	}
b0d623f7 A	1828	break;
2d21ac55 A	1829
	1830	/*
	1831	* Set data link type.
	1832	*/
0a7de745	1833	case BIOCSDLT: /* u_int */
316670eb A	1834	if (d->bd_bif == NULL) {
	1835	error = EINVAL;
	1836	} else {
	1837	u_int dlt;
	1838
0a7de745	1839	bcopy(addr, &dlt, sizeof(dlt));
d9a64523 A	1840
	1841	if (dlt == DLT_PKTAP &&
	1842	!(d->bd_flags & BPF_WANT_PKTAP)) {
5ba3f43e A	1843	dlt = DLT_RAW;
5ba3f43e A	1844	}
3e170ce0	1845	error = bpf_setdlt(d, dlt);
316670eb A	1846	}
316670eb A	1847	break;
2d21ac55	1848
1c79356b	1849	/*
9bccf70c	1850	* Get interface name.
1c79356b	1851	*/
0a7de745 A	1852	case BIOCGETIF: /* struct ifreq */
0a7de745 A	1853	if (d->bd_bif == 0) {
1c79356b	1854	error = EINVAL;
0a7de745	1855	} else {
9bccf70c	1856	struct ifnet *const ifp = d->bd_bif->bif_ifp;
9bccf70c	1857
316670eb	1858	snprintf(((struct ifreq )(void )addr)->ifr_name,
0a7de745	1859	sizeof(ifr.ifr_name), "%s", if_name(ifp));
9bccf70c	1860	}
1c79356b A	1861	break;
	1862
	1863	/*
	1864	* Set interface.
	1865	*/
0a7de745 A	1866	case BIOCSETIF: { /* struct ifreq */
0a7de745 A	1867	ifnet_t ifp;
316670eb	1868
0a7de745	1869	bcopy(addr, &ifr, sizeof(ifr));
316670eb A	1870	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
316670eb A	1871	ifp = ifunit(ifr.ifr_name);
0a7de745	1872	if (ifp == NULL) {
2d21ac55	1873	error = ENXIO;
0a7de745	1874	} else {
d9a64523	1875	error = bpf_setif(d, ifp, true, false);
0a7de745	1876	}
1c79356b	1877	break;
2d21ac55	1878	}
1c79356b A	1879
	1880	/*
	1881	* Set read timeout.
	1882	*/
0a7de745	1883	case BIOCSRTIMEOUT32: { /* struct user32_timeval */
316670eb A	1884	struct user32_timeval _tv;
316670eb A	1885	struct timeval tv;
b0d623f7	1886
0a7de745	1887	bcopy(addr, &_tv, sizeof(_tv));
316670eb A	1888	tv.tv_sec = _tv.tv_sec;
	1889	tv.tv_usec = _tv.tv_usec;
	1890
	1891	/*
	1892	* Subtract 1 tick from tvtohz() since this isn't
	1893	* a one-shot timer.
	1894	*/
0a7de745	1895	if ((error = itimerfix(&tv)) == 0) {
316670eb	1896	d->bd_rtout = tvtohz(&tv) - 1;
0a7de745	1897	}
316670eb A	1898	break;
	1899	}
	1900
0a7de745	1901	case BIOCSRTIMEOUT64: { /* struct user64_timeval */
316670eb A	1902	struct user64_timeval _tv;
	1903	struct timeval tv;
	1904
0a7de745	1905	bcopy(addr, &_tv, sizeof(_tv));
316670eb A	1906	tv.tv_sec = _tv.tv_sec;
	1907	tv.tv_usec = _tv.tv_usec;
	1908
	1909	/*
	1910	* Subtract 1 tick from tvtohz() since this isn't
	1911	* a one-shot timer.
	1912	*/
0a7de745	1913	if ((error = itimerfix(&tv)) == 0) {
316670eb	1914	d->bd_rtout = tvtohz(&tv) - 1;
0a7de745	1915	}
316670eb A	1916	break;
316670eb A	1917	}
1c79356b	1918
39236c6e	1919	/*
1c79356b A	1920	* Get read timeout.
1c79356b A	1921	*/
0a7de745	1922	case BIOCGRTIMEOUT32: { /* struct user32_timeval */
316670eb	1923	struct user32_timeval tv;
1c79356b	1924
0a7de745	1925	bzero(&tv, sizeof(tv));
316670eb A	1926	tv.tv_sec = d->bd_rtout / hz;
316670eb A	1927	tv.tv_usec = (d->bd_rtout % hz) * tick;
0a7de745	1928	bcopy(&tv, addr, sizeof(tv));
316670eb A	1929	break;
316670eb A	1930	}
6d2010ae	1931
0a7de745	1932	case BIOCGRTIMEOUT64: { /* struct user64_timeval */
316670eb	1933	struct user64_timeval tv;
6d2010ae	1934
0a7de745	1935	bzero(&tv, sizeof(tv));
316670eb A	1936	tv.tv_sec = d->bd_rtout / hz;
316670eb A	1937	tv.tv_usec = (d->bd_rtout % hz) * tick;
0a7de745	1938	bcopy(&tv, addr, sizeof(tv));
316670eb A	1939	break;
316670eb A	1940	}
1c79356b A	1941
	1942	/*
	1943	* Get packet stats.
	1944	*/
0a7de745	1945	case BIOCGSTATS: { /* struct bpf_stat */
316670eb	1946	struct bpf_stat bs;
1c79356b	1947
0a7de745	1948	bzero(&bs, sizeof(bs));
316670eb A	1949	bs.bs_recv = d->bd_rcount;
316670eb A	1950	bs.bs_drop = d->bd_dcount;
0a7de745	1951	bcopy(&bs, addr, sizeof(bs));
316670eb A	1952	break;
316670eb A	1953	}
1c79356b A	1954
	1955	/*
	1956	* Set immediate mode.
	1957	*/
0a7de745	1958	case BIOCIMMEDIATE: /* u_int */
3e170ce0	1959	d->bd_immediate = (u_int )(void *)addr;
1c79356b A	1960	break;
1c79356b A	1961
0a7de745	1962	case BIOCVERSION: { /* struct bpf_version */
316670eb	1963	struct bpf_version bv;
1c79356b	1964
0a7de745	1965	bzero(&bv, sizeof(bv));
316670eb A	1966	bv.bv_major = BPF_MAJOR_VERSION;
316670eb A	1967	bv.bv_minor = BPF_MINOR_VERSION;
0a7de745	1968	bcopy(&bv, addr, sizeof(bv));
316670eb A	1969	break;
316670eb A	1970	}
1c79356b	1971
9bccf70c A	1972	/*
	1973	* Get "header already complete" flag
	1974	*/
0a7de745 A	1975	case BIOCGHDRCMPLT: /* u_int */
0a7de745 A	1976	bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
9bccf70c A	1977	break;
	1978
	1979	/*
	1980	* Set "header already complete" flag
	1981	*/
0a7de745 A	1982	case BIOCSHDRCMPLT: /* u_int */
0a7de745 A	1983	bcopy(addr, &int_arg, sizeof(int_arg));
316670eb	1984	d->bd_hdrcmplt = int_arg ? 1 : 0;
9bccf70c A	1985	break;
	1986
	1987	/*
	1988	* Get "see sent packets" flag
	1989	*/
0a7de745 A	1990	case BIOCGSEESENT: /* u_int */
0a7de745 A	1991	bcopy(&d->bd_seesent, addr, sizeof(u_int));
9bccf70c A	1992	break;
	1993
	1994	/*
	1995	* Set "see sent packets" flag
	1996	*/
0a7de745 A	1997	case BIOCSSEESENT: /* u_int */
0a7de745 A	1998	bcopy(addr, &d->bd_seesent, sizeof(u_int));
316670eb A	1999	break;
	2000
	2001	/*
	2002	* Set traffic service class
	2003	*/
0a7de745	2004	case BIOCSETTC: { /* int */
316670eb A	2005	int tc;
316670eb A	2006
0a7de745	2007	bcopy(addr, &tc, sizeof(int));
316670eb	2008	error = bpf_set_traffic_class(d, tc);
9bccf70c	2009	break;
316670eb	2010	}
9bccf70c	2011
316670eb A	2012	/*
	2013	* Get traffic service class
	2014	*/
0a7de745 A	2015	case BIOCGETTC: /* int */
0a7de745 A	2016	bcopy(&d->bd_traffic_class, addr, sizeof(int));
1c79356b A	2017	break;
1c79356b A	2018
0a7de745	2019	case FIONBIO: /* Non-blocking I/O; int */
316670eb A	2020	break;
316670eb A	2021
0a7de745 A	2022	case FIOASYNC: /* Send signal on receive packets; int */
0a7de745 A	2023	bcopy(addr, &d->bd_async, sizeof(int));
1c79356b	2024	break;
9bccf70c	2025	#ifndef __APPLE__
1c79356b A	2026	case FIOSETOWN:
	2027	error = fsetown((int )addr, &d->bd_sigio);
	2028	break;
	2029
	2030	case FIOGETOWN:
	2031	(int )addr = fgetown(d->bd_sigio);
	2032	break;
	2033
	2034	/* This is deprecated, FIOSETOWN should be used instead. */
	2035	case TIOCSPGRP:
	2036	error = fsetown(-((int )addr), &d->bd_sigio);
	2037	break;
	2038
	2039	/* This is deprecated, FIOGETOWN should be used instead. */
	2040	case TIOCGPGRP:
	2041	(int )addr = -fgetown(d->bd_sigio);
	2042	break;
	2043	#endif
0a7de745	2044	case BIOCSRSIG: { /* Set receive signal; u_int */
316670eb	2045	u_int sig;
1c79356b	2046
0a7de745	2047	bcopy(addr, &sig, sizeof(u_int));
1c79356b	2048
0a7de745	2049	if (sig >= NSIG) {
316670eb	2050	error = EINVAL;
0a7de745	2051	} else {
316670eb	2052	d->bd_sig = sig;
0a7de745	2053	}
1c79356b A	2054	break;
1c79356b A	2055	}
0a7de745 A	2056	case BIOCGRSIG: /* u_int */
0a7de745 A	2057	bcopy(&d->bd_sig, addr, sizeof(u_int));
316670eb	2058	break;
39236c6e	2059	#ifdef __APPLE__
0a7de745 A	2060	case BIOCSEXTHDR: /* u_int */
	2061	bcopy(addr, &int_arg, sizeof(int_arg));
	2062	if (int_arg) {
fe8ab488	2063	d->bd_flags \|= BPF_EXTENDED_HDR;
0a7de745	2064	} else {
fe8ab488	2065	d->bd_flags &= ~BPF_EXTENDED_HDR;
0a7de745	2066	}
316670eb	2067	break;
39236c6e	2068
0a7de745 A	2069	case BIOCGIFATTACHCOUNT: { /* struct ifreq */
0a7de745 A	2070	ifnet_t ifp;
39236c6e A	2071	struct bpf_if *bp;
39236c6e A	2072
0a7de745	2073	bcopy(addr, &ifr, sizeof(ifr));
39236c6e A	2074	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
	2075	ifp = ifunit(ifr.ifr_name);
	2076	if (ifp == NULL) {
	2077	error = ENXIO;
	2078	break;
	2079	}
	2080	ifr.ifr_intval = 0;
	2081	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
	2082	struct bpf_d *bpf_d;
d9a64523	2083
0a7de745	2084	if (bp->bif_ifp == NULL \|\| bp->bif_ifp != ifp) {
39236c6e	2085	continue;
0a7de745	2086	}
d9a64523 A	2087	for (bpf_d = bp->bif_dlist; bpf_d;
d9a64523 A	2088	bpf_d = bpf_d->bd_next) {
39236c6e A	2089	ifr.ifr_intval += 1;
	2090	}
	2091	}
0a7de745	2092	bcopy(&ifr, addr, sizeof(ifr));
39236c6e A	2093	break;
39236c6e A	2094	}
0a7de745	2095	case BIOCGWANTPKTAP: /* u_int */
fe8ab488	2096	int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
0a7de745	2097	bcopy(&int_arg, addr, sizeof(int_arg));
fe8ab488 A	2098	break;
fe8ab488 A	2099
0a7de745 A	2100	case BIOCSWANTPKTAP: /* u_int */
	2101	bcopy(addr, &int_arg, sizeof(int_arg));
	2102	if (int_arg) {
d9a64523	2103	d->bd_flags \|= BPF_WANT_PKTAP;
0a7de745	2104	} else {
d9a64523	2105	d->bd_flags &= ~BPF_WANT_PKTAP;
0a7de745	2106	}
fe8ab488	2107	break;
39236c6e	2108	#endif
3e170ce0 A	2109
3e170ce0 A	2110	case BIOCSHEADDROP:
0a7de745	2111	bcopy(addr, &int_arg, sizeof(int_arg));
3e170ce0 A	2112	d->bd_headdrop = int_arg ? 1 : 0;
	2113	break;
	2114
	2115	case BIOCGHEADDROP:
0a7de745	2116	bcopy(&d->bd_headdrop, addr, sizeof(int));
3e170ce0	2117	break;
d9a64523 A	2118
	2119	case BIOCSTRUNCATE:
	2120	bcopy(addr, &int_arg, sizeof(int_arg));
0a7de745 A	2121	if (int_arg) {
	2122	d->bd_flags \|= BPF_TRUNCATE;
	2123	} else {
d9a64523	2124	d->bd_flags &= ~BPF_TRUNCATE;
0a7de745	2125	}
d9a64523 A	2126	break;
	2127
	2128	case BIOCGETUUID:
0a7de745	2129	bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
d9a64523 A	2130	break;
	2131
	2132	case BIOCSETUP: {
	2133	struct bpf_setup_args bsa;
0a7de745	2134	ifnet_t ifp;
d9a64523	2135
0a7de745	2136	bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
d9a64523 A	2137	bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
	2138	ifp = ifunit(bsa.bsa_ifname);
	2139	if (ifp == NULL) {
	2140	error = ENXIO;
	2141	os_log_info(OS_LOG_DEFAULT,
	2142	"%s: ifnet not found for %s error %d",
	2143	__func__, bsa.bsa_ifname, error);
	2144	break;
0a7de745	2145	}
d9a64523 A	2146
	2147	error = bpf_setup(d, bsa.bsa_uuid, ifp);
	2148	break;
	2149	}
	2150	case BIOCSPKTHDRV2:
	2151	bcopy(addr, &int_arg, sizeof(int_arg));
0a7de745	2152	if (int_arg != 0) {
d9a64523	2153	d->bd_flags \|= BPF_PKTHDRV2;
0a7de745	2154	} else {
d9a64523	2155	d->bd_flags &= ~BPF_PKTHDRV2;
0a7de745	2156	}
d9a64523 A	2157	break;
	2158
	2159	case BIOCGPKTHDRV2:
	2160	int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
0a7de745	2161	bcopy(&int_arg, addr, sizeof(int));
d9a64523	2162	break;
316670eb A	2163	}
316670eb A	2164
3e170ce0	2165	bpf_release_d(d);
91447636	2166	lck_mtx_unlock(bpf_mlock);
b0d623f7	2167
0a7de745	2168	return error;
1c79356b A	2169	}
	2170
	2171	/*
	2172	* Set d's packet filter program to fp. If this file already has a filter,
	2173	* free it and replace it. Returns EINVAL for bogus requests.
	2174	*/
	2175	static int
3e170ce0 A	2176	bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
3e170ce0 A	2177	u_long cmd)
1c79356b A	2178	{
	2179	struct bpf_insn fcode, old;
	2180	u_int flen, size;
1c79356b	2181
0a7de745	2182	while (d->bd_hbuf_read != 0) {
39236c6e	2183	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745	2184	}
39236c6e	2185
0a7de745 A	2186	if ((d->bd_flags & BPF_CLOSING) != 0) {
	2187	return ENXIO;
	2188	}
d9a64523	2189
1c79356b	2190	old = d->bd_filter;
2d21ac55	2191	if (bf_insns == USER_ADDR_NULL) {
0a7de745 A	2192	if (bf_len != 0) {
	2193	return EINVAL;
	2194	}
2d21ac55	2195	d->bd_filter = NULL;
1c79356b	2196	reset_d(d);
0a7de745 A	2197	if (old != 0) {
	2198	FREE(old, M_DEVBUF);
	2199	}
	2200	return 0;
1c79356b	2201	}
2d21ac55	2202	flen = bf_len;
0a7de745 A	2203	if (flen > BPF_MAXINSNS) {
	2204	return EINVAL;
	2205	}
1c79356b	2206
91447636	2207	size = flen * sizeof(struct bpf_insn);
1c79356b	2208	fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
9bccf70c	2209	#ifdef __APPLE__
0a7de745 A	2210	if (fcode == NULL) {
	2211	return ENOBUFS;
	2212	}
9bccf70c	2213	#endif
2d21ac55	2214	if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
1c79356b	2215	bpf_validate(fcode, (int)flen)) {
1c79356b	2216	d->bd_filter = fcode;
d9a64523	2217
0a7de745	2218	if (cmd == BIOCSETF32 \|\| cmd == BIOCSETF64) {
39236c6e	2219	reset_d(d);
0a7de745	2220	}
d9a64523	2221
0a7de745 A	2222	if (old != 0) {
	2223	FREE(old, M_DEVBUF);
	2224	}
1c79356b	2225
0a7de745	2226	return 0;
1c79356b	2227	}
0a7de745 A	2228	FREE(fcode, M_DEVBUF);
0a7de745 A	2229	return EINVAL;
1c79356b A	2230	}
	2231
	2232	/*
	2233	* Detach a file from its current interface (if attached at all) and attach
	2234	* to the interface indicated by the name stored in ifr.
	2235	* Return an errno or 0.
	2236	*/
	2237	static int
d9a64523	2238	bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read)
1c79356b A	2239	{
1c79356b A	2240	struct bpf_if *bp;
2d21ac55	2241	int error;
39236c6e	2242
0a7de745	2243	while (d->bd_hbuf_read != 0 && !has_hbuf_read) {
39236c6e	2244	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745	2245	}
39236c6e	2246
0a7de745 A	2247	if ((d->bd_flags & BPF_CLOSING) != 0) {
	2248	return ENXIO;
	2249	}
39236c6e	2250
1c79356b A	2251	/*
	2252	* Look through attached interfaces for the named one.
	2253	*/
	2254	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
	2255	struct ifnet *ifp = bp->bif_ifp;
	2256
0a7de745	2257	if (ifp == 0 \|\| ifp != theywant) {
1c79356b	2258	continue;
0a7de745	2259	}
fe8ab488	2260	/*
5ba3f43e	2261	* Do not use DLT_PKTAP, unless requested explicitly
fe8ab488	2262	*/
0a7de745	2263	if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
fe8ab488	2264	continue;
0a7de745	2265	}
5c9f4661 A	2266	/*
	2267	* Skip the coprocessor interface
	2268	*/
0a7de745	2269	if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
5c9f4661	2270	continue;
0a7de745	2271	}
1c79356b A	2272	/*
1c79356b A	2273	* We found the requested interface.
813fb2f6 A	2274	* Allocate the packet buffers.
	2275	*/
	2276	error = bpf_allocbufs(d);
0a7de745 A	2277	if (error != 0) {
	2278	return error;
	2279	}
813fb2f6 A	2280	/*
813fb2f6 A	2281	* Detach if attached to something else.
1c79356b	2282	*/
1c79356b	2283	if (bp != d->bd_bif) {
813fb2f6	2284	if (d->bd_bif != NULL) {
0a7de745 A	2285	if (bpf_detachd(d, 0) != 0) {
	2286	return ENXIO;
	2287	}
	2288	}
	2289	if (bpf_attachd(d, bp) != 0) {
	2290	return ENXIO;
2d21ac55	2291	}
1c79356b	2292	}
d9a64523	2293	if (do_reset) {
0a7de745	2294	reset_d(d);
d9a64523	2295	}
0a7de745	2296	return 0;
1c79356b A	2297	}
1c79356b A	2298	/* Not found. */
0a7de745	2299	return ENXIO;
1c79356b A	2300	}
1c79356b A	2301
2d21ac55 A	2302	/*
	2303	* Get a list of available data link type of the interface.
	2304	*/
	2305	static int
316670eb	2306	bpf_getdltlist(struct bpf_d d, caddr_t addr, struct proc p)
2d21ac55	2307	{
0a7de745 A	2308	u_int n;
	2309	int error;
	2310	struct ifnet *ifp;
	2311	struct bpf_if *bp;
	2312	user_addr_t dlist;
316670eb	2313	struct bpf_dltlist bfl;
b0d623f7	2314
0a7de745	2315	bcopy(addr, &bfl, sizeof(bfl));
b0d623f7	2316	if (proc_is64bit(p)) {
316670eb	2317	dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
b0d623f7	2318	} else {
316670eb	2319	dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2d21ac55	2320	}
b0d623f7	2321
2d21ac55 A	2322	ifp = d->bd_bif->bif_ifp;
	2323	n = 0;
	2324	error = 0;
fe8ab488	2325
2d21ac55	2326	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
0a7de745	2327	if (bp->bif_ifp != ifp) {
2d21ac55	2328	continue;
0a7de745	2329	}
d9a64523	2330	/*
5ba3f43e	2331	* Do not use DLT_PKTAP, unless requested explicitly
fe8ab488	2332	*/
0a7de745	2333	if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
fe8ab488	2334	continue;
0a7de745	2335	}
b0d623f7	2336	if (dlist != USER_ADDR_NULL) {
316670eb	2337	if (n >= bfl.bfl_len) {
0a7de745	2338	return ENOMEM;
2d21ac55	2339	}
b0d623f7	2340	error = copyout(&bp->bif_dlt, dlist,
0a7de745 A	2341	sizeof(bp->bif_dlt));
0a7de745 A	2342	if (error != 0) {
316670eb	2343	break;
0a7de745 A	2344	}
0a7de745 A	2345	dlist += sizeof(bp->bif_dlt);
2d21ac55 A	2346	}
	2347	n++;
	2348	}
316670eb	2349	bfl.bfl_len = n;
0a7de745	2350	bcopy(&bfl, addr, sizeof(bfl));
316670eb	2351
0a7de745	2352	return error;
2d21ac55 A	2353	}
	2354
	2355	/*
	2356	* Set the data link type of a BPF instance.
	2357	*/
	2358	static int
3e170ce0	2359	bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2d21ac55 A	2360	{
	2361	int error, opromisc;
	2362	struct ifnet *ifp;
	2363	struct bpf_if *bp;
d9a64523	2364
0a7de745 A	2365	if (d->bd_bif->bif_dlt == dlt) {
	2366	return 0;
	2367	}
d9a64523	2368
0a7de745	2369	while (d->bd_hbuf_read != 0) {
39236c6e	2370	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745	2371	}
39236c6e	2372
0a7de745 A	2373	if ((d->bd_flags & BPF_CLOSING) != 0) {
	2374	return ENXIO;
	2375	}
fe8ab488	2376
2d21ac55 A	2377	ifp = d->bd_bif->bif_ifp;
2d21ac55 A	2378	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
5ba3f43e A	2379	if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
	2380	/*
	2381	* Do not use DLT_PKTAP, unless requested explicitly
	2382	*/
d9a64523 A	2383	if (bp->bif_dlt == DLT_PKTAP &&
d9a64523 A	2384	!(d->bd_flags & BPF_WANT_PKTAP)) {
5ba3f43e A	2385	continue;
5ba3f43e A	2386	}
2d21ac55	2387	break;
5ba3f43e	2388	}
2d21ac55 A	2389	}
	2390	if (bp != NULL) {
	2391	opromisc = d->bd_promisc;
0a7de745 A	2392	if (bpf_detachd(d, 0) != 0) {
	2393	return ENXIO;
	2394	}
2d21ac55 A	2395	error = bpf_attachd(d, bp);
	2396	if (error) {
	2397	printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
d9a64523 A	2398	ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp),
d9a64523 A	2399	error);
0a7de745	2400	return error;
2d21ac55 A	2401	}
	2402	reset_d(d);
	2403	if (opromisc) {
	2404	lck_mtx_unlock(bpf_mlock);
	2405	error = ifnet_set_promiscuous(bp->bif_ifp, 1);
	2406	lck_mtx_lock(bpf_mlock);
3e170ce0 A	2407	if (error) {
	2408	printf("%s: ifpromisc %s%d failed (%d)\n",
	2409	__func__, ifnet_name(bp->bif_ifp),
	2410	ifnet_unit(bp->bif_ifp), error);
	2411	} else {
2d21ac55	2412	d->bd_promisc = 1;
3e170ce0	2413	}
2d21ac55 A	2414	}
2d21ac55 A	2415	}
0a7de745	2416	return bp == NULL ? EINVAL : 0;
2d21ac55 A	2417	}
2d21ac55 A	2418
316670eb A	2419	static int
	2420	bpf_set_traffic_class(struct bpf_d *d, int tc)
	2421	{
	2422	int error = 0;
	2423
0a7de745	2424	if (!SO_VALID_TC(tc)) {
316670eb	2425	error = EINVAL;
0a7de745	2426	} else {
316670eb	2427	d->bd_traffic_class = tc;
0a7de745	2428	}
316670eb	2429
0a7de745	2430	return error;
316670eb A	2431	}
	2432
	2433	static void
	2434	bpf_set_packet_service_class(struct mbuf *m, int tc)
	2435	{
0a7de745	2436	if (!(m->m_flags & M_PKTHDR)) {
316670eb	2437	return;
0a7de745	2438	}
316670eb A	2439
	2440	VERIFY(SO_VALID_TC(tc));
	2441	(void) m_set_service_class(m, so_tc2msc(tc));
	2442	}
	2443
1c79356b	2444	/*
b0d623f7	2445	* Support for select()
1c79356b A	2446	*
	2447	* Return true iff the specific operation will not block indefinitely.
	2448	* Otherwise, return false but make a note that a selwakeup() must be done.
	2449	*/
	2450	int
6d2010ae	2451	bpfselect(dev_t dev, int which, void * wql, struct proc *p)
1c79356b	2452	{
2d21ac55	2453	struct bpf_d *d;
6d2010ae	2454	int ret = 0;
1c79356b	2455
2d21ac55 A	2456	lck_mtx_lock(bpf_mlock);
2d21ac55 A	2457
55e303ae	2458	d = bpf_dtab[minor(dev)];
d9a64523 A	2459	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
d9a64523 A	2460	(d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55	2461	lck_mtx_unlock(bpf_mlock);
0a7de745	2462	return ENXIO;
2d21ac55	2463	}
55e303ae	2464
3e170ce0 A	2465	bpf_acquire_d(d);
3e170ce0 A	2466
9bccf70c	2467	if (d->bd_bif == NULL) {
3e170ce0	2468	bpf_release_d(d);
91447636	2469	lck_mtx_unlock(bpf_mlock);
0a7de745	2470	return ENXIO;
9bccf70c A	2471	}
9bccf70c A	2472
0a7de745	2473	while (d->bd_hbuf_read != 0) {
39236c6e	2474	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745	2475	}
3e170ce0 A	2476
	2477	if ((d->bd_flags & BPF_CLOSING) != 0) {
	2478	bpf_release_d(d);
39236c6e	2479	lck_mtx_unlock(bpf_mlock);
0a7de745	2480	return ENXIO;
39236c6e A	2481	}
39236c6e A	2482
6d2010ae	2483	switch (which) {
0a7de745 A	2484	case FREAD:
	2485	if (d->bd_hlen != 0 \|\|
	2486	((d->bd_immediate \|\|
	2487	d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
	2488	ret = 1; /* read has data to return */
	2489	} else {
	2490	/*
	2491	* Read has no data to return.
	2492	* Make the select wait, and start a timer if
	2493	* necessary.
	2494	*/
	2495	selrecord(p, &d->bd_sel, wql);
	2496	bpf_start_timer(d);
	2497	}
	2498	break;
6d2010ae	2499
0a7de745 A	2500	case FWRITE:
	2501	/* can't determine whether a write would block */
	2502	ret = 1;
	2503	break;
9bccf70c	2504	}
91447636	2505
3e170ce0	2506	bpf_release_d(d);
91447636	2507	lck_mtx_unlock(bpf_mlock);
3e170ce0	2508
0a7de745	2509	return ret;
1c79356b A	2510	}
1c79356b A	2511
b0d623f7 A	2512	/*
	2513	* Support for kevent() system call. Register EVFILT_READ filters and
	2514	* reject all others.
	2515	*/
	2516	int bpfkqfilter(dev_t dev, struct knote *kn);
	2517	static void filt_bpfdetach(struct knote *);
	2518	static int filt_bpfread(struct knote *, long);
cb323159 A	2519	static int filt_bpftouch(struct knote kn, struct kevent_qos_s kev);
cb323159 A	2520	static int filt_bpfprocess(struct knote kn, struct kevent_qos_s kev);
b0d623f7	2521
5ba3f43e	2522	SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
d9a64523	2523	.f_isfd = 1,
b0d623f7 A	2524	.f_detach = filt_bpfdetach,
b0d623f7 A	2525	.f_event = filt_bpfread,
39037602 A	2526	.f_touch = filt_bpftouch,
39037602 A	2527	.f_process = filt_bpfprocess,
b0d623f7 A	2528	};
b0d623f7 A	2529
b0d623f7	2530	static int
cb323159	2531	filt_bpfread_common(struct knote kn, struct kevent_qos_s kev, struct bpf_d *d)
b0d623f7	2532	{
b0d623f7	2533	int ready = 0;
cb323159	2534	int64_t data = 0;
b0d623f7	2535
b0d623f7	2536	if (d->bd_immediate) {
6d2010ae	2537	/*
d9a64523	2538	* If there's data in the hold buffer, it's the
6d2010ae A	2539	* amount of data a read will return.
	2540	*
	2541	* If there's no data in the hold buffer, but
	2542	* there's data in the store buffer, a read will
d9a64523	2543	* immediately rotate the store buffer to the
6d2010ae	2544	* hold buffer, the amount of data in the store
d9a64523	2545	* buffer is the amount of data a read will
6d2010ae A	2546	* return.
6d2010ae A	2547	*
d9a64523	2548	* If there's no data in either buffer, we're not
6d2010ae A	2549	* ready to read.
6d2010ae A	2550	*/
cb323159	2551	data = (d->bd_hlen == 0 \|\| d->bd_hbuf_read != 0 ?
d9a64523	2552	d->bd_slen : d->bd_hlen);
cb323159 A	2553	int64_t lowwat = knote_low_watermark(kn);
	2554	if (lowwat > d->bd_bufsize) {
	2555	lowwat = d->bd_bufsize;
6d2010ae	2556	}
cb323159	2557	ready = (data >= lowwat);
b0d623f7	2558	} else {
6d2010ae	2559	/*
d9a64523	2560	* If there's data in the hold buffer, it's the
6d2010ae A	2561	* amount of data a read will return.
6d2010ae A	2562	*
d9a64523 A	2563	* If there's no data in the hold buffer, but
d9a64523 A	2564	* there's data in the store buffer, if the
6d2010ae A	2565	* timer has expired a read will immediately
6d2010ae A	2566	* rotate the store buffer to the hold buffer,
d9a64523	2567	* so the amount of data in the store buffer is
6d2010ae A	2568	* the amount of data a read will return.
6d2010ae A	2569	*
d9a64523 A	2570	* If there's no data in either buffer, or there's
d9a64523 A	2571	* no data in the hold buffer and the timer hasn't
6d2010ae A	2572	* expired, we're not ready to read.
6d2010ae A	2573	*/
cb323159	2574	data = ((d->bd_hlen == 0 \|\| d->bd_hbuf_read != 0) &&
d9a64523	2575	d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
cb323159	2576	ready = (data > 0);
b0d623f7	2577	}
0a7de745	2578	if (!ready) {
6d2010ae	2579	bpf_start_timer(d);
cb323159 A	2580	} else if (kev) {
cb323159 A	2581	knote_fill_kevent(kn, kev, data);
0a7de745	2582	}
b0d623f7	2583
0a7de745	2584	return ready;
b0d623f7 A	2585	}
b0d623f7 A	2586
39037602 A	2587	int
	2588	bpfkqfilter(dev_t dev, struct knote *kn)
	2589	{
	2590	struct bpf_d *d;
	2591	int res;
	2592
	2593	/*
	2594	* Is this device a bpf?
	2595	*/
cb323159 A	2596	if (major(dev) != CDEV_MAJOR \|\| kn->kn_filter != EVFILT_READ) {
cb323159 A	2597	knote_set_error(kn, EINVAL);
0a7de745	2598	return 0;
39037602 A	2599	}
	2600
	2601	lck_mtx_lock(bpf_mlock);
	2602
	2603	d = bpf_dtab[minor(dev)];
	2604
d9a64523 A	2605	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
	2606	(d->bd_flags & BPF_CLOSING) != 0 \|\|
	2607	d->bd_bif == NULL) {
39037602	2608	lck_mtx_unlock(bpf_mlock);
cb323159	2609	knote_set_error(kn, ENXIO);
0a7de745	2610	return 0;
39037602 A	2611	}
	2612
	2613	kn->kn_hook = d;
	2614	kn->kn_filtid = EVFILTID_BPFREAD;
	2615	KNOTE_ATTACH(&d->bd_sel.si_note, kn);
	2616	d->bd_flags \|= BPF_KNOTE;
	2617
	2618	/* capture the current state */
cb323159	2619	res = filt_bpfread_common(kn, NULL, d);
39037602 A	2620
	2621	lck_mtx_unlock(bpf_mlock);
	2622
0a7de745	2623	return res;
39037602 A	2624	}
	2625
	2626	static void
	2627	filt_bpfdetach(struct knote *kn)
	2628	{
	2629	struct bpf_d d = (struct bpf_d )kn->kn_hook;
	2630
	2631	lck_mtx_lock(bpf_mlock);
	2632	if (d->bd_flags & BPF_KNOTE) {
	2633	KNOTE_DETACH(&d->bd_sel.si_note, kn);
	2634	d->bd_flags &= ~BPF_KNOTE;
	2635	}
	2636	lck_mtx_unlock(bpf_mlock);
	2637	}
	2638
	2639	static int
	2640	filt_bpfread(struct knote *kn, long hint)
	2641	{
	2642	#pragma unused(hint)
	2643	struct bpf_d d = (struct bpf_d )kn->kn_hook;
	2644
cb323159	2645	return filt_bpfread_common(kn, NULL, d);
39037602 A	2646	}
	2647
	2648	static int
cb323159	2649	filt_bpftouch(struct knote kn, struct kevent_qos_s kev)
39037602 A	2650	{
	2651	struct bpf_d d = (struct bpf_d )kn->kn_hook;
	2652	int res;
	2653
	2654	lck_mtx_lock(bpf_mlock);
	2655
	2656	/* save off the lowat threshold and flag */
	2657	kn->kn_sdata = kev->data;
	2658	kn->kn_sfflags = kev->fflags;
39037602 A	2659
39037602 A	2660	/* output data will be re-generated here */
cb323159	2661	res = filt_bpfread_common(kn, NULL, d);
39037602 A	2662
	2663	lck_mtx_unlock(bpf_mlock);
	2664
0a7de745	2665	return res;
39037602 A	2666	}
	2667
	2668	static int
cb323159	2669	filt_bpfprocess(struct knote kn, struct kevent_qos_s kev)
39037602	2670	{
39037602 A	2671	struct bpf_d d = (struct bpf_d )kn->kn_hook;
	2672	int res;
	2673
	2674	lck_mtx_lock(bpf_mlock);
cb323159	2675	res = filt_bpfread_common(kn, kev, d);
39037602 A	2676	lck_mtx_unlock(bpf_mlock);
39037602 A	2677
0a7de745	2678	return res;
39037602 A	2679	}
39037602 A	2680
1c79356b	2681	/*
d9a64523	2682	* Copy data from an mbuf chain into a buffer. This code is derived
5ba3f43e	2683	* from m_copydata in kern/uipc_mbuf.c.
1c79356b A	2684	*/
1c79356b A	2685	static void
5ba3f43e	2686	bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
1c79356b	2687	{
91447636	2688	u_int count;
1c79356b A	2689	u_char *dst;
1c79356b A	2690
1c79356b A	2691	dst = dst_arg;
1c79356b A	2692	while (len > 0) {
0a7de745	2693	if (m == 0) {
1c79356b	2694	panic("bpf_mcopy");
0a7de745	2695	}
1c79356b	2696	count = min(m->m_len, len);
2d21ac55	2697	bcopy(mbuf_data(m), dst, count);
1c79356b A	2698	m = m->m_next;
	2699	dst += count;
	2700	len -= count;
	2701	}
	2702	}
	2703
2d21ac55 A	2704	static inline void
2d21ac55 A	2705	bpf_tap_imp(
0a7de745 A	2706	ifnet_t ifp,
0a7de745 A	2707	u_int32_t dlt,
5ba3f43e	2708	struct bpf_packet *bpf_pkt,
0a7de745	2709	int outbound)
1c79356b	2710	{
0a7de745	2711	struct bpf_d *d;
5ba3f43e	2712	u_int slen;
91447636	2713	struct bpf_if *bp;
1c79356b	2714
2d21ac55 A	2715	/*
	2716	* It's possible that we get here after the bpf descriptor has been
	2717	* detached from the interface; in such a case we simply return.
	2718	* Lock ordering is important since we can be called asynchronously
5ba3f43e	2719	* (from IOKit) to process an inbound packet; when that happens
2d21ac55 A	2720	* we would have been holding its "gateLock" and will be acquiring
	2721	* "bpf_mlock" upon entering this routine. Due to that, we release
	2722	* "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
	2723	* acquire "gateLock" in the IOKit), in order to avoid a deadlock
	2724	* when a ifnet_set_promiscuous request simultaneously collides with
	2725	* an inbound packet being passed into the tap callback.
	2726	*/
91447636	2727	lck_mtx_lock(bpf_mlock);
2d21ac55 A	2728	if (ifp->if_bpf == NULL) {
	2729	lck_mtx_unlock(bpf_mlock);
	2730	return;
	2731	}
5ba3f43e A	2732	for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
	2733	if (bp->bif_ifp != ifp) {
	2734	/* wrong interface */
	2735	bp = NULL;
	2736	break;
2d21ac55	2737	}
5ba3f43e A	2738	if (dlt == 0 \|\| bp->bif_dlt == dlt) {
	2739	/* tapping default DLT or DLT matches */
	2740	break;
	2741	}
	2742	}
	2743	if (bp == NULL) {
	2744	goto done;
	2745	}
	2746	for (d = bp->bif_dlist; d; d = d->bd_next) {
d9a64523 A	2747	struct bpf_packet *bpf_pkt_saved = bpf_pkt;
	2748	struct bpf_packet bpf_pkt_tmp;
	2749	struct pktap_header_buffer bpfp_header_tmp;
	2750
0a7de745	2751	if (outbound && !d->bd_seesent) {
5ba3f43e	2752	continue;
0a7de745	2753	}
d9a64523	2754
5ba3f43e A	2755	++d->bd_rcount;
5ba3f43e A	2756	slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
d9a64523 A	2757	bpf_pkt->bpfp_total_length, 0);
	2758	if (bp->bif_ifp->if_type == IFT_PKTAP &&
	2759	bp->bif_dlt == DLT_PKTAP) {
	2760	/*
	2761	* Need to copy the bpf_pkt because the conversion
	2762	* to v2 pktap header modifies the content of the
	2763	* bpfp_header
	2764	*/
	2765	if ((d->bd_flags & BPF_PKTHDRV2) &&
	2766	bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
	2767	bpf_pkt_tmp = *bpf_pkt;
	2768
	2769	bpf_pkt = &bpf_pkt_tmp;
	2770
	2771	memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
	2772	bpf_pkt->bpfp_header_length);
	2773
	2774	bpf_pkt->bpfp_header = &bpfp_header_tmp;
	2775
	2776	convert_to_pktap_header_to_v2(bpf_pkt,
	2777	!!(d->bd_flags & BPF_TRUNCATE));
	2778	}
	2779
0a7de745	2780	if (d->bd_flags & BPF_TRUNCATE) {
d9a64523 A	2781	slen = min(slen,
d9a64523 A	2782	get_pkt_trunc_len((u_char *)bpf_pkt,
0a7de745 A	2783	bpf_pkt->bpfp_total_length));
0a7de745 A	2784	}
d9a64523	2785	}
5ba3f43e	2786	if (slen != 0) {
5ba3f43e	2787	catchpacket(d, bpf_pkt, slen, outbound);
91447636	2788	}
d9a64523	2789	bpf_pkt = bpf_pkt_saved;
1c79356b	2790	}
5ba3f43e	2791
d9a64523	2792	done:
91447636	2793	lck_mtx_unlock(bpf_mlock);
1c79356b A	2794	}
1c79356b A	2795
5ba3f43e A	2796	static inline void
5ba3f43e A	2797	bpf_tap_mbuf(
0a7de745 A	2798	ifnet_t ifp,
	2799	u_int32_t dlt,
	2800	mbuf_t m,
	2801	void* hdr,
	2802	size_t hlen,
	2803	int outbound)
5ba3f43e A	2804	{
	2805	struct bpf_packet bpf_pkt;
	2806	struct mbuf *m0;
	2807
	2808	if (ifp->if_bpf == NULL) {
	2809	/* quickly check without taking lock */
	2810	return;
	2811	}
	2812	bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
	2813	bpf_pkt.bpfp_mbuf = m;
	2814	bpf_pkt.bpfp_total_length = 0;
0a7de745	2815	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
5ba3f43e	2816	bpf_pkt.bpfp_total_length += m0->m_len;
0a7de745	2817	}
5ba3f43e A	2818	bpf_pkt.bpfp_header = hdr;
	2819	if (hdr != NULL) {
	2820	bpf_pkt.bpfp_total_length += hlen;
	2821	bpf_pkt.bpfp_header_length = hlen;
	2822	} else {
	2823	bpf_pkt.bpfp_header_length = 0;
	2824	}
	2825	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
	2826	}
	2827
2d21ac55 A	2828	void
2d21ac55 A	2829	bpf_tap_out(
0a7de745 A	2830	ifnet_t ifp,
	2831	u_int32_t dlt,
	2832	mbuf_t m,
	2833	void* hdr,
	2834	size_t hlen)
2d21ac55	2835	{
5ba3f43e	2836	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
2d21ac55 A	2837	}
	2838
	2839	void
	2840	bpf_tap_in(
0a7de745 A	2841	ifnet_t ifp,
	2842	u_int32_t dlt,
	2843	mbuf_t m,
	2844	void* hdr,
	2845	size_t hlen)
2d21ac55	2846	{
5ba3f43e	2847	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
2d21ac55 A	2848	}
	2849
	2850	/* Callback registered with Ethernet driver. */
0a7de745 A	2851	static int
0a7de745 A	2852	bpf_tap_callback(struct ifnet ifp, struct mbuf m)
2d21ac55	2853	{
5ba3f43e	2854	bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
d9a64523	2855
0a7de745	2856	return 0;
2d21ac55 A	2857	}
2d21ac55 A	2858
5ba3f43e	2859
d9a64523 A	2860	static errno_t
	2861	bpf_copydata(struct bpf_packet pkt, size_t off, size_t len, void out_data)
	2862	{
	2863	errno_t err = 0;
	2864	if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
	2865	err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
	2866	} else {
	2867	err = EINVAL;
	2868	}
	2869
0a7de745	2870	return err;
d9a64523 A	2871	}
d9a64523 A	2872
5ba3f43e A	2873	static void
	2874	copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
	2875	{
	2876	/* copy the optional header */
	2877	if (pkt->bpfp_header_length != 0) {
0a7de745	2878	size_t count = min(len, pkt->bpfp_header_length);
5ba3f43e A	2879	bcopy(pkt->bpfp_header, dst, count);
	2880	len -= count;
	2881	dst += count;
	2882	}
	2883	if (len == 0) {
	2884	/* nothing past the header */
	2885	return;
	2886	}
	2887	/* copy the packet */
	2888	switch (pkt->bpfp_type) {
	2889	case BPF_PACKET_TYPE_MBUF:
	2890	bpf_mcopy(pkt->bpfp_mbuf, dst, len);
	2891	break;
	2892	default:
	2893	break;
	2894	}
	2895	}
	2896
d9a64523 A	2897	static uint16_t
	2898	get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
	2899	const uint16_t remaining_caplen)
	2900	{
	2901	/*
	2902	* For some reason tcpdump expects to have one byte beyond the ESP header
	2903	*/
	2904	uint16_t trunc_len = ESP_HDR_SIZE + 1;
	2905
0a7de745 A	2906	if (trunc_len > remaining_caplen) {
	2907	return remaining_caplen;
	2908	}
d9a64523	2909
0a7de745	2910	return trunc_len;
d9a64523 A	2911	}
	2912
	2913	static uint16_t
	2914	get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
	2915	const uint16_t remaining_caplen)
	2916	{
	2917	/*
	2918	* Include the payload generic header
	2919	*/
	2920	uint16_t trunc_len = ISAKMP_HDR_SIZE;
	2921
0a7de745 A	2922	if (trunc_len > remaining_caplen) {
	2923	return remaining_caplen;
	2924	}
d9a64523	2925
0a7de745	2926	return trunc_len;
d9a64523 A	2927	}
	2928
	2929	static uint16_t
	2930	get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint16_t off,
	2931	const uint16_t remaining_caplen)
	2932	{
	2933	int err = 0;
	2934	uint16_t trunc_len = 0;
	2935	char payload[remaining_caplen];
	2936
	2937	err = bpf_copydata(pkt, off, remaining_caplen, payload);
0a7de745 A	2938	if (err != 0) {
	2939	return remaining_caplen;
	2940	}
d9a64523 A	2941	/*
	2942	* They are three cases:
	2943	* - IKE: payload start with 4 bytes header set to zero before ISAKMP header
	2944	* - keep alive: 1 byte payload
	2945	* - otherwise it's ESP
	2946	*/
	2947	if (remaining_caplen >= 4 &&
0a7de745 A	2948	payload[0] == 0 && payload[1] == 0 &&
0a7de745 A	2949	payload[2] == 0 && payload[3] == 0) {
d9a64523 A	2950	trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
	2951	} else if (remaining_caplen == 1) {
	2952	trunc_len = 1;
	2953	} else {
	2954	trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
	2955	}
	2956
0a7de745 A	2957	if (trunc_len > remaining_caplen) {
	2958	return remaining_caplen;
	2959	}
d9a64523	2960
0a7de745	2961	return trunc_len;
d9a64523 A	2962	}
	2963
	2964	static uint16_t
	2965	get_udp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	2966	{
	2967	int err = 0;
	2968	uint16_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
	2969
0a7de745 A	2970	if (trunc_len >= remaining_caplen) {
	2971	return remaining_caplen;
	2972	}
d9a64523 A	2973
	2974	struct udphdr udphdr;
	2975	err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
0a7de745 A	2976	if (err != 0) {
	2977	return remaining_caplen;
	2978	}
d9a64523 A	2979
	2980	u_short sport, dport;
	2981
	2982	sport = EXTRACT_SHORT(&udphdr.uh_sport);
	2983	dport = EXTRACT_SHORT(&udphdr.uh_dport);
	2984
	2985	if (dport == PORT_DNS \|\| sport == PORT_DNS) {
	2986	/*
	2987	* Full UDP payload for DNS
	2988	*/
	2989	trunc_len = remaining_caplen;
	2990	} else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) \|\|
0a7de745	2991	(sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
d9a64523 A	2992	/*
	2993	* Full UDP payload for BOOTP and DHCP
	2994	*/
	2995	trunc_len = remaining_caplen;
	2996	} else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
	2997	/*
	2998	* Return the ISAKMP header
	2999	*/
	3000	trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
	3001	remaining_caplen - sizeof(struct udphdr));
	3002	} else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
	3003	trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
	3004	remaining_caplen - sizeof(struct udphdr));
	3005	}
0a7de745 A	3006	if (trunc_len >= remaining_caplen) {
	3007	return remaining_caplen;
	3008	}
d9a64523	3009
0a7de745	3010	return trunc_len;
d9a64523 A	3011	}
	3012
	3013	static uint16_t
	3014	get_tcp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	3015	{
	3016	int err = 0;
	3017	uint16_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
0a7de745 A	3018	if (trunc_len >= remaining_caplen) {
	3019	return remaining_caplen;
	3020	}
d9a64523 A	3021
	3022	struct tcphdr tcphdr;
	3023	err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
0a7de745 A	3024	if (err != 0) {
	3025	return remaining_caplen;
	3026	}
d9a64523 A	3027
	3028	u_short sport, dport;
	3029	sport = EXTRACT_SHORT(&tcphdr.th_sport);
	3030	dport = EXTRACT_SHORT(&tcphdr.th_dport);
	3031
	3032	if (dport == PORT_DNS \|\| sport == PORT_DNS) {
	3033	/*
	3034	* Full TCP payload for DNS
	3035	*/
	3036	trunc_len = remaining_caplen;
	3037	} else {
	3038	trunc_len = tcphdr.th_off << 2;
	3039	}
0a7de745 A	3040	if (trunc_len >= remaining_caplen) {
	3041	return remaining_caplen;
	3042	}
d9a64523	3043
0a7de745	3044	return trunc_len;
d9a64523 A	3045	}
	3046
	3047	static uint16_t
	3048	get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	3049	{
	3050	uint16_t trunc_len;
	3051
	3052	switch (proto) {
	3053	case IPPROTO_ICMP: {
	3054	/*
	3055	* Full IMCP payload
	3056	*/
	3057	trunc_len = remaining_caplen;
	3058	break;
	3059	}
	3060	case IPPROTO_ICMPV6: {
	3061	/*
	3062	* Full IMCPV6 payload
	3063	*/
	3064	trunc_len = remaining_caplen;
	3065	break;
	3066	}
	3067	case IPPROTO_IGMP: {
	3068	/*
	3069	* Full IGMP payload
	3070	*/
	3071	trunc_len = remaining_caplen;
	3072	break;
	3073	}
	3074	case IPPROTO_UDP: {
	3075	trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
	3076	break;
	3077	}
	3078	case IPPROTO_TCP: {
	3079	trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
	3080	break;
	3081	}
	3082	case IPPROTO_ESP: {
	3083	trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
	3084	break;
	3085	}
	3086	default: {
	3087	/*
	3088	* By default we only include the IP header
	3089	*/
	3090	trunc_len = 0;
	3091	break;
	3092	}
	3093	}
0a7de745 A	3094	if (trunc_len >= remaining_caplen) {
	3095	return remaining_caplen;
	3096	}
d9a64523	3097
0a7de745	3098	return trunc_len;
d9a64523 A	3099	}
	3100
	3101	static uint16_t
	3102	get_ip_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	3103	{
	3104	int err = 0;
	3105	uint16_t iplen = sizeof(struct ip);
0a7de745 A	3106	if (iplen >= remaining_caplen) {
	3107	return remaining_caplen;
	3108	}
d9a64523 A	3109
	3110	struct ip iphdr;
	3111	err = bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
0a7de745 A	3112	if (err != 0) {
	3113	return remaining_caplen;
	3114	}
d9a64523 A	3115
	3116	uint8_t proto = 0;
	3117
	3118	iplen = iphdr.ip_hl << 2;
0a7de745 A	3119	if (iplen >= remaining_caplen) {
	3120	return remaining_caplen;
	3121	}
d9a64523 A	3122
	3123	proto = iphdr.ip_p;
	3124	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
	3125
0a7de745 A	3126	if (iplen >= remaining_caplen) {
	3127	return remaining_caplen;
	3128	}
d9a64523	3129
0a7de745	3130	return iplen;
d9a64523 A	3131	}
	3132
	3133	static uint16_t
	3134	get_ip6_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	3135	{
	3136	int err = 0;
	3137	uint16_t iplen = sizeof(struct ip6_hdr);
0a7de745 A	3138	if (iplen >= remaining_caplen) {
	3139	return remaining_caplen;
	3140	}
d9a64523 A	3141
	3142	struct ip6_hdr ip6hdr;
	3143	err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
0a7de745 A	3144	if (err != 0) {
	3145	return remaining_caplen;
	3146	}
d9a64523 A	3147
	3148	uint8_t proto = 0;
	3149
	3150	/*
	3151	* TBD: process the extension headers
	3152	*/
	3153	proto = ip6hdr.ip6_nxt;
	3154	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
	3155
0a7de745 A	3156	if (iplen >= remaining_caplen) {
	3157	return remaining_caplen;
	3158	}
d9a64523	3159
0a7de745	3160	return iplen;
d9a64523 A	3161	}
	3162
	3163	static uint16_t
	3164	get_ether_trunc_len(struct bpf_packet *pkt, int off, const uint16_t remaining_caplen)
	3165	{
	3166	int err = 0;
	3167	uint16_t ethlen = sizeof(struct ether_header);
0a7de745 A	3168	if (ethlen >= remaining_caplen) {
	3169	return remaining_caplen;
	3170	}
d9a64523 A	3171
	3172	struct ether_header eh;
	3173	u_short type;
	3174	err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
0a7de745 A	3175	if (err != 0) {
	3176	return remaining_caplen;
	3177	}
d9a64523 A	3178
	3179	type = EXTRACT_SHORT(&eh.ether_type);
	3180	/* Include full ARP */
	3181	if (type == ETHERTYPE_ARP) {
	3182	ethlen = remaining_caplen;
	3183	} else if (type != ETHERTYPE_IP && type != ETHERTYPE_IPV6) {
	3184	ethlen = min(BPF_MIN_PKT_SIZE, remaining_caplen);
	3185	} else {
	3186	if (type == ETHERTYPE_IP) {
	3187	ethlen += get_ip_trunc_len(pkt, sizeof(struct ether_header),
	3188	remaining_caplen);
	3189	} else if (type == ETHERTYPE_IPV6) {
	3190	ethlen += get_ip6_trunc_len(pkt, sizeof(struct ether_header),
0a7de745	3191	remaining_caplen);
d9a64523 A	3192	}
d9a64523 A	3193	}
0a7de745	3194	return ethlen;
d9a64523 A	3195	}
	3196
	3197	static uint32_t
	3198	get_pkt_trunc_len(u_char *p, u_int len)
	3199	{
	3200	struct bpf_packet pkt = (struct bpf_packet )(void *) p;
	3201	struct pktap_header pktap = (struct pktap_header ) (pkt->bpfp_header);
	3202	uint32_t out_pkt_len = 0, tlen = 0;
	3203	/*
	3204	* pktap->pth_frame_pre_length is L2 header length and accounts
	3205	* for both pre and pre_adjust.
	3206	* pktap->pth_length is sizeof(pktap_header) (excl the pre/pre_adjust)
	3207	* pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
	3208	* pre is the offset to the L3 header after the bpfp_header, or length
	3209	* of L2 header after bpfp_header, if present.
0a7de745	3210	*/
cb323159	3211	int32_t pre = pktap->pth_frame_pre_length -
d9a64523 A	3212	(pkt->bpfp_header_length - pktap->pth_length);
	3213
	3214	/* Length of the input packet starting from L3 header */
	3215	uint32_t in_pkt_len = len - pkt->bpfp_header_length - pre;
	3216	if (pktap->pth_protocol_family == AF_INET \|\|
	3217	pktap->pth_protocol_family == AF_INET6) {
	3218	/* Contains L2 header */
	3219	if (pre > 0) {
cb323159	3220	if (pre < (int32_t)sizeof(struct ether_header)) {
d9a64523	3221	goto too_short;
0a7de745	3222	}
d9a64523 A	3223
	3224	out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
	3225	} else if (pre == 0) {
	3226	if (pktap->pth_protocol_family == AF_INET) {
	3227	out_pkt_len = get_ip_trunc_len(pkt, pre, in_pkt_len);
	3228	} else if (pktap->pth_protocol_family == AF_INET6) {
	3229	out_pkt_len = get_ip6_trunc_len(pkt, pre, in_pkt_len);
	3230	}
	3231	} else {
	3232	/* Ideally pre should be >= 0. This is an exception */
	3233	out_pkt_len = min(BPF_MIN_PKT_SIZE, in_pkt_len);
	3234	}
	3235	} else {
	3236	if (pktap->pth_iftype == IFT_ETHER) {
	3237	if (in_pkt_len < sizeof(struct ether_header)) {
	3238	goto too_short;
	3239	}
	3240	/* At most include the Ethernet header and 16 bytes */
	3241	out_pkt_len = MIN(sizeof(struct ether_header) + 16,
	3242	in_pkt_len);
	3243	} else {
	3244	/*
	3245	* For unknown protocols include at most 16 bytes
	3246	*/
	3247	out_pkt_len = MIN(16, in_pkt_len);
	3248	}
	3249	}
	3250	done:
	3251	tlen = pkt->bpfp_header_length + out_pkt_len + pre;
0a7de745	3252	return tlen;
d9a64523 A	3253	too_short:
	3254	out_pkt_len = in_pkt_len;
	3255	goto done;
	3256	}
	3257
1c79356b A	3258	/*
	3259	* Move the packet data from interface memory (pkt) into the
	3260	* store buffer. Return 1 if it's time to wakeup a listener (buffer full),
5ba3f43e	3261	* otherwise 0.
1c79356b A	3262	*/
1c79356b A	3263	static void
5ba3f43e	3264	catchpacket(struct bpf_d d, struct bpf_packet pkt,
0a7de745	3265	u_int snaplen, int outbound)
1c79356b	3266	{
2d21ac55	3267	struct bpf_hdr *hp;
316670eb	3268	struct bpf_hdr_ext *ehp;
2d21ac55	3269	int totlen, curlen;
316670eb	3270	int hdrlen, caplen;
6d2010ae	3271	int do_wakeup = 0;
316670eb	3272	u_char *payload;
39236c6e	3273	struct timeval tv;
316670eb	3274
fe8ab488	3275	hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
316670eb	3276	d->bd_bif->bif_hdrlen;
1c79356b A	3277	/*
	3278	* Figure out how many bytes to move. If the packet is
	3279	* greater or equal to the snapshot length, transfer that
	3280	* much. Otherwise, transfer the whole packet (unless
	3281	* we hit the buffer size limit).
	3282	*/
5ba3f43e	3283	totlen = hdrlen + min(snaplen, pkt->bpfp_total_length);
0a7de745	3284	if (totlen > d->bd_bufsize) {
1c79356b	3285	totlen = d->bd_bufsize;
0a7de745	3286	}
1c79356b	3287
0a7de745	3288	if (hdrlen > totlen) {
a39ff7e2	3289	return;
0a7de745	3290	}
a39ff7e2	3291
1c79356b A	3292	/*
	3293	* Round up the end of the previous packet to the next longword.
	3294	*/
	3295	curlen = BPF_WORDALIGN(d->bd_slen);
	3296	if (curlen + totlen > d->bd_bufsize) {
	3297	/*
	3298	* This packet will overflow the storage buffer.
	3299	* Rotate the buffers if we can, then wakeup any
	3300	* pending reads.
813fb2f6 A	3301	*
	3302	* We cannot rotate buffers if a read is in progress
	3303	* so drop the packet
1c79356b	3304	*/
d9a64523	3305	if (d->bd_hbuf_read != 0) {
813fb2f6 A	3306	++d->bd_dcount;
	3307	return;
	3308	}
d9a64523	3309
6d2010ae	3310	if (d->bd_fbuf == NULL) {
3e170ce0 A	3311	if (d->bd_headdrop == 0) {
	3312	/*
	3313	* We haven't completed the previous read yet,
	3314	* so drop the packet.
	3315	*/
	3316	++d->bd_dcount;
	3317	return;
	3318	}
1c79356b	3319	/*
3e170ce0	3320	* Drop the hold buffer as it contains older packets
1c79356b	3321	*/
3e170ce0 A	3322	d->bd_dcount += d->bd_hcnt;
	3323	d->bd_fbuf = d->bd_hbuf;
	3324	ROTATE_BUFFERS(d);
	3325	} else {
	3326	ROTATE_BUFFERS(d);
1c79356b	3327	}
6d2010ae	3328	do_wakeup = 1;
1c79356b	3329	curlen = 0;
0a7de745	3330	} else if (d->bd_immediate \|\| d->bd_state == BPF_TIMED_OUT) {
1c79356b	3331	/*
d9a64523 A	3332	* Immediate mode is set, or the read timeout has
d9a64523 A	3333	* already expired during a select call. A packet
6d2010ae	3334	* arrived, so the reader should be woken up.
1c79356b	3335	*/
6d2010ae	3336	do_wakeup = 1;
0a7de745	3337	}
1c79356b A	3338
	3339	/*
	3340	* Append the bpf header.
	3341	*/
b0d623f7	3342	microtime(&tv);
d9a64523	3343	if (d->bd_flags & BPF_EXTENDED_HDR) {
5ba3f43e A	3344	struct mbuf *m;
	3345
	3346	m = (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF)
0a7de745	3347	? pkt->bpfp_mbuf : NULL;
d9a64523 A	3348	ehp = (struct bpf_hdr_ext )(void )(d->bd_sbuf + curlen);
	3349	memset(ehp, 0, sizeof(*ehp));
	3350	ehp->bh_tstamp.tv_sec = tv.tv_sec;
	3351	ehp->bh_tstamp.tv_usec = tv.tv_usec;
5ba3f43e A	3352
5ba3f43e A	3353	ehp->bh_datalen = pkt->bpfp_total_length;
d9a64523	3354	ehp->bh_hdrlen = hdrlen;
5ba3f43e A	3355	caplen = ehp->bh_caplen = totlen - hdrlen;
	3356	if (m == NULL) {
	3357	if (outbound) {
39236c6e	3358	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_OUT;
5ba3f43e	3359	} else {
39236c6e	3360	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_IN;
5ba3f43e	3361	}
39236c6e	3362	} else if (outbound) {
5ba3f43e A	3363	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_OUT;
5ba3f43e A	3364
39236c6e	3365	/* only do lookups on non-raw INPCB */
0a7de745 A	3366	if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID \|
	3367	PKTF_FLOW_LOCALSRC \| PKTF_FLOW_RAWSOCK)) ==
	3368	(PKTF_FLOW_ID \| PKTF_FLOW_LOCALSRC) &&
39236c6e A	3369	m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
	3370	ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
	3371	ehp->bh_proto = m->m_pkthdr.pkt_proto;
	3372	}
	3373	ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
0a7de745	3374	if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
39037602	3375	ehp->bh_pktflags \|= BPF_PKTFLAGS_TCP_REXMT;
0a7de745 A	3376	}
0a7de745 A	3377	if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
39037602	3378	ehp->bh_pktflags \|= BPF_PKTFLAGS_START_SEQ;
0a7de745 A	3379	}
0a7de745 A	3380	if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
39037602	3381	ehp->bh_pktflags \|= BPF_PKTFLAGS_LAST_PKT;
0a7de745	3382	}
39037602 A	3383	if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
	3384	ehp->bh_unsent_bytes =
	3385	m->m_pkthdr.bufstatus_if;
	3386	ehp->bh_unsent_snd =
	3387	m->m_pkthdr.bufstatus_sndbuf;
	3388	}
0a7de745	3389	} else {
316670eb	3390	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_IN;
0a7de745	3391	}
d9a64523 A	3392	payload = (u_char *)ehp + hdrlen;
	3393	} else {
	3394	hp = (struct bpf_hdr )(void )(d->bd_sbuf + curlen);
	3395	hp->bh_tstamp.tv_sec = tv.tv_sec;
	3396	hp->bh_tstamp.tv_usec = tv.tv_usec;
5ba3f43e	3397	hp->bh_datalen = pkt->bpfp_total_length;
d9a64523	3398	hp->bh_hdrlen = hdrlen;
5ba3f43e	3399	caplen = hp->bh_caplen = totlen - hdrlen;
d9a64523 A	3400	payload = (u_char *)hp + hdrlen;
d9a64523 A	3401	}
1c79356b A	3402	/*
	3403	* Copy the packet data into the store buffer and update its length.
	3404	*/
5ba3f43e	3405	copy_bpf_packet(pkt, payload, caplen);
1c79356b	3406	d->bd_slen = curlen + totlen;
3e170ce0	3407	d->bd_scnt += 1;
6d2010ae	3408
0a7de745	3409	if (do_wakeup) {
6d2010ae	3410	bpf_wakeup(d);
0a7de745	3411	}
1c79356b A	3412	}
	3413
	3414	/*
	3415	* Initialize all nonzero fields of a descriptor.
	3416	*/
	3417	static int
91447636	3418	bpf_allocbufs(struct bpf_d *d)
1c79356b	3419	{
813fb2f6 A	3420	if (d->bd_sbuf != NULL) {
	3421	FREE(d->bd_sbuf, M_DEVBUF);
	3422	d->bd_sbuf = NULL;
	3423	}
	3424	if (d->bd_hbuf != NULL) {
	3425	FREE(d->bd_hbuf, M_DEVBUF);
	3426	d->bd_hbuf = NULL;
	3427	}
	3428	if (d->bd_fbuf != NULL) {
	3429	FREE(d->bd_fbuf, M_DEVBUF);
	3430	d->bd_fbuf = NULL;
	3431	}
	3432
1c79356b	3433	d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
0a7de745 A	3434	if (d->bd_fbuf == NULL) {
	3435	return ENOBUFS;
	3436	}
1c79356b A	3437
1c79356b A	3438	d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
813fb2f6	3439	if (d->bd_sbuf == NULL) {
1c79356b	3440	FREE(d->bd_fbuf, M_DEVBUF);
813fb2f6	3441	d->bd_fbuf = NULL;
0a7de745	3442	return ENOBUFS;
1c79356b A	3443	}
	3444	d->bd_slen = 0;
	3445	d->bd_hlen = 0;
3e170ce0 A	3446	d->bd_scnt = 0;
3e170ce0 A	3447	d->bd_hcnt = 0;
0a7de745	3448	return 0;
1c79356b A	3449	}
	3450
	3451	/*
	3452	* Free buffers currently in use by a descriptor.
	3453	* Called on close.
	3454	*/
	3455	static void
91447636	3456	bpf_freed(struct bpf_d *d)
1c79356b A	3457	{
	3458	/*
	3459	* We don't need to lock out interrupts since this descriptor has
	3460	* been detached from its interface and it yet hasn't been marked
	3461	* free.
	3462	*/
0a7de745	3463	if (d->bd_hbuf_read != 0) {
39236c6e	3464	panic("bpf buffer freed during read");
0a7de745	3465	}
39236c6e	3466
1c79356b A	3467	if (d->bd_sbuf != 0) {
1c79356b A	3468	FREE(d->bd_sbuf, M_DEVBUF);
0a7de745	3469	if (d->bd_hbuf != 0) {
1c79356b	3470	FREE(d->bd_hbuf, M_DEVBUF);
0a7de745 A	3471	}
0a7de745 A	3472	if (d->bd_fbuf != 0) {
1c79356b	3473	FREE(d->bd_fbuf, M_DEVBUF);
0a7de745 A	3474	}
	3475	}
	3476	if (d->bd_filter) {
	3477	FREE(d->bd_filter, M_DEVBUF);
1c79356b	3478	}
1c79356b A	3479	}
	3480
	3481	/*
d9a64523	3482	* Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
1c79356b A	3483	* in the driver's softc; dlt is the link layer type; hdrlen is the fixed
	3484	* size of the link header (variable length headers not yet supported).
	3485	*/
	3486	void
91447636	3487	bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1c79356b	3488	{
2d21ac55 A	3489	bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
	3490	}
	3491
	3492	errno_t
	3493	bpf_attach(
0a7de745 A	3494	ifnet_t ifp,
	3495	u_int32_t dlt,
	3496	u_int32_t hdrlen,
	3497	bpf_send_func send,
	3498	bpf_tap_func tap)
2d21ac55	3499	{
5ba3f43e	3500	struct bpf_if *bp;
2d21ac55	3501	struct bpf_if *bp_new;
5ba3f43e	3502	struct bpf_if *bp_before_first = NULL;
2d21ac55	3503	struct bpf_if *bp_first = NULL;
5ba3f43e A	3504	struct bpf_if *bp_last = NULL;
	3505	boolean_t found;
	3506
3e170ce0 A	3507	bp_new = (struct bpf_if ) _MALLOC(sizeof(bp_new), M_DEVBUF,
3e170ce0 A	3508	M_WAIT \| M_ZERO);
0a7de745	3509	if (bp_new == 0) {
1c79356b	3510	panic("bpfattach");
0a7de745	3511	}
1c79356b	3512
91447636 A	3513	lck_mtx_lock(bpf_mlock);
91447636 A	3514
2d21ac55	3515	/*
5ba3f43e A	3516	* Check if this interface/dlt is already attached. Remember the
	3517	* first and last attachment for this interface, as well as the
	3518	* element before the first attachment.
2d21ac55	3519	*/
5ba3f43e A	3520	found = FALSE;
	3521	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
	3522	if (bp->bif_ifp != ifp) {
	3523	if (bp_first != NULL) {
	3524	/* no more elements for this interface */
	3525	break;
	3526	}
	3527	bp_before_first = bp;
	3528	} else {
	3529	if (bp->bif_dlt == dlt) {
	3530	found = TRUE;
	3531	break;
	3532	}
	3533	if (bp_first == NULL) {
	3534	bp_first = bp;
	3535	}
	3536	bp_last = bp;
	3537	}
2d21ac55	3538	}
5ba3f43e A	3539	if (found) {
5ba3f43e A	3540	lck_mtx_unlock(bpf_mlock);
39236c6e	3541	printf("bpfattach - %s with dlt %d is already attached\n",
0a7de745	3542	if_name(ifp), dlt);
2d21ac55	3543	FREE(bp_new, M_DEVBUF);
0a7de745	3544	return EEXIST;
2d21ac55	3545	}
d9a64523	3546
2d21ac55 A	3547	bp_new->bif_ifp = ifp;
	3548	bp_new->bif_dlt = dlt;
	3549	bp_new->bif_send = send;
	3550	bp_new->bif_tap = tap;
d9a64523	3551
2d21ac55 A	3552	if (bp_first == NULL) {
	3553	/* No other entries for this ifp */
	3554	bp_new->bif_next = bpf_iflist;
	3555	bpf_iflist = bp_new;
d9a64523	3556	} else {
5ba3f43e A	3557	if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
	3558	/* Make this the first entry for this interface */
	3559	if (bp_before_first != NULL) {
	3560	/* point the previous to us */
	3561	bp_before_first->bif_next = bp_new;
	3562	} else {
	3563	/* we're the new head */
	3564	bpf_iflist = bp_new;
	3565	}
	3566	bp_new->bif_next = bp_first;
	3567	} else {
	3568	/* Add this after the last entry for this interface */
	3569	bp_new->bif_next = bp_last->bif_next;
	3570	bp_last->bif_next = bp_new;
	3571	}
2d21ac55	3572	}
d9a64523	3573
1c79356b A	3574	/*
	3575	* Compute the length of the bpf header. This is not necessarily
	3576	* equal to SIZEOF_BPF_HDR because we want to insert spacing such
	3577	* that the network layer header begins on a longword boundary (for
	3578	* performance reasons and to alleviate alignment restrictions).
	3579	*/
2d21ac55	3580	bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
316670eb A	3581	bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
316670eb A	3582	sizeof(struct bpf_hdr_ext)) - hdrlen;
d9a64523	3583
91447636	3584	/* Take a reference on the interface */
2d21ac55	3585	ifnet_reference(ifp);
91447636 A	3586
91447636 A	3587	lck_mtx_unlock(bpf_mlock);
1c79356b	3588
55e303ae	3589	#ifndef __APPLE__
0a7de745	3590	if (bootverbose) {
39236c6e	3591	printf("bpf: %s attached\n", if_name(ifp));
0a7de745	3592	}
1c79356b	3593	#endif
2d21ac55	3594
0a7de745	3595	return 0;
1c79356b A	3596	}
1c79356b A	3597
9bccf70c A	3598	/*
	3599	* Detach bpf from an interface. This involves detaching each descriptor
	3600	* associated with the interface, and leaving bd_bif NULL. Notify each
	3601	* descriptor as it's detached so that any sleepers wake up and get
	3602	* ENXIO.
	3603	*/
	3604	void
91447636	3605	bpfdetach(struct ifnet *ifp)
9bccf70c	3606	{
0a7de745 A	3607	struct bpf_if bp, bp_prev, *bp_next;
0a7de745 A	3608	struct bpf_d *d;
9bccf70c	3609
0a7de745	3610	if (bpf_debug != 0) {
5ba3f43e	3611	printf("%s: %s\n", __func__, if_name(ifp));
0a7de745	3612	}
3e170ce0	3613
91447636	3614	lck_mtx_lock(bpf_mlock);
9bccf70c	3615
fe8ab488 A	3616	/*
	3617	* Build the list of devices attached to that interface
	3618	* that we need to free while keeping the lock to maintain
	3619	* the integrity of the interface list
	3620	*/
9bccf70c	3621	bp_prev = NULL;
2d21ac55 A	3622	for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
2d21ac55 A	3623	bp_next = bp->bif_next;
fe8ab488	3624
2d21ac55 A	3625	if (ifp != bp->bif_ifp) {
	3626	bp_prev = bp;
	3627	continue;
	3628	}
fe8ab488	3629	/* Unlink from the interface list */
0a7de745	3630	if (bp_prev) {
fe8ab488	3631	bp_prev->bif_next = bp->bif_next;
0a7de745	3632	} else {
fe8ab488	3633	bpf_iflist = bp->bif_next;
0a7de745	3634	}
fe8ab488	3635
3e170ce0	3636	/* Detach the devices attached to the interface */
2d21ac55	3637	while ((d = bp->bif_dlist) != NULL) {
3e170ce0 A	3638	/*
	3639	* Take an extra reference to prevent the device
	3640	* from being freed when bpf_detachd() releases
	3641	* the reference for the interface list
	3642	*/
	3643	bpf_acquire_d(d);
	3644	bpf_detachd(d, 0);
2d21ac55	3645	bpf_wakeup(d);
3e170ce0	3646	bpf_release_d(d);
2d21ac55	3647	}
2d21ac55	3648	ifnet_release(ifp);
9bccf70c A	3649	}
9bccf70c A	3650
91447636	3651	lck_mtx_unlock(bpf_mlock);
9bccf70c A	3652	}
9bccf70c A	3653
1c79356b	3654	void
91447636	3655	bpf_init(__unused void *unused)
1c79356b	3656	{
9bccf70c	3657	#ifdef __APPLE__
0a7de745 A	3658	int i;
0a7de745 A	3659	int maj;
1c79356b	3660
91447636	3661	if (bpf_devsw_installed == 0) {
9bccf70c	3662	bpf_devsw_installed = 1;
39236c6e A	3663	bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
	3664	bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
	3665	bpf_mlock_attr = lck_attr_alloc_init();
	3666	lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
9bccf70c A	3667	maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
9bccf70c A	3668	if (maj == -1) {
0a7de745	3669	if (bpf_mlock_attr) {
91447636	3670	lck_attr_free(bpf_mlock_attr);
0a7de745 A	3671	}
0a7de745 A	3672	if (bpf_mlock_grp) {
91447636	3673	lck_grp_free(bpf_mlock_grp);
0a7de745 A	3674	}
0a7de745 A	3675	if (bpf_mlock_grp_attr) {
91447636	3676	lck_grp_attr_free(bpf_mlock_grp_attr);
0a7de745	3677	}
d9a64523	3678
2d21ac55 A	3679	bpf_mlock = NULL;
	3680	bpf_mlock_attr = NULL;
	3681	bpf_mlock_grp = NULL;
	3682	bpf_mlock_grp_attr = NULL;
91447636	3683	bpf_devsw_installed = 0;
d9a64523	3684	printf("bpf_init: failed to allocate a major number\n");
55e303ae	3685	return;
9bccf70c	3686	}
91447636	3687
0a7de745	3688	for (i = 0; i < NBPFILTER; i++) {
55e303ae	3689	bpf_make_dev_t(maj);
0a7de745	3690	}
9bccf70c A	3691	}
	3692	#else
	3693	cdevsw_add(&bpf_cdevsw);
	3694	#endif
1c79356b A	3695	}
1c79356b A	3696
9bccf70c	3697	#ifndef __APPLE__
cb323159	3698	SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE + CDEV_MAJOR, bpf_drvinit, NULL);
1c79356b	3699	#endif
9bccf70c	3700
cb323159 A	3701	static int
	3702	sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
	3703	{
	3704	#pragma unused(arg1, arg2)
	3705	int i, err;
	3706
	3707	i = bpf_maxbufsize;
	3708
	3709	err = sysctl_handle_int(oidp, &i, 0, req);
	3710	if (err != 0 \|\| req->newptr == USER_ADDR_NULL) {
	3711	return err;
	3712	}
	3713
	3714	if (i < 0 \|\| i > BPF_MAXSIZE_CAP) {
	3715	i = BPF_MAXSIZE_CAP;
	3716	}
	3717
	3718	bpf_maxbufsize = i;
	3719	return err;
	3720	}