]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/dlil.c
xnu-4570.20.62.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
CommitLineData
1c79356b 1/*
5ba3f43e 2 * Copyright (c) 1999-2017 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
2d21ac55
A
28/*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
39236c6e 34#include <stddef.h>
1c79356b 35
1c79356b
A
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/socket.h>
91447636
A
42#include <sys/domain.h>
43#include <sys/user.h>
2d21ac55 44#include <sys/random.h>
316670eb 45#include <sys/socketvar.h>
1c79356b
A
46#include <net/if_dl.h>
47#include <net/if.h>
91447636 48#include <net/route.h>
1c79356b
A
49#include <net/if_var.h>
50#include <net/dlil.h>
91447636 51#include <net/if_arp.h>
316670eb 52#include <net/iptap.h>
39236c6e 53#include <net/pktap.h>
1c79356b
A
54#include <sys/kern_event.h>
55#include <sys/kdebug.h>
6d2010ae 56#include <sys/mcache.h>
39236c6e
A
57#include <sys/syslog.h>
58#include <sys/protosw.h>
59#include <sys/priv.h>
1c79356b 60
91447636 61#include <kern/assert.h>
1c79356b 62#include <kern/task.h>
9bccf70c
A
63#include <kern/thread.h>
64#include <kern/sched_prim.h>
91447636 65#include <kern/locks.h>
6d2010ae 66#include <kern/zalloc.h>
9bccf70c 67
39236c6e 68#include <net/kpi_protocol.h>
1c79356b 69#include <net/if_types.h>
6d2010ae 70#include <net/if_llreach.h>
91447636 71#include <net/kpi_interfacefilter.h>
316670eb
A
72#include <net/classq/classq.h>
73#include <net/classq/classq_sfb.h>
39236c6e
A
74#include <net/flowhash.h>
75#include <net/ntstat.h>
5ba3f43e
A
76#include <net/if_llatbl.h>
77#include <net/net_api_stats.h>
91447636 78
6d2010ae
A
79#if INET
80#include <netinet/in_var.h>
81#include <netinet/igmp_var.h>
316670eb
A
82#include <netinet/ip_var.h>
83#include <netinet/tcp.h>
84#include <netinet/tcp_var.h>
85#include <netinet/udp.h>
86#include <netinet/udp_var.h>
87#include <netinet/if_ether.h>
88#include <netinet/in_pcb.h>
39037602 89#include <netinet/in_tclass.h>
6d2010ae
A
90#endif /* INET */
91
92#if INET6
93#include <netinet6/in6_var.h>
94#include <netinet6/nd6.h>
95#include <netinet6/mld6_var.h>
39236c6e 96#include <netinet6/scope6_var.h>
6d2010ae
A
97#endif /* INET6 */
98
91447636 99#include <libkern/OSAtomic.h>
39236c6e 100#include <libkern/tree.h>
1c79356b 101
39236c6e 102#include <dev/random/randomdev.h>
d52fe63f 103#include <machine/machine_routines.h>
1c79356b 104
2d21ac55 105#include <mach/thread_act.h>
6d2010ae 106#include <mach/sdt.h>
2d21ac55 107
39236c6e
A
108#if CONFIG_MACF
109#include <sys/kauth.h>
2d21ac55 110#include <security/mac_framework.h>
39236c6e
A
111#include <net/ethernet.h>
112#include <net/firewire.h>
113#endif
2d21ac55 114
b0d623f7
A
115#if PF
116#include <net/pfvar.h>
117#endif /* PF */
316670eb 118#include <net/pktsched/pktsched.h>
b0d623f7 119
39037602
A
120#if NECP
121#include <net/necp.h>
122#endif /* NECP */
1c79356b 123
5ba3f43e 124
39037602
A
125#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
126#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
127#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
128#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
129#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
130
131#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
132#define MAX_LINKADDR 4 /* LONGWORDS */
133#define M_NKE M_IFADDR
1c79356b 134
2d21ac55 135#if 1
39037602 136#define DLIL_PRINTF printf
91447636 137#else
39037602 138#define DLIL_PRINTF kprintf
91447636
A
139#endif
140
6d2010ae
A
141#define IF_DATA_REQUIRE_ALIGNED_64(f) \
142 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
d1ecb069 143
6d2010ae
A
144#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
145 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
146
91447636 147enum {
2d21ac55
A
148 kProtoKPI_v1 = 1,
149 kProtoKPI_v2 = 2
91447636
A
150};
151
6d2010ae
A
152/*
153 * List of if_proto structures in if_proto_hash[] is protected by
154 * the ifnet lock. The rest of the fields are initialized at protocol
155 * attach time and never change, thus no lock required as long as
156 * a reference to it is valid, via if_proto_ref().
157 */
91447636 158struct if_proto {
6d2010ae
A
159 SLIST_ENTRY(if_proto) next_hash;
160 u_int32_t refcount;
161 u_int32_t detached;
162 struct ifnet *ifp;
91447636 163 protocol_family_t protocol_family;
6d2010ae 164 int proto_kpi;
91447636 165 union {
91447636 166 struct {
6d2010ae
A
167 proto_media_input input;
168 proto_media_preout pre_output;
169 proto_media_event event;
170 proto_media_ioctl ioctl;
91447636
A
171 proto_media_detached detached;
172 proto_media_resolve_multi resolve_multi;
173 proto_media_send_arp send_arp;
174 } v1;
2d21ac55
A
175 struct {
176 proto_media_input_v2 input;
6d2010ae
A
177 proto_media_preout pre_output;
178 proto_media_event event;
179 proto_media_ioctl ioctl;
2d21ac55
A
180 proto_media_detached detached;
181 proto_media_resolve_multi resolve_multi;
182 proto_media_send_arp send_arp;
183 } v2;
91447636 184 } kpi;
1c79356b
A
185};
186
91447636
A
187SLIST_HEAD(proto_hash_entry, if_proto);
188
6d2010ae
A
189#define DLIL_SDLDATALEN \
190 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
1c79356b 191
9bccf70c 192struct dlil_ifnet {
6d2010ae
A
193 struct ifnet dl_if; /* public ifnet */
194 /*
316670eb 195 * DLIL private fields, protected by dl_if_lock
6d2010ae
A
196 */
197 decl_lck_mtx_data(, dl_if_lock);
198 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
199 u_int32_t dl_if_flags; /* flags (below) */
200 u_int32_t dl_if_refcnt; /* refcnt */
201 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
202 void *dl_if_uniqueid; /* unique interface id */
203 size_t dl_if_uniqueid_len; /* length of the unique id */
204 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
39236c6e 205 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
6d2010ae
A
206 struct {
207 struct ifaddr ifa; /* lladdr ifa */
208 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
209 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
210 } dl_if_lladdr;
316670eb
A
211 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
212 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
6d2010ae
A
213 ctrace_t dl_if_attach; /* attach PC stacktrace */
214 ctrace_t dl_if_detach; /* detach PC stacktrace */
215};
216
217/* Values for dl_if_flags (private to DLIL) */
218#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
219#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
220#define DLIF_DEBUG 0x4 /* has debugging info */
221
222#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
223
224/* For gdb */
225__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
226
227struct dlil_ifnet_dbg {
228 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
229 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
230 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
231 /*
232 * Circular lists of ifnet_{reference,release} callers.
233 */
234 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
235 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
1c79356b
A
236};
237
6d2010ae
A
238#define DLIL_TO_IFP(s) (&s->dl_if)
239#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
240
91447636
A
241struct ifnet_filter {
242 TAILQ_ENTRY(ifnet_filter) filt_next;
6d2010ae 243 u_int32_t filt_skip;
39236c6e 244 u_int32_t filt_flags;
6d2010ae
A
245 ifnet_t filt_ifp;
246 const char *filt_name;
247 void *filt_cookie;
248 protocol_family_t filt_protocol;
249 iff_input_func filt_input;
250 iff_output_func filt_output;
251 iff_event_func filt_event;
252 iff_ioctl_func filt_ioctl;
253 iff_detached_func filt_detached;
1c79356b
A
254};
255
2d21ac55 256struct proto_input_entry;
55e303ae 257
91447636 258static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
91447636 259static lck_grp_t *dlil_lock_group;
6d2010ae 260lck_grp_t *ifnet_lock_group;
91447636 261static lck_grp_t *ifnet_head_lock_group;
316670eb
A
262static lck_grp_t *ifnet_snd_lock_group;
263static lck_grp_t *ifnet_rcv_lock_group;
6d2010ae 264lck_attr_t *ifnet_lock_attr;
7ddcb079
A
265decl_lck_rw_data(static, ifnet_head_lock);
266decl_lck_mtx_data(static, dlil_ifnet_lock);
39236c6e 267u_int32_t dlil_filter_disable_tso_count = 0;
316670eb 268
6d2010ae
A
269#if DEBUG
270static unsigned int ifnet_debug = 1; /* debugging (enabled) */
271#else
272static unsigned int ifnet_debug; /* debugging (disabled) */
273#endif /* !DEBUG */
274static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
275static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
276static struct zone *dlif_zone; /* zone for dlil_ifnet */
277
278#define DLIF_ZONE_MAX 64 /* maximum elements in zone */
279#define DLIF_ZONE_NAME "ifnet" /* zone name */
280
281static unsigned int dlif_filt_size; /* size of ifnet_filter */
282static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
283
284#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
285#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
286
6d2010ae
A
287static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
288static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
289
290#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
291#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
292
293static unsigned int dlif_proto_size; /* size of if_proto */
294static struct zone *dlif_proto_zone; /* zone for if_proto */
295
296#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
297#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
298
39037602
A
299static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
300static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
316670eb
A
301static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
302
303#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
304#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
305
39037602 306static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
316670eb
A
307static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
308static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
309
310#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
311#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
312
d1ecb069 313static u_int32_t net_rtref;
d1ecb069 314
316670eb
A
315static struct dlil_main_threading_info dlil_main_input_thread_info;
316__private_extern__ struct dlil_threading_info *dlil_main_input_thread =
317 (struct dlil_threading_info *)&dlil_main_input_thread_info;
2d21ac55 318
39037602 319static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
91447636 320static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
6d2010ae
A
321static void dlil_if_trace(struct dlil_ifnet *, int);
322static void if_proto_ref(struct if_proto *);
323static void if_proto_free(struct if_proto *);
324static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
325static int dlil_ifp_proto_count(struct ifnet *);
326static void if_flt_monitor_busy(struct ifnet *);
327static void if_flt_monitor_unbusy(struct ifnet *);
328static void if_flt_monitor_enter(struct ifnet *);
329static void if_flt_monitor_leave(struct ifnet *);
330static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
331 char **, protocol_family_t);
332static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
333 protocol_family_t);
334static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
335 const struct sockaddr_dl *);
336static int ifnet_lookup(struct ifnet *);
337static void if_purgeaddrs(struct ifnet *);
338
339static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
340 struct mbuf *, char *);
341static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
342 struct mbuf *);
343static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
344 mbuf_t *, const struct sockaddr *, void *, char *, char *);
345static void ifproto_media_event(struct ifnet *, protocol_family_t,
346 const struct kev_msg *);
347static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
348 unsigned long, void *);
349static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
350 struct sockaddr_dl *, size_t);
351static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
352 const struct sockaddr_dl *, const struct sockaddr *,
353 const struct sockaddr_dl *, const struct sockaddr *);
354
39037602
A
355static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
356 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
357 boolean_t poll, struct thread *tp);
316670eb
A
358static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
359 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
360static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
6d2010ae
A
361static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
362 protocol_family_t *);
363static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
364 const struct ifnet_demux_desc *, u_int32_t);
365static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
366static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
5ba3f43e
A
367#if CONFIG_EMBEDDED
368static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
369 const struct sockaddr *, const char *, const char *,
370 u_int32_t *, u_int32_t *);
371#else
6d2010ae 372static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
39236c6e 373 const struct sockaddr *, const char *, const char *);
5ba3f43e 374#endif /* CONFIG_EMBEDDED */
39236c6e
A
375static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
376 const struct sockaddr *, const char *, const char *,
377 u_int32_t *, u_int32_t *);
6d2010ae
A
378static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
379static void ifp_if_free(struct ifnet *);
380static void ifp_if_event(struct ifnet *, const struct kev_msg *);
316670eb
A
381static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
382static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
6d2010ae 383
316670eb
A
384static void dlil_main_input_thread_func(void *, wait_result_t);
385static void dlil_input_thread_func(void *, wait_result_t);
386static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
6d2010ae 387static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
316670eb
A
388static void dlil_terminate_input_thread(struct dlil_threading_info *);
389static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
390 struct dlil_threading_info *, boolean_t);
391static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
392static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
393 u_int32_t, ifnet_model_t, boolean_t);
394static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
395 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
396
5ba3f43e 397#if DEBUG || DEVELOPMENT
39236c6e 398static void dlil_verify_sum16(void);
5ba3f43e 399#endif /* DEBUG || DEVELOPMENT */
39236c6e
A
400static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
401 protocol_family_t);
402static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
403 protocol_family_t);
404
316670eb
A
405static void ifnet_detacher_thread_func(void *, wait_result_t);
406static int ifnet_detacher_thread_cont(int);
6d2010ae
A
407static void ifnet_detach_final(struct ifnet *);
408static void ifnet_detaching_enqueue(struct ifnet *);
409static struct ifnet *ifnet_detaching_dequeue(void);
410
316670eb
A
411static void ifnet_start_thread_fn(void *, wait_result_t);
412static void ifnet_poll_thread_fn(void *, wait_result_t);
413static void ifnet_poll(struct ifnet *);
5ba3f43e
A
414static errno_t ifnet_enqueue_common(struct ifnet *, void *,
415 classq_pkt_type_t, boolean_t, boolean_t *);
316670eb 416
6d2010ae
A
417static void ifp_src_route_copyout(struct ifnet *, struct route *);
418static void ifp_src_route_copyin(struct ifnet *, struct route *);
419#if INET6
420static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
421static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
422#endif /* INET6 */
423
316670eb 424static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
39236c6e
A
425static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
426static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
427static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
428static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
429static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
316670eb
A
430static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
431static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
39236c6e
A
432static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
433static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
434static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
3e170ce0
A
435static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS;
436
437struct chain_len_stats tx_chain_len_stats;
438static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
316670eb 439
5ba3f43e
A
440#if TEST_INPUT_THREAD_TERMINATION
441static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
442#endif /* TEST_INPUT_THREAD_TERMINATION */
443
6d2010ae
A
444/* The following are protected by dlil_ifnet_lock */
445static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
446static u_int32_t ifnet_detaching_cnt;
447static void *ifnet_delayed_run; /* wait channel for detaching thread */
448
39236c6e
A
449decl_lck_mtx_data(static, ifnet_fc_lock);
450
451static uint32_t ifnet_flowhash_seed;
452
453struct ifnet_flowhash_key {
454 char ifk_name[IFNAMSIZ];
455 uint32_t ifk_unit;
456 uint32_t ifk_flags;
457 uint32_t ifk_eflags;
458 uint32_t ifk_capabilities;
459 uint32_t ifk_capenable;
460 uint32_t ifk_output_sched_model;
461 uint32_t ifk_rand1;
462 uint32_t ifk_rand2;
463};
464
465/* Flow control entry per interface */
466struct ifnet_fc_entry {
467 RB_ENTRY(ifnet_fc_entry) ifce_entry;
468 u_int32_t ifce_flowhash;
469 struct ifnet *ifce_ifp;
470};
471
472static uint32_t ifnet_calc_flowhash(struct ifnet *);
473static int ifce_cmp(const struct ifnet_fc_entry *,
474 const struct ifnet_fc_entry *);
475static int ifnet_fc_add(struct ifnet *);
476static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
477static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
478
479/* protected by ifnet_fc_lock */
480RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
481RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
482RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
483
484static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
485static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
486
487#define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
488#define IFNET_FC_ZONE_MAX 32
489
39037602 490extern void bpfdetach(struct ifnet *);
6d2010ae 491extern void proto_input_run(void);
91447636 492
39037602 493extern uint32_t udp_count_opportunistic(unsigned int ifindex,
316670eb 494 u_int32_t flags);
39037602 495extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
316670eb
A
496 u_int32_t flags);
497
6d2010ae 498__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
91447636 499
39236c6e 500#if CONFIG_MACF
5ba3f43e
A
501#ifdef CONFIG_EMBEDDED
502int dlil_lladdr_ckreq = 1;
503#else
39236c6e
A
504int dlil_lladdr_ckreq = 0;
505#endif
5ba3f43e 506#endif
39236c6e 507
b0d623f7 508#if DEBUG
39236c6e 509int dlil_verbose = 1;
b0d623f7 510#else
39236c6e 511int dlil_verbose = 0;
b0d623f7 512#endif /* DEBUG */
6d2010ae 513#if IFNET_INPUT_SANITY_CHK
6d2010ae 514/* sanity checking of input packet lists received */
316670eb
A
515static u_int32_t dlil_input_sanity_check = 0;
516#endif /* IFNET_INPUT_SANITY_CHK */
517/* rate limit debug messages */
518struct timespec dlil_dbgrate = { 1, 0 };
b0d623f7 519
6d2010ae 520SYSCTL_DECL(_net_link_generic_system);
91447636 521
39236c6e
A
522#if CONFIG_MACF
523SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_lladdr_ckreq,
524 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_lladdr_ckreq, 0,
525 "Require MACF system info check to expose link-layer address");
526#endif
527
316670eb
A
528SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
529 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
530
531#define IF_SNDQ_MINLEN 32
532u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
533SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
534 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
535 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
536
537#define IF_RCVQ_MINLEN 32
39037602 538#define IF_RCVQ_MAXLEN 256
316670eb
A
539u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
540SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
541 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
542 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
543
39236c6e 544#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
316670eb
A
545static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
546SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
547 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
548 "ilog2 of EWMA decay rate of avg inbound packets");
549
39236c6e
A
550#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
551#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
316670eb 552static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
39236c6e
A
553SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
554 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
555 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
556 "Q", "input poll mode freeze time");
316670eb 557
39236c6e
A
558#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
559#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
316670eb 560static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
39236c6e
A
561SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
562 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
563 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
564 "Q", "input poll sampling time");
565
566#define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
567#define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
568static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
569SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
570 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
571 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
572 "Q", "input poll interval (time)");
573
574#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
316670eb
A
575static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
576SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
577 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
578 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
579
39236c6e 580#define IF_RXPOLL_WLOWAT 10
316670eb 581static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
39236c6e
A
582SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
583 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
584 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
585 "I", "input poll wakeup low watermark");
316670eb 586
39236c6e 587#define IF_RXPOLL_WHIWAT 100
316670eb 588static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
39236c6e
A
589SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
590 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
591 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
592 "I", "input poll wakeup high watermark");
316670eb
A
593
594static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
595SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
596 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
597 "max packets per poll call");
598
599static u_int32_t if_rxpoll = 1;
600SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
601 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
602 sysctl_rxpoll, "I", "enable opportunistic input polling");
603
5ba3f43e
A
604#if TEST_INPUT_THREAD_TERMINATION
605static u_int32_t if_input_thread_termination_spin = 0;
606SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
607 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
608 &if_input_thread_termination_spin, 0,
609 sysctl_input_thread_termination_spin,
610 "I", "input thread termination spin limit");
611#endif /* TEST_INPUT_THREAD_TERMINATION */
316670eb
A
612
613static u_int32_t cur_dlil_input_threads = 0;
614SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
39037602 615 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
316670eb 616 "Current number of DLIL input threads");
91447636 617
6d2010ae 618#if IFNET_INPUT_SANITY_CHK
316670eb 619SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
39037602 620 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
6d2010ae 621 "Turn on sanity checking in DLIL input");
316670eb 622#endif /* IFNET_INPUT_SANITY_CHK */
1c79356b 623
316670eb
A
624static u_int32_t if_flowadv = 1;
625SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
626 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
627 "enable flow-advisory mechanism");
628
fe8ab488
A
629static u_int32_t if_delaybased_queue = 1;
630SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
631 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
632 "enable delay based dynamic queue sizing");
633
39236c6e
A
634static uint64_t hwcksum_in_invalidated = 0;
635SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
636 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
637 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
638
639uint32_t hwcksum_dbg = 0;
640SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
641 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
642 "enable hardware cksum debugging");
643
3e170ce0
A
644u_int32_t ifnet_start_delayed = 0;
645SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
646 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
647 "number of times start was delayed");
648
649u_int32_t ifnet_delay_start_disabled = 0;
650SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
651 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
652 "number of times start was delayed");
653
39236c6e
A
654#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
655#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
656#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
657#define HWCKSUM_DBG_MASK \
658 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
659 HWCKSUM_DBG_FINALIZE_FORCED)
660
661static uint32_t hwcksum_dbg_mode = 0;
662SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
663 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
664 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
665
666static uint64_t hwcksum_dbg_partial_forced = 0;
667SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
668 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
669 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
670
671static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
672SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
673 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
674 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
675
676static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
677SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
678 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
679 &hwcksum_dbg_partial_rxoff_forced, 0,
680 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
681 "forced partial cksum rx offset");
682
683static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
684SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
685 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
686 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
687 "adjusted partial cksum rx offset");
688
689static uint64_t hwcksum_dbg_verified = 0;
690SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
691 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
692 &hwcksum_dbg_verified, "packets verified for having good checksum");
693
694static uint64_t hwcksum_dbg_bad_cksum = 0;
695SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
696 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
697 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
698
699static uint64_t hwcksum_dbg_bad_rxoff = 0;
700SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
701 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
702 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
703
704static uint64_t hwcksum_dbg_adjusted = 0;
705SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
706 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
707 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
708
709static uint64_t hwcksum_dbg_finalized_hdr = 0;
710SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
711 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
712 &hwcksum_dbg_finalized_hdr, "finalized headers");
713
714static uint64_t hwcksum_dbg_finalized_data = 0;
715SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
716 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
717 &hwcksum_dbg_finalized_data, "finalized payloads");
718
719uint32_t hwcksum_tx = 1;
720SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
721 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
722 "enable transmit hardware checksum offload");
723
724uint32_t hwcksum_rx = 1;
725SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
726 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
727 "enable receive hardware checksum offload");
728
3e170ce0
A
729SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
730 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
731 sysctl_tx_chain_len_stats, "S", "");
732
733uint32_t tx_chain_len_count = 0;
734SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
39037602 735 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
3e170ce0
A
736
737SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_ports_used,
738 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_ports_used, "");
739
5ba3f43e
A
740static uint32_t threshold_notify = 1; /* enable/disable */
741SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
742 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
743
744static uint32_t threshold_interval = 2; /* in seconds */
745SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
746 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
747
39037602
A
748#if (DEVELOPMENT || DEBUG)
749static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
750SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
751 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
752#endif /* DEVELOPMENT || DEBUG */
753
5ba3f43e
A
754struct net_api_stats net_api_stats;
755SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD|CTLFLAG_LOCKED,
756 &net_api_stats, net_api_stats, "");
757
758
316670eb 759unsigned int net_rxpoll = 1;
6d2010ae
A
760unsigned int net_affinity = 1;
761static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
1c79356b 762
b36670ce
A
763extern u_int32_t inject_buckets;
764
2d21ac55
A
765static lck_grp_attr_t *dlil_grp_attributes = NULL;
766static lck_attr_t *dlil_lck_attributes = NULL;
91447636 767
5ba3f43e
A
768/* DLIL data threshold thread call */
769static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
770
771static void dlil_mit_tcall_fn(thread_call_param_t, thread_call_param_t);
772
773uint32_t dlil_rcv_mit_pkts_min = 5;
774uint32_t dlil_rcv_mit_pkts_max = 64;
775uint32_t dlil_rcv_mit_interval = (500 * 1000);
776
777#if (DEVELOPMENT || DEBUG)
778SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_min,
779 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_min, 0, "");
780SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_max,
781 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_max, 0, "");
782SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_interval,
783 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_interval, 0, "");
784#endif /* DEVELOPMENT || DEBUG */
785
91447636 786
316670eb
A
787#define DLIL_INPUT_CHECK(m, ifp) { \
788 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
789 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
790 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
791 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
792 /* NOTREACHED */ \
793 } \
794}
795
796#define DLIL_EWMA(old, new, decay) do { \
797 u_int32_t _avg; \
798 if ((_avg = (old)) > 0) \
799 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
800 else \
801 _avg = (new); \
802 (old) = _avg; \
803} while (0)
804
805#define MBPS (1ULL * 1000 * 1000)
806#define GBPS (MBPS * 1000)
807
808struct rxpoll_time_tbl {
809 u_int64_t speed; /* downlink speed */
810 u_int32_t plowat; /* packets low watermark */
811 u_int32_t phiwat; /* packets high watermark */
812 u_int32_t blowat; /* bytes low watermark */
813 u_int32_t bhiwat; /* bytes high watermark */
814};
815
816static struct rxpoll_time_tbl rxpoll_tbl[] = {
817 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
818 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
819 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
820 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
821 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
822 { 0, 0, 0, 0, 0 }
823};
824
39236c6e 825int
b0d623f7 826proto_hash_value(u_int32_t protocol_family)
91447636 827{
4a3eedf9
A
828 /*
829 * dlil_proto_unplumb_all() depends on the mapping between
830 * the hash bucket index and the protocol family defined
831 * here; future changes must be applied there as well.
832 */
39037602 833 switch (protocol_family) {
91447636 834 case PF_INET:
6d2010ae 835 return (0);
91447636 836 case PF_INET6:
6d2010ae 837 return (1);
91447636 838 case PF_VLAN:
39236c6e 839 return (2);
6d2010ae 840 case PF_UNSPEC:
91447636 841 default:
39236c6e 842 return (3);
91447636
A
843 }
844}
845
6d2010ae
A
846/*
847 * Caller must already be holding ifnet lock.
848 */
849static struct if_proto *
b0d623f7 850find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
1c79356b 851{
91447636 852 struct if_proto *proto = NULL;
b0d623f7 853 u_int32_t i = proto_hash_value(protocol_family);
6d2010ae
A
854
855 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
856
857 if (ifp->if_proto_hash != NULL)
91447636 858 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6d2010ae
A
859
860 while (proto != NULL && proto->protocol_family != protocol_family)
91447636 861 proto = SLIST_NEXT(proto, next_hash);
6d2010ae
A
862
863 if (proto != NULL)
864 if_proto_ref(proto);
865
866 return (proto);
1c79356b
A
867}
868
91447636
A
869static void
870if_proto_ref(struct if_proto *proto)
1c79356b 871{
6d2010ae 872 atomic_add_32(&proto->refcount, 1);
1c79356b
A
873}
874
6d2010ae
A
875extern void if_rtproto_del(struct ifnet *ifp, int protocol);
876
91447636
A
877static void
878if_proto_free(struct if_proto *proto)
0b4e3aa0 879{
6d2010ae
A
880 u_int32_t oldval;
881 struct ifnet *ifp = proto->ifp;
882 u_int32_t proto_family = proto->protocol_family;
883 struct kev_dl_proto_data ev_pr_data;
884
885 oldval = atomic_add_32_ov(&proto->refcount, -1);
886 if (oldval > 1)
887 return;
888
889 /* No more reference on this, protocol must have been detached */
890 VERIFY(proto->detached);
891
892 if (proto->proto_kpi == kProtoKPI_v1) {
893 if (proto->kpi.v1.detached)
894 proto->kpi.v1.detached(ifp, proto->protocol_family);
895 }
896 if (proto->proto_kpi == kProtoKPI_v2) {
897 if (proto->kpi.v2.detached)
898 proto->kpi.v2.detached(ifp, proto->protocol_family);
91447636 899 }
6d2010ae
A
900
901 /*
902 * Cleanup routes that may still be in the routing table for that
903 * interface/protocol pair.
904 */
905 if_rtproto_del(ifp, proto_family);
906
907 /*
908 * The reserved field carries the number of protocol still attached
909 * (subject to change)
910 */
911 ifnet_lock_shared(ifp);
912 ev_pr_data.proto_family = proto_family;
913 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
914 ifnet_lock_done(ifp);
915
916 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
917 (struct net_event_data *)&ev_pr_data,
39037602 918 sizeof (struct kev_dl_proto_data));
6d2010ae
A
919
920 zfree(dlif_proto_zone, proto);
0b4e3aa0
A
921}
922
91447636 923__private_extern__ void
6d2010ae 924ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
1c79356b 925{
5ba3f43e
A
926#if !MACH_ASSERT
927#pragma unused(ifp)
928#endif
6d2010ae
A
929 unsigned int type = 0;
930 int ass = 1;
931
932 switch (what) {
933 case IFNET_LCK_ASSERT_EXCLUSIVE:
934 type = LCK_RW_ASSERT_EXCLUSIVE;
935 break;
936
937 case IFNET_LCK_ASSERT_SHARED:
938 type = LCK_RW_ASSERT_SHARED;
939 break;
940
941 case IFNET_LCK_ASSERT_OWNED:
942 type = LCK_RW_ASSERT_HELD;
943 break;
944
945 case IFNET_LCK_ASSERT_NOTOWNED:
946 /* nothing to do here for RW lock; bypass assert */
947 ass = 0;
948 break;
949
950 default:
951 panic("bad ifnet assert type: %d", what);
952 /* NOTREACHED */
953 }
954 if (ass)
5ba3f43e 955 LCK_RW_ASSERT(&ifp->if_lock, type);
1c79356b
A
956}
957
91447636 958__private_extern__ void
6d2010ae 959ifnet_lock_shared(struct ifnet *ifp)
1c79356b 960{
6d2010ae 961 lck_rw_lock_shared(&ifp->if_lock);
1c79356b
A
962}
963
91447636 964__private_extern__ void
6d2010ae 965ifnet_lock_exclusive(struct ifnet *ifp)
0b4e3aa0 966{
6d2010ae 967 lck_rw_lock_exclusive(&ifp->if_lock);
0b4e3aa0
A
968}
969
91447636 970__private_extern__ void
6d2010ae 971ifnet_lock_done(struct ifnet *ifp)
1c79356b 972{
6d2010ae 973 lck_rw_done(&ifp->if_lock);
1c79356b
A
974}
975
3e170ce0
A
976#if INET
977__private_extern__ void
978if_inetdata_lock_shared(struct ifnet *ifp)
979{
980 lck_rw_lock_shared(&ifp->if_inetdata_lock);
981}
982
983__private_extern__ void
984if_inetdata_lock_exclusive(struct ifnet *ifp)
985{
986 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
987}
988
989__private_extern__ void
990if_inetdata_lock_done(struct ifnet *ifp)
991{
992 lck_rw_done(&ifp->if_inetdata_lock);
993}
994#endif
995
39236c6e
A
996#if INET6
997__private_extern__ void
998if_inet6data_lock_shared(struct ifnet *ifp)
999{
1000 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1001}
1002
1003__private_extern__ void
1004if_inet6data_lock_exclusive(struct ifnet *ifp)
1005{
1006 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1007}
1008
1009__private_extern__ void
1010if_inet6data_lock_done(struct ifnet *ifp)
1011{
1012 lck_rw_done(&ifp->if_inet6data_lock);
1013}
1014#endif
1015
91447636 1016__private_extern__ void
2d21ac55 1017ifnet_head_lock_shared(void)
1c79356b 1018{
6d2010ae 1019 lck_rw_lock_shared(&ifnet_head_lock);
1c79356b
A
1020}
1021
91447636 1022__private_extern__ void
2d21ac55 1023ifnet_head_lock_exclusive(void)
91447636 1024{
6d2010ae 1025 lck_rw_lock_exclusive(&ifnet_head_lock);
91447636 1026}
1c79356b 1027
91447636 1028__private_extern__ void
2d21ac55 1029ifnet_head_done(void)
1c79356b 1030{
6d2010ae 1031 lck_rw_done(&ifnet_head_lock);
91447636 1032}
1c79356b 1033
39037602
A
1034__private_extern__ void
1035ifnet_head_assert_exclusive(void)
1036{
5ba3f43e 1037 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
39037602
A
1038}
1039
6d2010ae
A
1040/*
1041 * Caller must already be holding ifnet lock.
1042 */
1043static int
39037602 1044dlil_ifp_proto_count(struct ifnet *ifp)
91447636 1045{
6d2010ae
A
1046 int i, count = 0;
1047
1048 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1049
1050 if (ifp->if_proto_hash == NULL)
1051 goto done;
1052
1053 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1054 struct if_proto *proto;
1055 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1056 count++;
91447636
A
1057 }
1058 }
6d2010ae
A
1059done:
1060 return (count);
91447636 1061}
1c79356b 1062
91447636 1063__private_extern__ void
6d2010ae
A
1064dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1065 u_int32_t event_code, struct net_event_data *event_data,
1066 u_int32_t event_data_len)
91447636 1067{
6d2010ae
A
1068 struct net_event_data ev_data;
1069 struct kev_msg ev_msg;
1070
1071 bzero(&ev_msg, sizeof (ev_msg));
1072 bzero(&ev_data, sizeof (ev_data));
1073 /*
2d21ac55 1074 * a net event always starts with a net_event_data structure
91447636
A
1075 * but the caller can generate a simple net event or
1076 * provide a longer event structure to post
1077 */
6d2010ae
A
1078 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1079 ev_msg.kev_class = KEV_NETWORK_CLASS;
1080 ev_msg.kev_subclass = event_subclass;
1081 ev_msg.event_code = event_code;
1082
1083 if (event_data == NULL) {
91447636 1084 event_data = &ev_data;
39037602 1085 event_data_len = sizeof (struct net_event_data);
91447636 1086 }
6d2010ae 1087
fe8ab488 1088 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
91447636 1089 event_data->if_family = ifp->if_family;
39037602 1090 event_data->if_unit = (u_int32_t)ifp->if_unit;
6d2010ae 1091
91447636 1092 ev_msg.dv[0].data_length = event_data_len;
6d2010ae 1093 ev_msg.dv[0].data_ptr = event_data;
91447636 1094 ev_msg.dv[1].data_length = 0;
6d2010ae 1095
39037602
A
1096 /* Don't update interface generation for quality and RRC state changess */
1097 bool update_generation = (event_subclass != KEV_DL_SUBCLASS ||
1098 (event_code != KEV_DL_LINK_QUALITY_METRIC_CHANGED &&
1099 event_code != KEV_DL_RRC_STATE_CHANGED));
1100
1101 dlil_event_internal(ifp, &ev_msg, update_generation);
1c79356b
A
1102}
1103
316670eb
A
1104__private_extern__ int
1105dlil_alloc_local_stats(struct ifnet *ifp)
1106{
1107 int ret = EINVAL;
1108 void *buf, *base, **pbuf;
1109
1110 if (ifp == NULL)
1111 goto end;
1112
1113 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1114 /* allocate tcpstat_local structure */
1115 buf = zalloc(dlif_tcpstat_zone);
1116 if (buf == NULL) {
1117 ret = ENOMEM;
1118 goto end;
1119 }
1120 bzero(buf, dlif_tcpstat_bufsize);
1121
1122 /* Get the 64-bit aligned base address for this object */
1123 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1124 sizeof (u_int64_t));
1125 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1126 ((intptr_t)buf + dlif_tcpstat_bufsize));
1127
1128 /*
1129 * Wind back a pointer size from the aligned base and
1130 * save the original address so we can free it later.
1131 */
1132 pbuf = (void **)((intptr_t)base - sizeof (void *));
1133 *pbuf = buf;
1134 ifp->if_tcp_stat = base;
1135
1136 /* allocate udpstat_local structure */
1137 buf = zalloc(dlif_udpstat_zone);
1138 if (buf == NULL) {
1139 ret = ENOMEM;
1140 goto end;
1141 }
1142 bzero(buf, dlif_udpstat_bufsize);
1143
1144 /* Get the 64-bit aligned base address for this object */
1145 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1146 sizeof (u_int64_t));
1147 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1148 ((intptr_t)buf + dlif_udpstat_bufsize));
1149
1150 /*
1151 * Wind back a pointer size from the aligned base and
1152 * save the original address so we can free it later.
1153 */
1154 pbuf = (void **)((intptr_t)base - sizeof (void *));
1155 *pbuf = buf;
1156 ifp->if_udp_stat = base;
1157
1158 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
1159 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
1160
1161 ret = 0;
1162 }
1163
4bd07ac2
A
1164 if (ifp->if_ipv4_stat == NULL) {
1165 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
39037602 1166 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
4bd07ac2
A
1167 if (ifp->if_ipv4_stat == NULL) {
1168 ret = ENOMEM;
1169 goto end;
1170 }
1171 }
1172
1173 if (ifp->if_ipv6_stat == NULL) {
1174 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
39037602 1175 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
4bd07ac2
A
1176 if (ifp->if_ipv6_stat == NULL) {
1177 ret = ENOMEM;
1178 goto end;
1179 }
1180 }
316670eb
A
1181end:
1182 if (ret != 0) {
1183 if (ifp->if_tcp_stat != NULL) {
1184 pbuf = (void **)
1185 ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
1186 zfree(dlif_tcpstat_zone, *pbuf);
1187 ifp->if_tcp_stat = NULL;
1188 }
1189 if (ifp->if_udp_stat != NULL) {
1190 pbuf = (void **)
1191 ((intptr_t)ifp->if_udp_stat - sizeof (void *));
1192 zfree(dlif_udpstat_zone, *pbuf);
1193 ifp->if_udp_stat = NULL;
1194 }
4bd07ac2
A
1195 if (ifp->if_ipv4_stat != NULL) {
1196 FREE(ifp->if_ipv4_stat, M_TEMP);
1197 ifp->if_ipv4_stat = NULL;
1198 }
1199 if (ifp->if_ipv6_stat != NULL) {
1200 FREE(ifp->if_ipv6_stat, M_TEMP);
1201 ifp->if_ipv6_stat = NULL;
1202 }
316670eb
A
1203 }
1204
1205 return (ret);
1206}
1207
6d2010ae 1208static int
316670eb 1209dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
2d21ac55 1210{
316670eb
A
1211 thread_continue_t func;
1212 u_int32_t limit;
2d21ac55
A
1213 int error;
1214
316670eb
A
1215 /* NULL ifp indicates the main input thread, called at dlil_init time */
1216 if (ifp == NULL) {
1217 func = dlil_main_input_thread_func;
1218 VERIFY(inp == dlil_main_input_thread);
1219 (void) strlcat(inp->input_name,
1220 "main_input", DLIL_THREADNAME_LEN);
1221 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1222 func = dlil_rxpoll_input_thread_func;
1223 VERIFY(inp != dlil_main_input_thread);
1224 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
39236c6e 1225 "%s_input_poll", if_name(ifp));
6d2010ae 1226 } else {
316670eb
A
1227 func = dlil_input_thread_func;
1228 VERIFY(inp != dlil_main_input_thread);
1229 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
39236c6e 1230 "%s_input", if_name(ifp));
6d2010ae 1231 }
316670eb 1232 VERIFY(inp->input_thr == THREAD_NULL);
2d21ac55 1233
316670eb
A
1234 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1235 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1236
1237 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1238 inp->ifp = ifp; /* NULL for main input thread */
1239
1240 net_timerclear(&inp->mode_holdtime);
1241 net_timerclear(&inp->mode_lasttime);
1242 net_timerclear(&inp->sample_holdtime);
1243 net_timerclear(&inp->sample_lasttime);
1244 net_timerclear(&inp->dbg_lasttime);
1245
1246 /*
1247 * For interfaces that support opportunistic polling, set the
1248 * low and high watermarks for outstanding inbound packets/bytes.
1249 * Also define freeze times for transitioning between modes
1250 * and updating the average.
1251 */
1252 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1253 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
39236c6e 1254 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
316670eb
A
1255 } else {
1256 limit = (u_int32_t)-1;
1257 }
1258
5ba3f43e 1259 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
316670eb
A
1260 if (inp == dlil_main_input_thread) {
1261 struct dlil_main_threading_info *inpm =
1262 (struct dlil_main_threading_info *)inp;
5ba3f43e 1263 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
316670eb 1264 }
2d21ac55 1265
316670eb
A
1266 error = kernel_thread_start(func, inp, &inp->input_thr);
1267 if (error == KERN_SUCCESS) {
1268 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
6d2010ae 1269 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
2d21ac55 1270 /*
316670eb
A
1271 * We create an affinity set so that the matching workloop
1272 * thread or the starter thread (for loopback) can be
1273 * scheduled on the same processor set as the input thread.
2d21ac55 1274 */
316670eb
A
1275 if (net_affinity) {
1276 struct thread *tp = inp->input_thr;
2d21ac55
A
1277 u_int32_t tag;
1278 /*
1279 * Randomize to reduce the probability
1280 * of affinity tag namespace collision.
1281 */
5ba3f43e 1282 read_frandom(&tag, sizeof (tag));
2d21ac55
A
1283 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1284 thread_reference(tp);
316670eb
A
1285 inp->tag = tag;
1286 inp->net_affinity = TRUE;
2d21ac55
A
1287 }
1288 }
316670eb
A
1289 } else if (inp == dlil_main_input_thread) {
1290 panic_plain("%s: couldn't create main input thread", __func__);
1291 /* NOTREACHED */
2d21ac55 1292 } else {
39236c6e
A
1293 panic_plain("%s: couldn't create %s input thread", __func__,
1294 if_name(ifp));
6d2010ae 1295 /* NOTREACHED */
2d21ac55 1296 }
b0d623f7 1297 OSAddAtomic(1, &cur_dlil_input_threads);
316670eb 1298
6d2010ae 1299 return (error);
2d21ac55
A
1300}
1301
5ba3f43e
A
1302#if TEST_INPUT_THREAD_TERMINATION
1303static int
1304sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
316670eb 1305{
5ba3f43e
A
1306#pragma unused(arg1, arg2)
1307 uint32_t i;
1308 int err;
316670eb 1309
5ba3f43e 1310 i = if_input_thread_termination_spin;
316670eb 1311
5ba3f43e
A
1312 err = sysctl_handle_int(oidp, &i, 0, req);
1313 if (err != 0 || req->newptr == USER_ADDR_NULL)
1314 return (err);
1315
1316 if (net_rxpoll == 0)
1317 return (ENXIO);
316670eb 1318
5ba3f43e
A
1319 if_input_thread_termination_spin = i;
1320 return (err);
1321}
1322#endif /* TEST_INPUT_THREAD_TERMINATION */
1323
1324static void
1325dlil_clean_threading_info(struct dlil_threading_info *inp)
1326{
316670eb
A
1327 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1328 lck_grp_free(inp->lck_grp);
1329
1330 inp->input_waiting = 0;
1331 inp->wtot = 0;
1332 bzero(inp->input_name, sizeof (inp->input_name));
316670eb
A
1333 inp->ifp = NULL;
1334 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1335 qlimit(&inp->rcvq_pkts) = 0;
1336 bzero(&inp->stats, sizeof (inp->stats));
1337
1338 VERIFY(!inp->net_affinity);
1339 inp->input_thr = THREAD_NULL;
1340 VERIFY(inp->wloop_thr == THREAD_NULL);
1341 VERIFY(inp->poll_thr == THREAD_NULL);
1342 VERIFY(inp->tag == 0);
1343
1344 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1345 bzero(&inp->tstats, sizeof (inp->tstats));
1346 bzero(&inp->pstats, sizeof (inp->pstats));
1347 bzero(&inp->sstats, sizeof (inp->sstats));
1348
1349 net_timerclear(&inp->mode_holdtime);
1350 net_timerclear(&inp->mode_lasttime);
1351 net_timerclear(&inp->sample_holdtime);
1352 net_timerclear(&inp->sample_lasttime);
1353 net_timerclear(&inp->dbg_lasttime);
1354
1355#if IFNET_INPUT_SANITY_CHK
1356 inp->input_mbuf_cnt = 0;
1357#endif /* IFNET_INPUT_SANITY_CHK */
5ba3f43e 1358}
316670eb 1359
5ba3f43e
A
1360static void
1361dlil_terminate_input_thread(struct dlil_threading_info *inp)
1362{
1363 struct ifnet *ifp = inp->ifp;
1364
1365 VERIFY(current_thread() == inp->input_thr);
1366 VERIFY(inp != dlil_main_input_thread);
1367
1368 OSAddAtomic(-1, &cur_dlil_input_threads);
1369
1370#if TEST_INPUT_THREAD_TERMINATION
1371 { /* do something useless that won't get optimized away */
1372 uint32_t v = 1;
1373 for (uint32_t i = 0;
1374 i < if_input_thread_termination_spin;
1375 i++) {
1376 v = (i + 1) * v;
1377 }
1378 printf("the value is %d\n", v);
316670eb 1379 }
5ba3f43e
A
1380#endif /* TEST_INPUT_THREAD_TERMINATION */
1381
1382 lck_mtx_lock_spin(&inp->input_lck);
1383 VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0);
1384 inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE;
1385 wakeup_one((caddr_t)&inp->input_waiting);
1386 lck_mtx_unlock(&inp->input_lck);
316670eb
A
1387
1388 /* for the extra refcnt from kernel_thread_start() */
1389 thread_deallocate(current_thread());
1390
5ba3f43e
A
1391 if (dlil_verbose) {
1392 printf("%s: input thread terminated\n",
1393 if_name(ifp));
1394 }
1395
316670eb
A
1396 /* this is the end */
1397 thread_terminate(current_thread());
1398 /* NOTREACHED */
1399}
1400
2d21ac55
A
1401static kern_return_t
1402dlil_affinity_set(struct thread *tp, u_int32_t tag)
1403{
1404 thread_affinity_policy_data_t policy;
1405
1406 bzero(&policy, sizeof (policy));
1407 policy.affinity_tag = tag;
1408 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1409 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
1410}
1411
91447636
A
1412void
1413dlil_init(void)
1414{
6d2010ae
A
1415 thread_t thread = THREAD_NULL;
1416
1417 /*
1418 * The following fields must be 64-bit aligned for atomic operations.
1419 */
1420 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
39037602 1421 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
6d2010ae
A
1422 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1423 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1424 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1425 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1426 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1427 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1428 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1429 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1430 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1431 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1432 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1433 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1434 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1435
1436 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
39037602 1437 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
6d2010ae
A
1438 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1439 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1440 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1441 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1442 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1443 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1444 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1445 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1446 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1447 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1448 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1449 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1450 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1451
1452 /*
1453 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1454 */
1455 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1456 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1457 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1458 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1459 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
39236c6e
A
1460 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1461 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1462 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1463 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
5ba3f43e 1464 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
6d2010ae
A
1465 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1466 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1467 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1468 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1469
39236c6e
A
1470 /*
1471 * ... as well as the mbuf checksum flags counterparts.
1472 */
1473 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1474 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1475 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1476 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1477 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1478 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1479 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1480 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1481 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
5ba3f43e 1482 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
39236c6e
A
1483 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1484
6d2010ae
A
1485 /*
1486 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1487 */
1488 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
316670eb 1489 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
6d2010ae 1490
39236c6e
A
1491 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1492 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1493 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1494 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1495
1496 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1497 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1498 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1499
1500 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1501 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1502 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1503 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1504 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1505 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1506 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1507 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1508 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1509 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1510 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1511 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1512 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1513 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1514 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1515 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1516
1517 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1518 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1519 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1520 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1521 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
fe8ab488 1522 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
39037602 1523 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
39236c6e
A
1524
1525 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1526 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1527
6d2010ae
A
1528 PE_parse_boot_argn("net_affinity", &net_affinity,
1529 sizeof (net_affinity));
b0d623f7 1530
316670eb
A
1531 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
1532
d1ecb069 1533 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
6d2010ae
A
1534
1535 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
1536
1537 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
1538 sizeof (struct dlil_ifnet_dbg);
1539 /* Enforce 64-bit alignment for dlil_ifnet structure */
1540 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
1541 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
1542 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1543 0, DLIF_ZONE_NAME);
1544 if (dlif_zone == NULL) {
316670eb
A
1545 panic_plain("%s: failed allocating %s", __func__,
1546 DLIF_ZONE_NAME);
6d2010ae
A
1547 /* NOTREACHED */
1548 }
1549 zone_change(dlif_zone, Z_EXPAND, TRUE);
1550 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1551
1552 dlif_filt_size = sizeof (struct ifnet_filter);
1553 dlif_filt_zone = zinit(dlif_filt_size,
1554 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1555 if (dlif_filt_zone == NULL) {
316670eb 1556 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1557 DLIF_FILT_ZONE_NAME);
1558 /* NOTREACHED */
1559 }
1560 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1561 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1562
6d2010ae
A
1563 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
1564 dlif_phash_zone = zinit(dlif_phash_size,
1565 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1566 if (dlif_phash_zone == NULL) {
316670eb 1567 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1568 DLIF_PHASH_ZONE_NAME);
1569 /* NOTREACHED */
1570 }
1571 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1572 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1573
1574 dlif_proto_size = sizeof (struct if_proto);
1575 dlif_proto_zone = zinit(dlif_proto_size,
1576 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1577 if (dlif_proto_zone == NULL) {
316670eb 1578 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1579 DLIF_PROTO_ZONE_NAME);
1580 /* NOTREACHED */
1581 }
1582 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1583 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1584
316670eb
A
1585 dlif_tcpstat_size = sizeof (struct tcpstat_local);
1586 /* Enforce 64-bit alignment for tcpstat_local structure */
1587 dlif_tcpstat_bufsize =
1588 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
1589 dlif_tcpstat_bufsize =
1590 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
1591 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1592 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1593 DLIF_TCPSTAT_ZONE_NAME);
1594 if (dlif_tcpstat_zone == NULL) {
1595 panic_plain("%s: failed allocating %s", __func__,
1596 DLIF_TCPSTAT_ZONE_NAME);
1597 /* NOTREACHED */
1598 }
1599 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1600 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1601
1602 dlif_udpstat_size = sizeof (struct udpstat_local);
1603 /* Enforce 64-bit alignment for udpstat_local structure */
1604 dlif_udpstat_bufsize =
1605 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
1606 dlif_udpstat_bufsize =
1607 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
1608 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1609 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1610 DLIF_UDPSTAT_ZONE_NAME);
1611 if (dlif_udpstat_zone == NULL) {
1612 panic_plain("%s: failed allocating %s", __func__,
1613 DLIF_UDPSTAT_ZONE_NAME);
1614 /* NOTREACHED */
1615 }
1616 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1617 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1618
6d2010ae 1619 ifnet_llreach_init();
5ba3f43e 1620 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
d1ecb069 1621
91447636 1622 TAILQ_INIT(&dlil_ifnet_head);
91447636 1623 TAILQ_INIT(&ifnet_head);
6d2010ae 1624 TAILQ_INIT(&ifnet_detaching_head);
39037602 1625 TAILQ_INIT(&ifnet_ordered_head);
6d2010ae 1626
91447636 1627 /* Setup the lock groups we will use */
2d21ac55 1628 dlil_grp_attributes = lck_grp_attr_alloc_init();
91447636 1629
316670eb 1630 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
6d2010ae
A
1631 dlil_grp_attributes);
1632 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1633 dlil_grp_attributes);
1634 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1635 dlil_grp_attributes);
316670eb
A
1636 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1637 dlil_grp_attributes);
1638 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
6d2010ae
A
1639 dlil_grp_attributes);
1640
91447636 1641 /* Setup the lock attributes we will use */
2d21ac55 1642 dlil_lck_attributes = lck_attr_alloc_init();
6d2010ae 1643
91447636 1644 ifnet_lock_attr = lck_attr_alloc_init();
6d2010ae
A
1645
1646 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1647 dlil_lck_attributes);
1648 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1649
39236c6e
A
1650 /* Setup interface flow control related items */
1651 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
316670eb 1652
39236c6e
A
1653 ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry);
1654 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1655 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1656 if (ifnet_fc_zone == NULL) {
1657 panic_plain("%s: failed allocating %s", __func__,
1658 IFNET_FC_ZONE_NAME);
1659 /* NOTREACHED */
1660 }
1661 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1662 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
6d2010ae 1663
39236c6e 1664 /* Initialize interface address subsystem */
6d2010ae 1665 ifa_init();
39236c6e
A
1666
1667#if PF
1668 /* Initialize the packet filter */
1669 pfinit();
1670#endif /* PF */
1671
1672 /* Initialize queue algorithms */
1673 classq_init();
1674
1675 /* Initialize packet schedulers */
1676 pktsched_init();
1677
1678 /* Initialize flow advisory subsystem */
1679 flowadv_init();
1680
1681 /* Initialize the pktap virtual interface */
1682 pktap_init();
1683
39037602
A
1684 /* Initialize the service class to dscp map */
1685 net_qos_map_init();
1686
5ba3f43e 1687#if DEBUG || DEVELOPMENT
39236c6e
A
1688 /* Run self-tests */
1689 dlil_verify_sum16();
5ba3f43e
A
1690#endif /* DEBUG || DEVELOPMENT */
1691
1692 /* Initialize link layer table */
1693 lltable_glbl_init();
39236c6e 1694
91447636 1695 /*
316670eb
A
1696 * Create and start up the main DLIL input thread and the interface
1697 * detacher threads once everything is initialized.
91447636 1698 */
316670eb 1699 dlil_create_input_thread(NULL, dlil_main_input_thread);
2d21ac55 1700
316670eb
A
1701 if (kernel_thread_start(ifnet_detacher_thread_func,
1702 NULL, &thread) != KERN_SUCCESS) {
1703 panic_plain("%s: couldn't create detacher thread", __func__);
6d2010ae
A
1704 /* NOTREACHED */
1705 }
b0d623f7 1706 thread_deallocate(thread);
5ba3f43e 1707
91447636 1708}
1c79356b 1709
6d2010ae
A
1710static void
1711if_flt_monitor_busy(struct ifnet *ifp)
1712{
5ba3f43e 1713 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1714
1715 ++ifp->if_flt_busy;
1716 VERIFY(ifp->if_flt_busy != 0);
1717}
1718
1719static void
1720if_flt_monitor_unbusy(struct ifnet *ifp)
1721{
1722 if_flt_monitor_leave(ifp);
1723}
1724
1725static void
1726if_flt_monitor_enter(struct ifnet *ifp)
1727{
5ba3f43e 1728 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1729
1730 while (ifp->if_flt_busy) {
1731 ++ifp->if_flt_waiters;
1732 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1733 (PZERO - 1), "if_flt_monitor", NULL);
1734 }
1735 if_flt_monitor_busy(ifp);
1736}
1737
1738static void
1739if_flt_monitor_leave(struct ifnet *ifp)
1740{
5ba3f43e 1741 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1742
1743 VERIFY(ifp->if_flt_busy != 0);
1744 --ifp->if_flt_busy;
1745
1746 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1747 ifp->if_flt_waiters = 0;
1748 wakeup(&ifp->if_flt_head);
1749 }
1750}
1751
2d21ac55 1752__private_extern__ int
6d2010ae 1753dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
39236c6e 1754 interface_filter_t *filter_ref, u_int32_t flags)
6d2010ae
A
1755{
1756 int retval = 0;
1757 struct ifnet_filter *filter = NULL;
9bccf70c 1758
6d2010ae
A
1759 ifnet_head_lock_shared();
1760 /* Check that the interface is in the global list */
1761 if (!ifnet_lookup(ifp)) {
1762 retval = ENXIO;
1763 goto done;
1764 }
1765
1766 filter = zalloc(dlif_filt_zone);
1767 if (filter == NULL) {
1768 retval = ENOMEM;
1769 goto done;
1770 }
1771 bzero(filter, dlif_filt_size);
1772
1773 /* refcnt held above during lookup */
39236c6e 1774 filter->filt_flags = flags;
91447636
A
1775 filter->filt_ifp = ifp;
1776 filter->filt_cookie = if_filter->iff_cookie;
1777 filter->filt_name = if_filter->iff_name;
1778 filter->filt_protocol = if_filter->iff_protocol;
743345f9
A
1779 /*
1780 * Do not install filter callbacks for internal coproc interface
1781 */
1782 if (!IFNET_IS_INTCOPROC(ifp)) {
1783 filter->filt_input = if_filter->iff_input;
1784 filter->filt_output = if_filter->iff_output;
1785 filter->filt_event = if_filter->iff_event;
1786 filter->filt_ioctl = if_filter->iff_ioctl;
1787 }
91447636 1788 filter->filt_detached = if_filter->iff_detached;
6d2010ae
A
1789
1790 lck_mtx_lock(&ifp->if_flt_lock);
1791 if_flt_monitor_enter(ifp);
1792
5ba3f43e 1793 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636 1794 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
6d2010ae
A
1795
1796 if_flt_monitor_leave(ifp);
1797 lck_mtx_unlock(&ifp->if_flt_lock);
1798
91447636 1799 *filter_ref = filter;
b0d623f7
A
1800
1801 /*
1802 * Bump filter count and route_generation ID to let TCP
1803 * know it shouldn't do TSO on this connection
1804 */
39236c6e
A
1805 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1806 OSAddAtomic(1, &dlil_filter_disable_tso_count);
b0d623f7 1807 routegenid_update();
39236c6e 1808 }
5ba3f43e
A
1809 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1810 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1811 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1812 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1813 }
6d2010ae 1814 if (dlil_verbose) {
39236c6e
A
1815 printf("%s: %s filter attached\n", if_name(ifp),
1816 if_filter->iff_name);
6d2010ae
A
1817 }
1818done:
1819 ifnet_head_done();
1820 if (retval != 0 && ifp != NULL) {
39236c6e
A
1821 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1822 if_name(ifp), if_filter->iff_name, retval);
6d2010ae
A
1823 }
1824 if (retval != 0 && filter != NULL)
1825 zfree(dlif_filt_zone, filter);
1826
1827 return (retval);
1c79356b
A
1828}
1829
91447636 1830static int
6d2010ae 1831dlil_detach_filter_internal(interface_filter_t filter, int detached)
1c79356b 1832{
91447636 1833 int retval = 0;
6d2010ae 1834
3a60a9f5 1835 if (detached == 0) {
6d2010ae
A
1836 ifnet_t ifp = NULL;
1837
3a60a9f5
A
1838 ifnet_head_lock_shared();
1839 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6d2010ae
A
1840 interface_filter_t entry = NULL;
1841
1842 lck_mtx_lock(&ifp->if_flt_lock);
3a60a9f5 1843 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
6d2010ae
A
1844 if (entry != filter || entry->filt_skip)
1845 continue;
1846 /*
1847 * We've found a match; since it's possible
1848 * that the thread gets blocked in the monitor,
1849 * we do the lock dance. Interface should
1850 * not be detached since we still have a use
1851 * count held during filter attach.
1852 */
1853 entry->filt_skip = 1; /* skip input/output */
1854 lck_mtx_unlock(&ifp->if_flt_lock);
1855 ifnet_head_done();
1856
1857 lck_mtx_lock(&ifp->if_flt_lock);
1858 if_flt_monitor_enter(ifp);
5ba3f43e 1859 LCK_MTX_ASSERT(&ifp->if_flt_lock,
6d2010ae
A
1860 LCK_MTX_ASSERT_OWNED);
1861
1862 /* Remove the filter from the list */
1863 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1864 filt_next);
1865
1866 if_flt_monitor_leave(ifp);
1867 lck_mtx_unlock(&ifp->if_flt_lock);
1868 if (dlil_verbose) {
39236c6e
A
1869 printf("%s: %s filter detached\n",
1870 if_name(ifp), filter->filt_name);
6d2010ae
A
1871 }
1872 goto destroy;
3a60a9f5 1873 }
6d2010ae 1874 lck_mtx_unlock(&ifp->if_flt_lock);
3a60a9f5
A
1875 }
1876 ifnet_head_done();
6d2010ae
A
1877
1878 /* filter parameter is not a valid filter ref */
1879 retval = EINVAL;
1880 goto done;
3a60a9f5 1881 }
6d2010ae
A
1882
1883 if (dlil_verbose)
1884 printf("%s filter detached\n", filter->filt_name);
1885
1886destroy:
1887
1888 /* Call the detached function if there is one */
91447636
A
1889 if (filter->filt_detached)
1890 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
9bccf70c 1891
b0d623f7
A
1892 /*
1893 * Decrease filter count and route_generation ID to let TCP
1894 * know it should reevalute doing TSO or not
1895 */
39236c6e
A
1896 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1897 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
b0d623f7 1898 routegenid_update();
39236c6e 1899 }
39037602 1900
5ba3f43e
A
1901 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1902
39037602
A
1903 /* Free the filter */
1904 zfree(dlif_filt_zone, filter);
1905 filter = NULL;
6d2010ae 1906done:
39037602 1907 if (retval != 0 && filter != NULL) {
6d2010ae
A
1908 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1909 filter->filt_name, retval);
1910 }
39037602 1911
6d2010ae 1912 return (retval);
1c79356b
A
1913}
1914
2d21ac55 1915__private_extern__ void
91447636
A
1916dlil_detach_filter(interface_filter_t filter)
1917{
3a60a9f5
A
1918 if (filter == NULL)
1919 return;
91447636
A
1920 dlil_detach_filter_internal(filter, 0);
1921}
1c79356b 1922
316670eb
A
1923/*
1924 * Main input thread:
1925 *
1926 * a) handles all inbound packets for lo0
1927 * b) handles all inbound packets for interfaces with no dedicated
1928 * input thread (e.g. anything but Ethernet/PDP or those that support
1929 * opportunistic polling.)
1930 * c) protocol registrations
1931 * d) packet injections
1932 */
39037602 1933__attribute__((noreturn))
91447636 1934static void
316670eb 1935dlil_main_input_thread_func(void *v, wait_result_t w)
91447636 1936{
316670eb
A
1937#pragma unused(w)
1938 struct dlil_main_threading_info *inpm = v;
1939 struct dlil_threading_info *inp = v;
1940
1941 VERIFY(inp == dlil_main_input_thread);
1942 VERIFY(inp->ifp == NULL);
1943 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1944
91447636 1945 while (1) {
2d21ac55 1946 struct mbuf *m = NULL, *m_loop = NULL;
316670eb
A
1947 u_int32_t m_cnt, m_cnt_loop;
1948 boolean_t proto_req;
6d2010ae 1949
316670eb 1950 lck_mtx_lock_spin(&inp->input_lck);
6d2010ae 1951
2d21ac55 1952 /* Wait until there is work to be done */
316670eb
A
1953 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1954 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1955 (void) msleep(&inp->input_waiting, &inp->input_lck,
1956 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2d21ac55
A
1957 }
1958
316670eb
A
1959 inp->input_waiting |= DLIL_INPUT_RUNNING;
1960 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2d21ac55 1961
316670eb
A
1962 /* Main input thread cannot be terminated */
1963 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
2d21ac55 1964
316670eb
A
1965 proto_req = (inp->input_waiting &
1966 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
6d2010ae 1967
316670eb
A
1968 /* Packets for non-dedicated interfaces other than lo0 */
1969 m_cnt = qlen(&inp->rcvq_pkts);
39037602 1970 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
6d2010ae 1971
39236c6e 1972 /* Packets exclusive to lo0 */
316670eb 1973 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
39037602 1974 m_loop = _getq_all(&inpm->lo_rcvq_pkts, NULL, NULL, NULL);
6d2010ae 1975
316670eb 1976 inp->wtot = 0;
6d2010ae 1977
316670eb 1978 lck_mtx_unlock(&inp->input_lck);
6d2010ae 1979
316670eb 1980 /*
39037602
A
1981 * NOTE warning %%% attention !!!!
1982 * We should think about putting some thread starvation
1983 * safeguards if we deal with long chains of packets.
1984 */
316670eb
A
1985 if (m_loop != NULL)
1986 dlil_input_packet_list_extended(lo_ifp, m_loop,
1987 m_cnt_loop, inp->mode);
6d2010ae 1988
316670eb
A
1989 if (m != NULL)
1990 dlil_input_packet_list_extended(NULL, m,
1991 m_cnt, inp->mode);
1992
1993 if (proto_req)
1994 proto_input_run();
1995 }
1996
1997 /* NOTREACHED */
1998 VERIFY(0); /* we should never get here */
1999}
2000
2001/*
2002 * Input thread for interfaces with legacy input model.
2003 */
2004static void
2005dlil_input_thread_func(void *v, wait_result_t w)
2006{
2007#pragma unused(w)
39037602 2008 char thread_name[MAXTHREADNAMESIZE];
316670eb
A
2009 struct dlil_threading_info *inp = v;
2010 struct ifnet *ifp = inp->ifp;
2011
39037602
A
2012 /* Construct the name for this thread, and then apply it. */
2013 bzero(thread_name, sizeof(thread_name));
2014 snprintf(thread_name, sizeof(thread_name), "dlil_input_%s", ifp->if_xname);
2015 thread_set_thread_name(inp->input_thr, thread_name);
2016
316670eb
A
2017 VERIFY(inp != dlil_main_input_thread);
2018 VERIFY(ifp != NULL);
2019 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
2020 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2d21ac55 2021
316670eb
A
2022 while (1) {
2023 struct mbuf *m = NULL;
2024 u_int32_t m_cnt;
2025
2026 lck_mtx_lock_spin(&inp->input_lck);
2d21ac55 2027
316670eb
A
2028 /* Wait until there is work to be done */
2029 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2030 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2031 (void) msleep(&inp->input_waiting, &inp->input_lck,
2032 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2d21ac55
A
2033 }
2034
316670eb
A
2035 inp->input_waiting |= DLIL_INPUT_RUNNING;
2036 inp->input_waiting &= ~DLIL_INPUT_WAITING;
6d2010ae 2037
316670eb
A
2038 /*
2039 * Protocol registration and injection must always use
2040 * the main input thread; in theory the latter can utilize
2041 * the corresponding input thread where the packet arrived
2042 * on, but that requires our knowing the interface in advance
2043 * (and the benefits might not worth the trouble.)
2044 */
2045 VERIFY(!(inp->input_waiting &
2046 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
6d2010ae 2047
316670eb
A
2048 /* Packets for this interface */
2049 m_cnt = qlen(&inp->rcvq_pkts);
39037602 2050 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
6d2010ae 2051
316670eb
A
2052 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2053 lck_mtx_unlock(&inp->input_lck);
2054
2055 /* Free up pending packets */
2056 if (m != NULL)
2057 mbuf_freem_list(m);
2058
2059 dlil_terminate_input_thread(inp);
2060 /* NOTREACHED */
2061 return;
2d21ac55
A
2062 }
2063
316670eb
A
2064 inp->wtot = 0;
2065
5ba3f43e 2066 dlil_input_stats_sync(ifp, inp);
316670eb
A
2067
2068 lck_mtx_unlock(&inp->input_lck);
2d21ac55 2069
91447636 2070 /*
39037602
A
2071 * NOTE warning %%% attention !!!!
2072 * We should think about putting some thread starvation
2073 * safeguards if we deal with long chains of packets.
2074 */
6d2010ae 2075 if (m != NULL)
316670eb
A
2076 dlil_input_packet_list_extended(NULL, m,
2077 m_cnt, inp->mode);
2d21ac55 2078 }
316670eb
A
2079
2080 /* NOTREACHED */
2081 VERIFY(0); /* we should never get here */
2d21ac55
A
2082}
2083
316670eb
A
2084/*
2085 * Input thread for interfaces with opportunistic polling input model.
2086 */
2087static void
2088dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2d21ac55 2089{
316670eb
A
2090#pragma unused(w)
2091 struct dlil_threading_info *inp = v;
2092 struct ifnet *ifp = inp->ifp;
2093 struct timespec ts;
2d21ac55 2094
316670eb
A
2095 VERIFY(inp != dlil_main_input_thread);
2096 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
2d21ac55 2097
2d21ac55 2098 while (1) {
316670eb
A
2099 struct mbuf *m = NULL;
2100 u_int32_t m_cnt, m_size, poll_req = 0;
2101 ifnet_model_t mode;
2102 struct timespec now, delta;
39236c6e 2103 u_int64_t ival;
6d2010ae 2104
316670eb 2105 lck_mtx_lock_spin(&inp->input_lck);
6d2010ae 2106
39236c6e
A
2107 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
2108 ival = IF_RXPOLL_INTERVALTIME_MIN;
2109
316670eb
A
2110 /* Link parameters changed? */
2111 if (ifp->if_poll_update != 0) {
2112 ifp->if_poll_update = 0;
39236c6e 2113 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
91447636 2114 }
1c79356b 2115
316670eb
A
2116 /* Current operating mode */
2117 mode = inp->mode;
1c79356b 2118
316670eb 2119 /* Wait until there is work to be done */
39236c6e 2120 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
316670eb
A
2121 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2122 (void) msleep(&inp->input_waiting, &inp->input_lck,
2123 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2124 }
2d21ac55 2125
316670eb
A
2126 inp->input_waiting |= DLIL_INPUT_RUNNING;
2127 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2d21ac55
A
2128
2129 /*
316670eb
A
2130 * Protocol registration and injection must always use
2131 * the main input thread; in theory the latter can utilize
2132 * the corresponding input thread where the packet arrived
2133 * on, but that requires our knowing the interface in advance
2134 * (and the benefits might not worth the trouble.)
2d21ac55 2135 */
316670eb
A
2136 VERIFY(!(inp->input_waiting &
2137 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2d21ac55 2138
316670eb
A
2139 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2140 /* Free up pending packets */
5ba3f43e 2141 lck_mtx_convert_spin(&inp->input_lck);
316670eb 2142 _flushq(&inp->rcvq_pkts);
5ba3f43e
A
2143 if (inp->input_mit_tcall != NULL) {
2144 if (thread_call_isactive(inp->input_mit_tcall))
2145 thread_call_cancel(inp->input_mit_tcall);
2146 }
316670eb 2147 lck_mtx_unlock(&inp->input_lck);
2d21ac55 2148
316670eb
A
2149 dlil_terminate_input_thread(inp);
2150 /* NOTREACHED */
2151 return;
2d21ac55 2152 }
2d21ac55 2153
316670eb
A
2154 /* Total count of all packets */
2155 m_cnt = qlen(&inp->rcvq_pkts);
2156
2157 /* Total bytes of all packets */
2158 m_size = qsize(&inp->rcvq_pkts);
2159
2160 /* Packets for this interface */
39037602 2161 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
316670eb
A
2162 VERIFY(m != NULL || m_cnt == 0);
2163
2164 nanouptime(&now);
2165 if (!net_timerisset(&inp->sample_lasttime))
2166 *(&inp->sample_lasttime) = *(&now);
2167
2168 net_timersub(&now, &inp->sample_lasttime, &delta);
2169 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
2170 u_int32_t ptot, btot;
2171
2172 /* Accumulate statistics for current sampling */
2173 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
2174
2175 if (net_timercmp(&delta, &inp->sample_holdtime, <))
2176 goto skip;
2177
2178 *(&inp->sample_lasttime) = *(&now);
2179
2180 /* Calculate min/max of inbound bytes */
2181 btot = (u_int32_t)inp->sstats.bytes;
2182 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
2183 inp->rxpoll_bmin = btot;
2184 if (btot > inp->rxpoll_bmax)
2185 inp->rxpoll_bmax = btot;
2186
2187 /* Calculate EWMA of inbound bytes */
2188 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
2189
2190 /* Calculate min/max of inbound packets */
2191 ptot = (u_int32_t)inp->sstats.packets;
2192 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
2193 inp->rxpoll_pmin = ptot;
2194 if (ptot > inp->rxpoll_pmax)
2195 inp->rxpoll_pmax = ptot;
2196
2197 /* Calculate EWMA of inbound packets */
2198 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
2199
2200 /* Reset sampling statistics */
2201 PKTCNTR_CLEAR(&inp->sstats);
2202
2203 /* Calculate EWMA of wakeup requests */
2204 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2205 inp->wtot = 0;
2206
2207 if (dlil_verbose) {
2208 if (!net_timerisset(&inp->dbg_lasttime))
2209 *(&inp->dbg_lasttime) = *(&now);
2210 net_timersub(&now, &inp->dbg_lasttime, &delta);
2211 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2212 *(&inp->dbg_lasttime) = *(&now);
39236c6e 2213 printf("%s: [%s] pkts avg %d max %d "
316670eb
A
2214 "limits [%d/%d], wreq avg %d "
2215 "limits [%d/%d], bytes avg %d "
39236c6e
A
2216 "limits [%d/%d]\n", if_name(ifp),
2217 (inp->mode ==
316670eb
A
2218 IFNET_MODEL_INPUT_POLL_ON) ?
2219 "ON" : "OFF", inp->rxpoll_pavg,
2220 inp->rxpoll_pmax,
2221 inp->rxpoll_plowat,
2222 inp->rxpoll_phiwat,
2223 inp->rxpoll_wavg,
2224 inp->rxpoll_wlowat,
2225 inp->rxpoll_whiwat,
2226 inp->rxpoll_bavg,
2227 inp->rxpoll_blowat,
2228 inp->rxpoll_bhiwat);
2229 }
2230 }
2d21ac55 2231
316670eb
A
2232 /* Perform mode transition, if necessary */
2233 if (!net_timerisset(&inp->mode_lasttime))
2234 *(&inp->mode_lasttime) = *(&now);
2235
2236 net_timersub(&now, &inp->mode_lasttime, &delta);
2237 if (net_timercmp(&delta, &inp->mode_holdtime, <))
2238 goto skip;
2239
2240 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2241 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
316670eb
A
2242 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2243 mode = IFNET_MODEL_INPUT_POLL_OFF;
2244 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2245 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2246 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2247 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2248 mode = IFNET_MODEL_INPUT_POLL_ON;
2249 }
6d2010ae 2250
316670eb
A
2251 if (mode != inp->mode) {
2252 inp->mode = mode;
2253 *(&inp->mode_lasttime) = *(&now);
2254 poll_req++;
2255 }
2256 }
2257skip:
2258 dlil_input_stats_sync(ifp, inp);
6d2010ae 2259
316670eb 2260 lck_mtx_unlock(&inp->input_lck);
6d2010ae 2261
316670eb
A
2262 /*
2263 * If there's a mode change and interface is still attached,
2264 * perform a downcall to the driver for the new mode. Also
2265 * hold an IO refcnt on the interface to prevent it from
2266 * being detached (will be release below.)
2267 */
2268 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2269 struct ifnet_model_params p = { mode, { 0 } };
2270 errno_t err;
2271
2272 if (dlil_verbose) {
39236c6e 2273 printf("%s: polling is now %s, "
316670eb
A
2274 "pkts avg %d max %d limits [%d/%d], "
2275 "wreq avg %d limits [%d/%d], "
2276 "bytes avg %d limits [%d/%d]\n",
39236c6e 2277 if_name(ifp),
316670eb
A
2278 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2279 "ON" : "OFF", inp->rxpoll_pavg,
2280 inp->rxpoll_pmax, inp->rxpoll_plowat,
2281 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2282 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2283 inp->rxpoll_bavg, inp->rxpoll_blowat,
2284 inp->rxpoll_bhiwat);
2285 }
2d21ac55 2286
316670eb
A
2287 if ((err = ((*ifp->if_input_ctl)(ifp,
2288 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
39236c6e
A
2289 printf("%s: error setting polling mode "
2290 "to %s (%d)\n", if_name(ifp),
316670eb
A
2291 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2292 "ON" : "OFF", err);
2293 }
1c79356b 2294
316670eb
A
2295 switch (mode) {
2296 case IFNET_MODEL_INPUT_POLL_OFF:
2297 ifnet_set_poll_cycle(ifp, NULL);
2298 inp->rxpoll_offreq++;
2299 if (err != 0)
2300 inp->rxpoll_offerr++;
2301 break;
2d21ac55 2302
316670eb 2303 case IFNET_MODEL_INPUT_POLL_ON:
39236c6e 2304 net_nsectimer(&ival, &ts);
316670eb
A
2305 ifnet_set_poll_cycle(ifp, &ts);
2306 ifnet_poll(ifp);
2307 inp->rxpoll_onreq++;
2308 if (err != 0)
2309 inp->rxpoll_onerr++;
2310 break;
2311
2312 default:
2313 VERIFY(0);
2314 /* NOTREACHED */
2315 }
2316
2317 /* Release the IO refcnt */
2318 ifnet_decr_iorefcnt(ifp);
2319 }
2320
2321 /*
39037602
A
2322 * NOTE warning %%% attention !!!!
2323 * We should think about putting some thread starvation
2324 * safeguards if we deal with long chains of packets.
2325 */
316670eb
A
2326 if (m != NULL)
2327 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2328 }
2329
2330 /* NOTREACHED */
2331 VERIFY(0); /* we should never get here */
2332}
2333
39236c6e
A
2334/*
2335 * Must be called on an attached ifnet (caller is expected to check.)
2336 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2337 */
2338errno_t
2339dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2340 boolean_t locked)
316670eb 2341{
39236c6e 2342 struct dlil_threading_info *inp;
316670eb
A
2343 u_int64_t sample_holdtime, inbw;
2344
39236c6e
A
2345 VERIFY(ifp != NULL);
2346 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2347 return (ENXIO);
2348
2349 if (p != NULL) {
2350 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2351 (p->packets_lowat != 0 && p->packets_hiwat == 0))
2352 return (EINVAL);
2353 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2354 p->packets_lowat >= p->packets_hiwat)
2355 return (EINVAL);
2356 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2357 (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
2358 return (EINVAL);
2359 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2360 p->bytes_lowat >= p->bytes_hiwat)
2361 return (EINVAL);
2362 if (p->interval_time != 0 &&
2363 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
2364 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2365 }
2366
2367 if (!locked)
2368 lck_mtx_lock(&inp->input_lck);
2369
5ba3f43e 2370 LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
39236c6e
A
2371
2372 /*
2373 * Normally, we'd reset the parameters to the auto-tuned values
2374 * if the the input thread detects a change in link rate. If the
2375 * driver provides its own parameters right after a link rate
2376 * changes, but before the input thread gets to run, we want to
2377 * make sure to keep the driver's values. Clearing if_poll_update
2378 * will achieve that.
2379 */
2380 if (p != NULL && !locked && ifp->if_poll_update != 0)
2381 ifp->if_poll_update = 0;
316670eb 2382
39236c6e 2383 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
316670eb
A
2384 sample_holdtime = 0; /* polling is disabled */
2385 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2386 inp->rxpoll_blowat = 0;
2387 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2388 inp->rxpoll_bhiwat = (u_int32_t)-1;
39236c6e
A
2389 inp->rxpoll_plim = 0;
2390 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
316670eb 2391 } else {
39236c6e
A
2392 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2393 u_int64_t ival;
316670eb
A
2394 unsigned int n, i;
2395
39236c6e 2396 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
316670eb
A
2397 if (inbw < rxpoll_tbl[i].speed)
2398 break;
2399 n = i;
2400 }
39236c6e
A
2401 /* auto-tune if caller didn't specify a value */
2402 plowat = ((p == NULL || p->packets_lowat == 0) ?
2403 rxpoll_tbl[n].plowat : p->packets_lowat);
2404 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2405 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2406 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2407 rxpoll_tbl[n].blowat : p->bytes_lowat);
2408 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2409 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2410 plim = ((p == NULL || p->packets_limit == 0) ?
2411 if_rxpoll_max : p->packets_limit);
2412 ival = ((p == NULL || p->interval_time == 0) ?
2413 if_rxpoll_interval_time : p->interval_time);
2414
2415 VERIFY(plowat != 0 && phiwat != 0);
2416 VERIFY(blowat != 0 && bhiwat != 0);
2417 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2418
316670eb
A
2419 sample_holdtime = if_rxpoll_sample_holdtime;
2420 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2421 inp->rxpoll_whiwat = if_rxpoll_whiwat;
39236c6e
A
2422 inp->rxpoll_plowat = plowat;
2423 inp->rxpoll_phiwat = phiwat;
2424 inp->rxpoll_blowat = blowat;
2425 inp->rxpoll_bhiwat = bhiwat;
2426 inp->rxpoll_plim = plim;
2427 inp->rxpoll_ival = ival;
316670eb
A
2428 }
2429
2430 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2431 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2432
2433 if (dlil_verbose) {
39236c6e
A
2434 printf("%s: speed %llu bps, sample per %llu nsec, "
2435 "poll interval %llu nsec, pkts per poll %u, "
2436 "pkt limits [%u/%u], wreq limits [%u/%u], "
2437 "bytes limits [%u/%u]\n", if_name(ifp),
2438 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2439 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2440 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
316670eb 2441 }
39236c6e
A
2442
2443 if (!locked)
2444 lck_mtx_unlock(&inp->input_lck);
2445
2446 return (0);
2447}
2448
2449/*
2450 * Must be called on an attached ifnet (caller is expected to check.)
2451 */
2452errno_t
2453dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2454{
2455 struct dlil_threading_info *inp;
2456
2457 VERIFY(ifp != NULL && p != NULL);
2458 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2459 return (ENXIO);
2460
2461 bzero(p, sizeof (*p));
2462
2463 lck_mtx_lock(&inp->input_lck);
2464 p->packets_limit = inp->rxpoll_plim;
2465 p->packets_lowat = inp->rxpoll_plowat;
2466 p->packets_hiwat = inp->rxpoll_phiwat;
2467 p->bytes_lowat = inp->rxpoll_blowat;
2468 p->bytes_hiwat = inp->rxpoll_bhiwat;
2469 p->interval_time = inp->rxpoll_ival;
2470 lck_mtx_unlock(&inp->input_lck);
2471
2472 return (0);
316670eb
A
2473}
2474
2475errno_t
2476ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2477 const struct ifnet_stat_increment_param *s)
2478{
2479 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
2480}
2481
2482errno_t
2483ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2484 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2485{
2486 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
2487}
2488
2489static errno_t
2490ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2491 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2492{
5ba3f43e 2493 dlil_input_func input_func;
39037602 2494 struct ifnet_stat_increment_param _s;
316670eb 2495 u_int32_t m_cnt = 0, m_size = 0;
39037602
A
2496 struct mbuf *last;
2497 errno_t err = 0;
316670eb 2498
39236c6e
A
2499 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2500 if (m_head != NULL)
2501 mbuf_freem_list(m_head);
2502 return (EINVAL);
2503 }
2504
2505 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2506 VERIFY(m_tail == NULL || ext);
2507 VERIFY(s != NULL || !ext);
2508
316670eb
A
2509 /*
2510 * Drop the packet(s) if the parameters are invalid, or if the
2511 * interface is no longer attached; else hold an IO refcnt to
2512 * prevent it from being detached (will be released below.)
2513 */
39236c6e 2514 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
316670eb
A
2515 if (m_head != NULL)
2516 mbuf_freem_list(m_head);
2517 return (EINVAL);
2518 }
2519
5ba3f43e
A
2520 input_func = ifp->if_input_dlil;
2521 VERIFY(input_func != NULL);
39037602 2522
316670eb
A
2523 if (m_tail == NULL) {
2524 last = m_head;
39236c6e 2525 while (m_head != NULL) {
316670eb
A
2526#if IFNET_INPUT_SANITY_CHK
2527 if (dlil_input_sanity_check != 0)
2528 DLIL_INPUT_CHECK(last, ifp);
2529#endif /* IFNET_INPUT_SANITY_CHK */
2530 m_cnt++;
2531 m_size += m_length(last);
2532 if (mbuf_nextpkt(last) == NULL)
2533 break;
2534 last = mbuf_nextpkt(last);
2535 }
2536 m_tail = last;
2537 } else {
2538#if IFNET_INPUT_SANITY_CHK
2539 if (dlil_input_sanity_check != 0) {
2540 last = m_head;
2541 while (1) {
2542 DLIL_INPUT_CHECK(last, ifp);
2543 m_cnt++;
2544 m_size += m_length(last);
2545 if (mbuf_nextpkt(last) == NULL)
2546 break;
2547 last = mbuf_nextpkt(last);
2548 }
2549 } else {
2550 m_cnt = s->packets_in;
2551 m_size = s->bytes_in;
2552 last = m_tail;
2553 }
2554#else
2555 m_cnt = s->packets_in;
2556 m_size = s->bytes_in;
2557 last = m_tail;
2558#endif /* IFNET_INPUT_SANITY_CHK */
2559 }
2560
2561 if (last != m_tail) {
39236c6e
A
2562 panic_plain("%s: invalid input packet chain for %s, "
2563 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2564 m_tail, last);
316670eb
A
2565 }
2566
2567 /*
2568 * Assert packet count only for the extended variant, for backwards
2569 * compatibility, since this came directly from the device driver.
2570 * Relax this assertion for input bytes, as the driver may have
2571 * included the link-layer headers in the computation; hence
2572 * m_size is just an approximation.
2573 */
2574 if (ext && s->packets_in != m_cnt) {
39236c6e
A
2575 panic_plain("%s: input packet count mismatch for %s, "
2576 "%d instead of %d\n", __func__, if_name(ifp),
2577 s->packets_in, m_cnt);
316670eb
A
2578 }
2579
39037602
A
2580 if (s == NULL) {
2581 bzero(&_s, sizeof (_s));
2582 s = &_s;
2583 } else {
2584 _s = *s;
2585 }
2586 _s.packets_in = m_cnt;
2587 _s.bytes_in = m_size;
2588
5ba3f43e 2589 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
39037602
A
2590
2591 if (ifp != lo_ifp) {
2592 /* Release the IO refcnt */
2593 ifnet_decr_iorefcnt(ifp);
2594 }
2595
2596 return (err);
2597}
2598
39037602
A
2599
2600errno_t
2601dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2602{
2603 return (ifp->if_output(ifp, m));
2604}
2605
2606errno_t
2607dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2608 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2609 boolean_t poll, struct thread *tp)
2610{
2611 struct dlil_threading_info *inp;
2612 u_int32_t m_cnt = s->packets_in;
2613 u_int32_t m_size = s->bytes_in;
2614
316670eb
A
2615 if ((inp = ifp->if_inp) == NULL)
2616 inp = dlil_main_input_thread;
2617
2618 /*
2619 * If there is a matching DLIL input thread associated with an
2620 * affinity set, associate this thread with the same set. We
2621 * will only do this once.
2622 */
2623 lck_mtx_lock_spin(&inp->input_lck);
39037602 2624 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
316670eb
A
2625 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2626 (poll && inp->poll_thr == THREAD_NULL))) {
2627 u_int32_t tag = inp->tag;
2628
2629 if (poll) {
2630 VERIFY(inp->poll_thr == THREAD_NULL);
2631 inp->poll_thr = tp;
2632 } else {
2633 VERIFY(inp->wloop_thr == THREAD_NULL);
2634 inp->wloop_thr = tp;
2635 }
2636 lck_mtx_unlock(&inp->input_lck);
2637
2638 /* Associate the current thread with the new affinity tag */
2639 (void) dlil_affinity_set(tp, tag);
2640
2641 /*
2642 * Take a reference on the current thread; during detach,
5ba3f43e 2643 * we will need to refer to it in order to tear down its
316670eb
A
2644 * affinity.
2645 */
2646 thread_reference(tp);
2647 lck_mtx_lock_spin(&inp->input_lck);
2648 }
2649
39236c6e
A
2650 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2651
39037602 2652 /*
316670eb
A
2653 * Because of loopbacked multicast we cannot stuff the ifp in
2654 * the rcvif of the packet header: loopback (lo0) packets use a
2655 * dedicated list so that we can later associate them with lo_ifp
2656 * on their way up the stack. Packets for other interfaces without
2657 * dedicated input threads go to the regular list.
2658 */
39236c6e
A
2659 if (m_head != NULL) {
2660 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2661 struct dlil_main_threading_info *inpm =
2662 (struct dlil_main_threading_info *)inp;
2663 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2664 m_cnt, m_size);
2665 } else {
2666 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2667 m_cnt, m_size);
2668 }
316670eb
A
2669 }
2670
2671#if IFNET_INPUT_SANITY_CHK
2672 if (dlil_input_sanity_check != 0) {
2673 u_int32_t count;
2674 struct mbuf *m0;
2675
2676 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
2677 count++;
2678
2679 if (count != m_cnt) {
39236c6e
A
2680 panic_plain("%s: invalid packet count %d "
2681 "(expected %d)\n", if_name(ifp),
316670eb
A
2682 count, m_cnt);
2683 /* NOTREACHED */
2684 }
2685
2686 inp->input_mbuf_cnt += m_cnt;
2687 }
2688#endif /* IFNET_INPUT_SANITY_CHK */
2689
39037602
A
2690 dlil_input_stats_add(s, inp, poll);
2691 /*
2692 * If we're using the main input thread, synchronize the
2693 * stats now since we have the interface context. All
2694 * other cases involving dedicated input threads will
2695 * have their stats synchronized there.
2696 */
2697 if (inp == dlil_main_input_thread)
2698 dlil_input_stats_sync(ifp, inp);
316670eb 2699
5ba3f43e
A
2700 if (qlen(&inp->rcvq_pkts) >= dlil_rcv_mit_pkts_min &&
2701 qlen(&inp->rcvq_pkts) < dlil_rcv_mit_pkts_max &&
2702 (ifp->if_family == IFNET_FAMILY_ETHERNET ||
2703 ifp->if_type == IFT_CELLULAR)
2704 ) {
2705 if (!thread_call_isactive(inp->input_mit_tcall)) {
2706 uint64_t deadline;
2707 clock_interval_to_deadline(dlil_rcv_mit_interval,
2708 1, &deadline);
2709 (void) thread_call_enter_delayed(
2710 inp->input_mit_tcall, deadline);
2711 }
2712 } else {
2713 inp->input_waiting |= DLIL_INPUT_WAITING;
2714 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2715 inp->wtot++;
2716 wakeup_one((caddr_t)&inp->input_waiting);
2717 }
316670eb
A
2718 }
2719 lck_mtx_unlock(&inp->input_lck);
2720
316670eb
A
2721 return (0);
2722}
2723
5ba3f43e 2724
39236c6e
A
2725static void
2726ifnet_start_common(struct ifnet *ifp, int resetfc)
316670eb 2727{
39236c6e
A
2728 if (!(ifp->if_eflags & IFEF_TXSTART))
2729 return;
316670eb 2730 /*
39236c6e
A
2731 * If the starter thread is inactive, signal it to do work,
2732 * unless the interface is being flow controlled from below,
2733 * e.g. a virtual interface being flow controlled by a real
2734 * network interface beneath it.
316670eb
A
2735 */
2736 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e
A
2737 if (resetfc) {
2738 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2739 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2740 lck_mtx_unlock(&ifp->if_start_lock);
2741 return;
2742 }
316670eb 2743 ifp->if_start_req++;
3e170ce0
A
2744 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2745 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
39037602
A
2746 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
2747 ifp->if_start_delayed == 0)) {
5ba3f43e
A
2748 (void) thread_wakeup_thread((caddr_t)&ifp->if_start_thread,
2749 ifp->if_start_thread);
316670eb
A
2750 }
2751 lck_mtx_unlock(&ifp->if_start_lock);
2752}
2753
39236c6e
A
2754void
2755ifnet_start(struct ifnet *ifp)
2756{
2757 ifnet_start_common(ifp, 0);
2758}
2759
316670eb
A
2760static void
2761ifnet_start_thread_fn(void *v, wait_result_t w)
2762{
2763#pragma unused(w)
2764 struct ifnet *ifp = v;
2765 char ifname[IFNAMSIZ + 1];
39037602 2766 char thread_name[MAXTHREADNAMESIZE];
316670eb
A
2767 struct timespec *ts = NULL;
2768 struct ifclassq *ifq = &ifp->if_snd;
3e170ce0 2769 struct timespec delay_start_ts;
316670eb 2770
39037602
A
2771 /* Construct the name for this thread, and then apply it. */
2772 bzero(thread_name, sizeof(thread_name));
5ba3f43e
A
2773 (void) snprintf(thread_name, sizeof (thread_name),
2774 "ifnet_start_%s", ifp->if_xname);
39037602
A
2775 thread_set_thread_name(ifp->if_start_thread, thread_name);
2776
316670eb
A
2777 /*
2778 * Treat the dedicated starter thread for lo0 as equivalent to
2779 * the driver workloop thread; if net_affinity is enabled for
2780 * the main input thread, associate this starter thread to it
2781 * by binding them with the same affinity tag. This is done
2782 * only once (as we only have one lo_ifp which never goes away.)
2783 */
2784 if (ifp == lo_ifp) {
2785 struct dlil_threading_info *inp = dlil_main_input_thread;
2786 struct thread *tp = current_thread();
2787
2788 lck_mtx_lock(&inp->input_lck);
2789 if (inp->net_affinity) {
2790 u_int32_t tag = inp->tag;
2791
2792 VERIFY(inp->wloop_thr == THREAD_NULL);
2793 VERIFY(inp->poll_thr == THREAD_NULL);
2794 inp->wloop_thr = tp;
2795 lck_mtx_unlock(&inp->input_lck);
2796
2797 /* Associate this thread with the affinity tag */
2798 (void) dlil_affinity_set(tp, tag);
2799 } else {
2800 lck_mtx_unlock(&inp->input_lck);
2801 }
2802 }
2803
5ba3f43e 2804 (void) snprintf(ifname, sizeof (ifname), "%s_starter", if_name(ifp));
316670eb
A
2805
2806 lck_mtx_lock_spin(&ifp->if_start_lock);
2807
2808 for (;;) {
5ba3f43e 2809 if (ifp->if_start_thread != NULL) {
39037602
A
2810 (void) msleep(&ifp->if_start_thread,
2811 &ifp->if_start_lock,
3e170ce0 2812 (PZERO - 1) | PSPIN, ifname, ts);
5ba3f43e 2813 }
316670eb
A
2814 /* interface is detached? */
2815 if (ifp->if_start_thread == THREAD_NULL) {
2816 ifnet_set_start_cycle(ifp, NULL);
2817 lck_mtx_unlock(&ifp->if_start_lock);
2818 ifnet_purge(ifp);
2819
2820 if (dlil_verbose) {
39236c6e
A
2821 printf("%s: starter thread terminated\n",
2822 if_name(ifp));
316670eb
A
2823 }
2824
2825 /* for the extra refcnt from kernel_thread_start() */
2826 thread_deallocate(current_thread());
2827 /* this is the end */
2828 thread_terminate(current_thread());
2829 /* NOTREACHED */
2830 return;
2831 }
2832
2833 ifp->if_start_active = 1;
3e170ce0 2834
316670eb
A
2835 for (;;) {
2836 u_int32_t req = ifp->if_start_req;
3e170ce0
A
2837 if (!IFCQ_IS_EMPTY(ifq) &&
2838 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2839 ifp->if_start_delayed == 0 &&
2840 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2841 (ifp->if_eflags & IFEF_DELAY_START)) {
2842 ifp->if_start_delayed = 1;
2843 ifnet_start_delayed++;
2844 break;
2845 } else {
2846 ifp->if_start_delayed = 0;
2847 }
316670eb 2848 lck_mtx_unlock(&ifp->if_start_lock);
3e170ce0
A
2849
2850 /*
2851 * If no longer attached, don't call start because ifp
2852 * is being destroyed; else hold an IO refcnt to
2853 * prevent the interface from being detached (will be
2854 * released below.)
2855 */
2856 if (!ifnet_is_attached(ifp, 1)) {
2857 lck_mtx_lock_spin(&ifp->if_start_lock);
2858 break;
2859 }
2860
316670eb
A
2861 /* invoke the driver's start routine */
2862 ((*ifp->if_start)(ifp));
3e170ce0
A
2863
2864 /*
2865 * Release the io ref count taken by ifnet_is_attached.
2866 */
2867 ifnet_decr_iorefcnt(ifp);
2868
316670eb
A
2869 lck_mtx_lock_spin(&ifp->if_start_lock);
2870
2871 /* if there's no pending request, we're done */
2872 if (req == ifp->if_start_req)
2873 break;
2874 }
3e170ce0 2875
316670eb
A
2876 ifp->if_start_req = 0;
2877 ifp->if_start_active = 0;
3e170ce0 2878
316670eb
A
2879 /*
2880 * Wakeup N ns from now if rate-controlled by TBR, and if
2881 * there are still packets in the send queue which haven't
2882 * been dequeued so far; else sleep indefinitely (ts = NULL)
2883 * until ifnet_start() is called again.
2884 */
2885 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2886 &ifp->if_start_cycle : NULL);
2887
3e170ce0
A
2888 if (ts == NULL && ifp->if_start_delayed == 1) {
2889 delay_start_ts.tv_sec = 0;
2890 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2891 ts = &delay_start_ts;
2892 }
2893
316670eb
A
2894 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
2895 ts = NULL;
2896 }
2897
2898 /* NOTREACHED */
316670eb
A
2899}
2900
2901void
2902ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2903{
2904 if (ts == NULL)
2905 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
2906 else
2907 *(&ifp->if_start_cycle) = *ts;
2908
2909 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
39236c6e
A
2910 printf("%s: restart interval set to %lu nsec\n",
2911 if_name(ifp), ts->tv_nsec);
316670eb
A
2912}
2913
2914static void
2915ifnet_poll(struct ifnet *ifp)
2916{
2917 /*
2918 * If the poller thread is inactive, signal it to do work.
2919 */
2920 lck_mtx_lock_spin(&ifp->if_poll_lock);
2921 ifp->if_poll_req++;
2922 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
2923 wakeup_one((caddr_t)&ifp->if_poll_thread);
2924 }
2925 lck_mtx_unlock(&ifp->if_poll_lock);
2926}
2927
2928static void
2929ifnet_poll_thread_fn(void *v, wait_result_t w)
2930{
2931#pragma unused(w)
2932 struct dlil_threading_info *inp;
2933 struct ifnet *ifp = v;
2934 char ifname[IFNAMSIZ + 1];
2935 struct timespec *ts = NULL;
2936 struct ifnet_stat_increment_param s;
2937
39236c6e
A
2938 snprintf(ifname, sizeof (ifname), "%s_poller",
2939 if_name(ifp));
316670eb
A
2940 bzero(&s, sizeof (s));
2941
2942 lck_mtx_lock_spin(&ifp->if_poll_lock);
2943
2944 inp = ifp->if_inp;
2945 VERIFY(inp != NULL);
2946
2947 for (;;) {
2948 if (ifp->if_poll_thread != THREAD_NULL) {
2949 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
2950 (PZERO - 1) | PSPIN, ifname, ts);
2951 }
2952
2953 /* interface is detached (maybe while asleep)? */
2954 if (ifp->if_poll_thread == THREAD_NULL) {
2955 ifnet_set_poll_cycle(ifp, NULL);
2956 lck_mtx_unlock(&ifp->if_poll_lock);
2957
2958 if (dlil_verbose) {
39236c6e
A
2959 printf("%s: poller thread terminated\n",
2960 if_name(ifp));
316670eb
A
2961 }
2962
2963 /* for the extra refcnt from kernel_thread_start() */
2964 thread_deallocate(current_thread());
2965 /* this is the end */
2966 thread_terminate(current_thread());
2967 /* NOTREACHED */
2968 return;
2969 }
2970
2971 ifp->if_poll_active = 1;
2972 for (;;) {
2973 struct mbuf *m_head, *m_tail;
2974 u_int32_t m_lim, m_cnt, m_totlen;
2975 u_int16_t req = ifp->if_poll_req;
2976
2977 lck_mtx_unlock(&ifp->if_poll_lock);
2978
2979 /*
2980 * If no longer attached, there's nothing to do;
2981 * else hold an IO refcnt to prevent the interface
2982 * from being detached (will be released below.)
2983 */
db609669
A
2984 if (!ifnet_is_attached(ifp, 1)) {
2985 lck_mtx_lock_spin(&ifp->if_poll_lock);
316670eb 2986 break;
db609669 2987 }
316670eb 2988
39236c6e 2989 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
316670eb
A
2990 MAX((qlimit(&inp->rcvq_pkts)),
2991 (inp->rxpoll_phiwat << 2));
2992
2993 if (dlil_verbose > 1) {
39236c6e 2994 printf("%s: polling up to %d pkts, "
316670eb
A
2995 "pkts avg %d max %d, wreq avg %d, "
2996 "bytes avg %d\n",
39236c6e 2997 if_name(ifp), m_lim,
316670eb
A
2998 inp->rxpoll_pavg, inp->rxpoll_pmax,
2999 inp->rxpoll_wavg, inp->rxpoll_bavg);
3000 }
3001
3002 /* invoke the driver's input poll routine */
3003 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3004 &m_cnt, &m_totlen));
3005
3006 if (m_head != NULL) {
3007 VERIFY(m_tail != NULL && m_cnt > 0);
3008
3009 if (dlil_verbose > 1) {
39236c6e 3010 printf("%s: polled %d pkts, "
316670eb
A
3011 "pkts avg %d max %d, wreq avg %d, "
3012 "bytes avg %d\n",
39236c6e 3013 if_name(ifp), m_cnt,
316670eb
A
3014 inp->rxpoll_pavg, inp->rxpoll_pmax,
3015 inp->rxpoll_wavg, inp->rxpoll_bavg);
3016 }
3017
3018 /* stats are required for extended variant */
3019 s.packets_in = m_cnt;
3020 s.bytes_in = m_totlen;
3021
3022 (void) ifnet_input_common(ifp, m_head, m_tail,
3023 &s, TRUE, TRUE);
39236c6e
A
3024 } else {
3025 if (dlil_verbose > 1) {
3026 printf("%s: no packets, "
3027 "pkts avg %d max %d, wreq avg %d, "
3028 "bytes avg %d\n",
3029 if_name(ifp), inp->rxpoll_pavg,
3030 inp->rxpoll_pmax, inp->rxpoll_wavg,
3031 inp->rxpoll_bavg);
3032 }
3033
3034 (void) ifnet_input_common(ifp, NULL, NULL,
3035 NULL, FALSE, TRUE);
316670eb
A
3036 }
3037
3038 /* Release the io ref count */
3039 ifnet_decr_iorefcnt(ifp);
3040
3041 lck_mtx_lock_spin(&ifp->if_poll_lock);
3042
3043 /* if there's no pending request, we're done */
3044 if (req == ifp->if_poll_req)
3045 break;
3046 }
3047 ifp->if_poll_req = 0;
3048 ifp->if_poll_active = 0;
3049
3050 /*
3051 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3052 * until ifnet_poll() is called again.
3053 */
3054 ts = &ifp->if_poll_cycle;
3055 if (ts->tv_sec == 0 && ts->tv_nsec == 0)
3056 ts = NULL;
3057 }
3058
3059 /* NOTREACHED */
316670eb
A
3060}
3061
3062void
3063ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3064{
3065 if (ts == NULL)
3066 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
3067 else
3068 *(&ifp->if_poll_cycle) = *ts;
3069
3070 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
39236c6e
A
3071 printf("%s: poll interval set to %lu nsec\n",
3072 if_name(ifp), ts->tv_nsec);
316670eb
A
3073}
3074
3075void
3076ifnet_purge(struct ifnet *ifp)
3077{
3078 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
3079 if_qflush(ifp, 0);
3080}
3081
3082void
3083ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3084{
3085 IFCQ_LOCK_ASSERT_HELD(ifq);
3086
3087 if (!(IFCQ_IS_READY(ifq)))
3088 return;
3089
3090 if (IFCQ_TBR_IS_ENABLED(ifq)) {
3091 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
3092 ifq->ifcq_tbr.tbr_percent, 0 };
3093 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3094 }
3095
3096 ifclassq_update(ifq, ev);
3097}
3098
3099void
3100ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3101{
3102 switch (ev) {
39236c6e 3103 case CLASSQ_EV_LINK_BANDWIDTH:
316670eb
A
3104 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
3105 ifp->if_poll_update++;
3106 break;
3107
3108 default:
3109 break;
3110 }
3111}
3112
3113errno_t
3114ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3115{
3116 struct ifclassq *ifq;
3117 u_int32_t omodel;
3118 errno_t err;
3119
39037602 3120 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX)
316670eb
A
3121 return (EINVAL);
3122 else if (!(ifp->if_eflags & IFEF_TXSTART))
3123 return (ENXIO);
3124
3125 ifq = &ifp->if_snd;
3126 IFCQ_LOCK(ifq);
3127 omodel = ifp->if_output_sched_model;
3128 ifp->if_output_sched_model = model;
3129 if ((err = ifclassq_pktsched_setup(ifq)) != 0)
3130 ifp->if_output_sched_model = omodel;
3131 IFCQ_UNLOCK(ifq);
3132
3133 return (err);
3134}
3135
3136errno_t
3137ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3138{
3139 if (ifp == NULL)
3140 return (EINVAL);
3141 else if (!(ifp->if_eflags & IFEF_TXSTART))
3142 return (ENXIO);
3143
3144 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3145
3146 return (0);
3147}
3148
3149errno_t
3150ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3151{
3152 if (ifp == NULL || maxqlen == NULL)
3153 return (EINVAL);
3154 else if (!(ifp->if_eflags & IFEF_TXSTART))
3155 return (ENXIO);
3156
3157 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3158
3159 return (0);
3160}
3161
3162errno_t
39236c6e 3163ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
316670eb 3164{
39236c6e
A
3165 errno_t err;
3166
3167 if (ifp == NULL || pkts == NULL)
3168 err = EINVAL;
316670eb 3169 else if (!(ifp->if_eflags & IFEF_TXSTART))
39236c6e
A
3170 err = ENXIO;
3171 else
3172 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3173 pkts, NULL);
316670eb 3174
39236c6e
A
3175 return (err);
3176}
316670eb 3177
39236c6e
A
3178errno_t
3179ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3180 u_int32_t *pkts, u_int32_t *bytes)
3181{
3182 errno_t err;
3183
3184 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3185 (pkts == NULL && bytes == NULL))
3186 err = EINVAL;
3187 else if (!(ifp->if_eflags & IFEF_TXSTART))
3188 err = ENXIO;
3189 else
3190 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3191
3192 return (err);
316670eb
A
3193}
3194
3195errno_t
3196ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3197{
3198 struct dlil_threading_info *inp;
3199
3200 if (ifp == NULL)
3201 return (EINVAL);
3202 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3203 return (ENXIO);
3204
3205 if (maxqlen == 0)
3206 maxqlen = if_rcvq_maxlen;
3207 else if (maxqlen < IF_RCVQ_MINLEN)
3208 maxqlen = IF_RCVQ_MINLEN;
3209
3210 inp = ifp->if_inp;
3211 lck_mtx_lock(&inp->input_lck);
3212 qlimit(&inp->rcvq_pkts) = maxqlen;
3213 lck_mtx_unlock(&inp->input_lck);
3214
3215 return (0);
3216}
3217
3218errno_t
3219ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3220{
3221 struct dlil_threading_info *inp;
3222
3223 if (ifp == NULL || maxqlen == NULL)
3224 return (EINVAL);
3225 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3226 return (ENXIO);
3227
3228 inp = ifp->if_inp;
3229 lck_mtx_lock(&inp->input_lck);
3230 *maxqlen = qlimit(&inp->rcvq_pkts);
3231 lck_mtx_unlock(&inp->input_lck);
3232 return (0);
3233}
3234
5ba3f43e
A
3235void
3236ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3237 uint16_t delay_timeout)
3238{
3239 if (delay_qlen > 0 && delay_timeout > 0) {
3240 ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
3241 ifp->if_start_delay_qlen = min(100, delay_qlen);
3242 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3243 /* convert timeout to nanoseconds */
3244 ifp->if_start_delay_timeout *= 1000;
3245 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3246 ifp->if_xname, (uint32_t)delay_qlen,
3247 (uint32_t)delay_timeout);
3248 } else {
3249 ifp->if_eflags &= ~IFEF_ENQUEUE_MULTI;
3250 }
3251}
3252
3253static inline errno_t
3254ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype,
3255 boolean_t flush, boolean_t *pdrop)
316670eb 3256{
5ba3f43e
A
3257 volatile uint64_t *fg_ts = NULL;
3258 volatile uint64_t *rt_ts = NULL;
3259 struct mbuf *m = p;
3e170ce0 3260 struct timespec now;
5ba3f43e
A
3261 u_int64_t now_nsec = 0;
3262 int error = 0;
316670eb 3263
5ba3f43e
A
3264 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3265
3266 /*
3267 * If packet already carries a timestamp, either from dlil_output()
3268 * or from flowswitch, use it here. Otherwise, record timestamp.
3269 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3270 * the timestamp value is used internally there.
3271 */
3272 switch (ptype) {
3273 case QP_MBUF:
3274 ASSERT(m->m_flags & M_PKTHDR);
3275 ASSERT(m->m_nextpkt == NULL);
3276
3277 if (!(m->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3278 m->m_pkthdr.pkt_timestamp == 0) {
3279 nanouptime(&now);
3280 net_timernsec(&now, &now_nsec);
3281 m->m_pkthdr.pkt_timestamp = now_nsec;
3282 }
3283 m->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3284 /*
3285 * If the packet service class is not background,
3286 * update the timestamp to indicate recent activity
3287 * on a foreground socket.
3288 */
3289 if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3290 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3291 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND)) {
3292 ifp->if_fg_sendts = _net_uptime;
3293 if (fg_ts != NULL)
3294 *fg_ts = _net_uptime;
3295 }
3296 if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3297 ifp->if_rt_sendts = _net_uptime;
3298 if (rt_ts != NULL)
3299 *rt_ts = _net_uptime;
3300 }
3301 }
3302 break;
316670eb 3303
5ba3f43e
A
3304
3305 default:
3306 VERIFY(0);
3307 /* NOTREACHED */
3308 }
3e170ce0
A
3309
3310 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
5ba3f43e
A
3311 if (now_nsec == 0) {
3312 nanouptime(&now);
3313 net_timernsec(&now, &now_nsec);
3314 }
3e170ce0
A
3315 /*
3316 * If the driver chose to delay start callback for
3317 * coalescing multiple packets, Then use the following
3318 * heuristics to make sure that start callback will
3319 * be delayed only when bulk data transfer is detected.
3320 * 1. number of packets enqueued in (delay_win * 2) is
3321 * greater than or equal to the delay qlen.
3322 * 2. If delay_start is enabled it will stay enabled for
3323 * another 10 idle windows. This is to take into account
3324 * variable RTT and burst traffic.
3325 * 3. If the time elapsed since last enqueue is more
3326 * than 200ms we disable delaying start callback. This is
3327 * is to take idle time into account.
39037602 3328 */
3e170ce0
A
3329 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3330 if (ifp->if_start_delay_swin > 0) {
3331 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3332 ifp->if_start_delay_cnt++;
3333 } else if ((now_nsec - ifp->if_start_delay_swin)
3334 >= (200 * 1000 * 1000)) {
3335 ifp->if_start_delay_swin = now_nsec;
3336 ifp->if_start_delay_cnt = 1;
3337 ifp->if_start_delay_idle = 0;
3338 if (ifp->if_eflags & IFEF_DELAY_START) {
3339 ifp->if_eflags &=
3340 ~(IFEF_DELAY_START);
3341 ifnet_delay_start_disabled++;
3342 }
3343 } else {
3344 if (ifp->if_start_delay_cnt >=
3345 ifp->if_start_delay_qlen) {
3346 ifp->if_eflags |= IFEF_DELAY_START;
3347 ifp->if_start_delay_idle = 0;
3348 } else {
3349 if (ifp->if_start_delay_idle >= 10) {
3350 ifp->if_eflags &= ~(IFEF_DELAY_START);
3351 ifnet_delay_start_disabled++;
3352 } else {
3353 ifp->if_start_delay_idle++;
3354 }
39037602 3355 }
3e170ce0
A
3356 ifp->if_start_delay_swin = now_nsec;
3357 ifp->if_start_delay_cnt = 1;
3358 }
3359 } else {
3360 ifp->if_start_delay_swin = now_nsec;
3361 ifp->if_start_delay_cnt = 1;
3362 ifp->if_start_delay_idle = 0;
3363 ifp->if_eflags &= ~(IFEF_DELAY_START);
3364 }
3365 } else {
3366 ifp->if_eflags &= ~(IFEF_DELAY_START);
3367 }
3368
5ba3f43e
A
3369 switch (ptype) {
3370 case QP_MBUF:
3371 /* enqueue the packet (caller consumes object) */
3372 error = ifclassq_enqueue(&ifp->if_snd, m, QP_MBUF, pdrop);
3373 m = NULL;
3374 break;
3375
3376
3377 default:
3378 break;
3379 }
316670eb
A
3380
3381 /*
3382 * Tell the driver to start dequeueing; do this even when the queue
3383 * for the packet is suspended (EQSUSPENDED), as the driver could still
3384 * be dequeueing from other unsuspended queues.
3385 */
3e170ce0 3386 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
5ba3f43e 3387 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED))
316670eb
A
3388 ifnet_start(ifp);
3389
3390 return (error);
3391}
3392
5ba3f43e
A
3393errno_t
3394ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3395{
3396 boolean_t pdrop;
3397 return (ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop));
3398}
3399
3400errno_t
3401ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3402 boolean_t *pdrop)
3403{
3404 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3405 m->m_nextpkt != NULL) {
3406 if (m != NULL) {
3407 m_freem_list(m);
3408 *pdrop = TRUE;
3409 }
3410 return (EINVAL);
3411 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3412 !IF_FULLY_ATTACHED(ifp)) {
3413 /* flag tested without lock for performance */
3414 m_freem(m);
3415 *pdrop = TRUE;
3416 return (ENXIO);
3417 } else if (!(ifp->if_flags & IFF_UP)) {
3418 m_freem(m);
3419 *pdrop = TRUE;
3420 return (ENETDOWN);
3421 }
3422
3423 return (ifnet_enqueue_common(ifp, m, QP_MBUF, flush, pdrop));
3424}
3425
3426
316670eb
A
3427errno_t
3428ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3429{
fe8ab488 3430 errno_t rc;
5ba3f43e 3431 classq_pkt_type_t ptype;
316670eb
A
3432 if (ifp == NULL || mp == NULL)
3433 return (EINVAL);
3434 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
39037602 3435 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
316670eb 3436 return (ENXIO);
fe8ab488
A
3437 if (!ifnet_is_attached(ifp, 1))
3438 return (ENXIO);
5ba3f43e 3439
39037602 3440 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
5ba3f43e
A
3441 (void **)mp, NULL, NULL, NULL, &ptype);
3442 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
fe8ab488 3443 ifnet_decr_iorefcnt(ifp);
316670eb 3444
fe8ab488 3445 return (rc);
316670eb
A
3446}
3447
3448errno_t
3449ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3450 struct mbuf **mp)
3451{
fe8ab488 3452 errno_t rc;
5ba3f43e 3453 classq_pkt_type_t ptype;
316670eb
A
3454 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
3455 return (EINVAL);
3456 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
39037602 3457 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
316670eb 3458 return (ENXIO);
fe8ab488
A
3459 if (!ifnet_is_attached(ifp, 1))
3460 return (ENXIO);
39037602 3461
5ba3f43e
A
3462 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
3463 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)mp, NULL, NULL,
3464 NULL, &ptype);
3465 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
fe8ab488
A
3466 ifnet_decr_iorefcnt(ifp);
3467 return (rc);
316670eb
A
3468}
3469
3470errno_t
39037602
A
3471ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3472 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
316670eb 3473{
fe8ab488 3474 errno_t rc;
5ba3f43e 3475 classq_pkt_type_t ptype;
39037602 3476 if (ifp == NULL || head == NULL || pkt_limit < 1)
316670eb
A
3477 return (EINVAL);
3478 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
39037602 3479 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
316670eb 3480 return (ENXIO);
fe8ab488
A
3481 if (!ifnet_is_attached(ifp, 1))
3482 return (ENXIO);
39037602
A
3483
3484 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
5ba3f43e
A
3485 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head, (void **)tail, cnt,
3486 len, &ptype);
3487 VERIFY((*head == NULL) || (ptype == QP_MBUF));
39037602
A
3488 ifnet_decr_iorefcnt(ifp);
3489 return (rc);
3490}
3491
3492errno_t
3493ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3494 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3495{
3496 errno_t rc;
5ba3f43e 3497 classq_pkt_type_t ptype;
39037602
A
3498 if (ifp == NULL || head == NULL || byte_limit < 1)
3499 return (EINVAL);
3500 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3501 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3502 return (ENXIO);
3503 if (!ifnet_is_attached(ifp, 1))
3504 return (ENXIO);
3505
3506 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
5ba3f43e
A
3507 byte_limit, (void **)head, (void **)tail, cnt, len, &ptype);
3508 VERIFY((*head == NULL) || (ptype == QP_MBUF));
fe8ab488
A
3509 ifnet_decr_iorefcnt(ifp);
3510 return (rc);
316670eb
A
3511}
3512
3513errno_t
3514ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
39037602 3515 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
316670eb
A
3516 u_int32_t *len)
3517{
fe8ab488 3518 errno_t rc;
5ba3f43e 3519 classq_pkt_type_t ptype;
39037602
A
3520 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3521 !MBUF_VALID_SC(sc))
316670eb
A
3522 return (EINVAL);
3523 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
39037602 3524 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
316670eb 3525 return (ENXIO);
fe8ab488
A
3526 if (!ifnet_is_attached(ifp, 1))
3527 return (ENXIO);
5ba3f43e
A
3528
3529 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
3530 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head,
3531 (void **)tail, cnt, len, &ptype);
3532 VERIFY((*head == NULL) || (ptype == QP_MBUF));
fe8ab488
A
3533 ifnet_decr_iorefcnt(ifp);
3534 return (rc);
316670eb
A
3535}
3536
5ba3f43e 3537#if !CONFIG_EMBEDDED
39236c6e
A
3538errno_t
3539ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3540 const struct sockaddr *dest, const char *dest_linkaddr,
3541 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3542{
3543 if (pre != NULL)
3544 *pre = 0;
3545 if (post != NULL)
3546 *post = 0;
3547
3548 return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
3549}
5ba3f43e 3550#endif /* !CONFIG_EMBEDDED */
39236c6e 3551
316670eb
A
3552static int
3553dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3554 char **frame_header_p, protocol_family_t protocol_family)
3555{
3556 struct ifnet_filter *filter;
3557
3558 /*
3559 * Pass the inbound packet to the interface filters
6d2010ae
A
3560 */
3561 lck_mtx_lock_spin(&ifp->if_flt_lock);
3562 /* prevent filter list from changing in case we drop the lock */
3563 if_flt_monitor_busy(ifp);
2d21ac55
A
3564 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3565 int result;
3566
6d2010ae
A
3567 if (!filter->filt_skip && filter->filt_input != NULL &&
3568 (filter->filt_protocol == 0 ||
3569 filter->filt_protocol == protocol_family)) {
3570 lck_mtx_unlock(&ifp->if_flt_lock);
3571
2d21ac55 3572 result = (*filter->filt_input)(filter->filt_cookie,
6d2010ae
A
3573 ifp, protocol_family, m_p, frame_header_p);
3574
3575 lck_mtx_lock_spin(&ifp->if_flt_lock);
2d21ac55 3576 if (result != 0) {
6d2010ae
A
3577 /* we're done with the filter list */
3578 if_flt_monitor_unbusy(ifp);
3579 lck_mtx_unlock(&ifp->if_flt_lock);
2d21ac55
A
3580 return (result);
3581 }
3582 }
3583 }
6d2010ae
A
3584 /* we're done with the filter list */
3585 if_flt_monitor_unbusy(ifp);
3586 lck_mtx_unlock(&ifp->if_flt_lock);
b7266188
A
3587
3588 /*
6d2010ae 3589 * Strip away M_PROTO1 bit prior to sending packet up the stack as
b7266188
A
3590 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3591 */
3592 if (*m_p != NULL)
3593 (*m_p)->m_flags &= ~M_PROTO1;
3594
2d21ac55 3595 return (0);
1c79356b
A
3596}
3597
6d2010ae
A
3598static int
3599dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3600 protocol_family_t protocol_family)
3601{
3602 struct ifnet_filter *filter;
3603
3604 /*
3605 * Pass the outbound packet to the interface filters
3606 */
3607 lck_mtx_lock_spin(&ifp->if_flt_lock);
3608 /* prevent filter list from changing in case we drop the lock */
3609 if_flt_monitor_busy(ifp);
3610 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3611 int result;
3612
3613 if (!filter->filt_skip && filter->filt_output != NULL &&
3614 (filter->filt_protocol == 0 ||
3615 filter->filt_protocol == protocol_family)) {
3616 lck_mtx_unlock(&ifp->if_flt_lock);
3617
3618 result = filter->filt_output(filter->filt_cookie, ifp,
3619 protocol_family, m_p);
3620
3621 lck_mtx_lock_spin(&ifp->if_flt_lock);
3622 if (result != 0) {
3623 /* we're done with the filter list */
3624 if_flt_monitor_unbusy(ifp);
3625 lck_mtx_unlock(&ifp->if_flt_lock);
3626 return (result);
3627 }
3628 }
3629 }
3630 /* we're done with the filter list */
3631 if_flt_monitor_unbusy(ifp);
3632 lck_mtx_unlock(&ifp->if_flt_lock);
3633
3634 return (0);
3635}
3636
2d21ac55
A
3637static void
3638dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
1c79356b 3639{
2d21ac55 3640 int error;
1c79356b 3641
2d21ac55
A
3642 if (ifproto->proto_kpi == kProtoKPI_v1) {
3643 /* Version 1 protocols get one packet at a time */
3644 while (m != NULL) {
3645 char * frame_header;
3646 mbuf_t next_packet;
6d2010ae 3647
2d21ac55
A
3648 next_packet = m->m_nextpkt;
3649 m->m_nextpkt = NULL;
39236c6e
A
3650 frame_header = m->m_pkthdr.pkt_hdr;
3651 m->m_pkthdr.pkt_hdr = NULL;
6d2010ae
A
3652 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3653 ifproto->protocol_family, m, frame_header);
2d21ac55
A
3654 if (error != 0 && error != EJUSTRETURN)
3655 m_freem(m);
3656 m = next_packet;
3657 }
6d2010ae 3658 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
2d21ac55
A
3659 /* Version 2 protocols support packet lists */
3660 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
6d2010ae 3661 ifproto->protocol_family, m);
2d21ac55
A
3662 if (error != 0 && error != EJUSTRETURN)
3663 m_freem_list(m);
91447636 3664 }
2d21ac55 3665}
1c79356b 3666
316670eb
A
3667static void
3668dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3669 struct dlil_threading_info *inp, boolean_t poll)
3670{
3671 struct ifnet_stat_increment_param *d = &inp->stats;
3672
3673 if (s->packets_in != 0)
3674 d->packets_in += s->packets_in;
3675 if (s->bytes_in != 0)
3676 d->bytes_in += s->bytes_in;
3677 if (s->errors_in != 0)
3678 d->errors_in += s->errors_in;
3679
3680 if (s->packets_out != 0)
3681 d->packets_out += s->packets_out;
3682 if (s->bytes_out != 0)
3683 d->bytes_out += s->bytes_out;
3684 if (s->errors_out != 0)
3685 d->errors_out += s->errors_out;
3686
3687 if (s->collisions != 0)
3688 d->collisions += s->collisions;
3689 if (s->dropped != 0)
3690 d->dropped += s->dropped;
3691
3692 if (poll)
3693 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
3694}
3695
3696static void
3697dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3698{
3699 struct ifnet_stat_increment_param *s = &inp->stats;
3700
3701 /*
3702 * Use of atomic operations is unavoidable here because
3703 * these stats may also be incremented elsewhere via KPIs.
3704 */
3705 if (s->packets_in != 0) {
3706 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3707 s->packets_in = 0;
3708 }
3709 if (s->bytes_in != 0) {
3710 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3711 s->bytes_in = 0;
3712 }
3713 if (s->errors_in != 0) {
3714 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3715 s->errors_in = 0;
3716 }
3717
3718 if (s->packets_out != 0) {
3719 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3720 s->packets_out = 0;
3721 }
3722 if (s->bytes_out != 0) {
3723 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3724 s->bytes_out = 0;
3725 }
3726 if (s->errors_out != 0) {
3727 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3728 s->errors_out = 0;
3729 }
3730
3731 if (s->collisions != 0) {
3732 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3733 s->collisions = 0;
3734 }
3735 if (s->dropped != 0) {
3736 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3737 s->dropped = 0;
3738 }
39037602 3739
5ba3f43e 3740 if (ifp->if_data_threshold != 0) {
39037602 3741 lck_mtx_convert_spin(&inp->input_lck);
5ba3f43e 3742 ifnet_notify_data_threshold(ifp);
39236c6e 3743 }
5ba3f43e 3744
316670eb
A
3745 /*
3746 * No need for atomic operations as they are modified here
3747 * only from within the DLIL input thread context.
3748 */
3749 if (inp->tstats.packets != 0) {
3750 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3751 inp->tstats.packets = 0;
3752 }
3753 if (inp->tstats.bytes != 0) {
3754 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3755 inp->tstats.bytes = 0;
3756 }
3757}
3758
3759__private_extern__ void
3760dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3761{
3762 return (dlil_input_packet_list_common(ifp, m, 0,
3763 IFNET_MODEL_INPUT_POLL_OFF, FALSE));
3764}
3765
2d21ac55 3766__private_extern__ void
316670eb
A
3767dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3768 u_int32_t cnt, ifnet_model_t mode)
3769{
3770 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
3771}
3772
3773static void
3774dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3775 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
2d21ac55
A
3776{
3777 int error = 0;
2d21ac55
A
3778 protocol_family_t protocol_family;
3779 mbuf_t next_packet;
3780 ifnet_t ifp = ifp_param;
3781 char * frame_header;
3782 struct if_proto * last_ifproto = NULL;
3783 mbuf_t pkt_first = NULL;
3784 mbuf_t * pkt_next = NULL;
316670eb 3785 u_int32_t poll_thresh = 0, poll_ival = 0;
2d21ac55 3786
39037602 3787 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
2d21ac55 3788
316670eb
A
3789 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
3790 (poll_ival = if_rxpoll_interval_pkts) > 0)
3791 poll_thresh = cnt;
6d2010ae 3792
2d21ac55 3793 while (m != NULL) {
6d2010ae
A
3794 struct if_proto *ifproto = NULL;
3795 int iorefcnt = 0;
39236c6e 3796 uint32_t pktf_mask; /* pkt flags to preserve */
2d21ac55 3797
2d21ac55
A
3798 if (ifp_param == NULL)
3799 ifp = m->m_pkthdr.rcvif;
6d2010ae 3800
316670eb
A
3801 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
3802 poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
3803 ifnet_poll(ifp);
3804
6d2010ae 3805 /* Check if this mbuf looks valid */
316670eb 3806 MBUF_INPUT_CHECK(m, ifp);
6d2010ae
A
3807
3808 next_packet = m->m_nextpkt;
3809 m->m_nextpkt = NULL;
39236c6e
A
3810 frame_header = m->m_pkthdr.pkt_hdr;
3811 m->m_pkthdr.pkt_hdr = NULL;
2d21ac55 3812
316670eb
A
3813 /*
3814 * Get an IO reference count if the interface is not
3815 * loopback (lo0) and it is attached; lo0 never goes
3816 * away, so optimize for that.
6d2010ae
A
3817 */
3818 if (ifp != lo_ifp) {
3819 if (!ifnet_is_attached(ifp, 1)) {
3820 m_freem(m);
3821 goto next;
3822 }
3823 iorefcnt = 1;
5ba3f43e
A
3824 /*
3825 * Preserve the time stamp if it was set.
3826 */
3827 pktf_mask = PKTF_TS_VALID;
39236c6e
A
3828 } else {
3829 /*
3830 * If this arrived on lo0, preserve interface addr
3831 * info to allow for connectivity between loopback
3832 * and local interface addresses.
3833 */
3834 pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
2d21ac55 3835 }
d41d1dae 3836
39236c6e
A
3837 /* make sure packet comes in clean */
3838 m_classifier_init(m, pktf_mask);
3839
316670eb 3840 ifp_inc_traffic_class_in(ifp, m);
d41d1dae 3841
2d21ac55 3842 /* find which protocol family this packet is for */
6d2010ae 3843 ifnet_lock_shared(ifp);
2d21ac55 3844 error = (*ifp->if_demux)(ifp, m, frame_header,
6d2010ae
A
3845 &protocol_family);
3846 ifnet_lock_done(ifp);
2d21ac55 3847 if (error != 0) {
6d2010ae 3848 if (error == EJUSTRETURN)
2d21ac55 3849 goto next;
2d21ac55
A
3850 protocol_family = 0;
3851 }
6d2010ae 3852
39236c6e
A
3853 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
3854 !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
3855 dlil_input_cksum_dbg(ifp, m, frame_header,
3856 protocol_family);
3857
3858 /*
3859 * For partial checksum offload, we expect the driver to
3860 * set the start offset indicating the start of the span
3861 * that is covered by the hardware-computed checksum;
3862 * adjust this start offset accordingly because the data
3863 * pointer has been advanced beyond the link-layer header.
3864 *
3865 * Don't adjust if the interface is a bridge member, as
3866 * the adjustment will occur from the context of the
3867 * bridge interface during input.
3868 */
3869 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
3870 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
3871 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
3872 int adj;
3873
3874 if (frame_header == NULL ||
3875 frame_header < (char *)mbuf_datastart(m) ||
3876 frame_header > (char *)m->m_data ||
3877 (adj = (m->m_data - frame_header)) >
3878 m->m_pkthdr.csum_rx_start) {
3879 m->m_pkthdr.csum_data = 0;
3880 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
3881 hwcksum_in_invalidated++;
3882 } else {
3883 m->m_pkthdr.csum_rx_start -= adj;
3884 }
3885 }
3886
3887 pktap_input(ifp, protocol_family, m, frame_header);
316670eb 3888
2d21ac55 3889 if (m->m_flags & (M_BCAST|M_MCAST))
6d2010ae 3890 atomic_add_64(&ifp->if_imcasts, 1);
1c79356b 3891
2d21ac55
A
3892 /* run interface filters, exclude VLAN packets PR-3586856 */
3893 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
6d2010ae
A
3894 error = dlil_interface_filters_input(ifp, &m,
3895 &frame_header, protocol_family);
3896 if (error != 0) {
3897 if (error != EJUSTRETURN)
2d21ac55 3898 m_freem(m);
2d21ac55 3899 goto next;
91447636
A
3900 }
3901 }
39037602 3902 if (error != 0 || ((m->m_flags & M_PROMISC) != 0)) {
91447636 3903 m_freem(m);
2d21ac55 3904 goto next;
91447636 3905 }
6d2010ae 3906
2d21ac55
A
3907 /* Lookup the protocol attachment to this interface */
3908 if (protocol_family == 0) {
3909 ifproto = NULL;
6d2010ae
A
3910 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
3911 (last_ifproto->protocol_family == protocol_family)) {
3912 VERIFY(ifproto == NULL);
2d21ac55 3913 ifproto = last_ifproto;
6d2010ae
A
3914 if_proto_ref(last_ifproto);
3915 } else {
3916 VERIFY(ifproto == NULL);
3917 ifnet_lock_shared(ifp);
3918 /* callee holds a proto refcnt upon success */
2d21ac55 3919 ifproto = find_attached_proto(ifp, protocol_family);
6d2010ae 3920 ifnet_lock_done(ifp);
2d21ac55
A
3921 }
3922 if (ifproto == NULL) {
3923 /* no protocol for this packet, discard */
3924 m_freem(m);
3925 goto next;
3926 }
3927 if (ifproto != last_ifproto) {
2d21ac55
A
3928 if (last_ifproto != NULL) {
3929 /* pass up the list for the previous protocol */
2d21ac55
A
3930 dlil_ifproto_input(last_ifproto, pkt_first);
3931 pkt_first = NULL;
3932 if_proto_free(last_ifproto);
2d21ac55
A
3933 }
3934 last_ifproto = ifproto;
6d2010ae 3935 if_proto_ref(ifproto);
2d21ac55
A
3936 }
3937 /* extend the list */
39236c6e 3938 m->m_pkthdr.pkt_hdr = frame_header;
2d21ac55
A
3939 if (pkt_first == NULL) {
3940 pkt_first = m;
3941 } else {
3942 *pkt_next = m;
3943 }
3944 pkt_next = &m->m_nextpkt;
1c79356b 3945
6d2010ae 3946next:
2d21ac55
A
3947 if (next_packet == NULL && last_ifproto != NULL) {
3948 /* pass up the last list of packets */
2d21ac55
A
3949 dlil_ifproto_input(last_ifproto, pkt_first);
3950 if_proto_free(last_ifproto);
6d2010ae
A
3951 last_ifproto = NULL;
3952 }
3953 if (ifproto != NULL) {
3954 if_proto_free(ifproto);
3955 ifproto = NULL;
2d21ac55 3956 }
316670eb 3957
2d21ac55 3958 m = next_packet;
1c79356b 3959
6d2010ae
A
3960 /* update the driver's multicast filter, if needed */
3961 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
3962 ifp->if_updatemcasts = 0;
3963 if (iorefcnt == 1)
3964 ifnet_decr_iorefcnt(ifp);
91447636 3965 }
6d2010ae 3966
39037602 3967 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b
A
3968}
3969
6d2010ae
A
3970errno_t
3971if_mcasts_update(struct ifnet *ifp)
3972{
3973 errno_t err;
3974
3975 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
3976 if (err == EAFNOSUPPORT)
3977 err = 0;
39236c6e
A
3978 printf("%s: %s %d suspended link-layer multicast membership(s) "
3979 "(err=%d)\n", if_name(ifp),
6d2010ae
A
3980 (err == 0 ? "successfully restored" : "failed to restore"),
3981 ifp->if_updatemcasts, err);
3982
3983 /* just return success */
3984 return (0);
3985}
3986
39037602
A
3987/* If ifp is set, we will increment the generation for the interface */
3988int
3989dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
3990{
3991 if (ifp != NULL) {
3992 ifnet_increment_generation(ifp);
3993 }
3994
3995#if NECP
3996 necp_update_all_clients();
3997#endif /* NECP */
3998
3999 return (kev_post_msg(event));
4000}
4001
4002#define TMP_IF_PROTO_ARR_SIZE 10
91447636 4003static int
39037602 4004dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
1c79356b 4005{
a1c7dba1
A
4006 struct ifnet_filter *filter = NULL;
4007 struct if_proto *proto = NULL;
4008 int if_proto_count = 0;
4009 struct if_proto **tmp_ifproto_arr = NULL;
4010 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4011 int tmp_ifproto_arr_idx = 0;
4012 bool tmp_malloc = false;
6d2010ae 4013
6d2010ae
A
4014 /*
4015 * Pass the event to the interface filters
4016 */
4017 lck_mtx_lock_spin(&ifp->if_flt_lock);
4018 /* prevent filter list from changing in case we drop the lock */
4019 if_flt_monitor_busy(ifp);
4020 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4021 if (filter->filt_event != NULL) {
4022 lck_mtx_unlock(&ifp->if_flt_lock);
4023
4024 filter->filt_event(filter->filt_cookie, ifp,
4025 filter->filt_protocol, event);
4026
4027 lck_mtx_lock_spin(&ifp->if_flt_lock);
91447636 4028 }
6d2010ae
A
4029 }
4030 /* we're done with the filter list */
4031 if_flt_monitor_unbusy(ifp);
4032 lck_mtx_unlock(&ifp->if_flt_lock);
4033
3e170ce0
A
4034 /* Get an io ref count if the interface is attached */
4035 if (!ifnet_is_attached(ifp, 1))
4036 goto done;
4037
a1c7dba1
A
4038 /*
4039 * An embedded tmp_list_entry in if_proto may still get
4040 * over-written by another thread after giving up ifnet lock,
4041 * therefore we are avoiding embedded pointers here.
4042 */
6d2010ae 4043 ifnet_lock_shared(ifp);
a1c7dba1
A
4044 if_proto_count = dlil_ifp_proto_count(ifp);
4045 if (if_proto_count) {
6d2010ae 4046 int i;
a1c7dba1
A
4047 VERIFY(ifp->if_proto_hash != NULL);
4048 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
4049 tmp_ifproto_arr = tmp_ifproto_stack_arr;
4050 } else {
4051 MALLOC(tmp_ifproto_arr, struct if_proto **,
4052 sizeof (*tmp_ifproto_arr) * if_proto_count,
4053 M_TEMP, M_ZERO);
4054 if (tmp_ifproto_arr == NULL) {
4055 ifnet_lock_done(ifp);
4056 goto cleanup;
4057 }
4058 tmp_malloc = true;
4059 }
6d2010ae
A
4060
4061 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6d2010ae
A
4062 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
4063 next_hash) {
a1c7dba1
A
4064 if_proto_ref(proto);
4065 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
4066 tmp_ifproto_arr_idx++;
91447636
A
4067 }
4068 }
a1c7dba1 4069 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
91447636 4070 }
6d2010ae
A
4071 ifnet_lock_done(ifp);
4072
a1c7dba1
A
4073 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
4074 tmp_ifproto_arr_idx++) {
4075 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
4076 VERIFY(proto != NULL);
4077 proto_media_event eventp =
4078 (proto->proto_kpi == kProtoKPI_v1 ?
4079 proto->kpi.v1.event :
4080 proto->kpi.v2.event);
4081
4082 if (eventp != NULL) {
4083 eventp(ifp, proto->protocol_family,
4084 event);
4085 }
4086 if_proto_free(proto);
4087 }
4088
39037602 4089cleanup:
a1c7dba1
A
4090 if (tmp_malloc) {
4091 FREE(tmp_ifproto_arr, M_TEMP);
4092 }
4093
6d2010ae
A
4094 /* Pass the event to the interface */
4095 if (ifp->if_event != NULL)
4096 ifp->if_event(ifp, event);
4097
4098 /* Release the io ref count */
4099 ifnet_decr_iorefcnt(ifp);
6d2010ae 4100done:
39037602 4101 return (dlil_post_complete_msg(update_generation ? ifp : NULL, event));
1c79356b
A
4102}
4103
2d21ac55 4104errno_t
6d2010ae 4105ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
1c79356b 4106{
39037602 4107 struct kev_msg kev_msg;
2d21ac55
A
4108 int result = 0;
4109
6d2010ae
A
4110 if (ifp == NULL || event == NULL)
4111 return (EINVAL);
1c79356b 4112
6d2010ae 4113 bzero(&kev_msg, sizeof (kev_msg));
39037602
A
4114 kev_msg.vendor_code = event->vendor_code;
4115 kev_msg.kev_class = event->kev_class;
4116 kev_msg.kev_subclass = event->kev_subclass;
4117 kev_msg.event_code = event->event_code;
91447636
A
4118 kev_msg.dv[0].data_ptr = &event->event_data[0];
4119 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
4120 kev_msg.dv[1].data_length = 0;
6d2010ae 4121
39037602 4122 result = dlil_event_internal(ifp, &kev_msg, TRUE);
1c79356b 4123
6d2010ae 4124 return (result);
91447636 4125}
1c79356b 4126
2d21ac55
A
4127#if CONFIG_MACF_NET
4128#include <netinet/ip6.h>
4129#include <netinet/ip.h>
6d2010ae
A
4130static int
4131dlil_get_socket_type(struct mbuf **mp, int family, int raw)
2d21ac55
A
4132{
4133 struct mbuf *m;
4134 struct ip *ip;
4135 struct ip6_hdr *ip6;
4136 int type = SOCK_RAW;
4137
4138 if (!raw) {
4139 switch (family) {
4140 case PF_INET:
4141 m = m_pullup(*mp, sizeof(struct ip));
4142 if (m == NULL)
4143 break;
4144 *mp = m;
4145 ip = mtod(m, struct ip *);
4146 if (ip->ip_p == IPPROTO_TCP)
4147 type = SOCK_STREAM;
4148 else if (ip->ip_p == IPPROTO_UDP)
4149 type = SOCK_DGRAM;
4150 break;
4151 case PF_INET6:
4152 m = m_pullup(*mp, sizeof(struct ip6_hdr));
4153 if (m == NULL)
4154 break;
4155 *mp = m;
4156 ip6 = mtod(m, struct ip6_hdr *);
4157 if (ip6->ip6_nxt == IPPROTO_TCP)
4158 type = SOCK_STREAM;
4159 else if (ip6->ip6_nxt == IPPROTO_UDP)
4160 type = SOCK_DGRAM;
4161 break;
4162 }
4163 }
4164
4165 return (type);
4166}
4167#endif
4168
3e170ce0
A
4169static void
4170dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4171{
4172 mbuf_t n = m;
4173 int chainlen = 0;
4174
4175 while (n != NULL) {
4176 chainlen++;
4177 n = n->m_next;
4178 }
4179 switch (chainlen) {
4180 case 0:
4181 break;
4182 case 1:
4183 atomic_add_64(&cls->cls_one, 1);
4184 break;
4185 case 2:
4186 atomic_add_64(&cls->cls_two, 1);
4187 break;
4188 case 3:
4189 atomic_add_64(&cls->cls_three, 1);
4190 break;
4191 case 4:
4192 atomic_add_64(&cls->cls_four, 1);
4193 break;
4194 case 5:
4195 default:
4196 atomic_add_64(&cls->cls_five_or_more, 1);
4197 break;
4198 }
4199}
4200
1c79356b 4201/*
91447636
A
4202 * dlil_output
4203 *
4204 * Caller should have a lock on the protocol domain if the protocol
4205 * doesn't support finer grained locking. In most cases, the lock
4206 * will be held from the socket layer and won't be released until
4207 * we return back to the socket layer.
4208 *
4209 * This does mean that we must take a protocol lock before we take
4210 * an interface lock if we're going to take both. This makes sense
4211 * because a protocol is likely to interact with an ifp while it
4212 * is under the protocol lock.
316670eb
A
4213 *
4214 * An advisory code will be returned if adv is not null. This
39236c6e 4215 * can be used to provide feedback about interface queues to the
316670eb 4216 * application.
1c79356b 4217 */
6d2010ae
A
4218errno_t
4219dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
316670eb 4220 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
6d2010ae
A
4221{
4222 char *frame_type = NULL;
4223 char *dst_linkaddr = NULL;
4224 int retval = 0;
4225 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4226 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
4227 struct if_proto *proto = NULL;
2d21ac55
A
4228 mbuf_t m;
4229 mbuf_t send_head = NULL;
4230 mbuf_t *send_tail = &send_head;
6d2010ae 4231 int iorefcnt = 0;
316670eb 4232 u_int32_t pre = 0, post = 0;
39236c6e
A
4233 u_int32_t fpkts = 0, fbytes = 0;
4234 int32_t flen = 0;
5ba3f43e
A
4235 struct timespec now;
4236 u_int64_t now_nsec;
6d2010ae 4237
39236c6e 4238 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
6d2010ae 4239
39037602
A
4240 /*
4241 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4242 * from happening while this operation is in progress
4243 */
6d2010ae
A
4244 if (!ifnet_is_attached(ifp, 1)) {
4245 retval = ENXIO;
4246 goto cleanup;
4247 }
4248 iorefcnt = 1;
4249
5ba3f43e 4250 VERIFY(ifp->if_output_dlil != NULL);
39037602 4251
6d2010ae
A
4252 /* update the driver's multicast filter, if needed */
4253 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
4254 ifp->if_updatemcasts = 0;
4255
4256 frame_type = frame_type_buffer;
4257 dst_linkaddr = dst_linkaddr_buffer;
4258
91447636 4259 if (raw == 0) {
6d2010ae
A
4260 ifnet_lock_shared(ifp);
4261 /* callee holds a proto refcnt upon success */
91447636
A
4262 proto = find_attached_proto(ifp, proto_family);
4263 if (proto == NULL) {
6d2010ae 4264 ifnet_lock_done(ifp);
91447636
A
4265 retval = ENXIO;
4266 goto cleanup;
4267 }
6d2010ae 4268 ifnet_lock_done(ifp);
2d21ac55 4269 }
6d2010ae 4270
2d21ac55
A
4271preout_again:
4272 if (packetlist == NULL)
4273 goto cleanup;
6d2010ae 4274
2d21ac55
A
4275 m = packetlist;
4276 packetlist = packetlist->m_nextpkt;
4277 m->m_nextpkt = NULL;
6d2010ae 4278
2d21ac55 4279 if (raw == 0) {
6d2010ae
A
4280 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
4281 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
91447636 4282 retval = 0;
6d2010ae
A
4283 if (preoutp != NULL) {
4284 retval = preoutp(ifp, proto_family, &m, dest, route,
4285 frame_type, dst_linkaddr);
4286
4287 if (retval != 0) {
4288 if (retval == EJUSTRETURN)
4289 goto preout_again;
4290 m_freem(m);
4291 goto cleanup;
91447636 4292 }
1c79356b 4293 }
1c79356b 4294 }
2d21ac55
A
4295
4296#if CONFIG_MACF_NET
4297 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
4298 dlil_get_socket_type(&m, proto_family, raw));
39236c6e 4299 if (retval != 0) {
2d21ac55
A
4300 m_freem(m);
4301 goto cleanup;
4302 }
4303#endif
4304
4305 do {
6d2010ae 4306#if CONFIG_DTRACE
316670eb 4307 if (!raw && proto_family == PF_INET) {
39037602
A
4308 struct ip *ip = mtod(m, struct ip *);
4309 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
6d2010ae
A
4310 struct ip *, ip, struct ifnet *, ifp,
4311 struct ip *, ip, struct ip6_hdr *, NULL);
4312
316670eb 4313 } else if (!raw && proto_family == PF_INET6) {
39037602
A
4314 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
4315 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4316 struct ip6_hdr *, ip6, struct ifnet *, ifp,
4317 struct ip *, NULL, struct ip6_hdr *, ip6);
6d2010ae
A
4318 }
4319#endif /* CONFIG_DTRACE */
4320
39236c6e 4321 if (raw == 0 && ifp->if_framer != NULL) {
7e4a7d39
A
4322 int rcvif_set = 0;
4323
4324 /*
4325 * If this is a broadcast packet that needs to be
4326 * looped back into the system, set the inbound ifp
4327 * to that of the outbound ifp. This will allow
4328 * us to determine that it is a legitimate packet
4329 * for the system. Only set the ifp if it's not
4330 * already set, just to be safe.
4331 */
4332 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
4333 m->m_pkthdr.rcvif == NULL) {
4334 m->m_pkthdr.rcvif = ifp;
4335 rcvif_set = 1;
4336 }
4337
6d2010ae 4338 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
39236c6e
A
4339 frame_type, &pre, &post);
4340 if (retval != 0) {
6d2010ae 4341 if (retval != EJUSTRETURN)
2d21ac55 4342 m_freem(m);
2d21ac55 4343 goto next;
91447636 4344 }
7e4a7d39 4345
39236c6e
A
4346 /*
4347 * For partial checksum offload, adjust the start
4348 * and stuff offsets based on the prepended header.
4349 */
4350 if ((m->m_pkthdr.csum_flags &
4351 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4352 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4353 m->m_pkthdr.csum_tx_stuff += pre;
4354 m->m_pkthdr.csum_tx_start += pre;
4355 }
4356
4357 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
4358 dlil_output_cksum_dbg(ifp, m, pre,
4359 proto_family);
4360
7e4a7d39
A
4361 /*
4362 * Clear the ifp if it was set above, and to be
4363 * safe, only if it is still the same as the
4364 * outbound ifp we have in context. If it was
4365 * looped back, then a copy of it was sent to the
4366 * loopback interface with the rcvif set, and we
4367 * are clearing the one that will go down to the
4368 * layer below.
4369 */
4370 if (rcvif_set && m->m_pkthdr.rcvif == ifp)
4371 m->m_pkthdr.rcvif = NULL;
91447636 4372 }
6d2010ae
A
4373
4374 /*
2d21ac55
A
4375 * Let interface filters (if any) do their thing ...
4376 */
4377 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4378 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
6d2010ae
A
4379 retval = dlil_interface_filters_output(ifp,
4380 &m, proto_family);
4381 if (retval != 0) {
4382 if (retval != EJUSTRETURN)
4383 m_freem(m);
4384 goto next;
1c79356b 4385 }
1c79356b 4386 }
b7266188 4387 /*
39236c6e
A
4388 * Strip away M_PROTO1 bit prior to sending packet
4389 * to the driver as this field may be used by the driver
b7266188
A
4390 */
4391 m->m_flags &= ~M_PROTO1;
4392
2d21ac55
A
4393 /*
4394 * If the underlying interface is not capable of handling a
4395 * packet whose data portion spans across physically disjoint
4396 * pages, we need to "normalize" the packet so that we pass
4397 * down a chain of mbufs where each mbuf points to a span that
4398 * resides in the system page boundary. If the packet does
4399 * not cross page(s), the following is a no-op.
4400 */
4401 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
4402 if ((m = m_normalize(m)) == NULL)
4403 goto next;
4404 }
4405
6d2010ae
A
4406 /*
4407 * If this is a TSO packet, make sure the interface still
4408 * advertise TSO capability.
b0d623f7 4409 */
39236c6e 4410 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
6d2010ae
A
4411 retval = EMSGSIZE;
4412 m_freem(m);
4413 goto cleanup;
b0d623f7
A
4414 }
4415
39236c6e
A
4416 ifp_inc_traffic_class_out(ifp, m);
4417 pktap_output(ifp, proto_family, m, pre, post);
6d2010ae 4418
3e170ce0
A
4419 /*
4420 * Count the number of elements in the mbuf chain
4421 */
4422 if (tx_chain_len_count) {
4423 dlil_count_chain_len(m, &tx_chain_len_stats);
4424 }
4425
5ba3f43e
A
4426 /*
4427 * Record timestamp; ifnet_enqueue() will use this info
4428 * rather than redoing the work. An optimization could
4429 * involve doing this just once at the top, if there are
4430 * no interface filters attached, but that's probably
4431 * not a big deal.
4432 */
4433 nanouptime(&now);
4434 net_timernsec(&now, &now_nsec);
4435 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
4436
4437 /*
4438 * Discard partial sum information if this packet originated
4439 * from another interface; the packet would already have the
4440 * final checksum and we shouldn't recompute it.
4441 */
4442 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
4443 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID|CSUM_PARTIAL)) ==
4444 (CSUM_DATA_VALID|CSUM_PARTIAL)) {
4445 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4446 m->m_pkthdr.csum_data = 0;
4447 }
4448
2d21ac55
A
4449 /*
4450 * Finally, call the driver.
4451 */
3e170ce0 4452 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
39236c6e
A
4453 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4454 flen += (m_pktlen(m) - (pre + post));
4455 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4456 }
2d21ac55
A
4457 *send_tail = m;
4458 send_tail = &m->m_nextpkt;
6d2010ae 4459 } else {
39236c6e
A
4460 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4461 flen = (m_pktlen(m) - (pre + post));
4462 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4463 } else {
4464 flen = 0;
4465 }
6d2010ae 4466 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
39236c6e 4467 0, 0, 0, 0, 0);
5ba3f43e 4468 retval = (*ifp->if_output_dlil)(ifp, m);
316670eb
A
4469 if (retval == EQFULL || retval == EQSUSPENDED) {
4470 if (adv != NULL && adv->code == FADV_SUCCESS) {
4471 adv->code = (retval == EQFULL ?
4472 FADV_FLOW_CONTROLLED :
4473 FADV_SUSPENDED);
4474 }
4475 retval = 0;
4476 }
39236c6e
A
4477 if (retval == 0 && flen > 0) {
4478 fbytes += flen;
4479 fpkts++;
4480 }
4481 if (retval != 0 && dlil_verbose) {
4482 printf("%s: output error on %s retval = %d\n",
4483 __func__, if_name(ifp),
6d2010ae 4484 retval);
2d21ac55 4485 }
6d2010ae 4486 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
39236c6e 4487 0, 0, 0, 0, 0);
2d21ac55 4488 }
39236c6e 4489 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
2d21ac55
A
4490
4491next:
4492 m = packetlist;
39236c6e 4493 if (m != NULL) {
2d21ac55
A
4494 packetlist = packetlist->m_nextpkt;
4495 m->m_nextpkt = NULL;
4496 }
39236c6e 4497 } while (m != NULL);
d41d1dae 4498
39236c6e 4499 if (send_head != NULL) {
39236c6e
A
4500 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4501 0, 0, 0, 0, 0);
3e170ce0 4502 if (ifp->if_eflags & IFEF_SENDLIST) {
5ba3f43e 4503 retval = (*ifp->if_output_dlil)(ifp, send_head);
3e170ce0
A
4504 if (retval == EQFULL || retval == EQSUSPENDED) {
4505 if (adv != NULL) {
4506 adv->code = (retval == EQFULL ?
4507 FADV_FLOW_CONTROLLED :
4508 FADV_SUSPENDED);
4509 }
4510 retval = 0;
4511 }
4512 if (retval == 0 && flen > 0) {
4513 fbytes += flen;
4514 fpkts++;
4515 }
4516 if (retval != 0 && dlil_verbose) {
4517 printf("%s: output error on %s retval = %d\n",
4518 __func__, if_name(ifp), retval);
4519 }
4520 } else {
4521 struct mbuf *send_m;
4522 int enq_cnt = 0;
4523 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
4524 while (send_head != NULL) {
4525 send_m = send_head;
4526 send_head = send_m->m_nextpkt;
4527 send_m->m_nextpkt = NULL;
5ba3f43e 4528 retval = (*ifp->if_output_dlil)(ifp, send_m);
3e170ce0
A
4529 if (retval == EQFULL || retval == EQSUSPENDED) {
4530 if (adv != NULL) {
4531 adv->code = (retval == EQFULL ?
4532 FADV_FLOW_CONTROLLED :
4533 FADV_SUSPENDED);
4534 }
4535 retval = 0;
4536 }
4537 if (retval == 0) {
4538 enq_cnt++;
4539 if (flen > 0)
4540 fpkts++;
4541 }
4542 if (retval != 0 && dlil_verbose) {
39037602
A
4543 printf("%s: output error on %s "
4544 "retval = %d\n",
3e170ce0
A
4545 __func__, if_name(ifp), retval);
4546 }
4547 }
4548 if (enq_cnt > 0) {
4549 fbytes += flen;
4550 ifnet_start(ifp);
316670eb 4551 }
39236c6e
A
4552 }
4553 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 4554 }
6d2010ae 4555
39236c6e 4556 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 4557
91447636 4558cleanup:
39236c6e
A
4559 if (fbytes > 0)
4560 ifp->if_fbytes += fbytes;
4561 if (fpkts > 0)
4562 ifp->if_fpackets += fpkts;
6d2010ae
A
4563 if (proto != NULL)
4564 if_proto_free(proto);
4565 if (packetlist) /* if any packets are left, clean up */
2d21ac55 4566 mbuf_freem_list(packetlist);
91447636
A
4567 if (retval == EJUSTRETURN)
4568 retval = 0;
6d2010ae
A
4569 if (iorefcnt == 1)
4570 ifnet_decr_iorefcnt(ifp);
4571
4572 return (retval);
1c79356b
A
4573}
4574
2d21ac55 4575errno_t
6d2010ae
A
4576ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
4577 void *ioctl_arg)
4578{
4579 struct ifnet_filter *filter;
4580 int retval = EOPNOTSUPP;
4581 int result = 0;
4582
2d21ac55 4583 if (ifp == NULL || ioctl_code == 0)
6d2010ae
A
4584 return (EINVAL);
4585
4586 /* Get an io ref count if the interface is attached */
4587 if (!ifnet_is_attached(ifp, 1))
4588 return (EOPNOTSUPP);
4589
39037602
A
4590 /*
4591 * Run the interface filters first.
91447636
A
4592 * We want to run all filters before calling the protocol,
4593 * interface family, or interface.
4594 */
6d2010ae
A
4595 lck_mtx_lock_spin(&ifp->if_flt_lock);
4596 /* prevent filter list from changing in case we drop the lock */
4597 if_flt_monitor_busy(ifp);
91447636 4598 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
6d2010ae
A
4599 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
4600 filter->filt_protocol == proto_fam)) {
4601 lck_mtx_unlock(&ifp->if_flt_lock);
4602
4603 result = filter->filt_ioctl(filter->filt_cookie, ifp,
4604 proto_fam, ioctl_code, ioctl_arg);
4605
4606 lck_mtx_lock_spin(&ifp->if_flt_lock);
4607
91447636
A
4608 /* Only update retval if no one has handled the ioctl */
4609 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4610 if (result == ENOTSUP)
4611 result = EOPNOTSUPP;
4612 retval = result;
6d2010ae
A
4613 if (retval != 0 && retval != EOPNOTSUPP) {
4614 /* we're done with the filter list */
4615 if_flt_monitor_unbusy(ifp);
4616 lck_mtx_unlock(&ifp->if_flt_lock);
91447636
A
4617 goto cleanup;
4618 }
4619 }
4620 }
4621 }
6d2010ae
A
4622 /* we're done with the filter list */
4623 if_flt_monitor_unbusy(ifp);
4624 lck_mtx_unlock(&ifp->if_flt_lock);
4625
91447636 4626 /* Allow the protocol to handle the ioctl */
6d2010ae
A
4627 if (proto_fam != 0) {
4628 struct if_proto *proto;
4629
4630 /* callee holds a proto refcnt upon success */
4631 ifnet_lock_shared(ifp);
4632 proto = find_attached_proto(ifp, proto_fam);
4633 ifnet_lock_done(ifp);
4634 if (proto != NULL) {
4635 proto_media_ioctl ioctlp =
4636 (proto->proto_kpi == kProtoKPI_v1 ?
4637 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
91447636 4638 result = EOPNOTSUPP;
6d2010ae
A
4639 if (ioctlp != NULL)
4640 result = ioctlp(ifp, proto_fam, ioctl_code,
4641 ioctl_arg);
4642 if_proto_free(proto);
4643
91447636
A
4644 /* Only update retval if no one has handled the ioctl */
4645 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4646 if (result == ENOTSUP)
4647 result = EOPNOTSUPP;
4648 retval = result;
6d2010ae 4649 if (retval && retval != EOPNOTSUPP)
91447636 4650 goto cleanup;
91447636
A
4651 }
4652 }
4653 }
6d2010ae 4654
91447636 4655 /* retval is either 0 or EOPNOTSUPP */
6d2010ae 4656
91447636
A
4657 /*
4658 * Let the interface handle this ioctl.
4659 * If it returns EOPNOTSUPP, ignore that, we may have
4660 * already handled this in the protocol or family.
4661 */
6d2010ae 4662 if (ifp->if_ioctl)
91447636 4663 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
6d2010ae 4664
91447636
A
4665 /* Only update retval if no one has handled the ioctl */
4666 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4667 if (result == ENOTSUP)
4668 result = EOPNOTSUPP;
4669 retval = result;
4670 if (retval && retval != EOPNOTSUPP) {
4671 goto cleanup;
4672 }
4673 }
1c79356b 4674
6d2010ae 4675cleanup:
91447636
A
4676 if (retval == EJUSTRETURN)
4677 retval = 0;
6d2010ae
A
4678
4679 ifnet_decr_iorefcnt(ifp);
4680
4681 return (retval);
91447636 4682}
1c79356b 4683
91447636 4684__private_extern__ errno_t
6d2010ae 4685dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
91447636
A
4686{
4687 errno_t error = 0;
6d2010ae
A
4688
4689
4690 if (ifp->if_set_bpf_tap) {
4691 /* Get an io reference on the interface if it is attached */
4692 if (!ifnet_is_attached(ifp, 1))
39037602 4693 return (ENXIO);
91447636 4694 error = ifp->if_set_bpf_tap(ifp, mode, callback);
6d2010ae
A
4695 ifnet_decr_iorefcnt(ifp);
4696 }
4697 return (error);
1c79356b
A
4698}
4699
2d21ac55 4700errno_t
6d2010ae
A
4701dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4702 struct sockaddr *ll_addr, size_t ll_len)
1c79356b 4703{
91447636
A
4704 errno_t result = EOPNOTSUPP;
4705 struct if_proto *proto;
4706 const struct sockaddr *verify;
2d21ac55 4707 proto_media_resolve_multi resolvep;
6d2010ae
A
4708
4709 if (!ifnet_is_attached(ifp, 1))
39037602 4710 return (result);
6d2010ae 4711
91447636 4712 bzero(ll_addr, ll_len);
6d2010ae
A
4713
4714 /* Call the protocol first; callee holds a proto refcnt upon success */
4715 ifnet_lock_shared(ifp);
91447636 4716 proto = find_attached_proto(ifp, proto_addr->sa_family);
6d2010ae 4717 ifnet_lock_done(ifp);
2d21ac55 4718 if (proto != NULL) {
6d2010ae
A
4719 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4720 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
2d21ac55 4721 if (resolvep != NULL)
6d2010ae 4722 result = resolvep(ifp, proto_addr,
39037602 4723 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
6d2010ae 4724 if_proto_free(proto);
91447636 4725 }
6d2010ae 4726
91447636
A
4727 /* Let the interface verify the multicast address */
4728 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4729 if (result == 0)
4730 verify = ll_addr;
4731 else
4732 verify = proto_addr;
4733 result = ifp->if_check_multi(ifp, verify);
4734 }
6d2010ae
A
4735
4736 ifnet_decr_iorefcnt(ifp);
4737 return (result);
91447636 4738}
1c79356b 4739
91447636 4740__private_extern__ errno_t
6d2010ae 4741dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
39037602
A
4742 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4743 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
91447636
A
4744{
4745 struct if_proto *proto;
4746 errno_t result = 0;
6d2010ae
A
4747
4748 /* callee holds a proto refcnt upon success */
4749 ifnet_lock_shared(ifp);
91447636 4750 proto = find_attached_proto(ifp, target_proto->sa_family);
6d2010ae 4751 ifnet_lock_done(ifp);
2d21ac55 4752 if (proto == NULL) {
91447636 4753 result = ENOTSUP;
6d2010ae 4754 } else {
2d21ac55 4755 proto_media_send_arp arpp;
6d2010ae
A
4756 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4757 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
39236c6e 4758 if (arpp == NULL) {
2d21ac55 4759 result = ENOTSUP;
39236c6e
A
4760 } else {
4761 switch (arpop) {
4762 case ARPOP_REQUEST:
4763 arpstat.txrequests++;
4764 if (target_hw != NULL)
4765 arpstat.txurequests++;
4766 break;
4767 case ARPOP_REPLY:
4768 arpstat.txreplies++;
4769 break;
4770 }
6d2010ae
A
4771 result = arpp(ifp, arpop, sender_hw, sender_proto,
4772 target_hw, target_proto);
39236c6e 4773 }
6d2010ae 4774 if_proto_free(proto);
91447636 4775 }
6d2010ae
A
4776
4777 return (result);
91447636 4778}
1c79356b 4779
39236c6e
A
4780struct net_thread_marks { };
4781static const struct net_thread_marks net_thread_marks_base = { };
4782
4783__private_extern__ const net_thread_marks_t net_thread_marks_none =
39037602 4784 &net_thread_marks_base;
39236c6e
A
4785
4786__private_extern__ net_thread_marks_t
4787net_thread_marks_push(u_int32_t push)
316670eb 4788{
39236c6e
A
4789 static const char *const base = (const void*)&net_thread_marks_base;
4790 u_int32_t pop = 0;
4791
4792 if (push != 0) {
4793 struct uthread *uth = get_bsdthread_info(current_thread());
4794
4795 pop = push & ~uth->uu_network_marks;
4796 if (pop != 0)
4797 uth->uu_network_marks |= pop;
4798 }
4799
4800 return ((net_thread_marks_t)&base[pop]);
316670eb
A
4801}
4802
39236c6e
A
4803__private_extern__ net_thread_marks_t
4804net_thread_unmarks_push(u_int32_t unpush)
316670eb 4805{
39236c6e
A
4806 static const char *const base = (const void*)&net_thread_marks_base;
4807 u_int32_t unpop = 0;
4808
4809 if (unpush != 0) {
4810 struct uthread *uth = get_bsdthread_info(current_thread());
316670eb 4811
39236c6e
A
4812 unpop = unpush & uth->uu_network_marks;
4813 if (unpop != 0)
4814 uth->uu_network_marks &= ~unpop;
4815 }
4816
4817 return ((net_thread_marks_t)&base[unpop]);
316670eb
A
4818}
4819
4820__private_extern__ void
39236c6e 4821net_thread_marks_pop(net_thread_marks_t popx)
316670eb 4822{
39236c6e 4823 static const char *const base = (const void*)&net_thread_marks_base;
3e170ce0 4824 const ptrdiff_t pop = (const char *)popx - (const char *)base;
316670eb 4825
39236c6e
A
4826 if (pop != 0) {
4827 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4828 struct uthread *uth = get_bsdthread_info(current_thread());
4829
4830 VERIFY((pop & ones) == pop);
4831 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
4832 uth->uu_network_marks &= ~pop;
4833 }
4834}
4835
4836__private_extern__ void
4837net_thread_unmarks_pop(net_thread_marks_t unpopx)
4838{
4839 static const char *const base = (const void*)&net_thread_marks_base;
3e170ce0 4840 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
39236c6e
A
4841
4842 if (unpop != 0) {
4843 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4844 struct uthread *uth = get_bsdthread_info(current_thread());
4845
4846 VERIFY((unpop & ones) == unpop);
4847 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
4848 uth->uu_network_marks |= unpop;
4849 }
4850}
4851
4852__private_extern__ u_int32_t
4853net_thread_is_marked(u_int32_t check)
4854{
4855 if (check != 0) {
4856 struct uthread *uth = get_bsdthread_info(current_thread());
4857 return (uth->uu_network_marks & check);
4858 }
4859 else
4860 return (0);
4861}
4862
4863__private_extern__ u_int32_t
4864net_thread_is_unmarked(u_int32_t check)
4865{
4866 if (check != 0) {
4867 struct uthread *uth = get_bsdthread_info(current_thread());
4868 return (~uth->uu_network_marks & check);
4869 }
4870 else
4871 return (0);
316670eb
A
4872}
4873
2d21ac55
A
4874static __inline__ int
4875_is_announcement(const struct sockaddr_in * sender_sin,
6d2010ae 4876 const struct sockaddr_in * target_sin)
2d21ac55
A
4877{
4878 if (sender_sin == NULL) {
6d2010ae 4879 return (FALSE);
2d21ac55
A
4880 }
4881 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
4882}
4883
91447636 4884__private_extern__ errno_t
39037602
A
4885dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4886 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4887 const struct sockaddr *target_proto0, u_int32_t rtflags)
91447636
A
4888{
4889 errno_t result = 0;
2d21ac55
A
4890 const struct sockaddr_in * sender_sin;
4891 const struct sockaddr_in * target_sin;
316670eb
A
4892 struct sockaddr_inarp target_proto_sinarp;
4893 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
6d2010ae
A
4894
4895 if (target_proto == NULL || (sender_proto != NULL &&
4896 sender_proto->sa_family != target_proto->sa_family))
4897 return (EINVAL);
4898
316670eb
A
4899 /*
4900 * If the target is a (default) router, provide that
4901 * information to the send_arp callback routine.
4902 */
4903 if (rtflags & RTF_ROUTER) {
4904 bcopy(target_proto, &target_proto_sinarp,
4905 sizeof (struct sockaddr_in));
4906 target_proto_sinarp.sin_other |= SIN_ROUTER;
4907 target_proto = (struct sockaddr *)&target_proto_sinarp;
4908 }
4909
91447636
A
4910 /*
4911 * If this is an ARP request and the target IP is IPv4LL,
2d21ac55
A
4912 * send the request on all interfaces. The exception is
4913 * an announcement, which must only appear on the specific
4914 * interface.
91447636 4915 */
316670eb
A
4916 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
4917 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
6d2010ae
A
4918 if (target_proto->sa_family == AF_INET &&
4919 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4920 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4921 !_is_announcement(target_sin, sender_sin)) {
91447636
A
4922 ifnet_t *ifp_list;
4923 u_int32_t count;
4924 u_int32_t ifp_on;
6d2010ae 4925
91447636
A
4926 result = ENOTSUP;
4927
4928 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4929 for (ifp_on = 0; ifp_on < count; ifp_on++) {
6d2010ae
A
4930 errno_t new_result;
4931 ifaddr_t source_hw = NULL;
4932 ifaddr_t source_ip = NULL;
4933 struct sockaddr_in source_ip_copy;
4934 struct ifnet *cur_ifp = ifp_list[ifp_on];
4935
91447636 4936 /*
6d2010ae
A
4937 * Only arp on interfaces marked for IPv4LL
4938 * ARPing. This may mean that we don't ARP on
4939 * the interface the subnet route points to.
91447636 4940 */
6d2010ae 4941 if (!(cur_ifp->if_eflags & IFEF_ARPLL))
91447636 4942 continue;
b0d623f7 4943
91447636 4944 /* Find the source IP address */
6d2010ae
A
4945 ifnet_lock_shared(cur_ifp);
4946 source_hw = cur_ifp->if_lladdr;
4947 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4948 ifa_link) {
4949 IFA_LOCK(source_ip);
4950 if (source_ip->ifa_addr != NULL &&
4951 source_ip->ifa_addr->sa_family ==
4952 AF_INET) {
4953 /* Copy the source IP address */
4954 source_ip_copy =
4955 *(struct sockaddr_in *)
316670eb 4956 (void *)source_ip->ifa_addr;
6d2010ae 4957 IFA_UNLOCK(source_ip);
91447636
A
4958 break;
4959 }
6d2010ae 4960 IFA_UNLOCK(source_ip);
91447636 4961 }
6d2010ae 4962
91447636
A
4963 /* No IP Source, don't arp */
4964 if (source_ip == NULL) {
6d2010ae 4965 ifnet_lock_done(cur_ifp);
91447636
A
4966 continue;
4967 }
6d2010ae
A
4968
4969 IFA_ADDREF(source_hw);
4970 ifnet_lock_done(cur_ifp);
4971
91447636 4972 /* Send the ARP */
6d2010ae 4973 new_result = dlil_send_arp_internal(cur_ifp,
316670eb
A
4974 arpop, (struct sockaddr_dl *)(void *)
4975 source_hw->ifa_addr,
6d2010ae
A
4976 (struct sockaddr *)&source_ip_copy, NULL,
4977 target_proto);
b0d623f7 4978
6d2010ae 4979 IFA_REMREF(source_hw);
91447636
A
4980 if (result == ENOTSUP) {
4981 result = new_result;
4982 }
4983 }
6d2010ae 4984 ifnet_list_free(ifp_list);
91447636 4985 }
6d2010ae
A
4986 } else {
4987 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4988 sender_proto, target_hw, target_proto);
91447636 4989 }
6d2010ae
A
4990
4991 return (result);
91447636 4992}
1c79356b 4993
6d2010ae
A
4994/*
4995 * Caller must hold ifnet head lock.
4996 */
4997static int
4998ifnet_lookup(struct ifnet *ifp)
91447636 4999{
6d2010ae
A
5000 struct ifnet *_ifp;
5001
5ba3f43e 5002 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
6d2010ae
A
5003 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
5004 if (_ifp == ifp)
91447636 5005 break;
6d2010ae
A
5006 }
5007 return (_ifp != NULL);
91447636 5008}
39037602 5009
6d2010ae
A
5010/*
5011 * Caller has to pass a non-zero refio argument to get a
5012 * IO reference count. This will prevent ifnet_detach from
39037602 5013 * being called when there are outstanding io reference counts.
91447636 5014 */
6d2010ae
A
5015int
5016ifnet_is_attached(struct ifnet *ifp, int refio)
5017{
5018 int ret;
5019
5020 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e 5021 if ((ret = IF_FULLY_ATTACHED(ifp))) {
6d2010ae
A
5022 if (refio > 0)
5023 ifp->if_refio++;
5024 }
5025 lck_mtx_unlock(&ifp->if_ref_lock);
5026
5027 return (ret);
5028}
5029
39037602
A
5030/*
5031 * Caller must ensure the interface is attached; the assumption is that
5032 * there is at least an outstanding IO reference count held already.
5033 * Most callers would call ifnet_is_attached() instead.
5034 */
5035void
5036ifnet_incr_iorefcnt(struct ifnet *ifp)
5037{
5038 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e 5039 VERIFY(IF_FULLY_ATTACHED(ifp));
39037602
A
5040 VERIFY(ifp->if_refio > 0);
5041 ifp->if_refio++;
5042 lck_mtx_unlock(&ifp->if_ref_lock);
5043}
5044
6d2010ae
A
5045void
5046ifnet_decr_iorefcnt(struct ifnet *ifp)
5047{
5048 lck_mtx_lock_spin(&ifp->if_ref_lock);
5049 VERIFY(ifp->if_refio > 0);
5ba3f43e 5050 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6d2010ae
A
5051 ifp->if_refio--;
5052
39037602
A
5053 /*
5054 * if there are no more outstanding io references, wakeup the
6d2010ae
A
5055 * ifnet_detach thread if detaching flag is set.
5056 */
5ba3f43e 5057 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING))
6d2010ae 5058 wakeup(&(ifp->if_refio));
5ba3f43e 5059
6d2010ae
A
5060 lck_mtx_unlock(&ifp->if_ref_lock);
5061}
b0d623f7 5062
6d2010ae
A
5063static void
5064dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
5065{
5066 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
5067 ctrace_t *tr;
5068 u_int32_t idx;
5069 u_int16_t *cnt;
1c79356b 5070
6d2010ae
A
5071 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
5072 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
5073 /* NOTREACHED */
5074 }
5075
5076 if (refhold) {
5077 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
5078 tr = dl_if_dbg->dldbg_if_refhold;
5079 } else {
5080 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
5081 tr = dl_if_dbg->dldbg_if_refrele;
5082 }
5083
5084 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
5085 ctrace_record(&tr[idx]);
91447636 5086}
1c79356b 5087
6d2010ae
A
5088errno_t
5089dlil_if_ref(struct ifnet *ifp)
5090{
5091 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5092
5093 if (dl_if == NULL)
5094 return (EINVAL);
5095
5096 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5097 ++dl_if->dl_if_refcnt;
5098 if (dl_if->dl_if_refcnt == 0) {
5099 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
5100 /* NOTREACHED */
5101 }
5102 if (dl_if->dl_if_trace != NULL)
5103 (*dl_if->dl_if_trace)(dl_if, TRUE);
5104 lck_mtx_unlock(&dl_if->dl_if_lock);
5105
5106 return (0);
91447636 5107}
1c79356b 5108
6d2010ae
A
5109errno_t
5110dlil_if_free(struct ifnet *ifp)
5111{
5112 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5ba3f43e 5113 bool need_release = FALSE;
6d2010ae
A
5114
5115 if (dl_if == NULL)
5116 return (EINVAL);
5117
5118 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5ba3f43e
A
5119 switch (dl_if->dl_if_refcnt) {
5120 case 0:
6d2010ae
A
5121 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
5122 /* NOTREACHED */
5ba3f43e
A
5123 break;
5124 case 1:
5125 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
5126 need_release = TRUE;
5127 }
5128 break;
5129 default:
5130 break;
6d2010ae
A
5131 }
5132 --dl_if->dl_if_refcnt;
5133 if (dl_if->dl_if_trace != NULL)
5134 (*dl_if->dl_if_trace)(dl_if, FALSE);
5135 lck_mtx_unlock(&dl_if->dl_if_lock);
5ba3f43e
A
5136 if (need_release) {
5137 dlil_if_release(ifp);
5138 }
6d2010ae
A
5139 return (0);
5140}
1c79356b 5141
2d21ac55 5142static errno_t
6d2010ae 5143dlil_attach_protocol_internal(struct if_proto *proto,
5ba3f43e
A
5144 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
5145 uint32_t * proto_count)
91447636 5146{
6d2010ae 5147 struct kev_dl_proto_data ev_pr_data;
91447636
A
5148 struct ifnet *ifp = proto->ifp;
5149 int retval = 0;
b0d623f7 5150 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
6d2010ae
A
5151 struct if_proto *prev_proto;
5152 struct if_proto *_proto;
5153
5154 /* callee holds a proto refcnt upon success */
5155 ifnet_lock_exclusive(ifp);
5156 _proto = find_attached_proto(ifp, proto->protocol_family);
5157 if (_proto != NULL) {
91447636 5158 ifnet_lock_done(ifp);
6d2010ae
A
5159 if_proto_free(_proto);
5160 return (EEXIST);
91447636 5161 }
6d2010ae 5162
91447636
A
5163 /*
5164 * Call family module add_proto routine so it can refine the
5165 * demux descriptors as it wishes.
5166 */
6d2010ae
A
5167 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
5168 demux_count);
91447636 5169 if (retval) {
6d2010ae
A
5170 ifnet_lock_done(ifp);
5171 return (retval);
91447636 5172 }
6d2010ae 5173
91447636
A
5174 /*
5175 * Insert the protocol in the hash
5176 */
6d2010ae
A
5177 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
5178 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
5179 prev_proto = SLIST_NEXT(prev_proto, next_hash);
5180 if (prev_proto)
5181 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
5182 else
5183 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
5184 proto, next_hash);
5185
5186 /* hold a proto refcnt for attach */
5187 if_proto_ref(proto);
1c79356b 5188
91447636 5189 /*
6d2010ae
A
5190 * The reserved field carries the number of protocol still attached
5191 * (subject to change)
91447636 5192 */
91447636
A
5193 ev_pr_data.proto_family = proto->protocol_family;
5194 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
6d2010ae
A
5195 ifnet_lock_done(ifp);
5196
5197 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
5198 (struct net_event_data *)&ev_pr_data,
5199 sizeof (struct kev_dl_proto_data));
5ba3f43e
A
5200 if (proto_count != NULL) {
5201 *proto_count = ev_pr_data.proto_remaining_count;
5202 }
6d2010ae 5203 return (retval);
91447636 5204}
0b4e3aa0 5205
2d21ac55
A
5206errno_t
5207ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 5208 const struct ifnet_attach_proto_param *proto_details)
91447636
A
5209{
5210 int retval = 0;
5211 struct if_proto *ifproto = NULL;
5ba3f43e 5212 uint32_t proto_count = 0;
6d2010ae
A
5213
5214 ifnet_head_lock_shared();
5215 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5216 retval = EINVAL;
5217 goto end;
5218 }
5219 /* Check that the interface is in the global list */
5220 if (!ifnet_lookup(ifp)) {
5221 retval = ENXIO;
5222 goto end;
5223 }
5224
5225 ifproto = zalloc(dlif_proto_zone);
5226 if (ifproto == NULL) {
91447636
A
5227 retval = ENOMEM;
5228 goto end;
5229 }
6d2010ae
A
5230 bzero(ifproto, dlif_proto_size);
5231
5232 /* refcnt held above during lookup */
91447636
A
5233 ifproto->ifp = ifp;
5234 ifproto->protocol_family = protocol;
5235 ifproto->proto_kpi = kProtoKPI_v1;
5236 ifproto->kpi.v1.input = proto_details->input;
5237 ifproto->kpi.v1.pre_output = proto_details->pre_output;
5238 ifproto->kpi.v1.event = proto_details->event;
5239 ifproto->kpi.v1.ioctl = proto_details->ioctl;
5240 ifproto->kpi.v1.detached = proto_details->detached;
5241 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
5242 ifproto->kpi.v1.send_arp = proto_details->send_arp;
6d2010ae 5243
2d21ac55 5244 retval = dlil_attach_protocol_internal(ifproto,
5ba3f43e
A
5245 proto_details->demux_list, proto_details->demux_count,
5246 &proto_count);
6d2010ae 5247
9bccf70c 5248end:
6d2010ae 5249 if (retval != 0 && retval != EEXIST && ifp != NULL) {
39236c6e
A
5250 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5251 if_name(ifp), protocol, retval);
5ba3f43e
A
5252 } else {
5253 if (dlil_verbose) {
5254 printf("%s: attached v1 protocol %d (count = %d)\n",
5255 if_name(ifp),
5256 protocol, proto_count);
5257 }
6d2010ae
A
5258 }
5259 ifnet_head_done();
5ba3f43e
A
5260 if (retval == 0) {
5261 } else if (ifproto != NULL) {
6d2010ae 5262 zfree(dlif_proto_zone, ifproto);
5ba3f43e 5263 }
6d2010ae 5264 return (retval);
1c79356b
A
5265}
5266
2d21ac55
A
5267errno_t
5268ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 5269 const struct ifnet_attach_proto_param_v2 *proto_details)
91447636 5270{
2d21ac55 5271 int retval = 0;
91447636 5272 struct if_proto *ifproto = NULL;
5ba3f43e 5273 uint32_t proto_count = 0;
6d2010ae
A
5274
5275 ifnet_head_lock_shared();
5276 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5277 retval = EINVAL;
5278 goto end;
5279 }
5280 /* Check that the interface is in the global list */
5281 if (!ifnet_lookup(ifp)) {
5282 retval = ENXIO;
5283 goto end;
5284 }
5285
5286 ifproto = zalloc(dlif_proto_zone);
5287 if (ifproto == NULL) {
91447636
A
5288 retval = ENOMEM;
5289 goto end;
5290 }
2d21ac55 5291 bzero(ifproto, sizeof(*ifproto));
6d2010ae
A
5292
5293 /* refcnt held above during lookup */
2d21ac55
A
5294 ifproto->ifp = ifp;
5295 ifproto->protocol_family = protocol;
5296 ifproto->proto_kpi = kProtoKPI_v2;
5297 ifproto->kpi.v2.input = proto_details->input;
5298 ifproto->kpi.v2.pre_output = proto_details->pre_output;
5299 ifproto->kpi.v2.event = proto_details->event;
5300 ifproto->kpi.v2.ioctl = proto_details->ioctl;
5301 ifproto->kpi.v2.detached = proto_details->detached;
5302 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
5303 ifproto->kpi.v2.send_arp = proto_details->send_arp;
1c79356b 5304
6d2010ae 5305 retval = dlil_attach_protocol_internal(ifproto,
5ba3f43e
A
5306 proto_details->demux_list, proto_details->demux_count,
5307 &proto_count);
6d2010ae
A
5308
5309end:
5310 if (retval != 0 && retval != EEXIST && ifp != NULL) {
39236c6e
A
5311 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5312 if_name(ifp), protocol, retval);
5ba3f43e
A
5313 } else {
5314 if (dlil_verbose) {
5315 printf("%s: attached v2 protocol %d (count = %d)\n",
5316 if_name(ifp),
5317 protocol, proto_count);
5318 }
2d21ac55 5319 }
6d2010ae 5320 ifnet_head_done();
5ba3f43e
A
5321 if (retval == 0) {
5322 } else if (ifproto != NULL) {
6d2010ae 5323 zfree(dlif_proto_zone, ifproto);
5ba3f43e 5324 }
6d2010ae 5325 return (retval);
91447636 5326}
1c79356b 5327
2d21ac55
A
5328errno_t
5329ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
91447636
A
5330{
5331 struct if_proto *proto = NULL;
5332 int retval = 0;
6d2010ae
A
5333
5334 if (ifp == NULL || proto_family == 0) {
5335 retval = EINVAL;
91447636
A
5336 goto end;
5337 }
6d2010ae
A
5338
5339 ifnet_lock_exclusive(ifp);
5340 /* callee holds a proto refcnt upon success */
91447636 5341 proto = find_attached_proto(ifp, proto_family);
91447636
A
5342 if (proto == NULL) {
5343 retval = ENXIO;
6d2010ae 5344 ifnet_lock_done(ifp);
91447636
A
5345 goto end;
5346 }
6d2010ae
A
5347
5348 /* call family module del_proto */
91447636
A
5349 if (ifp->if_del_proto)
5350 ifp->if_del_proto(ifp, proto->protocol_family);
1c79356b 5351
6d2010ae
A
5352 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
5353 proto, if_proto, next_hash);
5354
5355 if (proto->proto_kpi == kProtoKPI_v1) {
5356 proto->kpi.v1.input = ifproto_media_input_v1;
39037602 5357 proto->kpi.v1.pre_output = ifproto_media_preout;
6d2010ae
A
5358 proto->kpi.v1.event = ifproto_media_event;
5359 proto->kpi.v1.ioctl = ifproto_media_ioctl;
5360 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
5361 proto->kpi.v1.send_arp = ifproto_media_send_arp;
5362 } else {
5363 proto->kpi.v2.input = ifproto_media_input_v2;
5364 proto->kpi.v2.pre_output = ifproto_media_preout;
5365 proto->kpi.v2.event = ifproto_media_event;
5366 proto->kpi.v2.ioctl = ifproto_media_ioctl;
5367 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
5368 proto->kpi.v2.send_arp = ifproto_media_send_arp;
5369 }
5370 proto->detached = 1;
5371 ifnet_lock_done(ifp);
5372
5373 if (dlil_verbose) {
39236c6e
A
5374 printf("%s: detached %s protocol %d\n", if_name(ifp),
5375 (proto->proto_kpi == kProtoKPI_v1) ?
6d2010ae
A
5376 "v1" : "v2", proto_family);
5377 }
5378
5379 /* release proto refcnt held during protocol attach */
5380 if_proto_free(proto);
91447636
A
5381
5382 /*
6d2010ae
A
5383 * Release proto refcnt held during lookup; the rest of
5384 * protocol detach steps will happen when the last proto
5385 * reference is released.
91447636 5386 */
6d2010ae
A
5387 if_proto_free(proto);
5388
91447636 5389end:
6d2010ae 5390 return (retval);
91447636 5391}
1c79356b 5392
6d2010ae
A
5393
5394static errno_t
5395ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
5396 struct mbuf *packet, char *header)
91447636 5397{
6d2010ae
A
5398#pragma unused(ifp, protocol, packet, header)
5399 return (ENXIO);
5400}
5401
5402static errno_t
5403ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
5404 struct mbuf *packet)
5405{
5406#pragma unused(ifp, protocol, packet)
5407 return (ENXIO);
5408
5409}
5410
5411static errno_t
5412ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
5413 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
5414 char *link_layer_dest)
5415{
5416#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5417 return (ENXIO);
9bccf70c 5418
91447636 5419}
9bccf70c 5420
91447636 5421static void
6d2010ae
A
5422ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
5423 const struct kev_msg *event)
5424{
5425#pragma unused(ifp, protocol, event)
5426}
5427
5428static errno_t
5429ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
5430 unsigned long command, void *argument)
5431{
5432#pragma unused(ifp, protocol, command, argument)
5433 return (ENXIO);
5434}
5435
5436static errno_t
5437ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
5438 struct sockaddr_dl *out_ll, size_t ll_len)
5439{
5440#pragma unused(ifp, proto_addr, out_ll, ll_len)
5441 return (ENXIO);
5442}
5443
5444static errno_t
5445ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
5446 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5447 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5448{
5449#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5450 return (ENXIO);
91447636 5451}
9bccf70c 5452
91447636 5453extern int if_next_index(void);
4bd07ac2 5454extern int tcp_ecn_outbound;
91447636 5455
2d21ac55 5456errno_t
6d2010ae 5457ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
91447636 5458{
91447636 5459 struct ifnet *tmp_if;
6d2010ae
A
5460 struct ifaddr *ifa;
5461 struct if_data_internal if_data_saved;
5462 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
316670eb
A
5463 struct dlil_threading_info *dl_inp;
5464 u_int32_t sflags = 0;
5465 int err;
1c79356b 5466
6d2010ae
A
5467 if (ifp == NULL)
5468 return (EINVAL);
5469
7ddcb079
A
5470 /*
5471 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5472 * prevent the interface from being configured while it is
5473 * embryonic, as ifnet_head_lock is dropped and reacquired
5474 * below prior to marking the ifnet with IFRF_ATTACHED.
5475 */
5476 dlil_if_lock();
6d2010ae 5477 ifnet_head_lock_exclusive();
91447636
A
5478 /* Verify we aren't already on the list */
5479 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
5480 if (tmp_if == ifp) {
5481 ifnet_head_done();
7ddcb079 5482 dlil_if_unlock();
6d2010ae 5483 return (EEXIST);
91447636
A
5484 }
5485 }
0b4e3aa0 5486
6d2010ae 5487 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e
A
5488 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
5489 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
6d2010ae
A
5490 __func__, ifp);
5491 /* NOTREACHED */
91447636 5492 }
6d2010ae 5493 lck_mtx_unlock(&ifp->if_ref_lock);
1c79356b 5494
6d2010ae 5495 ifnet_lock_exclusive(ifp);
b0d623f7 5496
6d2010ae
A
5497 /* Sanity check */
5498 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5499 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5500
5501 if (ll_addr != NULL) {
5502 if (ifp->if_addrlen == 0) {
5503 ifp->if_addrlen = ll_addr->sdl_alen;
5504 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
5505 ifnet_lock_done(ifp);
5506 ifnet_head_done();
7ddcb079 5507 dlil_if_unlock();
6d2010ae 5508 return (EINVAL);
b0d623f7
A
5509 }
5510 }
5511
91447636 5512 /*
b0d623f7 5513 * Allow interfaces without protocol families to attach
91447636
A
5514 * only if they have the necessary fields filled out.
5515 */
6d2010ae
A
5516 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
5517 DLIL_PRINTF("%s: Attempt to attach interface without "
5518 "family module - %d\n", __func__, ifp->if_family);
5519 ifnet_lock_done(ifp);
5520 ifnet_head_done();
7ddcb079 5521 dlil_if_unlock();
6d2010ae 5522 return (ENODEV);
1c79356b
A
5523 }
5524
6d2010ae
A
5525 /* Allocate protocol hash table */
5526 VERIFY(ifp->if_proto_hash == NULL);
5527 ifp->if_proto_hash = zalloc(dlif_phash_zone);
5528 if (ifp->if_proto_hash == NULL) {
5529 ifnet_lock_done(ifp);
5530 ifnet_head_done();
7ddcb079 5531 dlil_if_unlock();
6d2010ae
A
5532 return (ENOBUFS);
5533 }
5534 bzero(ifp->if_proto_hash, dlif_phash_size);
91447636 5535
6d2010ae
A
5536 lck_mtx_lock_spin(&ifp->if_flt_lock);
5537 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
91447636 5538 TAILQ_INIT(&ifp->if_flt_head);
6d2010ae
A
5539 VERIFY(ifp->if_flt_busy == 0);
5540 VERIFY(ifp->if_flt_waiters == 0);
5541 lck_mtx_unlock(&ifp->if_flt_lock);
5542
6d2010ae
A
5543 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
5544 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
91447636 5545 LIST_INIT(&ifp->if_multiaddrs);
6d2010ae 5546 }
1c79356b 5547
6d2010ae
A
5548 VERIFY(ifp->if_allhostsinm == NULL);
5549 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5550 TAILQ_INIT(&ifp->if_addrhead);
5551
6d2010ae
A
5552 if (ifp->if_index == 0) {
5553 int idx = if_next_index();
5554
5555 if (idx == -1) {
5556 ifp->if_index = 0;
5557 ifnet_lock_done(ifp);
5558 ifnet_head_done();
7ddcb079 5559 dlil_if_unlock();
6d2010ae 5560 return (ENOBUFS);
1c79356b 5561 }
6d2010ae
A
5562 ifp->if_index = idx;
5563 }
5564 /* There should not be anything occupying this slot */
5565 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5566
5567 /* allocate (if needed) and initialize a link address */
6d2010ae
A
5568 ifa = dlil_alloc_lladdr(ifp, ll_addr);
5569 if (ifa == NULL) {
5570 ifnet_lock_done(ifp);
5571 ifnet_head_done();
7ddcb079 5572 dlil_if_unlock();
6d2010ae
A
5573 return (ENOBUFS);
5574 }
5575
5576 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5577 ifnet_addrs[ifp->if_index - 1] = ifa;
5578
5579 /* make this address the first on the list */
5580 IFA_LOCK(ifa);
5581 /* hold a reference for ifnet_addrs[] */
5582 IFA_ADDREF_LOCKED(ifa);
5583 /* if_attach_link_ifa() holds a reference for ifa_link */
5584 if_attach_link_ifa(ifp, ifa);
5585 IFA_UNLOCK(ifa);
5586
2d21ac55 5587#if CONFIG_MACF_NET
6d2010ae 5588 mac_ifnet_label_associate(ifp);
2d21ac55 5589#endif
2d21ac55 5590
6d2010ae
A
5591 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5592 ifindex2ifnet[ifp->if_index] = ifp;
2d21ac55 5593
6d2010ae
A
5594 /* Hold a reference to the underlying dlil_ifnet */
5595 ifnet_reference(ifp);
5596
316670eb
A
5597 /* Clear stats (save and restore other fields that we care) */
5598 if_data_saved = ifp->if_data;
5599 bzero(&ifp->if_data, sizeof (ifp->if_data));
5600 ifp->if_data.ifi_type = if_data_saved.ifi_type;
5601 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5602 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5603 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5604 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5605 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5606 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5607 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5608 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5609 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5610 ifnet_touch_lastchange(ifp);
5611
5612 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
39037602
A
5613 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
5614 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
316670eb
A
5615
5616 /* By default, use SFB and enable flow advisory */
5617 sflags = PKTSCHEDF_QALG_SFB;
5618 if (if_flowadv)
5619 sflags |= PKTSCHEDF_QALG_FLOWCTL;
5620
fe8ab488
A
5621 if (if_delaybased_queue)
5622 sflags |= PKTSCHEDF_QALG_DELAYBASED;
5623
5ba3f43e
A
5624 if (ifp->if_output_sched_model ==
5625 IFNET_SCHED_MODEL_DRIVER_MANAGED)
5626 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
5627
316670eb
A
5628 /* Initialize transmit queue(s) */
5629 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
5630 if (err != 0) {
5631 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5632 "err=%d", __func__, ifp, err);
5633 /* NOTREACHED */
5634 }
5635
5636 /* Sanity checks on the input thread storage */
5637 dl_inp = &dl_if->dl_if_inpstorage;
5638 bzero(&dl_inp->stats, sizeof (dl_inp->stats));
5639 VERIFY(dl_inp->input_waiting == 0);
5640 VERIFY(dl_inp->wtot == 0);
5641 VERIFY(dl_inp->ifp == NULL);
5642 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
5643 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
5644 VERIFY(!dl_inp->net_affinity);
5645 VERIFY(ifp->if_inp == NULL);
5646 VERIFY(dl_inp->input_thr == THREAD_NULL);
5647 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
5648 VERIFY(dl_inp->poll_thr == THREAD_NULL);
5649 VERIFY(dl_inp->tag == 0);
5650 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
5651 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
5652 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
5653 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
5654#if IFNET_INPUT_SANITY_CHK
5655 VERIFY(dl_inp->input_mbuf_cnt == 0);
5656#endif /* IFNET_INPUT_SANITY_CHK */
5657
5658 /*
5659 * A specific DLIL input thread is created per Ethernet/cellular
5660 * interface or for an interface which supports opportunistic
5661 * input polling. Pseudo interfaces or other types of interfaces
5662 * use the main input thread instead.
5663 */
5664 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
5665 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
5666 ifp->if_inp = dl_inp;
5667 err = dlil_create_input_thread(ifp, ifp->if_inp);
5668 if (err != 0) {
5669 panic_plain("%s: ifp=%p couldn't get an input thread; "
5670 "err=%d", __func__, ifp, err);
5671 /* NOTREACHED */
5672 }
5673 }
5674
5ba3f43e
A
5675 if (ifp->if_inp != NULL && ifp->if_inp->input_mit_tcall == NULL) {
5676 ifp->if_inp->input_mit_tcall =
5677 thread_call_allocate_with_priority(dlil_mit_tcall_fn,
5678 ifp, THREAD_CALL_PRIORITY_KERNEL);
5679 }
5680
6d2010ae 5681 /*
39236c6e
A
5682 * If the driver supports the new transmit model, calculate flow hash
5683 * and create a workloop starter thread to invoke the if_start callback
5684 * where the packets may be dequeued and transmitted.
6d2010ae 5685 */
316670eb 5686 if (ifp->if_eflags & IFEF_TXSTART) {
39236c6e
A
5687 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5688 VERIFY(ifp->if_flowhash != 0);
316670eb
A
5689 VERIFY(ifp->if_start_thread == THREAD_NULL);
5690
5691 ifnet_set_start_cycle(ifp, NULL);
5692 ifp->if_start_active = 0;
5693 ifp->if_start_req = 0;
39236c6e 5694 ifp->if_start_flags = 0;
5ba3f43e
A
5695 VERIFY(ifp->if_start != NULL);
5696 if ((err = kernel_thread_start(ifnet_start_thread_fn,
5697 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
5698 panic_plain("%s: "
5699 "ifp=%p couldn't get a start thread; "
316670eb 5700 "err=%d", __func__, ifp, err);
5ba3f43e 5701 /* NOTREACHED */
6d2010ae 5702 }
316670eb
A
5703 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
5704 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
39236c6e
A
5705 } else {
5706 ifp->if_flowhash = 0;
316670eb
A
5707 }
5708
5709 /*
5710 * If the driver supports the new receive model, create a poller
5711 * thread to invoke if_input_poll callback where the packets may
5712 * be dequeued from the driver and processed for reception.
5713 */
5714 if (ifp->if_eflags & IFEF_RXPOLL) {
5715 VERIFY(ifp->if_input_poll != NULL);
5716 VERIFY(ifp->if_input_ctl != NULL);
5717 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5718
5719 ifnet_set_poll_cycle(ifp, NULL);
5720 ifp->if_poll_update = 0;
5721 ifp->if_poll_active = 0;
5722 ifp->if_poll_req = 0;
5723 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
5724 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5725 panic_plain("%s: ifp=%p couldn't get a poll thread; "
6d2010ae
A
5726 "err=%d", __func__, ifp, err);
5727 /* NOTREACHED */
5728 }
316670eb
A
5729 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
5730 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
91447636 5731 }
6d2010ae 5732
316670eb
A
5733 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5734 VERIFY(ifp->if_desc.ifd_len == 0);
5735 VERIFY(ifp->if_desc.ifd_desc != NULL);
6d2010ae
A
5736
5737 /* Record attach PC stacktrace */
5738 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5739
5740 ifp->if_updatemcasts = 0;
5741 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5742 struct ifmultiaddr *ifma;
5743 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5744 IFMA_LOCK(ifma);
5745 if (ifma->ifma_addr->sa_family == AF_LINK ||
5746 ifma->ifma_addr->sa_family == AF_UNSPEC)
5747 ifp->if_updatemcasts++;
5748 IFMA_UNLOCK(ifma);
5749 }
5750
39236c6e
A
5751 printf("%s: attached with %d suspended link-layer multicast "
5752 "membership(s)\n", if_name(ifp),
6d2010ae
A
5753 ifp->if_updatemcasts);
5754 }
5755
39236c6e
A
5756 /* Clear logging parameters */
5757 bzero(&ifp->if_log, sizeof (ifp->if_log));
5ba3f43e
A
5758
5759 /* Clear foreground/realtime activity timestamps */
39236c6e 5760 ifp->if_fg_sendts = 0;
5ba3f43e 5761 ifp->if_rt_sendts = 0;
39236c6e
A
5762
5763 VERIFY(ifp->if_delegated.ifp == NULL);
5764 VERIFY(ifp->if_delegated.type == 0);
5765 VERIFY(ifp->if_delegated.family == 0);
5766 VERIFY(ifp->if_delegated.subfamily == 0);
fe8ab488 5767 VERIFY(ifp->if_delegated.expensive == 0);
39236c6e 5768
39037602
A
5769 VERIFY(ifp->if_agentids == NULL);
5770 VERIFY(ifp->if_agentcount == 0);
3e170ce0
A
5771
5772 /* Reset interface state */
5773 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
39037602 5774 ifp->if_interface_state.valid_bitmask |=
3e170ce0
A
5775 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5776 ifp->if_interface_state.interface_availability =
5777 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5778
5779 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5780 if (ifp == lo_ifp) {
5781 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5782 ifp->if_interface_state.valid_bitmask |=
5783 IF_INTERFACE_STATE_LQM_STATE_VALID;
5784 } else {
5785 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5786 }
4bd07ac2
A
5787
5788 /*
5789 * Enable ECN capability on this interface depending on the
5790 * value of ECN global setting
5791 */
5792 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
5793 ifp->if_eflags |= IFEF_ECN_ENABLE;
5794 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
5795 }
5796
39037602
A
5797 /*
5798 * Built-in Cyclops always on policy for WiFi infra
5799 */
5800 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5801 errno_t error;
5802
5803 error = if_set_qosmarking_mode(ifp,
5804 IFRTYPE_QOSMARKING_FASTLANE);
5805 if (error != 0) {
5806 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5807 __func__, ifp->if_xname, error);
5808 } else {
5809 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
5810#if (DEVELOPMENT || DEBUG)
5811 printf("%s fastlane enabled on %s\n",
5812 __func__, ifp->if_xname);
5813#endif /* (DEVELOPMENT || DEBUG) */
5814 }
5815 }
5816
0c530ab8 5817 ifnet_lock_done(ifp);
b0d623f7 5818 ifnet_head_done();
6d2010ae 5819
5ba3f43e 5820
6d2010ae
A
5821 lck_mtx_lock(&ifp->if_cached_route_lock);
5822 /* Enable forwarding cached route */
5823 ifp->if_fwd_cacheok = 1;
5824 /* Clean up any existing cached routes */
39236c6e 5825 ROUTE_RELEASE(&ifp->if_fwd_route);
6d2010ae 5826 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
39236c6e 5827 ROUTE_RELEASE(&ifp->if_src_route);
6d2010ae 5828 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
39236c6e 5829 ROUTE_RELEASE(&ifp->if_src_route6);
6d2010ae
A
5830 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
5831 lck_mtx_unlock(&ifp->if_cached_route_lock);
5832
5833 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5834
b0d623f7 5835 /*
6d2010ae
A
5836 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5837 * and trees; do this before the ifnet is marked as attached.
5838 * The ifnet keeps the reference to the info structures even after
5839 * the ifnet is detached, since the network-layer records still
5840 * refer to the info structures even after that. This also
5841 * makes it possible for them to still function after the ifnet
5842 * is recycled or reattached.
b0d623f7 5843 */
6d2010ae
A
5844#if INET
5845 if (IGMP_IFINFO(ifp) == NULL) {
5846 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
5847 VERIFY(IGMP_IFINFO(ifp) != NULL);
5848 } else {
5849 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5850 igmp_domifreattach(IGMP_IFINFO(ifp));
5851 }
5852#endif /* INET */
5853#if INET6
5854 if (MLD_IFINFO(ifp) == NULL) {
5855 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
5856 VERIFY(MLD_IFINFO(ifp) != NULL);
5857 } else {
5858 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5859 mld_domifreattach(MLD_IFINFO(ifp));
5860 }
5861#endif /* INET6 */
b0d623f7 5862
39236c6e 5863 VERIFY(ifp->if_data_threshold == 0);
5ba3f43e 5864 VERIFY(ifp->if_dt_tcall != NULL);
39236c6e 5865
6d2010ae
A
5866 /*
5867 * Finally, mark this ifnet as attached.
5868 */
5869 lck_mtx_lock(rnh_lock);
5870 ifnet_lock_exclusive(ifp);
5871 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e 5872 ifp->if_refflags = IFRF_ATTACHED; /* clears embryonic */
6d2010ae 5873 lck_mtx_unlock(&ifp->if_ref_lock);
d1ecb069 5874 if (net_rtref) {
6d2010ae
A
5875 /* boot-args override; enable idle notification */
5876 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
d1ecb069 5877 IFRF_IDLE_NOTIFY);
6d2010ae
A
5878 } else {
5879 /* apply previous request(s) to set the idle flags, if any */
5880 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5881 ifp->if_idle_new_flags_mask);
5882
d1ecb069 5883 }
6d2010ae
A
5884 ifnet_lock_done(ifp);
5885 lck_mtx_unlock(rnh_lock);
7ddcb079 5886 dlil_if_unlock();
6d2010ae
A
5887
5888#if PF
5889 /*
5890 * Attach packet filter to this interface, if enabled.
5891 */
5892 pf_ifnet_hook(ifp, 1);
5893#endif /* PF */
d1ecb069 5894
2d21ac55 5895 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
1c79356b 5896
6d2010ae 5897 if (dlil_verbose) {
39236c6e 5898 printf("%s: attached%s\n", if_name(ifp),
6d2010ae
A
5899 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5900 }
5901
5902 return (0);
5903}
5904
5905/*
5906 * Prepare the storage for the first/permanent link address, which must
5907 * must have the same lifetime as the ifnet itself. Although the link
5908 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5909 * its location in memory must never change as it may still be referred
5910 * to by some parts of the system afterwards (unfortunate implementation
5911 * artifacts inherited from BSD.)
5912 *
5913 * Caller must hold ifnet lock as writer.
5914 */
5915static struct ifaddr *
5916dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
5917{
5918 struct ifaddr *ifa, *oifa;
5919 struct sockaddr_dl *asdl, *msdl;
5920 char workbuf[IFNAMSIZ*2];
5921 int namelen, masklen, socksize;
5922 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5923
5924 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
5925 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
5926
39236c6e
A
5927 namelen = snprintf(workbuf, sizeof (workbuf), "%s",
5928 if_name(ifp));
39037602
A
5929 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
5930 + ((namelen > 0) ? namelen : 0);
6d2010ae 5931 socksize = masklen + ifp->if_addrlen;
39037602 5932#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
6d2010ae
A
5933 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
5934 socksize = sizeof(struct sockaddr_dl);
5935 socksize = ROUNDUP(socksize);
5936#undef ROUNDUP
5937
5938 ifa = ifp->if_lladdr;
5939 if (socksize > DLIL_SDLMAXLEN ||
5940 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
5941 /*
5942 * Rare, but in the event that the link address requires
5943 * more storage space than DLIL_SDLMAXLEN, allocate the
5944 * largest possible storages for address and mask, such
5945 * that we can reuse the same space when if_addrlen grows.
5946 * This same space will be used when if_addrlen shrinks.
5947 */
5948 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
5949 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
5950 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
5951 if (ifa == NULL)
5952 return (NULL);
5953 ifa_lock_init(ifa);
5954 /* Don't set IFD_ALLOC, as this is permanent */
5955 ifa->ifa_debug = IFD_LINK;
5956 }
5957 IFA_LOCK(ifa);
5958 /* address and mask sockaddr_dl locations */
5959 asdl = (struct sockaddr_dl *)(ifa + 1);
5960 bzero(asdl, SOCK_MAXADDRLEN);
316670eb
A
5961 msdl = (struct sockaddr_dl *)(void *)
5962 ((char *)asdl + SOCK_MAXADDRLEN);
6d2010ae
A
5963 bzero(msdl, SOCK_MAXADDRLEN);
5964 } else {
5965 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
5966 /*
5967 * Use the storage areas for address and mask within the
5968 * dlil_ifnet structure. This is the most common case.
5969 */
5970 if (ifa == NULL) {
5971 ifa = &dl_if->dl_if_lladdr.ifa;
5972 ifa_lock_init(ifa);
5973 /* Don't set IFD_ALLOC, as this is permanent */
5974 ifa->ifa_debug = IFD_LINK;
5975 }
5976 IFA_LOCK(ifa);
5977 /* address and mask sockaddr_dl locations */
316670eb 5978 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
6d2010ae 5979 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
316670eb 5980 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
6d2010ae
A
5981 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
5982 }
5983
5984 /* hold a permanent reference for the ifnet itself */
5985 IFA_ADDREF_LOCKED(ifa);
5986 oifa = ifp->if_lladdr;
5987 ifp->if_lladdr = ifa;
5988
5989 VERIFY(ifa->ifa_debug == IFD_LINK);
5990 ifa->ifa_ifp = ifp;
5991 ifa->ifa_rtrequest = link_rtrequest;
5992 ifa->ifa_addr = (struct sockaddr *)asdl;
5993 asdl->sdl_len = socksize;
5994 asdl->sdl_family = AF_LINK;
39037602
A
5995 if (namelen > 0) {
5996 bcopy(workbuf, asdl->sdl_data, min(namelen,
5997 sizeof (asdl->sdl_data)));
5998 asdl->sdl_nlen = namelen;
5999 } else {
6000 asdl->sdl_nlen = 0;
6001 }
6d2010ae
A
6002 asdl->sdl_index = ifp->if_index;
6003 asdl->sdl_type = ifp->if_type;
6004 if (ll_addr != NULL) {
6005 asdl->sdl_alen = ll_addr->sdl_alen;
6006 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
6007 } else {
6008 asdl->sdl_alen = 0;
6009 }
39037602 6010 ifa->ifa_netmask = (struct sockaddr *)msdl;
6d2010ae 6011 msdl->sdl_len = masklen;
39037602 6012 while (namelen > 0)
6d2010ae
A
6013 msdl->sdl_data[--namelen] = 0xff;
6014 IFA_UNLOCK(ifa);
6015
6016 if (oifa != NULL)
6017 IFA_REMREF(oifa);
6018
6019 return (ifa);
6020}
6021
6022static void
6023if_purgeaddrs(struct ifnet *ifp)
6024{
6025#if INET
6026 in_purgeaddrs(ifp);
6027#endif /* INET */
6028#if INET6
6029 in6_purgeaddrs(ifp);
6030#endif /* INET6 */
1c79356b
A
6031}
6032
2d21ac55 6033errno_t
6d2010ae 6034ifnet_detach(ifnet_t ifp)
1c79356b 6035{
39236c6e 6036 struct ifnet *delegated_ifp;
39037602 6037 struct nd_ifinfo *ndi = NULL;
39236c6e 6038
6d2010ae
A
6039 if (ifp == NULL)
6040 return (EINVAL);
6041
39037602
A
6042 ndi = ND_IFINFO(ifp);
6043 if (NULL != ndi)
6044 ndi->cga_initialized = FALSE;
6045
6d2010ae 6046 lck_mtx_lock(rnh_lock);
316670eb 6047 ifnet_head_lock_exclusive();
91447636 6048 ifnet_lock_exclusive(ifp);
6d2010ae
A
6049
6050 /*
6051 * Check to see if this interface has previously triggered
6052 * aggressive protocol draining; if so, decrement the global
6053 * refcnt and clear PR_AGGDRAIN on the route domain if
6054 * there are no more of such an interface around.
6055 */
6056 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
6057
6058 lck_mtx_lock_spin(&ifp->if_ref_lock);
39037602 6059 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
6d2010ae
A
6060 lck_mtx_unlock(&ifp->if_ref_lock);
6061 ifnet_lock_done(ifp);
6d2010ae 6062 ifnet_head_done();
13f56ec4 6063 lck_mtx_unlock(rnh_lock);
6d2010ae
A
6064 return (EINVAL);
6065 } else if (ifp->if_refflags & IFRF_DETACHING) {
91447636 6066 /* Interface has already been detached */
6d2010ae 6067 lck_mtx_unlock(&ifp->if_ref_lock);
91447636 6068 ifnet_lock_done(ifp);
6d2010ae 6069 ifnet_head_done();
13f56ec4 6070 lck_mtx_unlock(rnh_lock);
6d2010ae 6071 return (ENXIO);
55e303ae 6072 }
5ba3f43e 6073 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
6d2010ae
A
6074 /* Indicate this interface is being detached */
6075 ifp->if_refflags &= ~IFRF_ATTACHED;
6076 ifp->if_refflags |= IFRF_DETACHING;
6077 lck_mtx_unlock(&ifp->if_ref_lock);
6078
6079 if (dlil_verbose)
39236c6e 6080 printf("%s: detaching\n", if_name(ifp));
6d2010ae 6081
490019cf
A
6082 /* Reset ECN enable/disable flags */
6083 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
6084 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
6085
91447636 6086 /*
6d2010ae
A
6087 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6088 * no longer be visible during lookups from this point.
91447636 6089 */
6d2010ae
A
6090 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
6091 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
6092 ifp->if_link.tqe_next = NULL;
6093 ifp->if_link.tqe_prev = NULL;
39037602
A
6094 if (ifp->if_ordered_link.tqe_next != NULL ||
6095 ifp->if_ordered_link.tqe_prev != NULL) {
6096 ifnet_remove_from_ordered_list(ifp);
6097 }
6d2010ae
A
6098 ifindex2ifnet[ifp->if_index] = NULL;
6099
3e170ce0
A
6100 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6101 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
6102
6d2010ae
A
6103 /* Record detach PC stacktrace */
6104 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
6105
39236c6e
A
6106 /* Clear logging parameters */
6107 bzero(&ifp->if_log, sizeof (ifp->if_log));
6108
6109 /* Clear delegated interface info (reference released below) */
6110 delegated_ifp = ifp->if_delegated.ifp;
6111 bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
6112
3e170ce0
A
6113 /* Reset interface state */
6114 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
6115
91447636 6116 ifnet_lock_done(ifp);
6d2010ae 6117 ifnet_head_done();
13f56ec4 6118 lck_mtx_unlock(rnh_lock);
6d2010ae 6119
5ba3f43e 6120
39236c6e
A
6121 /* Release reference held on the delegated interface */
6122 if (delegated_ifp != NULL)
6123 ifnet_release(delegated_ifp);
6124
316670eb
A
6125 /* Reset Link Quality Metric (unless loopback [lo0]) */
6126 if (ifp != lo_ifp)
3e170ce0 6127 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
316670eb
A
6128
6129 /* Reset TCP local statistics */
6130 if (ifp->if_tcp_stat != NULL)
6131 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
6132
6133 /* Reset UDP local statistics */
6134 if (ifp->if_udp_stat != NULL)
6135 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
6136
4bd07ac2
A
6137 /* Reset ifnet IPv4 stats */
6138 if (ifp->if_ipv4_stat != NULL)
6139 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
6140
6141 /* Reset ifnet IPv6 stats */
6142 if (ifp->if_ipv6_stat != NULL)
6143 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
6144
3e170ce0
A
6145 /* Release memory held for interface link status report */
6146 if (ifp->if_link_status != NULL) {
6147 FREE(ifp->if_link_status, M_TEMP);
6148 ifp->if_link_status = NULL;
6149 }
6150
39037602
A
6151 /* Clear agent IDs */
6152 if (ifp->if_agentids != NULL) {
6153 FREE(ifp->if_agentids, M_NETAGENT);
6154 ifp->if_agentids = NULL;
6155 }
6156 ifp->if_agentcount = 0;
6157
6158
2d21ac55
A
6159 /* Let BPF know we're detaching */
6160 bpfdetach(ifp);
6d2010ae
A
6161
6162 /* Mark the interface as DOWN */
6163 if_down(ifp);
6164
6165 /* Disable forwarding cached route */
6166 lck_mtx_lock(&ifp->if_cached_route_lock);
6167 ifp->if_fwd_cacheok = 0;
6168 lck_mtx_unlock(&ifp->if_cached_route_lock);
6169
5ba3f43e 6170 /* Disable data threshold and wait for any pending event posting */
39236c6e 6171 ifp->if_data_threshold = 0;
5ba3f43e
A
6172 VERIFY(ifp->if_dt_tcall != NULL);
6173 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
6174
d1ecb069 6175 /*
6d2010ae
A
6176 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6177 * references to the info structures and leave them attached to
6178 * this ifnet.
d1ecb069 6179 */
6d2010ae
A
6180#if INET
6181 igmp_domifdetach(ifp);
6182#endif /* INET */
6183#if INET6
6184 mld_domifdetach(ifp);
6185#endif /* INET6 */
6186
6187 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
6188
6189 /* Let worker thread take care of the rest, to avoid reentrancy */
7ddcb079 6190 dlil_if_lock();
6d2010ae 6191 ifnet_detaching_enqueue(ifp);
7ddcb079 6192 dlil_if_unlock();
6d2010ae
A
6193
6194 return (0);
6195}
6196
6197static void
6198ifnet_detaching_enqueue(struct ifnet *ifp)
6199{
7ddcb079 6200 dlil_if_lock_assert();
6d2010ae
A
6201
6202 ++ifnet_detaching_cnt;
6203 VERIFY(ifnet_detaching_cnt != 0);
6204 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
6205 wakeup((caddr_t)&ifnet_delayed_run);
6206}
6207
6208static struct ifnet *
6209ifnet_detaching_dequeue(void)
6210{
6211 struct ifnet *ifp;
6212
7ddcb079 6213 dlil_if_lock_assert();
6d2010ae
A
6214
6215 ifp = TAILQ_FIRST(&ifnet_detaching_head);
6216 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
6217 if (ifp != NULL) {
6218 VERIFY(ifnet_detaching_cnt != 0);
6219 --ifnet_detaching_cnt;
6220 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
6221 ifp->if_detaching_link.tqe_next = NULL;
6222 ifp->if_detaching_link.tqe_prev = NULL;
6223 }
6224 return (ifp);
6225}
6226
316670eb
A
6227static int
6228ifnet_detacher_thread_cont(int err)
6d2010ae 6229{
316670eb 6230#pragma unused(err)
6d2010ae
A
6231 struct ifnet *ifp;
6232
6233 for (;;) {
316670eb 6234 dlil_if_lock_assert();
6d2010ae 6235 while (ifnet_detaching_cnt == 0) {
316670eb
A
6236 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6237 (PZERO - 1), "ifnet_detacher_cont", 0,
6238 ifnet_detacher_thread_cont);
6239 /* NOTREACHED */
6d2010ae
A
6240 }
6241
6242 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
6243
6244 /* Take care of detaching ifnet */
6245 ifp = ifnet_detaching_dequeue();
316670eb
A
6246 if (ifp != NULL) {
6247 dlil_if_unlock();
6d2010ae 6248 ifnet_detach_final(ifp);
316670eb
A
6249 dlil_if_lock();
6250 }
55e303ae 6251 }
316670eb
A
6252}
6253
6254static void
6255ifnet_detacher_thread_func(void *v, wait_result_t w)
6256{
6257#pragma unused(v, w)
6258 dlil_if_lock();
6259 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6260 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
6261 /*
6262 * msleep0() shouldn't have returned as PCATCH was not set;
6263 * therefore assert in this case.
6264 */
6265 dlil_if_unlock();
6266 VERIFY(0);
6d2010ae 6267}
b0d623f7 6268
6d2010ae
A
6269static void
6270ifnet_detach_final(struct ifnet *ifp)
6271{
6272 struct ifnet_filter *filter, *filter_next;
6273 struct ifnet_filter_head fhead;
316670eb 6274 struct dlil_threading_info *inp;
6d2010ae
A
6275 struct ifaddr *ifa;
6276 ifnet_detached_func if_free;
6277 int i;
6278
6279 lck_mtx_lock(&ifp->if_ref_lock);
6280 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6281 panic("%s: flags mismatch (detaching not set) ifp=%p",
6282 __func__, ifp);
6283 /* NOTREACHED */
6284 }
6285
316670eb
A
6286 /*
6287 * Wait until the existing IO references get released
6288 * before we proceed with ifnet_detach. This is not a
6289 * common case, so block without using a continuation.
b0d623f7 6290 */
6d2010ae 6291 while (ifp->if_refio > 0) {
39236c6e
A
6292 printf("%s: Waiting for IO references on %s interface "
6293 "to be released\n", __func__, if_name(ifp));
6d2010ae
A
6294 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
6295 (PZERO - 1), "ifnet_ioref_wait", NULL);
6296 }
6297 lck_mtx_unlock(&ifp->if_ref_lock);
6298
fe8ab488
A
6299 /* Drain and destroy send queue */
6300 ifclassq_teardown(ifp);
6301
6d2010ae
A
6302 /* Detach interface filters */
6303 lck_mtx_lock(&ifp->if_flt_lock);
6304 if_flt_monitor_enter(ifp);
b0d623f7 6305
5ba3f43e 6306 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636
A
6307 fhead = ifp->if_flt_head;
6308 TAILQ_INIT(&ifp->if_flt_head);
2d21ac55 6309
6d2010ae
A
6310 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
6311 filter_next = TAILQ_NEXT(filter, filt_next);
6312 lck_mtx_unlock(&ifp->if_flt_lock);
6313
6314 dlil_detach_filter_internal(filter, 1);
6315 lck_mtx_lock(&ifp->if_flt_lock);
6316 }
6317 if_flt_monitor_leave(ifp);
6318 lck_mtx_unlock(&ifp->if_flt_lock);
6319
6320 /* Tell upper layers to drop their network addresses */
6321 if_purgeaddrs(ifp);
6322
6323 ifnet_lock_exclusive(ifp);
6324
6325 /* Uplumb all protocols */
6326 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6327 struct if_proto *proto;
6328
6329 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6330 while (proto != NULL) {
6331 protocol_family_t family = proto->protocol_family;
6332 ifnet_lock_done(ifp);
6333 proto_unplumb(family, ifp);
6334 ifnet_lock_exclusive(ifp);
6335 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6336 }
6337 /* There should not be any protocols left */
6338 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
6339 }
6340 zfree(dlif_phash_zone, ifp->if_proto_hash);
6341 ifp->if_proto_hash = NULL;
6342
6343 /* Detach (permanent) link address from if_addrhead */
6344 ifa = TAILQ_FIRST(&ifp->if_addrhead);
6345 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
6346 IFA_LOCK(ifa);
6347 if_detach_link_ifa(ifp, ifa);
6348 IFA_UNLOCK(ifa);
6349
6350 /* Remove (permanent) link address from ifnet_addrs[] */
6351 IFA_REMREF(ifa);
6352 ifnet_addrs[ifp->if_index - 1] = NULL;
6353
6354 /* This interface should not be on {ifnet_head,detaching} */
6355 VERIFY(ifp->if_link.tqe_next == NULL);
6356 VERIFY(ifp->if_link.tqe_prev == NULL);
6357 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6358 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
39037602
A
6359 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
6360 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
6d2010ae
A
6361
6362 /* The slot should have been emptied */
6363 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6364
6365 /* There should not be any addresses left */
6366 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
1c79356b 6367
316670eb
A
6368 /*
6369 * Signal the starter thread to terminate itself.
6370 */
6371 if (ifp->if_start_thread != THREAD_NULL) {
6372 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e 6373 ifp->if_start_flags = 0;
316670eb
A
6374 ifp->if_start_thread = THREAD_NULL;
6375 wakeup_one((caddr_t)&ifp->if_start_thread);
6376 lck_mtx_unlock(&ifp->if_start_lock);
6377 }
6378
6379 /*
6380 * Signal the poller thread to terminate itself.
6381 */
6382 if (ifp->if_poll_thread != THREAD_NULL) {
6383 lck_mtx_lock_spin(&ifp->if_poll_lock);
6384 ifp->if_poll_thread = THREAD_NULL;
6385 wakeup_one((caddr_t)&ifp->if_poll_thread);
6386 lck_mtx_unlock(&ifp->if_poll_lock);
6387 }
6388
2d21ac55
A
6389 /*
6390 * If thread affinity was set for the workloop thread, we will need
6391 * to tear down the affinity and release the extra reference count
316670eb
A
6392 * taken at attach time. Does not apply to lo0 or other interfaces
6393 * without dedicated input threads.
2d21ac55 6394 */
316670eb
A
6395 if ((inp = ifp->if_inp) != NULL) {
6396 VERIFY(inp != dlil_main_input_thread);
6397
6398 if (inp->net_affinity) {
6399 struct thread *tp, *wtp, *ptp;
6400
6401 lck_mtx_lock_spin(&inp->input_lck);
6402 wtp = inp->wloop_thr;
6403 inp->wloop_thr = THREAD_NULL;
6404 ptp = inp->poll_thr;
6405 inp->poll_thr = THREAD_NULL;
6406 tp = inp->input_thr; /* don't nullify now */
6407 inp->tag = 0;
6408 inp->net_affinity = FALSE;
6409 lck_mtx_unlock(&inp->input_lck);
6410
6411 /* Tear down poll thread affinity */
6412 if (ptp != NULL) {
6413 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
6414 (void) dlil_affinity_set(ptp,
6415 THREAD_AFFINITY_TAG_NULL);
6416 thread_deallocate(ptp);
6d2010ae 6417 }
2d21ac55 6418
2d21ac55 6419 /* Tear down workloop thread affinity */
316670eb
A
6420 if (wtp != NULL) {
6421 (void) dlil_affinity_set(wtp,
2d21ac55 6422 THREAD_AFFINITY_TAG_NULL);
316670eb 6423 thread_deallocate(wtp);
2d21ac55 6424 }
1c79356b 6425
316670eb 6426 /* Tear down DLIL input thread affinity */
2d21ac55
A
6427 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
6428 thread_deallocate(tp);
9bccf70c 6429 }
1c79356b 6430
316670eb
A
6431 /* disassociate ifp DLIL input thread */
6432 ifp->if_inp = NULL;
6d2010ae 6433
5ba3f43e 6434 /* tell the input thread to terminate */
316670eb
A
6435 lck_mtx_lock_spin(&inp->input_lck);
6436 inp->input_waiting |= DLIL_INPUT_TERMINATE;
6437 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
6438 wakeup_one((caddr_t)&inp->input_waiting);
91447636 6439 }
316670eb 6440 lck_mtx_unlock(&inp->input_lck);
5ba3f43e
A
6441
6442 /* wait for the input thread to terminate */
6443 lck_mtx_lock_spin(&inp->input_lck);
6444 while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE)
6445 == 0) {
6446 (void) msleep(&inp->input_waiting, &inp->input_lck,
6447 (PZERO - 1) | PSPIN, inp->input_name, NULL);
6448 }
6449 lck_mtx_unlock(&inp->input_lck);
6450
6451 /* clean-up input thread state */
6452 dlil_clean_threading_info(inp);
6453
55e303ae 6454 }
6d2010ae
A
6455
6456 /* The driver might unload, so point these to ourselves */
6457 if_free = ifp->if_free;
5ba3f43e 6458 ifp->if_output_dlil = ifp_if_output;
6d2010ae 6459 ifp->if_output = ifp_if_output;
316670eb
A
6460 ifp->if_pre_enqueue = ifp_if_output;
6461 ifp->if_start = ifp_if_start;
6462 ifp->if_output_ctl = ifp_if_ctl;
5ba3f43e 6463 ifp->if_input_dlil = ifp_if_input;
316670eb
A
6464 ifp->if_input_poll = ifp_if_input_poll;
6465 ifp->if_input_ctl = ifp_if_ctl;
6d2010ae
A
6466 ifp->if_ioctl = ifp_if_ioctl;
6467 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
6468 ifp->if_free = ifp_if_free;
6469 ifp->if_demux = ifp_if_demux;
6470 ifp->if_event = ifp_if_event;
39236c6e
A
6471 ifp->if_framer_legacy = ifp_if_framer;
6472 ifp->if_framer = ifp_if_framer_extended;
6d2010ae
A
6473 ifp->if_add_proto = ifp_if_add_proto;
6474 ifp->if_del_proto = ifp_if_del_proto;
6475 ifp->if_check_multi = ifp_if_check_multi;
6476
316670eb
A
6477 /* wipe out interface description */
6478 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
6479 ifp->if_desc.ifd_len = 0;
6480 VERIFY(ifp->if_desc.ifd_desc != NULL);
6481 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
6482
39236c6e
A
6483 /* there shouldn't be any delegation by now */
6484 VERIFY(ifp->if_delegated.ifp == NULL);
6485 VERIFY(ifp->if_delegated.type == 0);
6486 VERIFY(ifp->if_delegated.family == 0);
6487 VERIFY(ifp->if_delegated.subfamily == 0);
fe8ab488 6488 VERIFY(ifp->if_delegated.expensive == 0);
39236c6e 6489
39037602
A
6490 /* QoS marking get cleared */
6491 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
6492 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
6493
5ba3f43e 6494
6d2010ae
A
6495 ifnet_lock_done(ifp);
6496
6497#if PF
6498 /*
6499 * Detach this interface from packet filter, if enabled.
6500 */
6501 pf_ifnet_hook(ifp, 0);
6502#endif /* PF */
6503
6504 /* Filter list should be empty */
6505 lck_mtx_lock_spin(&ifp->if_flt_lock);
6506 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6507 VERIFY(ifp->if_flt_busy == 0);
6508 VERIFY(ifp->if_flt_waiters == 0);
6509 lck_mtx_unlock(&ifp->if_flt_lock);
6510
316670eb
A
6511 /* Last chance to drain send queue */
6512 if_qflush(ifp, 0);
6513
6d2010ae
A
6514 /* Last chance to cleanup any cached route */
6515 lck_mtx_lock(&ifp->if_cached_route_lock);
6516 VERIFY(!ifp->if_fwd_cacheok);
39236c6e 6517 ROUTE_RELEASE(&ifp->if_fwd_route);
6d2010ae 6518 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
39236c6e 6519 ROUTE_RELEASE(&ifp->if_src_route);
6d2010ae 6520 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
39236c6e 6521 ROUTE_RELEASE(&ifp->if_src_route6);
6d2010ae
A
6522 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
6523 lck_mtx_unlock(&ifp->if_cached_route_lock);
6524
39236c6e 6525 VERIFY(ifp->if_data_threshold == 0);
5ba3f43e
A
6526 VERIFY(ifp->if_dt_tcall != NULL);
6527 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
39236c6e 6528
6d2010ae
A
6529 ifnet_llreach_ifdetach(ifp);
6530
6531 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
6532
6d2010ae
A
6533 /*
6534 * Finally, mark this ifnet as detached.
6535 */
6536 lck_mtx_lock_spin(&ifp->if_ref_lock);
6537 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6538 panic("%s: flags mismatch (detaching not set) ifp=%p",
6539 __func__, ifp);
6540 /* NOTREACHED */
55e303ae 6541 }
6d2010ae
A
6542 ifp->if_refflags &= ~IFRF_DETACHING;
6543 lck_mtx_unlock(&ifp->if_ref_lock);
39037602
A
6544 if (if_free != NULL)
6545 if_free(ifp);
6d2010ae
A
6546
6547 if (dlil_verbose)
39236c6e 6548 printf("%s: detached\n", if_name(ifp));
6d2010ae
A
6549
6550 /* Release reference held during ifnet attach */
6551 ifnet_release(ifp);
1c79356b 6552}
9bccf70c 6553
5ba3f43e 6554errno_t
6d2010ae 6555ifp_if_output(struct ifnet *ifp, struct mbuf *m)
9bccf70c 6556{
6d2010ae 6557#pragma unused(ifp)
39037602 6558 m_freem_list(m);
6d2010ae 6559 return (0);
9bccf70c
A
6560}
6561
5ba3f43e 6562void
316670eb
A
6563ifp_if_start(struct ifnet *ifp)
6564{
6565 ifnet_purge(ifp);
6566}
6567
39037602
A
6568static errno_t
6569ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6570 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6571 boolean_t poll, struct thread *tp)
6572{
6573#pragma unused(ifp, m_tail, s, poll, tp)
6574 m_freem_list(m_head);
6575 return (ENXIO);
6576}
6577
316670eb
A
6578static void
6579ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6580 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6581{
6582#pragma unused(ifp, flags, max_cnt)
6583 if (m_head != NULL)
6584 *m_head = NULL;
6585 if (m_tail != NULL)
6586 *m_tail = NULL;
6587 if (cnt != NULL)
6588 *cnt = 0;
6589 if (len != NULL)
6590 *len = 0;
6591}
6592
6593static errno_t
6594ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6595{
6596#pragma unused(ifp, cmd, arglen, arg)
6597 return (EOPNOTSUPP);
6598}
6599
6d2010ae
A
6600static errno_t
6601ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
9bccf70c 6602{
6d2010ae
A
6603#pragma unused(ifp, fh, pf)
6604 m_freem(m);
6605 return (EJUSTRETURN);
9bccf70c
A
6606}
6607
6d2010ae
A
6608static errno_t
6609ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6610 const struct ifnet_demux_desc *da, u_int32_t dc)
9bccf70c 6611{
6d2010ae
A
6612#pragma unused(ifp, pf, da, dc)
6613 return (EINVAL);
9bccf70c
A
6614}
6615
91447636 6616static errno_t
6d2010ae 6617ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
9bccf70c 6618{
6d2010ae
A
6619#pragma unused(ifp, pf)
6620 return (EINVAL);
6621}
6622
6623static errno_t
6624ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6625{
6626#pragma unused(ifp, sa)
6627 return (EOPNOTSUPP);
6628}
6629
5ba3f43e
A
6630#if CONFIG_EMBEDDED
6631static errno_t
6632ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6633 const struct sockaddr *sa, const char *ll, const char *t,
6634 u_int32_t *pre, u_int32_t *post)
6635#else
39236c6e
A
6636static errno_t
6637ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6638 const struct sockaddr *sa, const char *ll, const char *t)
5ba3f43e 6639#endif /* !CONFIG_EMBEDDED */
6d2010ae
A
6640{
6641#pragma unused(ifp, m, sa, ll, t)
5ba3f43e
A
6642#if CONFIG_EMBEDDED
6643 return (ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post));
6644#else
39236c6e 6645 return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
5ba3f43e 6646#endif /* !CONFIG_EMBEDDED */
39236c6e
A
6647}
6648
6649static errno_t
6650ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6651 const struct sockaddr *sa, const char *ll, const char *t,
6652 u_int32_t *pre, u_int32_t *post)
6653{
6654#pragma unused(ifp, sa, ll, t)
6d2010ae
A
6655 m_freem(*m);
6656 *m = NULL;
39236c6e
A
6657
6658 if (pre != NULL)
6659 *pre = 0;
6660 if (post != NULL)
6661 *post = 0;
6662
6d2010ae
A
6663 return (EJUSTRETURN);
6664}
6665
316670eb 6666errno_t
6d2010ae
A
6667ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6668{
6669#pragma unused(ifp, cmd, arg)
6670 return (EOPNOTSUPP);
6671}
6672
6673static errno_t
6674ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6675{
6676#pragma unused(ifp, tm, f)
6677 /* XXX not sure what to do here */
6678 return (0);
6679}
6680
6681static void
6682ifp_if_free(struct ifnet *ifp)
6683{
6684#pragma unused(ifp)
6685}
6686
6687static void
6688ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6689{
6690#pragma unused(ifp, e)
9bccf70c
A
6691}
6692
2d21ac55 6693__private_extern__
6d2010ae
A
6694int dlil_if_acquire(u_int32_t family, const void *uniqueid,
6695 size_t uniqueid_len, struct ifnet **ifp)
6696{
6697 struct ifnet *ifp1 = NULL;
6698 struct dlil_ifnet *dlifp1 = NULL;
6699 void *buf, *base, **pbuf;
6700 int ret = 0;
6701
7ddcb079 6702 dlil_if_lock();
6d2010ae
A
6703 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
6704 ifp1 = (struct ifnet *)dlifp1;
6705
6706 if (ifp1->if_family != family)
6707 continue;
6708
6709 lck_mtx_lock(&dlifp1->dl_if_lock);
6710 /* same uniqueid and same len or no unique id specified */
6711 if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) &&
39037602 6712 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
6d2010ae
A
6713 /* check for matching interface in use */
6714 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6715 if (uniqueid_len) {
6716 ret = EBUSY;
6717 lck_mtx_unlock(&dlifp1->dl_if_lock);
9bccf70c 6718 goto end;
6d2010ae
A
6719 }
6720 } else {
6721 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
6722 lck_mtx_unlock(&dlifp1->dl_if_lock);
6723 *ifp = ifp1;
6724 goto end;
6725 }
6726 }
6727 lck_mtx_unlock(&dlifp1->dl_if_lock);
6728 }
6729
6730 /* no interface found, allocate a new one */
6731 buf = zalloc(dlif_zone);
6732 if (buf == NULL) {
6733 ret = ENOMEM;
6734 goto end;
6735 }
6736 bzero(buf, dlif_bufsize);
6737
6738 /* Get the 64-bit aligned base address for this object */
6739 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
6740 sizeof (u_int64_t));
6741 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
6742
6743 /*
6744 * Wind back a pointer size from the aligned base and
6745 * save the original address so we can free it later.
6746 */
6747 pbuf = (void **)((intptr_t)base - sizeof (void *));
6748 *pbuf = buf;
6749 dlifp1 = base;
6750
6751 if (uniqueid_len) {
6752 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
6753 M_NKE, M_WAITOK);
6754 if (dlifp1->dl_if_uniqueid == NULL) {
5ba3f43e 6755 zfree(dlif_zone, buf);
6d2010ae
A
6756 ret = ENOMEM;
6757 goto end;
6758 }
6759 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
6760 dlifp1->dl_if_uniqueid_len = uniqueid_len;
6761 }
6762
6763 ifp1 = (struct ifnet *)dlifp1;
6764 dlifp1->dl_if_flags = DLIF_INUSE;
6765 if (ifnet_debug) {
6766 dlifp1->dl_if_flags |= DLIF_DEBUG;
6767 dlifp1->dl_if_trace = dlil_if_trace;
6768 }
6769 ifp1->if_name = dlifp1->dl_if_namestorage;
39236c6e 6770 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
316670eb
A
6771
6772 /* initialize interface description */
6773 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
6774 ifp1->if_desc.ifd_len = 0;
6775 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
6776
5ba3f43e 6777
2d21ac55 6778#if CONFIG_MACF_NET
6d2010ae 6779 mac_ifnet_label_init(ifp1);
2d21ac55 6780#endif
9bccf70c 6781
316670eb
A
6782 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
6783 DLIL_PRINTF("%s: failed to allocate if local stats, "
6784 "error: %d\n", __func__, ret);
6785 /* This probably shouldn't be fatal */
6786 ret = 0;
6787 }
6788
6d2010ae
A
6789 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
6790 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
6791 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
6792 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6d2010ae
A
6793 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
6794 ifnet_lock_attr);
6795 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
3e170ce0
A
6796#if INET
6797 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
6798 ifnet_lock_attr);
6799 ifp1->if_inetdata = NULL;
6800#endif
39236c6e 6801#if INET6
3e170ce0
A
6802 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
6803 ifnet_lock_attr);
39236c6e
A
6804 ifp1->if_inet6data = NULL;
6805#endif
3e170ce0
A
6806 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
6807 ifnet_lock_attr);
6808 ifp1->if_link_status = NULL;
6d2010ae 6809
316670eb
A
6810 /* for send data paths */
6811 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
6812 ifnet_lock_attr);
6813 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
6814 ifnet_lock_attr);
6815 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
6816 ifnet_lock_attr);
6817
6818 /* for receive data paths */
6819 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
6820 ifnet_lock_attr);
6821
5ba3f43e
A
6822 /* thread call allocation is done with sleeping zalloc */
6823 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
6824 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
6825 if (ifp1->if_dt_tcall == NULL) {
6826 panic_plain("%s: couldn't create if_dt_tcall", __func__);
6827 /* NOTREACHED */
6828 }
6829
6d2010ae
A
6830 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
6831
6832 *ifp = ifp1;
9bccf70c
A
6833
6834end:
7ddcb079 6835 dlil_if_unlock();
9bccf70c 6836
6d2010ae
A
6837 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
6838 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
6839
6840 return (ret);
9bccf70c
A
6841}
6842
2d21ac55 6843__private_extern__ void
6d2010ae
A
6844dlil_if_release(ifnet_t ifp)
6845{
6846 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
6847
5ba3f43e
A
6848 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
6849 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
6850 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
6851 }
6852
6d2010ae
A
6853 ifnet_lock_exclusive(ifp);
6854 lck_mtx_lock(&dlifp->dl_if_lock);
6855 dlifp->dl_if_flags &= ~DLIF_INUSE;
fe8ab488 6856 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6d2010ae 6857 ifp->if_name = dlifp->dl_if_namestorage;
39236c6e
A
6858 /* Reset external name (name + unit) */
6859 ifp->if_xname = dlifp->dl_if_xnamestorage;
39037602 6860 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
39236c6e 6861 "%s?", ifp->if_name);
6d2010ae 6862 lck_mtx_unlock(&dlifp->dl_if_lock);
2d21ac55 6863#if CONFIG_MACF_NET
6d2010ae 6864 /*
39037602
A
6865 * We can either recycle the MAC label here or in dlil_if_acquire().
6866 * It seems logical to do it here but this means that anything that
6867 * still has a handle on ifp will now see it as unlabeled.
6868 * Since the interface is "dead" that may be OK. Revisit later.
6869 */
6d2010ae 6870 mac_ifnet_label_recycle(ifp);
2d21ac55 6871#endif
6d2010ae 6872 ifnet_lock_done(ifp);
9bccf70c 6873}
4a3eedf9 6874
7ddcb079
A
6875__private_extern__ void
6876dlil_if_lock(void)
6877{
6878 lck_mtx_lock(&dlil_ifnet_lock);
6879}
6880
6881__private_extern__ void
6882dlil_if_unlock(void)
6883{
6884 lck_mtx_unlock(&dlil_ifnet_lock);
6885}
6886
6887__private_extern__ void
6888dlil_if_lock_assert(void)
6889{
5ba3f43e 6890 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
7ddcb079
A
6891}
6892
4a3eedf9
A
6893__private_extern__ void
6894dlil_proto_unplumb_all(struct ifnet *ifp)
6895{
6896 /*
39236c6e
A
6897 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6898 * each bucket contains exactly one entry; PF_VLAN does not need an
6899 * explicit unplumb.
4a3eedf9 6900 *
39236c6e 6901 * if_proto_hash[3] is for other protocols; we expect anything
4a3eedf9
A
6902 * in this bucket to respond to the DETACHING event (which would
6903 * have happened by now) and do the unplumb then.
6904 */
6905 (void) proto_unplumb(PF_INET, ifp);
6906#if INET6
6907 (void) proto_unplumb(PF_INET6, ifp);
6908#endif /* INET6 */
4a3eedf9 6909}
6d2010ae
A
6910
6911static void
6912ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6913{
6914 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6915 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6916
6917 route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
6918
6919 lck_mtx_unlock(&ifp->if_cached_route_lock);
6920}
6921
6922static void
6923ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6924{
6925 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6926 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6927
6928 if (ifp->if_fwd_cacheok) {
6929 route_copyin(src, &ifp->if_src_route, sizeof (*src));
6930 } else {
39236c6e 6931 ROUTE_RELEASE(src);
6d2010ae
A
6932 }
6933 lck_mtx_unlock(&ifp->if_cached_route_lock);
6934}
6935
6936#if INET6
6937static void
6938ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6939{
6940 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6941 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6942
6943 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6944 sizeof (*dst));
6945
6946 lck_mtx_unlock(&ifp->if_cached_route_lock);
6947}
6948
6949static void
6950ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6951{
6952 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6953 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6954
6955 if (ifp->if_fwd_cacheok) {
6956 route_copyin((struct route *)src,
6957 (struct route *)&ifp->if_src_route6, sizeof (*src));
6958 } else {
39236c6e 6959 ROUTE_RELEASE(src);
6d2010ae
A
6960 }
6961 lck_mtx_unlock(&ifp->if_cached_route_lock);
6962}
6963#endif /* INET6 */
6964
6965struct rtentry *
6966ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6967{
6968 struct route src_rt;
316670eb
A
6969 struct sockaddr_in *dst;
6970
6971 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
6d2010ae
A
6972
6973 ifp_src_route_copyout(ifp, &src_rt);
6974
39236c6e
A
6975 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6976 ROUTE_RELEASE(&src_rt);
6977 if (dst->sin_family != AF_INET) {
6d2010ae
A
6978 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
6979 dst->sin_len = sizeof (src_rt.ro_dst);
6980 dst->sin_family = AF_INET;
6981 }
6982 dst->sin_addr = src_ip;
6983
5ba3f43e
A
6984 VERIFY(src_rt.ro_rt == NULL);
6985 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
6986 0, 0, ifp->if_index);
6d2010ae 6987
5ba3f43e
A
6988 if (src_rt.ro_rt != NULL) {
6989 /* retain a ref, copyin consumes one */
6990 struct rtentry *rte = src_rt.ro_rt;
6991 RT_ADDREF(rte);
6992 ifp_src_route_copyin(ifp, &src_rt);
6993 src_rt.ro_rt = rte;
6d2010ae
A
6994 }
6995 }
6996
6997 return (src_rt.ro_rt);
6998}
6999
7000#if INET6
39037602 7001struct rtentry *
6d2010ae
A
7002ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
7003{
7004 struct route_in6 src_rt;
7005
7006 ifp_src_route6_copyout(ifp, &src_rt);
7007
39236c6e
A
7008 if (ROUTE_UNUSABLE(&src_rt) ||
7009 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
7010 ROUTE_RELEASE(&src_rt);
7011 if (src_rt.ro_dst.sin6_family != AF_INET6) {
6d2010ae
A
7012 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
7013 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
7014 src_rt.ro_dst.sin6_family = AF_INET6;
7015 }
7016 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
316670eb
A
7017 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
7018 sizeof (src_rt.ro_dst.sin6_addr));
6d2010ae
A
7019
7020 if (src_rt.ro_rt == NULL) {
7021 src_rt.ro_rt = rtalloc1_scoped(
7022 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
7023 ifp->if_index);
7024
7025 if (src_rt.ro_rt != NULL) {
7026 /* retain a ref, copyin consumes one */
7027 struct rtentry *rte = src_rt.ro_rt;
7028 RT_ADDREF(rte);
7029 ifp_src_route6_copyin(ifp, &src_rt);
7030 src_rt.ro_rt = rte;
7031 }
7032 }
7033 }
7034
7035 return (src_rt.ro_rt);
7036}
7037#endif /* INET6 */
316670eb
A
7038
7039void
3e170ce0 7040if_lqm_update(struct ifnet *ifp, int lqm, int locked)
316670eb
A
7041{
7042 struct kev_dl_link_quality_metric_data ev_lqm_data;
7043
7044 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
7045
7046 /* Normalize to edge */
5ba3f43e
A
7047 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
7048 lqm = IFNET_LQM_THRESH_ABORT;
7049 atomic_bitset_32(&tcbinfo.ipi_flags,
7050 INPCBINFO_HANDLE_LQM_ABORT);
7051 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
7052 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
7053 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
7054 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
7055 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
7056 lqm <= IFNET_LQM_THRESH_POOR) {
316670eb 7057 lqm = IFNET_LQM_THRESH_POOR;
5ba3f43e
A
7058 } else if (lqm > IFNET_LQM_THRESH_POOR &&
7059 lqm <= IFNET_LQM_THRESH_GOOD) {
316670eb 7060 lqm = IFNET_LQM_THRESH_GOOD;
5ba3f43e 7061 }
316670eb 7062
3e170ce0
A
7063 /*
7064 * Take the lock if needed
7065 */
7066 if (!locked)
7067 ifnet_lock_exclusive(ifp);
7068
7069 if (lqm == ifp->if_interface_state.lqm_state &&
39037602 7070 (ifp->if_interface_state.valid_bitmask &
3e170ce0
A
7071 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
7072 /*
7073 * Release the lock if was not held by the caller
7074 */
7075 if (!locked)
7076 ifnet_lock_done(ifp);
316670eb
A
7077 return; /* nothing to update */
7078 }
3e170ce0
A
7079 ifp->if_interface_state.valid_bitmask |=
7080 IF_INTERFACE_STATE_LQM_STATE_VALID;
7081 ifp->if_interface_state.lqm_state = lqm;
7082
7083 /*
7084 * Don't want to hold the lock when issuing kernel events
7085 */
316670eb
A
7086 ifnet_lock_done(ifp);
7087
7088 bzero(&ev_lqm_data, sizeof (ev_lqm_data));
7089 ev_lqm_data.link_quality_metric = lqm;
7090
7091 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
7092 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
3e170ce0
A
7093
7094 /*
7095 * Reacquire the lock for the caller
7096 */
7097 if (locked)
7098 ifnet_lock_exclusive(ifp);
7099}
7100
7101static void
7102if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
7103{
7104 struct kev_dl_rrc_state kev;
39037602 7105
3e170ce0
A
7106 if (rrc_state == ifp->if_interface_state.rrc_state &&
7107 (ifp->if_interface_state.valid_bitmask &
7108 IF_INTERFACE_STATE_RRC_STATE_VALID))
7109 return;
7110
7111 ifp->if_interface_state.valid_bitmask |=
7112 IF_INTERFACE_STATE_RRC_STATE_VALID;
7113
7114 ifp->if_interface_state.rrc_state = rrc_state;
7115
7116 /*
7117 * Don't want to hold the lock when issuing kernel events
7118 */
7119 ifnet_lock_done(ifp);
7120
7121 bzero(&kev, sizeof(struct kev_dl_rrc_state));
7122 kev.rrc_state = rrc_state;
7123
7124 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
7125 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
7126
7127 ifnet_lock_exclusive(ifp);
7128}
7129
7130errno_t
7131if_state_update(struct ifnet *ifp,
39037602 7132 struct if_interface_state *if_interface_state)
3e170ce0
A
7133{
7134 u_short if_index_available = 0;
7135
7136 ifnet_lock_exclusive(ifp);
7137
7138 if ((ifp->if_type != IFT_CELLULAR) &&
7139 (if_interface_state->valid_bitmask &
7140 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
7141 ifnet_lock_done(ifp);
7142 return (ENOTSUP);
7143 }
7144 if ((if_interface_state->valid_bitmask &
7145 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
7146 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
7147 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
7148 ifnet_lock_done(ifp);
7149 return (EINVAL);
7150 }
7151 if ((if_interface_state->valid_bitmask &
7152 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
7153 if_interface_state->rrc_state !=
7154 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
7155 if_interface_state->rrc_state !=
7156 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
7157 ifnet_lock_done(ifp);
7158 return (EINVAL);
7159 }
7160
7161 if (if_interface_state->valid_bitmask &
7162 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7163 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
7164 }
7165 if (if_interface_state->valid_bitmask &
7166 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7167 if_rrc_state_update(ifp, if_interface_state->rrc_state);
7168 }
7169 if (if_interface_state->valid_bitmask &
7170 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7171 ifp->if_interface_state.valid_bitmask |=
7172 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7173 ifp->if_interface_state.interface_availability =
7174 if_interface_state->interface_availability;
7175
7176 if (ifp->if_interface_state.interface_availability ==
7177 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
7178 if_index_available = ifp->if_index;
7179 }
7180 }
7181 ifnet_lock_done(ifp);
7182
7183 /*
7184 * Check if the TCP connections going on this interface should be
7185 * forced to send probe packets instead of waiting for TCP timers
7186 * to fire. This will be done when there is an explicit
7187 * notification that the interface became available.
7188 */
7189 if (if_index_available > 0)
7190 tcp_interface_send_probe(if_index_available);
7191
7192 return (0);
7193}
7194
7195void
7196if_get_state(struct ifnet *ifp,
39037602 7197 struct if_interface_state *if_interface_state)
3e170ce0
A
7198{
7199 ifnet_lock_shared(ifp);
7200
7201 if_interface_state->valid_bitmask = 0;
7202
7203 if (ifp->if_interface_state.valid_bitmask &
7204 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7205 if_interface_state->valid_bitmask |=
7206 IF_INTERFACE_STATE_RRC_STATE_VALID;
7207 if_interface_state->rrc_state =
7208 ifp->if_interface_state.rrc_state;
7209 }
7210 if (ifp->if_interface_state.valid_bitmask &
7211 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7212 if_interface_state->valid_bitmask |=
7213 IF_INTERFACE_STATE_LQM_STATE_VALID;
7214 if_interface_state->lqm_state =
7215 ifp->if_interface_state.lqm_state;
7216 }
7217 if (ifp->if_interface_state.valid_bitmask &
7218 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7219 if_interface_state->valid_bitmask |=
7220 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7221 if_interface_state->interface_availability =
7222 ifp->if_interface_state.interface_availability;
7223 }
7224
7225 ifnet_lock_done(ifp);
7226}
7227
7228errno_t
7229if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
7230{
7231 ifnet_lock_exclusive(ifp);
7232 if (conn_probe > 1) {
7233 ifnet_lock_done(ifp);
7234 return (EINVAL);
7235 }
7236 if (conn_probe == 0)
7237 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
7238 else
7239 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
7240 ifnet_lock_done(ifp);
7241
5ba3f43e
A
7242#if NECP
7243 necp_update_all_clients();
7244#endif /* NECP */
7245
3e170ce0
A
7246 tcp_probe_connectivity(ifp, conn_probe);
7247 return (0);
316670eb
A
7248}
7249
7250/* for uuid.c */
7251int
7252uuid_get_ethernet(u_int8_t *node)
7253{
7254 struct ifnet *ifp;
7255 struct sockaddr_dl *sdl;
7256
7257 ifnet_head_lock_shared();
7258 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
7259 ifnet_lock_shared(ifp);
7260 IFA_LOCK_SPIN(ifp->if_lladdr);
7261 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
7262 if (sdl->sdl_type == IFT_ETHER) {
7263 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
7264 IFA_UNLOCK(ifp->if_lladdr);
7265 ifnet_lock_done(ifp);
7266 ifnet_head_done();
7267 return (0);
7268 }
7269 IFA_UNLOCK(ifp->if_lladdr);
7270 ifnet_lock_done(ifp);
7271 }
7272 ifnet_head_done();
7273
7274 return (-1);
7275}
7276
7277static int
7278sysctl_rxpoll SYSCTL_HANDLER_ARGS
7279{
7280#pragma unused(arg1, arg2)
39236c6e
A
7281 uint32_t i;
7282 int err;
316670eb
A
7283
7284 i = if_rxpoll;
7285
7286 err = sysctl_handle_int(oidp, &i, 0, req);
7287 if (err != 0 || req->newptr == USER_ADDR_NULL)
7288 return (err);
7289
7290 if (net_rxpoll == 0)
7291 return (ENXIO);
7292
7293 if_rxpoll = i;
7294 return (err);
7295}
7296
7297static int
39236c6e 7298sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
7299{
7300#pragma unused(arg1, arg2)
39236c6e
A
7301 uint64_t q;
7302 int err;
316670eb 7303
39236c6e 7304 q = if_rxpoll_mode_holdtime;
316670eb 7305
39236c6e 7306 err = sysctl_handle_quad(oidp, &q, 0, req);
316670eb
A
7307 if (err != 0 || req->newptr == USER_ADDR_NULL)
7308 return (err);
7309
39236c6e
A
7310 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN)
7311 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
7312
7313 if_rxpoll_mode_holdtime = q;
316670eb 7314
316670eb
A
7315 return (err);
7316}
7317
7318static int
39236c6e 7319sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
7320{
7321#pragma unused(arg1, arg2)
39236c6e
A
7322 uint64_t q;
7323 int err;
316670eb 7324
39236c6e 7325 q = if_rxpoll_sample_holdtime;
316670eb 7326
39236c6e 7327 err = sysctl_handle_quad(oidp, &q, 0, req);
316670eb
A
7328 if (err != 0 || req->newptr == USER_ADDR_NULL)
7329 return (err);
7330
39236c6e
A
7331 if (q < IF_RXPOLL_SAMPLETIME_MIN)
7332 q = IF_RXPOLL_SAMPLETIME_MIN;
7333
7334 if_rxpoll_sample_holdtime = q;
316670eb 7335
316670eb
A
7336 return (err);
7337}
7338
39236c6e
A
7339static int
7340sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
316670eb 7341{
39236c6e
A
7342#pragma unused(arg1, arg2)
7343 uint64_t q;
7344 int err;
316670eb 7345
39236c6e 7346 q = if_rxpoll_interval_time;
316670eb 7347
39236c6e
A
7348 err = sysctl_handle_quad(oidp, &q, 0, req);
7349 if (err != 0 || req->newptr == USER_ADDR_NULL)
7350 return (err);
7351
7352 if (q < IF_RXPOLL_INTERVALTIME_MIN)
7353 q = IF_RXPOLL_INTERVALTIME_MIN;
316670eb 7354
39236c6e 7355 if_rxpoll_interval_time = q;
316670eb 7356
39236c6e 7357 return (err);
316670eb
A
7358}
7359
39236c6e
A
7360static int
7361sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
316670eb 7362{
39236c6e
A
7363#pragma unused(arg1, arg2)
7364 uint32_t i;
7365 int err;
316670eb 7366
39236c6e 7367 i = if_rxpoll_wlowat;
316670eb 7368
39236c6e
A
7369 err = sysctl_handle_int(oidp, &i, 0, req);
7370 if (err != 0 || req->newptr == USER_ADDR_NULL)
7371 return (err);
316670eb 7372
39236c6e
A
7373 if (i == 0 || i >= if_rxpoll_whiwat)
7374 return (EINVAL);
7375
7376 if_rxpoll_wlowat = i;
7377 return (err);
316670eb
A
7378}
7379
39236c6e
A
7380static int
7381sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
316670eb 7382{
39236c6e
A
7383#pragma unused(arg1, arg2)
7384 uint32_t i;
7385 int err;
316670eb 7386
39236c6e 7387 i = if_rxpoll_whiwat;
316670eb 7388
39236c6e
A
7389 err = sysctl_handle_int(oidp, &i, 0, req);
7390 if (err != 0 || req->newptr == USER_ADDR_NULL)
7391 return (err);
316670eb 7392
39236c6e
A
7393 if (i <= if_rxpoll_wlowat)
7394 return (EINVAL);
7395
7396 if_rxpoll_whiwat = i;
7397 return (err);
316670eb
A
7398}
7399
7400static int
39236c6e 7401sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
316670eb 7402{
39236c6e
A
7403#pragma unused(arg1, arg2)
7404 int i, err;
316670eb 7405
39236c6e 7406 i = if_sndq_maxlen;
316670eb 7407
39236c6e
A
7408 err = sysctl_handle_int(oidp, &i, 0, req);
7409 if (err != 0 || req->newptr == USER_ADDR_NULL)
7410 return (err);
316670eb 7411
39236c6e
A
7412 if (i < IF_SNDQ_MINLEN)
7413 i = IF_SNDQ_MINLEN;
316670eb 7414
39236c6e
A
7415 if_sndq_maxlen = i;
7416 return (err);
316670eb
A
7417}
7418
39236c6e
A
7419static int
7420sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
316670eb 7421{
39236c6e
A
7422#pragma unused(arg1, arg2)
7423 int i, err;
7424
7425 i = if_rcvq_maxlen;
7426
7427 err = sysctl_handle_int(oidp, &i, 0, req);
7428 if (err != 0 || req->newptr == USER_ADDR_NULL)
7429 return (err);
7430
7431 if (i < IF_RCVQ_MINLEN)
7432 i = IF_RCVQ_MINLEN;
7433
7434 if_rcvq_maxlen = i;
7435 return (err);
316670eb
A
7436}
7437
7438void
7439dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
7440 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
7441{
7442 struct kev_dl_node_presence kev;
7443 struct sockaddr_dl *sdl;
7444 struct sockaddr_in6 *sin6;
7445
7446 VERIFY(ifp);
7447 VERIFY(sa);
7448 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7449
7450 bzero(&kev, sizeof (kev));
7451 sin6 = &kev.sin6_node_address;
7452 sdl = &kev.sdl_node_address;
7453 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7454 kev.rssi = rssi;
7455 kev.link_quality_metric = lqm;
7456 kev.node_proximity_metric = npm;
7457 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
7458
7459 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
7460 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
7461 &kev.link_data, sizeof (kev));
7462}
7463
7464void
7465dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
7466{
7467 struct kev_dl_node_absence kev;
7468 struct sockaddr_in6 *sin6;
7469 struct sockaddr_dl *sdl;
7470
7471 VERIFY(ifp);
7472 VERIFY(sa);
7473 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7474
7475 bzero(&kev, sizeof (kev));
7476 sin6 = &kev.sin6_node_address;
7477 sdl = &kev.sdl_node_address;
7478 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7479
7480 nd6_alt_node_absent(ifp, sin6);
7481 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
7482 &kev.link_data, sizeof (kev));
7483}
7484
39236c6e
A
7485const void *
7486dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
7487 kauth_cred_t *credp)
7488{
7489 const u_int8_t *bytes;
7490 size_t size;
7491
7492 bytes = CONST_LLADDR(sdl);
7493 size = sdl->sdl_alen;
7494
7495#if CONFIG_MACF
7496 if (dlil_lladdr_ckreq) {
7497 switch (sdl->sdl_type) {
7498 case IFT_ETHER:
39236c6e 7499 case IFT_IEEE1394:
39236c6e
A
7500 break;
7501 default:
7502 credp = NULL;
7503 break;
7504 };
7505
7506 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
7507 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
7508 [0] = 2
7509 };
7510
5ba3f43e 7511 bytes = unspec;
39236c6e
A
7512 }
7513 }
7514#else
7515#pragma unused(credp)
7516#endif
7517
7518 if (sizep != NULL) *sizep = size;
7519 return (bytes);
7520}
7521
7522void
7523dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
7524 u_int8_t info[DLIL_MODARGLEN])
7525{
7526 struct kev_dl_issues kev;
7527 struct timeval tv;
7528
7529 VERIFY(ifp != NULL);
7530 VERIFY(modid != NULL);
7531 _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
7532 _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
7533
3e170ce0 7534 bzero(&kev, sizeof (kev));
39236c6e
A
7535
7536 microtime(&tv);
7537 kev.timestamp = tv.tv_sec;
7538 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
7539 if (info != NULL)
7540 bcopy(info, &kev.info, DLIL_MODARGLEN);
7541
7542 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
7543 &kev.link_data, sizeof (kev));
7544}
7545
316670eb
A
7546errno_t
7547ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7548 struct proc *p)
7549{
7550 u_int32_t level = IFNET_THROTTLE_OFF;
7551 errno_t result = 0;
7552
7553 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
7554
7555 if (cmd == SIOCSIFOPPORTUNISTIC) {
7556 /*
7557 * XXX: Use priv_check_cred() instead of root check?
7558 */
7559 if ((result = proc_suser(p)) != 0)
7560 return (result);
7561
7562 if (ifr->ifr_opportunistic.ifo_flags ==
7563 IFRIFOF_BLOCK_OPPORTUNISTIC)
7564 level = IFNET_THROTTLE_OPPORTUNISTIC;
7565 else if (ifr->ifr_opportunistic.ifo_flags == 0)
7566 level = IFNET_THROTTLE_OFF;
7567 else
7568 result = EINVAL;
7569
7570 if (result == 0)
7571 result = ifnet_set_throttle(ifp, level);
7572 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
7573 ifr->ifr_opportunistic.ifo_flags = 0;
7574 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
7575 ifr->ifr_opportunistic.ifo_flags |=
7576 IFRIFOF_BLOCK_OPPORTUNISTIC;
7577 }
7578 }
7579
7580 /*
7581 * Return the count of current opportunistic connections
7582 * over the interface.
7583 */
7584 if (result == 0) {
7585 uint32_t flags = 0;
7586 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
7587 INPCB_OPPORTUNISTIC_SETCMD : 0;
39037602 7588 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
316670eb
A
7589 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
7590 ifr->ifr_opportunistic.ifo_inuse =
7591 udp_count_opportunistic(ifp->if_index, flags) +
7592 tcp_count_opportunistic(ifp->if_index, flags);
7593 }
7594
7595 if (result == EALREADY)
7596 result = 0;
7597
7598 return (result);
7599}
7600
7601int
7602ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
7603{
7604 struct ifclassq *ifq;
7605 int err = 0;
7606
7607 if (!(ifp->if_eflags & IFEF_TXSTART))
7608 return (ENXIO);
7609
7610 *level = IFNET_THROTTLE_OFF;
7611
7612 ifq = &ifp->if_snd;
7613 IFCQ_LOCK(ifq);
7614 /* Throttling works only for IFCQ, not ALTQ instances */
7615 if (IFCQ_IS_ENABLED(ifq))
7616 IFCQ_GET_THROTTLE(ifq, *level, err);
7617 IFCQ_UNLOCK(ifq);
7618
7619 return (err);
7620}
7621
7622int
7623ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
7624{
7625 struct ifclassq *ifq;
7626 int err = 0;
7627
7628 if (!(ifp->if_eflags & IFEF_TXSTART))
7629 return (ENXIO);
7630
39236c6e
A
7631 ifq = &ifp->if_snd;
7632
316670eb
A
7633 switch (level) {
7634 case IFNET_THROTTLE_OFF:
7635 case IFNET_THROTTLE_OPPORTUNISTIC:
316670eb
A
7636 break;
7637 default:
7638 return (EINVAL);
7639 }
7640
316670eb
A
7641 IFCQ_LOCK(ifq);
7642 if (IFCQ_IS_ENABLED(ifq))
7643 IFCQ_SET_THROTTLE(ifq, level, err);
7644 IFCQ_UNLOCK(ifq);
7645
7646 if (err == 0) {
39236c6e
A
7647 printf("%s: throttling level set to %d\n", if_name(ifp),
7648 level);
316670eb
A
7649 if (level == IFNET_THROTTLE_OFF)
7650 ifnet_start(ifp);
7651 }
7652
7653 return (err);
7654}
39236c6e
A
7655
7656errno_t
7657ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7658 struct proc *p)
7659{
7660#pragma unused(p)
7661 errno_t result = 0;
7662 uint32_t flags;
7663 int level, category, subcategory;
7664
7665 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7666
7667 if (cmd == SIOCSIFLOG) {
7668 if ((result = priv_check_cred(kauth_cred_get(),
7669 PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
7670 return (result);
7671
7672 level = ifr->ifr_log.ifl_level;
7673 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX)
7674 result = EINVAL;
7675
7676 flags = ifr->ifr_log.ifl_flags;
7677 if ((flags &= IFNET_LOGF_MASK) == 0)
7678 result = EINVAL;
7679
7680 category = ifr->ifr_log.ifl_category;
7681 subcategory = ifr->ifr_log.ifl_subcategory;
7682
7683 if (result == 0)
7684 result = ifnet_set_log(ifp, level, flags,
7685 category, subcategory);
7686 } else {
7687 result = ifnet_get_log(ifp, &level, &flags, &category,
7688 &subcategory);
7689 if (result == 0) {
7690 ifr->ifr_log.ifl_level = level;
7691 ifr->ifr_log.ifl_flags = flags;
7692 ifr->ifr_log.ifl_category = category;
7693 ifr->ifr_log.ifl_subcategory = subcategory;
7694 }
7695 }
7696
7697 return (result);
7698}
7699
7700int
7701ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7702 int32_t category, int32_t subcategory)
7703{
7704 int err = 0;
7705
7706 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7707 VERIFY(flags & IFNET_LOGF_MASK);
7708
7709 /*
7710 * The logging level applies to all facilities; make sure to
7711 * update them all with the most current level.
7712 */
7713 flags |= ifp->if_log.flags;
7714
7715 if (ifp->if_output_ctl != NULL) {
7716 struct ifnet_log_params l;
7717
7718 bzero(&l, sizeof (l));
7719 l.level = level;
7720 l.flags = flags;
7721 l.flags &= ~IFNET_LOGF_DLIL;
7722 l.category = category;
7723 l.subcategory = subcategory;
7724
7725 /* Send this request to lower layers */
7726 if (l.flags != 0) {
7727 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7728 sizeof (l), &l);
7729 }
7730 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7731 /*
7732 * If targeted to the lower layers without an output
7733 * control callback registered on the interface, just
7734 * silently ignore facilities other than ours.
7735 */
7736 flags &= IFNET_LOGF_DLIL;
490019cf 7737 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL)))
39236c6e
A
7738 level = 0;
7739 }
7740
7741 if (err == 0) {
7742 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT)
7743 ifp->if_log.flags = 0;
7744 else
7745 ifp->if_log.flags |= flags;
7746
7747 log(LOG_INFO, "%s: logging level set to %d flags=%b "
7748 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
7749 ifp->if_log.level, ifp->if_log.flags,
7750 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
7751 category, subcategory);
7752 }
7753
7754 return (err);
7755}
7756
7757int
7758ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7759 int32_t *category, int32_t *subcategory)
7760{
7761 if (level != NULL)
7762 *level = ifp->if_log.level;
7763 if (flags != NULL)
7764 *flags = ifp->if_log.flags;
7765 if (category != NULL)
7766 *category = ifp->if_log.category;
7767 if (subcategory != NULL)
7768 *subcategory = ifp->if_log.subcategory;
7769
7770 return (0);
7771}
7772
7773int
7774ifnet_notify_address(struct ifnet *ifp, int af)
7775{
7776 struct ifnet_notify_address_params na;
7777
7778#if PF
7779 (void) pf_ifaddr_hook(ifp);
7780#endif /* PF */
7781
7782 if (ifp->if_output_ctl == NULL)
7783 return (EOPNOTSUPP);
7784
7785 bzero(&na, sizeof (na));
7786 na.address_family = af;
7787
7788 return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7789 sizeof (na), &na));
7790}
7791
7792errno_t
7793ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7794{
7795 if (ifp == NULL || flowid == NULL) {
7796 return (EINVAL);
7797 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 7798 !IF_FULLY_ATTACHED(ifp)) {
39236c6e
A
7799 return (ENXIO);
7800 }
7801
7802 *flowid = ifp->if_flowhash;
7803
7804 return (0);
7805}
7806
7807errno_t
7808ifnet_disable_output(struct ifnet *ifp)
7809{
7810 int err;
7811
7812 if (ifp == NULL) {
7813 return (EINVAL);
7814 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 7815 !IF_FULLY_ATTACHED(ifp)) {
39236c6e
A
7816 return (ENXIO);
7817 }
7818
7819 if ((err = ifnet_fc_add(ifp)) == 0) {
7820 lck_mtx_lock_spin(&ifp->if_start_lock);
7821 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7822 lck_mtx_unlock(&ifp->if_start_lock);
7823 }
7824 return (err);
7825}
7826
7827errno_t
7828ifnet_enable_output(struct ifnet *ifp)
7829{
7830 if (ifp == NULL) {
7831 return (EINVAL);
7832 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 7833 !IF_FULLY_ATTACHED(ifp)) {
39236c6e
A
7834 return (ENXIO);
7835 }
7836
7837 ifnet_start_common(ifp, 1);
7838 return (0);
7839}
7840
7841void
7842ifnet_flowadv(uint32_t flowhash)
7843{
7844 struct ifnet_fc_entry *ifce;
7845 struct ifnet *ifp;
7846
7847 ifce = ifnet_fc_get(flowhash);
7848 if (ifce == NULL)
7849 return;
7850
7851 VERIFY(ifce->ifce_ifp != NULL);
7852 ifp = ifce->ifce_ifp;
7853
7854 /* flow hash gets recalculated per attach, so check */
7855 if (ifnet_is_attached(ifp, 1)) {
7856 if (ifp->if_flowhash == flowhash)
7857 (void) ifnet_enable_output(ifp);
7858 ifnet_decr_iorefcnt(ifp);
7859 }
7860 ifnet_fc_entry_free(ifce);
7861}
7862
7863/*
7864 * Function to compare ifnet_fc_entries in ifnet flow control tree
7865 */
7866static inline int
7867ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7868{
7869 return (fc1->ifce_flowhash - fc2->ifce_flowhash);
7870}
7871
7872static int
7873ifnet_fc_add(struct ifnet *ifp)
7874{
7875 struct ifnet_fc_entry keyfc, *ifce;
7876 uint32_t flowhash;
7877
7878 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7879 VERIFY(ifp->if_flowhash != 0);
7880 flowhash = ifp->if_flowhash;
7881
7882 bzero(&keyfc, sizeof (keyfc));
7883 keyfc.ifce_flowhash = flowhash;
7884
7885 lck_mtx_lock_spin(&ifnet_fc_lock);
7886 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7887 if (ifce != NULL && ifce->ifce_ifp == ifp) {
7888 /* Entry is already in ifnet_fc_tree, return */
7889 lck_mtx_unlock(&ifnet_fc_lock);
7890 return (0);
7891 }
7892
7893 if (ifce != NULL) {
7894 /*
7895 * There is a different fc entry with the same flow hash
7896 * but different ifp pointer. There can be a collision
7897 * on flow hash but the probability is low. Let's just
7898 * avoid adding a second one when there is a collision.
7899 */
7900 lck_mtx_unlock(&ifnet_fc_lock);
7901 return (EAGAIN);
7902 }
7903
7904 /* become regular mutex */
7905 lck_mtx_convert_spin(&ifnet_fc_lock);
7906
7907 ifce = zalloc_noblock(ifnet_fc_zone);
7908 if (ifce == NULL) {
7909 /* memory allocation failed */
7910 lck_mtx_unlock(&ifnet_fc_lock);
7911 return (ENOMEM);
7912 }
7913 bzero(ifce, ifnet_fc_zone_size);
7914
7915 ifce->ifce_flowhash = flowhash;
7916 ifce->ifce_ifp = ifp;
7917
7918 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7919 lck_mtx_unlock(&ifnet_fc_lock);
7920 return (0);
7921}
7922
7923static struct ifnet_fc_entry *
7924ifnet_fc_get(uint32_t flowhash)
7925{
7926 struct ifnet_fc_entry keyfc, *ifce;
7927 struct ifnet *ifp;
7928
7929 bzero(&keyfc, sizeof (keyfc));
7930 keyfc.ifce_flowhash = flowhash;
7931
7932 lck_mtx_lock_spin(&ifnet_fc_lock);
7933 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7934 if (ifce == NULL) {
7935 /* Entry is not present in ifnet_fc_tree, return */
7936 lck_mtx_unlock(&ifnet_fc_lock);
7937 return (NULL);
7938 }
7939
7940 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7941
7942 VERIFY(ifce->ifce_ifp != NULL);
7943 ifp = ifce->ifce_ifp;
7944
7945 /* become regular mutex */
7946 lck_mtx_convert_spin(&ifnet_fc_lock);
7947
7948 if (!ifnet_is_attached(ifp, 0)) {
7949 /*
7950 * This ifp is not attached or in the process of being
7951 * detached; just don't process it.
7952 */
7953 ifnet_fc_entry_free(ifce);
7954 ifce = NULL;
7955 }
7956 lck_mtx_unlock(&ifnet_fc_lock);
7957
7958 return (ifce);
7959}
7960
7961static void
7962ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7963{
7964 zfree(ifnet_fc_zone, ifce);
7965}
7966
7967static uint32_t
7968ifnet_calc_flowhash(struct ifnet *ifp)
7969{
7970 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7971 uint32_t flowhash = 0;
7972
7973 if (ifnet_flowhash_seed == 0)
7974 ifnet_flowhash_seed = RandomULong();
7975
7976 bzero(&fh, sizeof (fh));
7977
7978 (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name);
7979 fh.ifk_unit = ifp->if_unit;
7980 fh.ifk_flags = ifp->if_flags;
7981 fh.ifk_eflags = ifp->if_eflags;
7982 fh.ifk_capabilities = ifp->if_capabilities;
7983 fh.ifk_capenable = ifp->if_capenable;
7984 fh.ifk_output_sched_model = ifp->if_output_sched_model;
7985 fh.ifk_rand1 = RandomULong();
7986 fh.ifk_rand2 = RandomULong();
7987
7988try_again:
7989 flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed);
7990 if (flowhash == 0) {
7991 /* try to get a non-zero flowhash */
7992 ifnet_flowhash_seed = RandomULong();
7993 goto try_again;
7994 }
7995
7996 return (flowhash);
7997}
7998
3e170ce0
A
7999int
8000ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
8001 uint16_t flags, uint8_t *data)
8002{
8003#pragma unused(flags)
8004 int error = 0;
8005
8006 switch (family) {
8007 case AF_INET:
8008 if_inetdata_lock_exclusive(ifp);
8009 if (IN_IFEXTRA(ifp) != NULL) {
8010 if (len == 0) {
8011 /* Allow clearing the signature */
8012 IN_IFEXTRA(ifp)->netsig_len = 0;
8013 bzero(IN_IFEXTRA(ifp)->netsig,
8014 sizeof (IN_IFEXTRA(ifp)->netsig));
8015 if_inetdata_lock_done(ifp);
8016 break;
8017 } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) {
8018 error = EINVAL;
8019 if_inetdata_lock_done(ifp);
8020 break;
8021 }
8022 IN_IFEXTRA(ifp)->netsig_len = len;
8023 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
8024 } else {
8025 error = ENOMEM;
8026 }
8027 if_inetdata_lock_done(ifp);
8028 break;
8029
8030 case AF_INET6:
8031 if_inet6data_lock_exclusive(ifp);
8032 if (IN6_IFEXTRA(ifp) != NULL) {
8033 if (len == 0) {
8034 /* Allow clearing the signature */
8035 IN6_IFEXTRA(ifp)->netsig_len = 0;
8036 bzero(IN6_IFEXTRA(ifp)->netsig,
8037 sizeof (IN6_IFEXTRA(ifp)->netsig));
8038 if_inet6data_lock_done(ifp);
8039 break;
8040 } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) {
8041 error = EINVAL;
8042 if_inet6data_lock_done(ifp);
8043 break;
8044 }
8045 IN6_IFEXTRA(ifp)->netsig_len = len;
8046 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
8047 } else {
8048 error = ENOMEM;
8049 }
8050 if_inet6data_lock_done(ifp);
8051 break;
8052
8053 default:
8054 error = EINVAL;
8055 break;
8056 }
8057
8058 return (error);
8059}
8060
8061int
8062ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
8063 uint16_t *flags, uint8_t *data)
8064{
8065 int error = 0;
8066
5ba3f43e 8067 if (ifp == NULL || len == NULL || data == NULL)
3e170ce0
A
8068 return (EINVAL);
8069
8070 switch (family) {
8071 case AF_INET:
8072 if_inetdata_lock_shared(ifp);
8073 if (IN_IFEXTRA(ifp) != NULL) {
8074 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
8075 error = EINVAL;
8076 if_inetdata_lock_done(ifp);
8077 break;
8078 }
8079 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0)
8080 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
8081 else
8082 error = ENOENT;
8083 } else {
8084 error = ENOMEM;
8085 }
8086 if_inetdata_lock_done(ifp);
8087 break;
8088
8089 case AF_INET6:
8090 if_inet6data_lock_shared(ifp);
8091 if (IN6_IFEXTRA(ifp) != NULL) {
8092 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
8093 error = EINVAL;
8094 if_inet6data_lock_done(ifp);
8095 break;
8096 }
8097 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0)
8098 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
8099 else
8100 error = ENOENT;
8101 } else {
8102 error = ENOMEM;
8103 }
8104 if_inet6data_lock_done(ifp);
8105 break;
8106
8107 default:
8108 error = EINVAL;
8109 break;
8110 }
8111
5ba3f43e 8112 if (error == 0 && flags != NULL)
3e170ce0
A
8113 *flags = 0;
8114
8115 return (error);
8116}
8117
5ba3f43e
A
8118#if INET6
8119int
8120ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8121{
8122 int i, error = 0, one_set = 0;
8123
8124 if_inet6data_lock_exclusive(ifp);
8125
8126 if (IN6_IFEXTRA(ifp) == NULL) {
8127 error = ENOMEM;
8128 goto out;
8129 }
8130
8131 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8132 uint32_t prefix_len =
8133 prefixes[i].prefix_len;
8134 struct in6_addr *prefix =
8135 &prefixes[i].ipv6_prefix;
8136
8137 if (prefix_len == 0) {
8138 /* Allow clearing the signature */
8139 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
8140 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8141 sizeof(struct in6_addr));
8142
8143 continue;
8144 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
8145 prefix_len != NAT64_PREFIX_LEN_40 &&
8146 prefix_len != NAT64_PREFIX_LEN_48 &&
8147 prefix_len != NAT64_PREFIX_LEN_56 &&
8148 prefix_len != NAT64_PREFIX_LEN_64 &&
8149 prefix_len != NAT64_PREFIX_LEN_96) {
8150 error = EINVAL;
8151 goto out;
8152 }
8153
8154 if (IN6_IS_SCOPE_EMBED(prefix)) {
8155 error = EINVAL;
8156 goto out;
8157 }
8158
8159 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
8160 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8161 sizeof(struct in6_addr));
8162 one_set = 1;
8163 }
8164
8165out:
8166 if_inet6data_lock_done(ifp);
8167
8168 if (error == 0 && one_set != 0)
8169 necp_update_all_clients();
8170
8171 return (error);
8172}
8173
8174int
8175ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8176{
8177 int i, found_one = 0, error = 0;
8178
8179 if (ifp == NULL)
8180 return (EINVAL);
8181
8182 if_inet6data_lock_shared(ifp);
8183
8184 if (IN6_IFEXTRA(ifp) == NULL) {
8185 error = ENOMEM;
8186 goto out;
8187 }
8188
8189 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8190 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0)
8191 found_one = 1;
8192 }
8193
8194 if (found_one == 0) {
8195 error = ENOENT;
8196 goto out;
8197 }
8198
8199 if (prefixes)
8200 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
8201 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
8202
8203out:
8204 if_inet6data_lock_done(ifp);
8205
8206 return (error);
8207}
8208#endif
8209
39236c6e
A
8210static void
8211dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
8212 protocol_family_t pf)
8213{
8214#pragma unused(ifp)
8215 uint32_t did_sw;
8216
8217 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
8218 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6)))
8219 return;
8220
8221 switch (pf) {
8222 case PF_INET:
8223 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
8224 if (did_sw & CSUM_DELAY_IP)
8225 hwcksum_dbg_finalized_hdr++;
8226 if (did_sw & CSUM_DELAY_DATA)
8227 hwcksum_dbg_finalized_data++;
8228 break;
8229#if INET6
8230 case PF_INET6:
8231 /*
8232 * Checksum offload should not have been enabled when
8233 * extension headers exist; that also means that we
8234 * cannot force-finalize packets with extension headers.
8235 * Indicate to the callee should it skip such case by
8236 * setting optlen to -1.
8237 */
8238 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
8239 m->m_pkthdr.csum_flags);
8240 if (did_sw & CSUM_DELAY_IPV6_DATA)
8241 hwcksum_dbg_finalized_data++;
8242 break;
8243#endif /* INET6 */
8244 default:
8245 return;
8246 }
8247}
8248
8249static void
8250dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
8251 protocol_family_t pf)
8252{
5ba3f43e 8253 uint16_t sum = 0;
39236c6e
A
8254 uint32_t hlen;
8255
8256 if (frame_header == NULL ||
8257 frame_header < (char *)mbuf_datastart(m) ||
8258 frame_header > (char *)m->m_data) {
8259 printf("%s: frame header pointer 0x%llx out of range "
8260 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
8261 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
8262 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
8263 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
8264 (uint64_t)VM_KERNEL_ADDRPERM(m));
8265 return;
8266 }
8267 hlen = (m->m_data - frame_header);
8268
8269 switch (pf) {
8270 case PF_INET:
8271#if INET6
8272 case PF_INET6:
8273#endif /* INET6 */
8274 break;
8275 default:
8276 return;
8277 }
8278
8279 /*
8280 * Force partial checksum offload; useful to simulate cases
8281 * where the hardware does not support partial checksum offload,
8282 * in order to validate correctness throughout the layers above.
8283 */
8284 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
8285 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
8286
8287 if (foff > (uint32_t)m->m_pkthdr.len)
8288 return;
8289
8290 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
8291
8292 /* Compute 16-bit 1's complement sum from forced offset */
8293 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
8294
8295 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
8296 m->m_pkthdr.csum_rx_val = sum;
8297 m->m_pkthdr.csum_rx_start = (foff + hlen);
8298
8299 hwcksum_dbg_partial_forced++;
8300 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
8301 }
8302
8303 /*
8304 * Partial checksum offload verification (and adjustment);
8305 * useful to validate and test cases where the hardware
8306 * supports partial checksum offload.
8307 */
8308 if ((m->m_pkthdr.csum_flags &
8309 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
8310 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
8311 uint32_t rxoff;
8312
8313 /* Start offset must begin after frame header */
8314 rxoff = m->m_pkthdr.csum_rx_start;
8315 if (hlen > rxoff) {
8316 hwcksum_dbg_bad_rxoff++;
8317 if (dlil_verbose) {
8318 printf("%s: partial cksum start offset %d "
8319 "is less than frame header length %d for "
8320 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
8321 (uint64_t)VM_KERNEL_ADDRPERM(m));
8322 }
8323 return;
8324 }
39037602 8325 rxoff -= hlen;
39236c6e
A
8326
8327 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
8328 /*
8329 * Compute the expected 16-bit 1's complement sum;
8330 * skip this if we've already computed it above
8331 * when partial checksum offload is forced.
8332 */
8333 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
8334
8335 /* Hardware or driver is buggy */
8336 if (sum != m->m_pkthdr.csum_rx_val) {
8337 hwcksum_dbg_bad_cksum++;
8338 if (dlil_verbose) {
8339 printf("%s: bad partial cksum value "
8340 "0x%x (expected 0x%x) for mbuf "
8341 "0x%llx [rx_start %d]\n",
8342 if_name(ifp),
8343 m->m_pkthdr.csum_rx_val, sum,
8344 (uint64_t)VM_KERNEL_ADDRPERM(m),
8345 m->m_pkthdr.csum_rx_start);
8346 }
8347 return;
8348 }
8349 }
8350 hwcksum_dbg_verified++;
8351
8352 /*
8353 * This code allows us to emulate various hardwares that
8354 * perform 16-bit 1's complement sum beginning at various
8355 * start offset values.
8356 */
8357 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
8358 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
8359
8360 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len)
8361 return;
8362
5ba3f43e
A
8363 sum = m_adj_sum16(m, rxoff, aoff,
8364 m_pktlen(m) - aoff, sum);
39236c6e
A
8365
8366 m->m_pkthdr.csum_rx_val = sum;
8367 m->m_pkthdr.csum_rx_start = (aoff + hlen);
8368
8369 hwcksum_dbg_adjusted++;
8370 }
8371 }
8372}
8373
8374static int
8375sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8376{
8377#pragma unused(arg1, arg2)
8378 u_int32_t i;
8379 int err;
8380
8381 i = hwcksum_dbg_mode;
8382
8383 err = sysctl_handle_int(oidp, &i, 0, req);
8384 if (err != 0 || req->newptr == USER_ADDR_NULL)
8385 return (err);
8386
8387 if (hwcksum_dbg == 0)
8388 return (ENODEV);
8389
8390 if ((i & ~HWCKSUM_DBG_MASK) != 0)
8391 return (EINVAL);
8392
8393 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
8394
8395 return (err);
8396}
8397
8398static int
8399sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8400{
8401#pragma unused(arg1, arg2)
8402 u_int32_t i;
8403 int err;
8404
8405 i = hwcksum_dbg_partial_rxoff_forced;
8406
8407 err = sysctl_handle_int(oidp, &i, 0, req);
8408 if (err != 0 || req->newptr == USER_ADDR_NULL)
8409 return (err);
8410
8411 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED))
8412 return (ENODEV);
8413
8414 hwcksum_dbg_partial_rxoff_forced = i;
8415
8416 return (err);
8417}
8418
8419static int
8420sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8421{
8422#pragma unused(arg1, arg2)
8423 u_int32_t i;
8424 int err;
8425
8426 i = hwcksum_dbg_partial_rxoff_adj;
8427
8428 err = sysctl_handle_int(oidp, &i, 0, req);
8429 if (err != 0 || req->newptr == USER_ADDR_NULL)
8430 return (err);
8431
8432 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ))
8433 return (ENODEV);
8434
8435 hwcksum_dbg_partial_rxoff_adj = i;
8436
8437 return (err);
8438}
8439
3e170ce0
A
8440static int
8441sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8442{
8443#pragma unused(oidp, arg1, arg2)
8444 int err;
39037602 8445
3e170ce0 8446 if (req->oldptr == USER_ADDR_NULL) {
39037602 8447
3e170ce0
A
8448 }
8449 if (req->newptr != USER_ADDR_NULL) {
8450 return (EPERM);
8451 }
8452 err = SYSCTL_OUT(req, &tx_chain_len_stats,
8453 sizeof(struct chain_len_stats));
8454
8455 return (err);
8456}
8457
8458
5ba3f43e 8459#if DEBUG || DEVELOPMENT
39236c6e
A
8460/* Blob for sum16 verification */
8461static uint8_t sumdata[] = {
8462 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8463 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8464 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8465 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8466 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8467 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8468 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8469 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8470 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8471 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8472 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8473 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8474 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8475 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8476 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8477 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8478 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8479 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8480 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8481 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8482 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8483 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8484 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8485 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8486 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8487 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8488 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8489 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8490 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8491 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8492 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8493 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8494 0xc8, 0x28, 0x02, 0x00, 0x00
8495};
8496
8497/* Precomputed 16-bit 1's complement sums for various spans of the above data */
8498static struct {
5ba3f43e
A
8499 boolean_t init;
8500 uint16_t len;
8501 uint16_t sumr; /* reference */
8502 uint16_t sumrp; /* reference, precomputed */
39236c6e 8503} sumtbl[] = {
5ba3f43e
A
8504 { FALSE, 0, 0, 0x0000 },
8505 { FALSE, 1, 0, 0x001f },
8506 { FALSE, 2, 0, 0x8b1f },
8507 { FALSE, 3, 0, 0x8b27 },
8508 { FALSE, 7, 0, 0x790e },
8509 { FALSE, 11, 0, 0xcb6d },
8510 { FALSE, 20, 0, 0x20dd },
8511 { FALSE, 27, 0, 0xbabd },
8512 { FALSE, 32, 0, 0xf3e8 },
8513 { FALSE, 37, 0, 0x197d },
8514 { FALSE, 43, 0, 0x9eae },
8515 { FALSE, 64, 0, 0x4678 },
8516 { FALSE, 127, 0, 0x9399 },
8517 { FALSE, 256, 0, 0xd147 },
8518 { FALSE, 325, 0, 0x0358 },
39236c6e
A
8519};
8520#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8521
8522static void
8523dlil_verify_sum16(void)
8524{
8525 struct mbuf *m;
8526 uint8_t *buf;
8527 int n;
8528
8529 /* Make sure test data plus extra room for alignment fits in cluster */
8530 _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES);
8531
5ba3f43e
A
8532 kprintf("DLIL: running SUM16 self-tests ... ");
8533
39236c6e
A
8534 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
8535 MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */
8536 buf = mtod(m, uint8_t *); /* base address */
8537
8538 for (n = 0; n < SUMTBL_MAX; n++) {
8539 uint16_t len = sumtbl[n].len;
8540 int i;
8541
8542 /* Verify for all possible alignments */
8543 for (i = 0; i < (int)sizeof (uint64_t); i++) {
5ba3f43e 8544 uint16_t sum, sumr;
39236c6e
A
8545 uint8_t *c;
8546
8547 /* Copy over test data to mbuf */
8548 VERIFY(len <= sizeof (sumdata));
8549 c = buf + i;
8550 bcopy(sumdata, c, len);
8551
8552 /* Zero-offset test (align by data pointer) */
8553 m->m_data = (caddr_t)c;
8554 m->m_len = len;
8555 sum = m_sum16(m, 0, len);
8556
5ba3f43e
A
8557 if (!sumtbl[n].init) {
8558 sumr = in_cksum_mbuf_ref(m, len, 0, 0);
8559 sumtbl[n].sumr = sumr;
8560 sumtbl[n].init = TRUE;
8561 } else {
8562 sumr = sumtbl[n].sumr;
8563 }
8564
39236c6e 8565 /* Something is horribly broken; stop now */
5ba3f43e
A
8566 if (sumr != sumtbl[n].sumrp) {
8567 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
8568 "for len=%d align=%d sum=0x%04x "
8569 "[expected=0x%04x]\n", __func__,
8570 len, i, sum, sumr);
8571 /* NOTREACHED */
8572 } else if (sum != sumr) {
8573 panic_plain("\n%s: broken m_sum16() for len=%d "
8574 "align=%d sum=0x%04x [expected=0x%04x]\n",
8575 __func__, len, i, sum, sumr);
39236c6e
A
8576 /* NOTREACHED */
8577 }
8578
8579 /* Alignment test by offset (fixed data pointer) */
8580 m->m_data = (caddr_t)buf;
8581 m->m_len = i + len;
8582 sum = m_sum16(m, i, len);
8583
8584 /* Something is horribly broken; stop now */
5ba3f43e
A
8585 if (sum != sumr) {
8586 panic_plain("\n%s: broken m_sum16() for len=%d "
8587 "offset=%d sum=0x%04x [expected=0x%04x]\n",
8588 __func__, len, i, sum, sumr);
39236c6e
A
8589 /* NOTREACHED */
8590 }
8591#if INET
8592 /* Simple sum16 contiguous buffer test by aligment */
8593 sum = b_sum16(c, len);
8594
8595 /* Something is horribly broken; stop now */
5ba3f43e
A
8596 if (sum != sumr) {
8597 panic_plain("\n%s: broken b_sum16() for len=%d "
8598 "align=%d sum=0x%04x [expected=0x%04x]\n",
8599 __func__, len, i, sum, sumr);
39236c6e
A
8600 /* NOTREACHED */
8601 }
8602#endif /* INET */
8603 }
8604 }
8605 m_freem(m);
8606
5ba3f43e 8607 kprintf("PASSED\n");
39236c6e 8608}
5ba3f43e 8609#endif /* DEBUG || DEVELOPMENT */
39236c6e
A
8610
8611#define CASE_STRINGIFY(x) case x: return #x
8612
8613__private_extern__ const char *
8614dlil_kev_dl_code_str(u_int32_t event_code)
8615{
8616 switch (event_code) {
8617 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
8618 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
8619 CASE_STRINGIFY(KEV_DL_SIFMTU);
8620 CASE_STRINGIFY(KEV_DL_SIFPHYS);
8621 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
8622 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
8623 CASE_STRINGIFY(KEV_DL_ADDMULTI);
8624 CASE_STRINGIFY(KEV_DL_DELMULTI);
8625 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
8626 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
8627 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
8628 CASE_STRINGIFY(KEV_DL_LINK_OFF);
8629 CASE_STRINGIFY(KEV_DL_LINK_ON);
8630 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
8631 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
8632 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
8633 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
8634 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
8635 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
8636 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
8637 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
8638 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
8639 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
8640 CASE_STRINGIFY(KEV_DL_ISSUES);
8641 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
8642 default:
8643 break;
8644 }
8645 return ("");
8646}
3e170ce0
A
8647
8648/*
8649 * Mirror the arguments of ifnet_get_local_ports_extended()
8650 * ifindex
8651 * protocol
8652 * flags
8653 */
8654static int
8655sysctl_get_ports_used SYSCTL_HANDLER_ARGS
8656{
8657#pragma unused(oidp)
8658 int *name = (int *)arg1;
8659 int namelen = arg2;
8660 int error = 0;
8661 int idx;
8662 protocol_family_t protocol;
8663 u_int32_t flags;
8664 ifnet_t ifp = NULL;
8665 u_int8_t *bitfield = NULL;
8666
39037602 8667 if (req->newptr != USER_ADDR_NULL) {
3e170ce0
A
8668 error = EPERM;
8669 goto done;
8670 }
8671 if (namelen != 3) {
8672 error = ENOENT;
8673 goto done;
8674 }
8675
8676 if (req->oldptr == USER_ADDR_NULL) {
8677 req->oldidx = bitstr_size(65536);
8678 goto done;
8679 }
8680 if (req->oldlen < bitstr_size(65536)) {
8681 error = ENOMEM;
8682 goto done;
8683 }
39037602 8684
3e170ce0
A
8685 idx = name[0];
8686 protocol = name[1];
8687 flags = name[2];
39037602 8688
3e170ce0 8689 ifnet_head_lock_shared();
4d15aeb1 8690 if (!IF_INDEX_IN_RANGE(idx)) {
3e170ce0
A
8691 ifnet_head_done();
8692 error = ENOENT;
8693 goto done;
8694 }
8695 ifp = ifindex2ifnet[idx];
8696 ifnet_head_done();
39037602 8697
5ba3f43e 8698 bitfield = _MALLOC(bitstr_size(65536), M_TEMP, M_WAITOK | M_ZERO);
3e170ce0
A
8699 if (bitfield == NULL) {
8700 error = ENOMEM;
8701 goto done;
8702 }
8703 error = ifnet_get_local_ports_extended(ifp, protocol, flags, bitfield);
8704 if (error != 0) {
8705 printf("%s: ifnet_get_local_ports_extended() error %d\n",
8706 __func__, error);
8707 goto done;
8708 }
8709 error = SYSCTL_OUT(req, bitfield, bitstr_size(65536));
8710done:
8711 if (bitfield != NULL)
8712 _FREE(bitfield, M_TEMP);
8713 return (error);
8714}
8715
5ba3f43e
A
8716static void
8717dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
8718{
8719#pragma unused(arg1)
8720 struct ifnet *ifp = arg0;
8721
8722 if (ifnet_is_attached(ifp, 1)) {
8723 nstat_ifnet_threshold_reached(ifp->if_index);
8724 ifnet_decr_iorefcnt(ifp);
8725 }
8726}
8727
8728void
8729ifnet_notify_data_threshold(struct ifnet *ifp)
8730{
8731 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
8732 uint64_t oldbytes = ifp->if_dt_bytes;
8733
8734 ASSERT(ifp->if_dt_tcall != NULL);
8735
8736 /*
8737 * If we went over the threshold, notify NetworkStatistics.
8738 * We rate-limit it based on the threshold interval value.
8739 */
8740 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
8741 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
8742 !thread_call_isactive(ifp->if_dt_tcall)) {
8743 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
8744 uint64_t now = mach_absolute_time(), deadline = now;
8745 uint64_t ival;
8746
8747 if (tival != 0) {
8748 nanoseconds_to_absolutetime(tival, &ival);
8749 clock_deadline_for_periodic_event(ival, now, &deadline);
8750 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
8751 deadline);
8752 } else {
8753 (void) thread_call_enter(ifp->if_dt_tcall);
8754 }
8755 }
8756}
8757
39037602
A
8758#if (DEVELOPMENT || DEBUG)
8759/*
8760 * The sysctl variable name contains the input parameters of
8761 * ifnet_get_keepalive_offload_frames()
8762 * ifp (interface index): name[0]
8763 * frames_array_count: name[1]
8764 * frame_data_offset: name[2]
8765 * The return length gives used_frames_count
8766 */
8767static int
8768sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8769{
8770#pragma unused(oidp)
8771 int *name = (int *)arg1;
8772 u_int namelen = arg2;
8773 int idx;
8774 ifnet_t ifp = NULL;
8775 u_int32_t frames_array_count;
8776 size_t frame_data_offset;
8777 u_int32_t used_frames_count;
8778 struct ifnet_keepalive_offload_frame *frames_array = NULL;
8779 int error = 0;
8780 u_int32_t i;
8781
8782 /*
8783 * Only root can get look at other people TCP frames
8784 */
8785 error = proc_suser(current_proc());
8786 if (error != 0)
8787 goto done;
8788 /*
8789 * Validate the input parameters
8790 */
8791 if (req->newptr != USER_ADDR_NULL) {
8792 error = EPERM;
8793 goto done;
8794 }
8795 if (namelen != 3) {
8796 error = EINVAL;
8797 goto done;
8798 }
8799 if (req->oldptr == USER_ADDR_NULL) {
8800 error = EINVAL;
8801 goto done;
8802 }
8803 if (req->oldlen == 0) {
8804 error = EINVAL;
8805 goto done;
8806 }
8807 idx = name[0];
8808 frames_array_count = name[1];
8809 frame_data_offset = name[2];
8810
8811 /* Make sure the passed buffer is large enough */
8812 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
8813 req->oldlen) {
8814 error = ENOMEM;
8815 goto done;
8816 }
8817
8818 ifnet_head_lock_shared();
4d15aeb1 8819 if (!IF_INDEX_IN_RANGE(idx)) {
39037602
A
8820 ifnet_head_done();
8821 error = ENOENT;
8822 goto done;
8823 }
8824 ifp = ifindex2ifnet[idx];
8825 ifnet_head_done();
8826
8827 frames_array = _MALLOC(frames_array_count *
8828 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
8829 if (frames_array == NULL) {
8830 error = ENOMEM;
8831 goto done;
8832 }
8833
8834 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
8835 frames_array_count, frame_data_offset, &used_frames_count);
8836 if (error != 0) {
8837 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8838 __func__, error);
8839 goto done;
8840 }
8841
8842 for (i = 0; i < used_frames_count; i++) {
8843 error = SYSCTL_OUT(req, frames_array + i,
8844 sizeof(struct ifnet_keepalive_offload_frame));
8845 if (error != 0) {
8846 goto done;
8847 }
8848 }
8849done:
8850 if (frames_array != NULL)
8851 _FREE(frames_array, M_TEMP);
8852 return (error);
8853}
8854#endif /* DEVELOPMENT || DEBUG */
5ba3f43e
A
8855
8856void
8857ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
8858 struct ifnet *ifp)
8859{
8860 tcp_update_stats_per_flow(ifs, ifp);
8861}
8862
8863static void
8864dlil_mit_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
8865{
8866#pragma unused(arg1)
8867 struct ifnet *ifp = (struct ifnet *)arg0;
8868 struct dlil_threading_info *inp = ifp->if_inp;
8869
8870 ifnet_lock_shared(ifp);
8871 if (!IF_FULLY_ATTACHED(ifp) || inp == NULL) {
8872 ifnet_lock_done(ifp);
8873 return;
8874 }
8875
8876 lck_mtx_lock_spin(&inp->input_lck);
8877 inp->input_waiting |= DLIL_INPUT_WAITING;
8878 if (!(inp->input_waiting & DLIL_INPUT_RUNNING) ||
8879 !qempty(&inp->rcvq_pkts)) {
8880 inp->wtot++;
8881 wakeup_one((caddr_t)&inp->input_waiting);
8882 }
8883 lck_mtx_unlock(&inp->input_lck);
8884 ifnet_lock_done(ifp);
8885}