]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/dlil.c
xnu-4570.71.2.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
CommitLineData
1c79356b 1/*
a39ff7e2 2 * Copyright (c) 1999-2018 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
2d21ac55
A
28/*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
39236c6e 34#include <stddef.h>
1c79356b 35
1c79356b
A
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/socket.h>
91447636
A
42#include <sys/domain.h>
43#include <sys/user.h>
2d21ac55 44#include <sys/random.h>
316670eb 45#include <sys/socketvar.h>
1c79356b
A
46#include <net/if_dl.h>
47#include <net/if.h>
91447636 48#include <net/route.h>
1c79356b
A
49#include <net/if_var.h>
50#include <net/dlil.h>
91447636 51#include <net/if_arp.h>
316670eb 52#include <net/iptap.h>
39236c6e 53#include <net/pktap.h>
1c79356b
A
54#include <sys/kern_event.h>
55#include <sys/kdebug.h>
6d2010ae 56#include <sys/mcache.h>
39236c6e
A
57#include <sys/syslog.h>
58#include <sys/protosw.h>
59#include <sys/priv.h>
1c79356b 60
91447636 61#include <kern/assert.h>
1c79356b 62#include <kern/task.h>
9bccf70c
A
63#include <kern/thread.h>
64#include <kern/sched_prim.h>
91447636 65#include <kern/locks.h>
6d2010ae 66#include <kern/zalloc.h>
9bccf70c 67
39236c6e 68#include <net/kpi_protocol.h>
1c79356b 69#include <net/if_types.h>
9d749ea3 70#include <net/if_ipsec.h>
6d2010ae 71#include <net/if_llreach.h>
9d749ea3 72#include <net/if_utun.h>
91447636 73#include <net/kpi_interfacefilter.h>
316670eb
A
74#include <net/classq/classq.h>
75#include <net/classq/classq_sfb.h>
39236c6e
A
76#include <net/flowhash.h>
77#include <net/ntstat.h>
5ba3f43e
A
78#include <net/if_llatbl.h>
79#include <net/net_api_stats.h>
a39ff7e2 80#include <net/if_ports_used.h>
91447636 81
6d2010ae
A
82#if INET
83#include <netinet/in_var.h>
84#include <netinet/igmp_var.h>
316670eb
A
85#include <netinet/ip_var.h>
86#include <netinet/tcp.h>
87#include <netinet/tcp_var.h>
88#include <netinet/udp.h>
89#include <netinet/udp_var.h>
90#include <netinet/if_ether.h>
91#include <netinet/in_pcb.h>
39037602 92#include <netinet/in_tclass.h>
6d2010ae
A
93#endif /* INET */
94
95#if INET6
96#include <netinet6/in6_var.h>
97#include <netinet6/nd6.h>
98#include <netinet6/mld6_var.h>
39236c6e 99#include <netinet6/scope6_var.h>
6d2010ae
A
100#endif /* INET6 */
101
91447636 102#include <libkern/OSAtomic.h>
39236c6e 103#include <libkern/tree.h>
1c79356b 104
39236c6e 105#include <dev/random/randomdev.h>
d52fe63f 106#include <machine/machine_routines.h>
1c79356b 107
2d21ac55 108#include <mach/thread_act.h>
6d2010ae 109#include <mach/sdt.h>
2d21ac55 110
39236c6e
A
111#if CONFIG_MACF
112#include <sys/kauth.h>
2d21ac55 113#include <security/mac_framework.h>
39236c6e
A
114#include <net/ethernet.h>
115#include <net/firewire.h>
116#endif
2d21ac55 117
b0d623f7
A
118#if PF
119#include <net/pfvar.h>
120#endif /* PF */
316670eb 121#include <net/pktsched/pktsched.h>
b0d623f7 122
39037602
A
123#if NECP
124#include <net/necp.h>
125#endif /* NECP */
1c79356b 126
5ba3f43e 127
39037602
A
128#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
129#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
130#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
131#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
132#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
133
134#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
135#define MAX_LINKADDR 4 /* LONGWORDS */
136#define M_NKE M_IFADDR
1c79356b 137
2d21ac55 138#if 1
39037602 139#define DLIL_PRINTF printf
91447636 140#else
39037602 141#define DLIL_PRINTF kprintf
91447636
A
142#endif
143
6d2010ae
A
144#define IF_DATA_REQUIRE_ALIGNED_64(f) \
145 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
d1ecb069 146
6d2010ae
A
147#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
148 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
149
91447636 150enum {
2d21ac55
A
151 kProtoKPI_v1 = 1,
152 kProtoKPI_v2 = 2
91447636
A
153};
154
6d2010ae
A
155/*
156 * List of if_proto structures in if_proto_hash[] is protected by
157 * the ifnet lock. The rest of the fields are initialized at protocol
158 * attach time and never change, thus no lock required as long as
159 * a reference to it is valid, via if_proto_ref().
160 */
91447636 161struct if_proto {
6d2010ae
A
162 SLIST_ENTRY(if_proto) next_hash;
163 u_int32_t refcount;
164 u_int32_t detached;
165 struct ifnet *ifp;
91447636 166 protocol_family_t protocol_family;
6d2010ae 167 int proto_kpi;
91447636 168 union {
91447636 169 struct {
6d2010ae
A
170 proto_media_input input;
171 proto_media_preout pre_output;
172 proto_media_event event;
173 proto_media_ioctl ioctl;
91447636
A
174 proto_media_detached detached;
175 proto_media_resolve_multi resolve_multi;
176 proto_media_send_arp send_arp;
177 } v1;
2d21ac55
A
178 struct {
179 proto_media_input_v2 input;
6d2010ae
A
180 proto_media_preout pre_output;
181 proto_media_event event;
182 proto_media_ioctl ioctl;
2d21ac55
A
183 proto_media_detached detached;
184 proto_media_resolve_multi resolve_multi;
185 proto_media_send_arp send_arp;
186 } v2;
91447636 187 } kpi;
1c79356b
A
188};
189
91447636
A
190SLIST_HEAD(proto_hash_entry, if_proto);
191
6d2010ae
A
192#define DLIL_SDLDATALEN \
193 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
1c79356b 194
9bccf70c 195struct dlil_ifnet {
6d2010ae
A
196 struct ifnet dl_if; /* public ifnet */
197 /*
316670eb 198 * DLIL private fields, protected by dl_if_lock
6d2010ae
A
199 */
200 decl_lck_mtx_data(, dl_if_lock);
201 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
202 u_int32_t dl_if_flags; /* flags (below) */
203 u_int32_t dl_if_refcnt; /* refcnt */
204 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
205 void *dl_if_uniqueid; /* unique interface id */
206 size_t dl_if_uniqueid_len; /* length of the unique id */
207 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
39236c6e 208 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
6d2010ae
A
209 struct {
210 struct ifaddr ifa; /* lladdr ifa */
211 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
212 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
213 } dl_if_lladdr;
316670eb
A
214 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
215 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
6d2010ae
A
216 ctrace_t dl_if_attach; /* attach PC stacktrace */
217 ctrace_t dl_if_detach; /* detach PC stacktrace */
218};
219
220/* Values for dl_if_flags (private to DLIL) */
221#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
222#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
223#define DLIF_DEBUG 0x4 /* has debugging info */
224
225#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
226
227/* For gdb */
228__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
229
230struct dlil_ifnet_dbg {
231 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
232 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
233 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
234 /*
235 * Circular lists of ifnet_{reference,release} callers.
236 */
237 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
238 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
1c79356b
A
239};
240
6d2010ae
A
241#define DLIL_TO_IFP(s) (&s->dl_if)
242#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
243
91447636
A
244struct ifnet_filter {
245 TAILQ_ENTRY(ifnet_filter) filt_next;
6d2010ae 246 u_int32_t filt_skip;
39236c6e 247 u_int32_t filt_flags;
6d2010ae
A
248 ifnet_t filt_ifp;
249 const char *filt_name;
250 void *filt_cookie;
251 protocol_family_t filt_protocol;
252 iff_input_func filt_input;
253 iff_output_func filt_output;
254 iff_event_func filt_event;
255 iff_ioctl_func filt_ioctl;
256 iff_detached_func filt_detached;
1c79356b
A
257};
258
2d21ac55 259struct proto_input_entry;
55e303ae 260
91447636 261static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
91447636 262static lck_grp_t *dlil_lock_group;
6d2010ae 263lck_grp_t *ifnet_lock_group;
91447636 264static lck_grp_t *ifnet_head_lock_group;
316670eb
A
265static lck_grp_t *ifnet_snd_lock_group;
266static lck_grp_t *ifnet_rcv_lock_group;
6d2010ae 267lck_attr_t *ifnet_lock_attr;
7ddcb079
A
268decl_lck_rw_data(static, ifnet_head_lock);
269decl_lck_mtx_data(static, dlil_ifnet_lock);
39236c6e 270u_int32_t dlil_filter_disable_tso_count = 0;
316670eb 271
6d2010ae
A
272#if DEBUG
273static unsigned int ifnet_debug = 1; /* debugging (enabled) */
274#else
275static unsigned int ifnet_debug; /* debugging (disabled) */
276#endif /* !DEBUG */
277static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
278static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
279static struct zone *dlif_zone; /* zone for dlil_ifnet */
280
281#define DLIF_ZONE_MAX 64 /* maximum elements in zone */
282#define DLIF_ZONE_NAME "ifnet" /* zone name */
283
284static unsigned int dlif_filt_size; /* size of ifnet_filter */
285static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
286
287#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
288#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
289
6d2010ae
A
290static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
291static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
292
293#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
294#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
295
296static unsigned int dlif_proto_size; /* size of if_proto */
297static struct zone *dlif_proto_zone; /* zone for if_proto */
298
299#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
300#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
301
39037602
A
302static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
303static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
316670eb
A
304static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
305
306#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
307#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
308
39037602 309static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
316670eb
A
310static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
311static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
312
313#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
314#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
315
d1ecb069 316static u_int32_t net_rtref;
d1ecb069 317
316670eb
A
318static struct dlil_main_threading_info dlil_main_input_thread_info;
319__private_extern__ struct dlil_threading_info *dlil_main_input_thread =
320 (struct dlil_threading_info *)&dlil_main_input_thread_info;
2d21ac55 321
39037602 322static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
91447636 323static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
6d2010ae
A
324static void dlil_if_trace(struct dlil_ifnet *, int);
325static void if_proto_ref(struct if_proto *);
326static void if_proto_free(struct if_proto *);
327static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
a39ff7e2
A
328static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
329 u_int32_t list_count);
6d2010ae
A
330static void if_flt_monitor_busy(struct ifnet *);
331static void if_flt_monitor_unbusy(struct ifnet *);
332static void if_flt_monitor_enter(struct ifnet *);
333static void if_flt_monitor_leave(struct ifnet *);
334static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
335 char **, protocol_family_t);
336static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
337 protocol_family_t);
338static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
339 const struct sockaddr_dl *);
340static int ifnet_lookup(struct ifnet *);
341static void if_purgeaddrs(struct ifnet *);
342
343static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
344 struct mbuf *, char *);
345static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
346 struct mbuf *);
347static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
348 mbuf_t *, const struct sockaddr *, void *, char *, char *);
349static void ifproto_media_event(struct ifnet *, protocol_family_t,
350 const struct kev_msg *);
351static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
352 unsigned long, void *);
353static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
354 struct sockaddr_dl *, size_t);
355static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
356 const struct sockaddr_dl *, const struct sockaddr *,
357 const struct sockaddr_dl *, const struct sockaddr *);
358
39037602
A
359static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
360 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
361 boolean_t poll, struct thread *tp);
316670eb
A
362static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
363 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
364static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
6d2010ae
A
365static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
366 protocol_family_t *);
367static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
368 const struct ifnet_demux_desc *, u_int32_t);
369static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
370static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
5ba3f43e
A
371#if CONFIG_EMBEDDED
372static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
373 const struct sockaddr *, const char *, const char *,
374 u_int32_t *, u_int32_t *);
375#else
6d2010ae 376static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
39236c6e 377 const struct sockaddr *, const char *, const char *);
5ba3f43e 378#endif /* CONFIG_EMBEDDED */
39236c6e
A
379static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
380 const struct sockaddr *, const char *, const char *,
381 u_int32_t *, u_int32_t *);
6d2010ae
A
382static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
383static void ifp_if_free(struct ifnet *);
384static void ifp_if_event(struct ifnet *, const struct kev_msg *);
316670eb
A
385static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
386static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
6d2010ae 387
316670eb
A
388static void dlil_main_input_thread_func(void *, wait_result_t);
389static void dlil_input_thread_func(void *, wait_result_t);
390static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
6d2010ae 391static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
316670eb
A
392static void dlil_terminate_input_thread(struct dlil_threading_info *);
393static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
394 struct dlil_threading_info *, boolean_t);
395static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
396static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
397 u_int32_t, ifnet_model_t, boolean_t);
398static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
399 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
400
5ba3f43e 401#if DEBUG || DEVELOPMENT
39236c6e 402static void dlil_verify_sum16(void);
5ba3f43e 403#endif /* DEBUG || DEVELOPMENT */
39236c6e
A
404static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
405 protocol_family_t);
406static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
407 protocol_family_t);
408
316670eb
A
409static void ifnet_detacher_thread_func(void *, wait_result_t);
410static int ifnet_detacher_thread_cont(int);
6d2010ae
A
411static void ifnet_detach_final(struct ifnet *);
412static void ifnet_detaching_enqueue(struct ifnet *);
413static struct ifnet *ifnet_detaching_dequeue(void);
414
316670eb
A
415static void ifnet_start_thread_fn(void *, wait_result_t);
416static void ifnet_poll_thread_fn(void *, wait_result_t);
417static void ifnet_poll(struct ifnet *);
5ba3f43e
A
418static errno_t ifnet_enqueue_common(struct ifnet *, void *,
419 classq_pkt_type_t, boolean_t, boolean_t *);
316670eb 420
6d2010ae
A
421static void ifp_src_route_copyout(struct ifnet *, struct route *);
422static void ifp_src_route_copyin(struct ifnet *, struct route *);
423#if INET6
424static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
425static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
426#endif /* INET6 */
427
316670eb 428static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
39236c6e
A
429static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
430static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
431static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
432static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
433static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
316670eb
A
434static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
435static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
39236c6e
A
436static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
437static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
438static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
3e170ce0
A
439
440struct chain_len_stats tx_chain_len_stats;
441static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
316670eb 442
5ba3f43e
A
443#if TEST_INPUT_THREAD_TERMINATION
444static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
445#endif /* TEST_INPUT_THREAD_TERMINATION */
446
6d2010ae
A
447/* The following are protected by dlil_ifnet_lock */
448static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
449static u_int32_t ifnet_detaching_cnt;
450static void *ifnet_delayed_run; /* wait channel for detaching thread */
451
39236c6e
A
452decl_lck_mtx_data(static, ifnet_fc_lock);
453
454static uint32_t ifnet_flowhash_seed;
455
456struct ifnet_flowhash_key {
457 char ifk_name[IFNAMSIZ];
458 uint32_t ifk_unit;
459 uint32_t ifk_flags;
460 uint32_t ifk_eflags;
461 uint32_t ifk_capabilities;
462 uint32_t ifk_capenable;
463 uint32_t ifk_output_sched_model;
464 uint32_t ifk_rand1;
465 uint32_t ifk_rand2;
466};
467
468/* Flow control entry per interface */
469struct ifnet_fc_entry {
470 RB_ENTRY(ifnet_fc_entry) ifce_entry;
471 u_int32_t ifce_flowhash;
472 struct ifnet *ifce_ifp;
473};
474
475static uint32_t ifnet_calc_flowhash(struct ifnet *);
476static int ifce_cmp(const struct ifnet_fc_entry *,
477 const struct ifnet_fc_entry *);
478static int ifnet_fc_add(struct ifnet *);
479static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
480static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
481
482/* protected by ifnet_fc_lock */
483RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
484RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
485RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
486
487static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
488static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
489
490#define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
491#define IFNET_FC_ZONE_MAX 32
492
39037602 493extern void bpfdetach(struct ifnet *);
6d2010ae 494extern void proto_input_run(void);
91447636 495
39037602 496extern uint32_t udp_count_opportunistic(unsigned int ifindex,
316670eb 497 u_int32_t flags);
39037602 498extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
316670eb
A
499 u_int32_t flags);
500
6d2010ae 501__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
91447636 502
39236c6e 503#if CONFIG_MACF
5ba3f43e
A
504#ifdef CONFIG_EMBEDDED
505int dlil_lladdr_ckreq = 1;
506#else
39236c6e
A
507int dlil_lladdr_ckreq = 0;
508#endif
5ba3f43e 509#endif
39236c6e 510
b0d623f7 511#if DEBUG
39236c6e 512int dlil_verbose = 1;
b0d623f7 513#else
39236c6e 514int dlil_verbose = 0;
b0d623f7 515#endif /* DEBUG */
6d2010ae 516#if IFNET_INPUT_SANITY_CHK
6d2010ae 517/* sanity checking of input packet lists received */
316670eb
A
518static u_int32_t dlil_input_sanity_check = 0;
519#endif /* IFNET_INPUT_SANITY_CHK */
520/* rate limit debug messages */
521struct timespec dlil_dbgrate = { 1, 0 };
b0d623f7 522
6d2010ae 523SYSCTL_DECL(_net_link_generic_system);
91447636 524
316670eb
A
525SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
526 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
527
528#define IF_SNDQ_MINLEN 32
529u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
530SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
531 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
532 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
533
534#define IF_RCVQ_MINLEN 32
39037602 535#define IF_RCVQ_MAXLEN 256
316670eb
A
536u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
537SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
538 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
539 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
540
39236c6e 541#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
316670eb
A
542static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
543SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
544 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
545 "ilog2 of EWMA decay rate of avg inbound packets");
546
39236c6e
A
547#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
548#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
316670eb 549static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
39236c6e
A
550SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
551 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
552 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
553 "Q", "input poll mode freeze time");
316670eb 554
39236c6e
A
555#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
556#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
316670eb 557static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
39236c6e
A
558SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
559 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
560 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
561 "Q", "input poll sampling time");
562
563#define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
564#define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
565static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
566SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
567 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
568 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
569 "Q", "input poll interval (time)");
570
571#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
316670eb
A
572static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
573SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
574 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
575 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
576
39236c6e 577#define IF_RXPOLL_WLOWAT 10
316670eb 578static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
39236c6e
A
579SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
580 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
581 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
582 "I", "input poll wakeup low watermark");
316670eb 583
39236c6e 584#define IF_RXPOLL_WHIWAT 100
316670eb 585static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
39236c6e
A
586SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
587 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
588 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
589 "I", "input poll wakeup high watermark");
316670eb
A
590
591static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
592SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
593 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
594 "max packets per poll call");
595
596static u_int32_t if_rxpoll = 1;
597SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
598 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
599 sysctl_rxpoll, "I", "enable opportunistic input polling");
600
5ba3f43e
A
601#if TEST_INPUT_THREAD_TERMINATION
602static u_int32_t if_input_thread_termination_spin = 0;
603SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
604 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
605 &if_input_thread_termination_spin, 0,
606 sysctl_input_thread_termination_spin,
607 "I", "input thread termination spin limit");
608#endif /* TEST_INPUT_THREAD_TERMINATION */
316670eb
A
609
610static u_int32_t cur_dlil_input_threads = 0;
611SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
39037602 612 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
316670eb 613 "Current number of DLIL input threads");
91447636 614
6d2010ae 615#if IFNET_INPUT_SANITY_CHK
316670eb 616SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
39037602 617 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
6d2010ae 618 "Turn on sanity checking in DLIL input");
316670eb 619#endif /* IFNET_INPUT_SANITY_CHK */
1c79356b 620
316670eb
A
621static u_int32_t if_flowadv = 1;
622SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
623 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
624 "enable flow-advisory mechanism");
625
fe8ab488
A
626static u_int32_t if_delaybased_queue = 1;
627SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
628 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
629 "enable delay based dynamic queue sizing");
630
39236c6e
A
631static uint64_t hwcksum_in_invalidated = 0;
632SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
633 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
634 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
635
636uint32_t hwcksum_dbg = 0;
637SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
638 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
639 "enable hardware cksum debugging");
640
3e170ce0
A
641u_int32_t ifnet_start_delayed = 0;
642SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
643 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
644 "number of times start was delayed");
645
646u_int32_t ifnet_delay_start_disabled = 0;
647SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
648 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
649 "number of times start was delayed");
650
39236c6e
A
651#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
652#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
653#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
654#define HWCKSUM_DBG_MASK \
655 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
656 HWCKSUM_DBG_FINALIZE_FORCED)
657
658static uint32_t hwcksum_dbg_mode = 0;
659SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
660 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
661 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
662
663static uint64_t hwcksum_dbg_partial_forced = 0;
664SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
665 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
666 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
667
668static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
669SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
670 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
671 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
672
673static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
674SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
675 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
676 &hwcksum_dbg_partial_rxoff_forced, 0,
677 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
678 "forced partial cksum rx offset");
679
680static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
681SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
682 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
683 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
684 "adjusted partial cksum rx offset");
685
686static uint64_t hwcksum_dbg_verified = 0;
687SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
688 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
689 &hwcksum_dbg_verified, "packets verified for having good checksum");
690
691static uint64_t hwcksum_dbg_bad_cksum = 0;
692SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
693 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
694 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
695
696static uint64_t hwcksum_dbg_bad_rxoff = 0;
697SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
698 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
699 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
700
701static uint64_t hwcksum_dbg_adjusted = 0;
702SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
703 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
704 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
705
706static uint64_t hwcksum_dbg_finalized_hdr = 0;
707SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
708 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
709 &hwcksum_dbg_finalized_hdr, "finalized headers");
710
711static uint64_t hwcksum_dbg_finalized_data = 0;
712SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
713 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
714 &hwcksum_dbg_finalized_data, "finalized payloads");
715
716uint32_t hwcksum_tx = 1;
717SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
718 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
719 "enable transmit hardware checksum offload");
720
721uint32_t hwcksum_rx = 1;
722SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
723 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
724 "enable receive hardware checksum offload");
725
3e170ce0
A
726SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
727 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
728 sysctl_tx_chain_len_stats, "S", "");
729
730uint32_t tx_chain_len_count = 0;
731SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
39037602 732 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
3e170ce0 733
5ba3f43e
A
734static uint32_t threshold_notify = 1; /* enable/disable */
735SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
736 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
737
738static uint32_t threshold_interval = 2; /* in seconds */
739SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
740 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
741
39037602
A
742#if (DEVELOPMENT || DEBUG)
743static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
744SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
745 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
746#endif /* DEVELOPMENT || DEBUG */
747
5ba3f43e
A
748struct net_api_stats net_api_stats;
749SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD|CTLFLAG_LOCKED,
750 &net_api_stats, net_api_stats, "");
751
752
316670eb 753unsigned int net_rxpoll = 1;
6d2010ae
A
754unsigned int net_affinity = 1;
755static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
1c79356b 756
b36670ce
A
757extern u_int32_t inject_buckets;
758
2d21ac55
A
759static lck_grp_attr_t *dlil_grp_attributes = NULL;
760static lck_attr_t *dlil_lck_attributes = NULL;
91447636 761
5ba3f43e
A
762/* DLIL data threshold thread call */
763static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
764
765static void dlil_mit_tcall_fn(thread_call_param_t, thread_call_param_t);
766
767uint32_t dlil_rcv_mit_pkts_min = 5;
768uint32_t dlil_rcv_mit_pkts_max = 64;
769uint32_t dlil_rcv_mit_interval = (500 * 1000);
770
771#if (DEVELOPMENT || DEBUG)
772SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_min,
773 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_min, 0, "");
774SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_max,
775 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_max, 0, "");
776SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_interval,
777 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_interval, 0, "");
778#endif /* DEVELOPMENT || DEBUG */
779
91447636 780
316670eb
A
781#define DLIL_INPUT_CHECK(m, ifp) { \
782 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
783 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
784 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
785 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
786 /* NOTREACHED */ \
787 } \
788}
789
790#define DLIL_EWMA(old, new, decay) do { \
791 u_int32_t _avg; \
792 if ((_avg = (old)) > 0) \
793 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
794 else \
795 _avg = (new); \
796 (old) = _avg; \
797} while (0)
798
799#define MBPS (1ULL * 1000 * 1000)
800#define GBPS (MBPS * 1000)
801
802struct rxpoll_time_tbl {
803 u_int64_t speed; /* downlink speed */
804 u_int32_t plowat; /* packets low watermark */
805 u_int32_t phiwat; /* packets high watermark */
806 u_int32_t blowat; /* bytes low watermark */
807 u_int32_t bhiwat; /* bytes high watermark */
808};
809
810static struct rxpoll_time_tbl rxpoll_tbl[] = {
811 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
812 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
813 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
814 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
815 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
816 { 0, 0, 0, 0, 0 }
817};
818
39236c6e 819int
b0d623f7 820proto_hash_value(u_int32_t protocol_family)
91447636 821{
4a3eedf9
A
822 /*
823 * dlil_proto_unplumb_all() depends on the mapping between
824 * the hash bucket index and the protocol family defined
825 * here; future changes must be applied there as well.
826 */
39037602 827 switch (protocol_family) {
91447636 828 case PF_INET:
6d2010ae 829 return (0);
91447636 830 case PF_INET6:
6d2010ae 831 return (1);
91447636 832 case PF_VLAN:
39236c6e 833 return (2);
6d2010ae 834 case PF_UNSPEC:
91447636 835 default:
39236c6e 836 return (3);
91447636
A
837 }
838}
839
6d2010ae
A
840/*
841 * Caller must already be holding ifnet lock.
842 */
843static struct if_proto *
b0d623f7 844find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
1c79356b 845{
91447636 846 struct if_proto *proto = NULL;
b0d623f7 847 u_int32_t i = proto_hash_value(protocol_family);
6d2010ae
A
848
849 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
850
851 if (ifp->if_proto_hash != NULL)
91447636 852 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6d2010ae
A
853
854 while (proto != NULL && proto->protocol_family != protocol_family)
91447636 855 proto = SLIST_NEXT(proto, next_hash);
6d2010ae
A
856
857 if (proto != NULL)
858 if_proto_ref(proto);
859
860 return (proto);
1c79356b
A
861}
862
91447636
A
863static void
864if_proto_ref(struct if_proto *proto)
1c79356b 865{
6d2010ae 866 atomic_add_32(&proto->refcount, 1);
1c79356b
A
867}
868
6d2010ae
A
869extern void if_rtproto_del(struct ifnet *ifp, int protocol);
870
91447636
A
871static void
872if_proto_free(struct if_proto *proto)
0b4e3aa0 873{
6d2010ae
A
874 u_int32_t oldval;
875 struct ifnet *ifp = proto->ifp;
876 u_int32_t proto_family = proto->protocol_family;
877 struct kev_dl_proto_data ev_pr_data;
878
879 oldval = atomic_add_32_ov(&proto->refcount, -1);
880 if (oldval > 1)
881 return;
882
883 /* No more reference on this, protocol must have been detached */
884 VERIFY(proto->detached);
885
886 if (proto->proto_kpi == kProtoKPI_v1) {
887 if (proto->kpi.v1.detached)
888 proto->kpi.v1.detached(ifp, proto->protocol_family);
889 }
890 if (proto->proto_kpi == kProtoKPI_v2) {
891 if (proto->kpi.v2.detached)
892 proto->kpi.v2.detached(ifp, proto->protocol_family);
91447636 893 }
6d2010ae
A
894
895 /*
896 * Cleanup routes that may still be in the routing table for that
897 * interface/protocol pair.
898 */
899 if_rtproto_del(ifp, proto_family);
900
901 /*
902 * The reserved field carries the number of protocol still attached
903 * (subject to change)
904 */
905 ifnet_lock_shared(ifp);
906 ev_pr_data.proto_family = proto_family;
a39ff7e2 907 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
6d2010ae
A
908 ifnet_lock_done(ifp);
909
910 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
911 (struct net_event_data *)&ev_pr_data,
39037602 912 sizeof (struct kev_dl_proto_data));
6d2010ae 913
a39ff7e2
A
914 if (ev_pr_data.proto_remaining_count == 0) {
915 /*
916 * The protocol count has gone to zero, mark the interface down.
917 * This used to be done by configd.KernelEventMonitor, but that
918 * is inherently prone to races (rdar://problem/30810208).
919 */
920 (void) ifnet_set_flags(ifp, 0, IFF_UP);
921 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
922 dlil_post_sifflags_msg(ifp);
923 }
924
6d2010ae 925 zfree(dlif_proto_zone, proto);
0b4e3aa0
A
926}
927
91447636 928__private_extern__ void
6d2010ae 929ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
1c79356b 930{
5ba3f43e
A
931#if !MACH_ASSERT
932#pragma unused(ifp)
933#endif
6d2010ae
A
934 unsigned int type = 0;
935 int ass = 1;
936
937 switch (what) {
938 case IFNET_LCK_ASSERT_EXCLUSIVE:
939 type = LCK_RW_ASSERT_EXCLUSIVE;
940 break;
941
942 case IFNET_LCK_ASSERT_SHARED:
943 type = LCK_RW_ASSERT_SHARED;
944 break;
945
946 case IFNET_LCK_ASSERT_OWNED:
947 type = LCK_RW_ASSERT_HELD;
948 break;
949
950 case IFNET_LCK_ASSERT_NOTOWNED:
951 /* nothing to do here for RW lock; bypass assert */
952 ass = 0;
953 break;
954
955 default:
956 panic("bad ifnet assert type: %d", what);
957 /* NOTREACHED */
958 }
959 if (ass)
5ba3f43e 960 LCK_RW_ASSERT(&ifp->if_lock, type);
1c79356b
A
961}
962
91447636 963__private_extern__ void
6d2010ae 964ifnet_lock_shared(struct ifnet *ifp)
1c79356b 965{
6d2010ae 966 lck_rw_lock_shared(&ifp->if_lock);
1c79356b
A
967}
968
91447636 969__private_extern__ void
6d2010ae 970ifnet_lock_exclusive(struct ifnet *ifp)
0b4e3aa0 971{
6d2010ae 972 lck_rw_lock_exclusive(&ifp->if_lock);
0b4e3aa0
A
973}
974
91447636 975__private_extern__ void
6d2010ae 976ifnet_lock_done(struct ifnet *ifp)
1c79356b 977{
6d2010ae 978 lck_rw_done(&ifp->if_lock);
1c79356b
A
979}
980
3e170ce0
A
981#if INET
982__private_extern__ void
983if_inetdata_lock_shared(struct ifnet *ifp)
984{
985 lck_rw_lock_shared(&ifp->if_inetdata_lock);
986}
987
988__private_extern__ void
989if_inetdata_lock_exclusive(struct ifnet *ifp)
990{
991 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
992}
993
994__private_extern__ void
995if_inetdata_lock_done(struct ifnet *ifp)
996{
997 lck_rw_done(&ifp->if_inetdata_lock);
998}
999#endif
1000
39236c6e
A
1001#if INET6
1002__private_extern__ void
1003if_inet6data_lock_shared(struct ifnet *ifp)
1004{
1005 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1006}
1007
1008__private_extern__ void
1009if_inet6data_lock_exclusive(struct ifnet *ifp)
1010{
1011 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1012}
1013
1014__private_extern__ void
1015if_inet6data_lock_done(struct ifnet *ifp)
1016{
1017 lck_rw_done(&ifp->if_inet6data_lock);
1018}
1019#endif
1020
91447636 1021__private_extern__ void
2d21ac55 1022ifnet_head_lock_shared(void)
1c79356b 1023{
6d2010ae 1024 lck_rw_lock_shared(&ifnet_head_lock);
1c79356b
A
1025}
1026
91447636 1027__private_extern__ void
2d21ac55 1028ifnet_head_lock_exclusive(void)
91447636 1029{
6d2010ae 1030 lck_rw_lock_exclusive(&ifnet_head_lock);
91447636 1031}
1c79356b 1032
91447636 1033__private_extern__ void
2d21ac55 1034ifnet_head_done(void)
1c79356b 1035{
6d2010ae 1036 lck_rw_done(&ifnet_head_lock);
91447636 1037}
1c79356b 1038
39037602
A
1039__private_extern__ void
1040ifnet_head_assert_exclusive(void)
1041{
5ba3f43e 1042 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
39037602
A
1043}
1044
6d2010ae 1045/*
a39ff7e2
A
1046 * dlil_ifp_protolist
1047 * - get the list of protocols attached to the interface, or just the number
1048 * of attached protocols
1049 * - if the number returned is greater than 'list_count', truncation occurred
1050 *
1051 * Note:
1052 * - caller must already be holding ifnet lock.
6d2010ae 1053 */
a39ff7e2
A
1054static u_int32_t
1055dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1056 u_int32_t list_count)
91447636 1057{
a39ff7e2
A
1058 u_int32_t count = 0;
1059 int i;
6d2010ae
A
1060
1061 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1062
1063 if (ifp->if_proto_hash == NULL)
1064 goto done;
1065
1066 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1067 struct if_proto *proto;
1068 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
a39ff7e2
A
1069 if (list != NULL && count < list_count) {
1070 list[count] = proto->protocol_family;
1071 }
6d2010ae 1072 count++;
91447636
A
1073 }
1074 }
6d2010ae
A
1075done:
1076 return (count);
91447636 1077}
1c79356b 1078
a39ff7e2
A
1079__private_extern__ u_int32_t
1080if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1081{
1082 ifnet_lock_shared(ifp);
1083 count = dlil_ifp_protolist(ifp, protolist, count);
1084 ifnet_lock_done(ifp);
1085 return (count);
1086}
1087
1088__private_extern__ void
1089if_free_protolist(u_int32_t *list)
1090{
1091 _FREE(list, M_TEMP);
1092}
1093
91447636 1094__private_extern__ void
6d2010ae
A
1095dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1096 u_int32_t event_code, struct net_event_data *event_data,
1097 u_int32_t event_data_len)
91447636 1098{
6d2010ae
A
1099 struct net_event_data ev_data;
1100 struct kev_msg ev_msg;
1101
1102 bzero(&ev_msg, sizeof (ev_msg));
1103 bzero(&ev_data, sizeof (ev_data));
1104 /*
2d21ac55 1105 * a net event always starts with a net_event_data structure
91447636
A
1106 * but the caller can generate a simple net event or
1107 * provide a longer event structure to post
1108 */
6d2010ae
A
1109 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1110 ev_msg.kev_class = KEV_NETWORK_CLASS;
1111 ev_msg.kev_subclass = event_subclass;
1112 ev_msg.event_code = event_code;
1113
1114 if (event_data == NULL) {
91447636 1115 event_data = &ev_data;
39037602 1116 event_data_len = sizeof (struct net_event_data);
91447636 1117 }
6d2010ae 1118
fe8ab488 1119 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
91447636 1120 event_data->if_family = ifp->if_family;
39037602 1121 event_data->if_unit = (u_int32_t)ifp->if_unit;
6d2010ae 1122
91447636 1123 ev_msg.dv[0].data_length = event_data_len;
6d2010ae 1124 ev_msg.dv[0].data_ptr = event_data;
91447636 1125 ev_msg.dv[1].data_length = 0;
6d2010ae 1126
39037602
A
1127 /* Don't update interface generation for quality and RRC state changess */
1128 bool update_generation = (event_subclass != KEV_DL_SUBCLASS ||
1129 (event_code != KEV_DL_LINK_QUALITY_METRIC_CHANGED &&
1130 event_code != KEV_DL_RRC_STATE_CHANGED));
1131
1132 dlil_event_internal(ifp, &ev_msg, update_generation);
1c79356b
A
1133}
1134
316670eb
A
1135__private_extern__ int
1136dlil_alloc_local_stats(struct ifnet *ifp)
1137{
1138 int ret = EINVAL;
1139 void *buf, *base, **pbuf;
1140
1141 if (ifp == NULL)
1142 goto end;
1143
1144 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1145 /* allocate tcpstat_local structure */
1146 buf = zalloc(dlif_tcpstat_zone);
1147 if (buf == NULL) {
1148 ret = ENOMEM;
1149 goto end;
1150 }
1151 bzero(buf, dlif_tcpstat_bufsize);
1152
1153 /* Get the 64-bit aligned base address for this object */
1154 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1155 sizeof (u_int64_t));
1156 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1157 ((intptr_t)buf + dlif_tcpstat_bufsize));
1158
1159 /*
1160 * Wind back a pointer size from the aligned base and
1161 * save the original address so we can free it later.
1162 */
1163 pbuf = (void **)((intptr_t)base - sizeof (void *));
1164 *pbuf = buf;
1165 ifp->if_tcp_stat = base;
1166
1167 /* allocate udpstat_local structure */
1168 buf = zalloc(dlif_udpstat_zone);
1169 if (buf == NULL) {
1170 ret = ENOMEM;
1171 goto end;
1172 }
1173 bzero(buf, dlif_udpstat_bufsize);
1174
1175 /* Get the 64-bit aligned base address for this object */
1176 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1177 sizeof (u_int64_t));
1178 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1179 ((intptr_t)buf + dlif_udpstat_bufsize));
1180
1181 /*
1182 * Wind back a pointer size from the aligned base and
1183 * save the original address so we can free it later.
1184 */
1185 pbuf = (void **)((intptr_t)base - sizeof (void *));
1186 *pbuf = buf;
1187 ifp->if_udp_stat = base;
1188
1189 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
1190 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
1191
1192 ret = 0;
1193 }
1194
4bd07ac2
A
1195 if (ifp->if_ipv4_stat == NULL) {
1196 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
39037602 1197 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
4bd07ac2
A
1198 if (ifp->if_ipv4_stat == NULL) {
1199 ret = ENOMEM;
1200 goto end;
1201 }
1202 }
1203
1204 if (ifp->if_ipv6_stat == NULL) {
1205 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
39037602 1206 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
4bd07ac2
A
1207 if (ifp->if_ipv6_stat == NULL) {
1208 ret = ENOMEM;
1209 goto end;
1210 }
1211 }
316670eb
A
1212end:
1213 if (ret != 0) {
1214 if (ifp->if_tcp_stat != NULL) {
1215 pbuf = (void **)
1216 ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
1217 zfree(dlif_tcpstat_zone, *pbuf);
1218 ifp->if_tcp_stat = NULL;
1219 }
1220 if (ifp->if_udp_stat != NULL) {
1221 pbuf = (void **)
1222 ((intptr_t)ifp->if_udp_stat - sizeof (void *));
1223 zfree(dlif_udpstat_zone, *pbuf);
1224 ifp->if_udp_stat = NULL;
1225 }
4bd07ac2
A
1226 if (ifp->if_ipv4_stat != NULL) {
1227 FREE(ifp->if_ipv4_stat, M_TEMP);
1228 ifp->if_ipv4_stat = NULL;
1229 }
1230 if (ifp->if_ipv6_stat != NULL) {
1231 FREE(ifp->if_ipv6_stat, M_TEMP);
1232 ifp->if_ipv6_stat = NULL;
1233 }
316670eb
A
1234 }
1235
1236 return (ret);
1237}
1238
6d2010ae 1239static int
316670eb 1240dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
2d21ac55 1241{
316670eb
A
1242 thread_continue_t func;
1243 u_int32_t limit;
2d21ac55
A
1244 int error;
1245
316670eb
A
1246 /* NULL ifp indicates the main input thread, called at dlil_init time */
1247 if (ifp == NULL) {
1248 func = dlil_main_input_thread_func;
1249 VERIFY(inp == dlil_main_input_thread);
1250 (void) strlcat(inp->input_name,
1251 "main_input", DLIL_THREADNAME_LEN);
1252 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1253 func = dlil_rxpoll_input_thread_func;
1254 VERIFY(inp != dlil_main_input_thread);
1255 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
39236c6e 1256 "%s_input_poll", if_name(ifp));
6d2010ae 1257 } else {
316670eb
A
1258 func = dlil_input_thread_func;
1259 VERIFY(inp != dlil_main_input_thread);
1260 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
39236c6e 1261 "%s_input", if_name(ifp));
6d2010ae 1262 }
316670eb 1263 VERIFY(inp->input_thr == THREAD_NULL);
2d21ac55 1264
316670eb
A
1265 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1266 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1267
1268 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1269 inp->ifp = ifp; /* NULL for main input thread */
1270
1271 net_timerclear(&inp->mode_holdtime);
1272 net_timerclear(&inp->mode_lasttime);
1273 net_timerclear(&inp->sample_holdtime);
1274 net_timerclear(&inp->sample_lasttime);
1275 net_timerclear(&inp->dbg_lasttime);
1276
1277 /*
1278 * For interfaces that support opportunistic polling, set the
1279 * low and high watermarks for outstanding inbound packets/bytes.
1280 * Also define freeze times for transitioning between modes
1281 * and updating the average.
1282 */
1283 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1284 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
39236c6e 1285 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
316670eb
A
1286 } else {
1287 limit = (u_int32_t)-1;
1288 }
1289
5ba3f43e 1290 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
316670eb
A
1291 if (inp == dlil_main_input_thread) {
1292 struct dlil_main_threading_info *inpm =
1293 (struct dlil_main_threading_info *)inp;
5ba3f43e 1294 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
316670eb 1295 }
2d21ac55 1296
316670eb
A
1297 error = kernel_thread_start(func, inp, &inp->input_thr);
1298 if (error == KERN_SUCCESS) {
1299 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
6d2010ae 1300 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
2d21ac55 1301 /*
316670eb
A
1302 * We create an affinity set so that the matching workloop
1303 * thread or the starter thread (for loopback) can be
1304 * scheduled on the same processor set as the input thread.
2d21ac55 1305 */
316670eb
A
1306 if (net_affinity) {
1307 struct thread *tp = inp->input_thr;
2d21ac55
A
1308 u_int32_t tag;
1309 /*
1310 * Randomize to reduce the probability
1311 * of affinity tag namespace collision.
1312 */
5ba3f43e 1313 read_frandom(&tag, sizeof (tag));
2d21ac55
A
1314 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1315 thread_reference(tp);
316670eb
A
1316 inp->tag = tag;
1317 inp->net_affinity = TRUE;
2d21ac55
A
1318 }
1319 }
316670eb
A
1320 } else if (inp == dlil_main_input_thread) {
1321 panic_plain("%s: couldn't create main input thread", __func__);
1322 /* NOTREACHED */
2d21ac55 1323 } else {
39236c6e
A
1324 panic_plain("%s: couldn't create %s input thread", __func__,
1325 if_name(ifp));
6d2010ae 1326 /* NOTREACHED */
2d21ac55 1327 }
b0d623f7 1328 OSAddAtomic(1, &cur_dlil_input_threads);
316670eb 1329
6d2010ae 1330 return (error);
2d21ac55
A
1331}
1332
5ba3f43e
A
1333#if TEST_INPUT_THREAD_TERMINATION
1334static int
1335sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
316670eb 1336{
5ba3f43e
A
1337#pragma unused(arg1, arg2)
1338 uint32_t i;
1339 int err;
316670eb 1340
5ba3f43e 1341 i = if_input_thread_termination_spin;
316670eb 1342
5ba3f43e
A
1343 err = sysctl_handle_int(oidp, &i, 0, req);
1344 if (err != 0 || req->newptr == USER_ADDR_NULL)
1345 return (err);
1346
1347 if (net_rxpoll == 0)
1348 return (ENXIO);
316670eb 1349
5ba3f43e
A
1350 if_input_thread_termination_spin = i;
1351 return (err);
1352}
1353#endif /* TEST_INPUT_THREAD_TERMINATION */
1354
1355static void
1356dlil_clean_threading_info(struct dlil_threading_info *inp)
1357{
316670eb
A
1358 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1359 lck_grp_free(inp->lck_grp);
1360
1361 inp->input_waiting = 0;
1362 inp->wtot = 0;
1363 bzero(inp->input_name, sizeof (inp->input_name));
316670eb
A
1364 inp->ifp = NULL;
1365 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1366 qlimit(&inp->rcvq_pkts) = 0;
1367 bzero(&inp->stats, sizeof (inp->stats));
1368
1369 VERIFY(!inp->net_affinity);
1370 inp->input_thr = THREAD_NULL;
1371 VERIFY(inp->wloop_thr == THREAD_NULL);
1372 VERIFY(inp->poll_thr == THREAD_NULL);
1373 VERIFY(inp->tag == 0);
1374
1375 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1376 bzero(&inp->tstats, sizeof (inp->tstats));
1377 bzero(&inp->pstats, sizeof (inp->pstats));
1378 bzero(&inp->sstats, sizeof (inp->sstats));
1379
1380 net_timerclear(&inp->mode_holdtime);
1381 net_timerclear(&inp->mode_lasttime);
1382 net_timerclear(&inp->sample_holdtime);
1383 net_timerclear(&inp->sample_lasttime);
1384 net_timerclear(&inp->dbg_lasttime);
1385
1386#if IFNET_INPUT_SANITY_CHK
1387 inp->input_mbuf_cnt = 0;
1388#endif /* IFNET_INPUT_SANITY_CHK */
5ba3f43e 1389}
316670eb 1390
5ba3f43e
A
1391static void
1392dlil_terminate_input_thread(struct dlil_threading_info *inp)
1393{
1394 struct ifnet *ifp = inp->ifp;
1395
1396 VERIFY(current_thread() == inp->input_thr);
1397 VERIFY(inp != dlil_main_input_thread);
1398
1399 OSAddAtomic(-1, &cur_dlil_input_threads);
1400
1401#if TEST_INPUT_THREAD_TERMINATION
1402 { /* do something useless that won't get optimized away */
1403 uint32_t v = 1;
1404 for (uint32_t i = 0;
1405 i < if_input_thread_termination_spin;
1406 i++) {
1407 v = (i + 1) * v;
1408 }
1409 printf("the value is %d\n", v);
316670eb 1410 }
5ba3f43e
A
1411#endif /* TEST_INPUT_THREAD_TERMINATION */
1412
1413 lck_mtx_lock_spin(&inp->input_lck);
1414 VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0);
1415 inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE;
1416 wakeup_one((caddr_t)&inp->input_waiting);
1417 lck_mtx_unlock(&inp->input_lck);
316670eb
A
1418
1419 /* for the extra refcnt from kernel_thread_start() */
1420 thread_deallocate(current_thread());
1421
5ba3f43e
A
1422 if (dlil_verbose) {
1423 printf("%s: input thread terminated\n",
1424 if_name(ifp));
1425 }
1426
316670eb
A
1427 /* this is the end */
1428 thread_terminate(current_thread());
1429 /* NOTREACHED */
1430}
1431
2d21ac55
A
1432static kern_return_t
1433dlil_affinity_set(struct thread *tp, u_int32_t tag)
1434{
1435 thread_affinity_policy_data_t policy;
1436
1437 bzero(&policy, sizeof (policy));
1438 policy.affinity_tag = tag;
1439 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1440 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
1441}
1442
91447636
A
1443void
1444dlil_init(void)
1445{
6d2010ae
A
1446 thread_t thread = THREAD_NULL;
1447
1448 /*
1449 * The following fields must be 64-bit aligned for atomic operations.
1450 */
1451 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
39037602 1452 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
6d2010ae
A
1453 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1454 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1455 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1456 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1457 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1458 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1459 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1460 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1461 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1462 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1463 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1464 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1465 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1466
1467 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
39037602 1468 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
6d2010ae
A
1469 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1470 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1471 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1472 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1473 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1474 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1475 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1476 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1477 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1478 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1479 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1480 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1481 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1482
1483 /*
1484 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1485 */
1486 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1487 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1488 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1489 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1490 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
39236c6e
A
1491 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1492 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1493 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1494 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
5ba3f43e 1495 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
6d2010ae
A
1496 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1497 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1498 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1499 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1500
39236c6e
A
1501 /*
1502 * ... as well as the mbuf checksum flags counterparts.
1503 */
1504 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1505 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1506 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1507 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1508 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1509 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1510 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1511 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1512 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
5ba3f43e 1513 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
39236c6e
A
1514 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1515
6d2010ae
A
1516 /*
1517 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1518 */
1519 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
316670eb 1520 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
6d2010ae 1521
39236c6e
A
1522 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1523 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1524 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1525 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1526
1527 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1528 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1529 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1530
1531 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1532 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1533 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1534 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1535 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1536 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1537 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1538 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1539 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1540 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1541 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1542 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1543 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1544 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1545 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1546 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1547
1548 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1549 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1550 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1551 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1552 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
fe8ab488 1553 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
39037602 1554 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
39236c6e
A
1555
1556 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1557 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1558
6d2010ae
A
1559 PE_parse_boot_argn("net_affinity", &net_affinity,
1560 sizeof (net_affinity));
b0d623f7 1561
316670eb
A
1562 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
1563
d1ecb069 1564 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
6d2010ae
A
1565
1566 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
1567
1568 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
1569 sizeof (struct dlil_ifnet_dbg);
1570 /* Enforce 64-bit alignment for dlil_ifnet structure */
1571 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
1572 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
1573 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1574 0, DLIF_ZONE_NAME);
1575 if (dlif_zone == NULL) {
316670eb
A
1576 panic_plain("%s: failed allocating %s", __func__,
1577 DLIF_ZONE_NAME);
6d2010ae
A
1578 /* NOTREACHED */
1579 }
1580 zone_change(dlif_zone, Z_EXPAND, TRUE);
1581 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1582
1583 dlif_filt_size = sizeof (struct ifnet_filter);
1584 dlif_filt_zone = zinit(dlif_filt_size,
1585 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1586 if (dlif_filt_zone == NULL) {
316670eb 1587 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1588 DLIF_FILT_ZONE_NAME);
1589 /* NOTREACHED */
1590 }
1591 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1592 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1593
6d2010ae
A
1594 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
1595 dlif_phash_zone = zinit(dlif_phash_size,
1596 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1597 if (dlif_phash_zone == NULL) {
316670eb 1598 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1599 DLIF_PHASH_ZONE_NAME);
1600 /* NOTREACHED */
1601 }
1602 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1603 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1604
1605 dlif_proto_size = sizeof (struct if_proto);
1606 dlif_proto_zone = zinit(dlif_proto_size,
1607 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1608 if (dlif_proto_zone == NULL) {
316670eb 1609 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1610 DLIF_PROTO_ZONE_NAME);
1611 /* NOTREACHED */
1612 }
1613 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1614 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1615
316670eb
A
1616 dlif_tcpstat_size = sizeof (struct tcpstat_local);
1617 /* Enforce 64-bit alignment for tcpstat_local structure */
1618 dlif_tcpstat_bufsize =
1619 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
1620 dlif_tcpstat_bufsize =
1621 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
1622 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1623 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1624 DLIF_TCPSTAT_ZONE_NAME);
1625 if (dlif_tcpstat_zone == NULL) {
1626 panic_plain("%s: failed allocating %s", __func__,
1627 DLIF_TCPSTAT_ZONE_NAME);
1628 /* NOTREACHED */
1629 }
1630 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1631 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1632
1633 dlif_udpstat_size = sizeof (struct udpstat_local);
1634 /* Enforce 64-bit alignment for udpstat_local structure */
1635 dlif_udpstat_bufsize =
1636 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
1637 dlif_udpstat_bufsize =
1638 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
1639 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1640 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1641 DLIF_UDPSTAT_ZONE_NAME);
1642 if (dlif_udpstat_zone == NULL) {
1643 panic_plain("%s: failed allocating %s", __func__,
1644 DLIF_UDPSTAT_ZONE_NAME);
1645 /* NOTREACHED */
1646 }
1647 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1648 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1649
6d2010ae 1650 ifnet_llreach_init();
5ba3f43e 1651 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
d1ecb069 1652
91447636 1653 TAILQ_INIT(&dlil_ifnet_head);
91447636 1654 TAILQ_INIT(&ifnet_head);
6d2010ae 1655 TAILQ_INIT(&ifnet_detaching_head);
39037602 1656 TAILQ_INIT(&ifnet_ordered_head);
6d2010ae 1657
91447636 1658 /* Setup the lock groups we will use */
2d21ac55 1659 dlil_grp_attributes = lck_grp_attr_alloc_init();
91447636 1660
316670eb 1661 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
6d2010ae
A
1662 dlil_grp_attributes);
1663 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1664 dlil_grp_attributes);
1665 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1666 dlil_grp_attributes);
316670eb
A
1667 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1668 dlil_grp_attributes);
1669 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
6d2010ae
A
1670 dlil_grp_attributes);
1671
91447636 1672 /* Setup the lock attributes we will use */
2d21ac55 1673 dlil_lck_attributes = lck_attr_alloc_init();
6d2010ae 1674
91447636 1675 ifnet_lock_attr = lck_attr_alloc_init();
6d2010ae
A
1676
1677 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1678 dlil_lck_attributes);
1679 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1680
39236c6e
A
1681 /* Setup interface flow control related items */
1682 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
316670eb 1683
39236c6e
A
1684 ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry);
1685 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1686 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1687 if (ifnet_fc_zone == NULL) {
1688 panic_plain("%s: failed allocating %s", __func__,
1689 IFNET_FC_ZONE_NAME);
1690 /* NOTREACHED */
1691 }
1692 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1693 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
6d2010ae 1694
39236c6e 1695 /* Initialize interface address subsystem */
6d2010ae 1696 ifa_init();
39236c6e
A
1697
1698#if PF
1699 /* Initialize the packet filter */
1700 pfinit();
1701#endif /* PF */
1702
1703 /* Initialize queue algorithms */
1704 classq_init();
1705
1706 /* Initialize packet schedulers */
1707 pktsched_init();
1708
1709 /* Initialize flow advisory subsystem */
1710 flowadv_init();
1711
1712 /* Initialize the pktap virtual interface */
1713 pktap_init();
1714
39037602
A
1715 /* Initialize the service class to dscp map */
1716 net_qos_map_init();
1717
a39ff7e2
A
1718 /* Initialize the interface port list */
1719 if_ports_used_init();
1720
5ba3f43e 1721#if DEBUG || DEVELOPMENT
39236c6e
A
1722 /* Run self-tests */
1723 dlil_verify_sum16();
5ba3f43e
A
1724#endif /* DEBUG || DEVELOPMENT */
1725
1726 /* Initialize link layer table */
1727 lltable_glbl_init();
39236c6e 1728
91447636 1729 /*
316670eb
A
1730 * Create and start up the main DLIL input thread and the interface
1731 * detacher threads once everything is initialized.
91447636 1732 */
316670eb 1733 dlil_create_input_thread(NULL, dlil_main_input_thread);
2d21ac55 1734
316670eb
A
1735 if (kernel_thread_start(ifnet_detacher_thread_func,
1736 NULL, &thread) != KERN_SUCCESS) {
1737 panic_plain("%s: couldn't create detacher thread", __func__);
6d2010ae
A
1738 /* NOTREACHED */
1739 }
b0d623f7 1740 thread_deallocate(thread);
5ba3f43e 1741
91447636 1742}
1c79356b 1743
6d2010ae
A
1744static void
1745if_flt_monitor_busy(struct ifnet *ifp)
1746{
5ba3f43e 1747 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1748
1749 ++ifp->if_flt_busy;
1750 VERIFY(ifp->if_flt_busy != 0);
1751}
1752
1753static void
1754if_flt_monitor_unbusy(struct ifnet *ifp)
1755{
1756 if_flt_monitor_leave(ifp);
1757}
1758
1759static void
1760if_flt_monitor_enter(struct ifnet *ifp)
1761{
5ba3f43e 1762 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1763
1764 while (ifp->if_flt_busy) {
1765 ++ifp->if_flt_waiters;
1766 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1767 (PZERO - 1), "if_flt_monitor", NULL);
1768 }
1769 if_flt_monitor_busy(ifp);
1770}
1771
1772static void
1773if_flt_monitor_leave(struct ifnet *ifp)
1774{
5ba3f43e 1775 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1776
1777 VERIFY(ifp->if_flt_busy != 0);
1778 --ifp->if_flt_busy;
1779
1780 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1781 ifp->if_flt_waiters = 0;
1782 wakeup(&ifp->if_flt_head);
1783 }
1784}
1785
2d21ac55 1786__private_extern__ int
6d2010ae 1787dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
39236c6e 1788 interface_filter_t *filter_ref, u_int32_t flags)
6d2010ae
A
1789{
1790 int retval = 0;
1791 struct ifnet_filter *filter = NULL;
9bccf70c 1792
6d2010ae
A
1793 ifnet_head_lock_shared();
1794 /* Check that the interface is in the global list */
1795 if (!ifnet_lookup(ifp)) {
1796 retval = ENXIO;
1797 goto done;
1798 }
1799
1800 filter = zalloc(dlif_filt_zone);
1801 if (filter == NULL) {
1802 retval = ENOMEM;
1803 goto done;
1804 }
1805 bzero(filter, dlif_filt_size);
1806
1807 /* refcnt held above during lookup */
39236c6e 1808 filter->filt_flags = flags;
91447636
A
1809 filter->filt_ifp = ifp;
1810 filter->filt_cookie = if_filter->iff_cookie;
1811 filter->filt_name = if_filter->iff_name;
1812 filter->filt_protocol = if_filter->iff_protocol;
743345f9
A
1813 /*
1814 * Do not install filter callbacks for internal coproc interface
1815 */
1816 if (!IFNET_IS_INTCOPROC(ifp)) {
1817 filter->filt_input = if_filter->iff_input;
1818 filter->filt_output = if_filter->iff_output;
1819 filter->filt_event = if_filter->iff_event;
1820 filter->filt_ioctl = if_filter->iff_ioctl;
1821 }
91447636 1822 filter->filt_detached = if_filter->iff_detached;
6d2010ae
A
1823
1824 lck_mtx_lock(&ifp->if_flt_lock);
1825 if_flt_monitor_enter(ifp);
1826
5ba3f43e 1827 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636 1828 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
6d2010ae
A
1829
1830 if_flt_monitor_leave(ifp);
1831 lck_mtx_unlock(&ifp->if_flt_lock);
1832
91447636 1833 *filter_ref = filter;
b0d623f7
A
1834
1835 /*
1836 * Bump filter count and route_generation ID to let TCP
1837 * know it shouldn't do TSO on this connection
1838 */
39236c6e
A
1839 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1840 OSAddAtomic(1, &dlil_filter_disable_tso_count);
b0d623f7 1841 routegenid_update();
39236c6e 1842 }
5ba3f43e
A
1843 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1844 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1845 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1846 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1847 }
6d2010ae 1848 if (dlil_verbose) {
39236c6e
A
1849 printf("%s: %s filter attached\n", if_name(ifp),
1850 if_filter->iff_name);
6d2010ae
A
1851 }
1852done:
1853 ifnet_head_done();
1854 if (retval != 0 && ifp != NULL) {
39236c6e
A
1855 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1856 if_name(ifp), if_filter->iff_name, retval);
6d2010ae
A
1857 }
1858 if (retval != 0 && filter != NULL)
1859 zfree(dlif_filt_zone, filter);
1860
1861 return (retval);
1c79356b
A
1862}
1863
91447636 1864static int
6d2010ae 1865dlil_detach_filter_internal(interface_filter_t filter, int detached)
1c79356b 1866{
91447636 1867 int retval = 0;
6d2010ae 1868
3a60a9f5 1869 if (detached == 0) {
6d2010ae
A
1870 ifnet_t ifp = NULL;
1871
3a60a9f5
A
1872 ifnet_head_lock_shared();
1873 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6d2010ae
A
1874 interface_filter_t entry = NULL;
1875
1876 lck_mtx_lock(&ifp->if_flt_lock);
3a60a9f5 1877 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
6d2010ae
A
1878 if (entry != filter || entry->filt_skip)
1879 continue;
1880 /*
1881 * We've found a match; since it's possible
1882 * that the thread gets blocked in the monitor,
1883 * we do the lock dance. Interface should
1884 * not be detached since we still have a use
1885 * count held during filter attach.
1886 */
1887 entry->filt_skip = 1; /* skip input/output */
1888 lck_mtx_unlock(&ifp->if_flt_lock);
1889 ifnet_head_done();
1890
1891 lck_mtx_lock(&ifp->if_flt_lock);
1892 if_flt_monitor_enter(ifp);
5ba3f43e 1893 LCK_MTX_ASSERT(&ifp->if_flt_lock,
6d2010ae
A
1894 LCK_MTX_ASSERT_OWNED);
1895
1896 /* Remove the filter from the list */
1897 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1898 filt_next);
1899
1900 if_flt_monitor_leave(ifp);
1901 lck_mtx_unlock(&ifp->if_flt_lock);
1902 if (dlil_verbose) {
39236c6e
A
1903 printf("%s: %s filter detached\n",
1904 if_name(ifp), filter->filt_name);
6d2010ae
A
1905 }
1906 goto destroy;
3a60a9f5 1907 }
6d2010ae 1908 lck_mtx_unlock(&ifp->if_flt_lock);
3a60a9f5
A
1909 }
1910 ifnet_head_done();
6d2010ae
A
1911
1912 /* filter parameter is not a valid filter ref */
1913 retval = EINVAL;
1914 goto done;
3a60a9f5 1915 }
6d2010ae
A
1916
1917 if (dlil_verbose)
1918 printf("%s filter detached\n", filter->filt_name);
1919
1920destroy:
1921
1922 /* Call the detached function if there is one */
91447636
A
1923 if (filter->filt_detached)
1924 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
9bccf70c 1925
b0d623f7
A
1926 /*
1927 * Decrease filter count and route_generation ID to let TCP
1928 * know it should reevalute doing TSO or not
1929 */
39236c6e
A
1930 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1931 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
b0d623f7 1932 routegenid_update();
39236c6e 1933 }
39037602 1934
5ba3f43e
A
1935 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1936
39037602
A
1937 /* Free the filter */
1938 zfree(dlif_filt_zone, filter);
1939 filter = NULL;
6d2010ae 1940done:
39037602 1941 if (retval != 0 && filter != NULL) {
6d2010ae
A
1942 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1943 filter->filt_name, retval);
1944 }
39037602 1945
6d2010ae 1946 return (retval);
1c79356b
A
1947}
1948
2d21ac55 1949__private_extern__ void
91447636
A
1950dlil_detach_filter(interface_filter_t filter)
1951{
3a60a9f5
A
1952 if (filter == NULL)
1953 return;
91447636
A
1954 dlil_detach_filter_internal(filter, 0);
1955}
1c79356b 1956
316670eb
A
1957/*
1958 * Main input thread:
1959 *
1960 * a) handles all inbound packets for lo0
1961 * b) handles all inbound packets for interfaces with no dedicated
1962 * input thread (e.g. anything but Ethernet/PDP or those that support
1963 * opportunistic polling.)
1964 * c) protocol registrations
1965 * d) packet injections
1966 */
39037602 1967__attribute__((noreturn))
91447636 1968static void
316670eb 1969dlil_main_input_thread_func(void *v, wait_result_t w)
91447636 1970{
316670eb
A
1971#pragma unused(w)
1972 struct dlil_main_threading_info *inpm = v;
1973 struct dlil_threading_info *inp = v;
1974
1975 VERIFY(inp == dlil_main_input_thread);
1976 VERIFY(inp->ifp == NULL);
1977 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1978
91447636 1979 while (1) {
2d21ac55 1980 struct mbuf *m = NULL, *m_loop = NULL;
316670eb
A
1981 u_int32_t m_cnt, m_cnt_loop;
1982 boolean_t proto_req;
6d2010ae 1983
316670eb 1984 lck_mtx_lock_spin(&inp->input_lck);
6d2010ae 1985
2d21ac55 1986 /* Wait until there is work to be done */
316670eb
A
1987 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1988 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1989 (void) msleep(&inp->input_waiting, &inp->input_lck,
1990 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2d21ac55
A
1991 }
1992
316670eb
A
1993 inp->input_waiting |= DLIL_INPUT_RUNNING;
1994 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2d21ac55 1995
316670eb
A
1996 /* Main input thread cannot be terminated */
1997 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
2d21ac55 1998
316670eb
A
1999 proto_req = (inp->input_waiting &
2000 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
6d2010ae 2001
316670eb
A
2002 /* Packets for non-dedicated interfaces other than lo0 */
2003 m_cnt = qlen(&inp->rcvq_pkts);
39037602 2004 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
6d2010ae 2005
39236c6e 2006 /* Packets exclusive to lo0 */
316670eb 2007 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
39037602 2008 m_loop = _getq_all(&inpm->lo_rcvq_pkts, NULL, NULL, NULL);
6d2010ae 2009
316670eb 2010 inp->wtot = 0;
6d2010ae 2011
316670eb 2012 lck_mtx_unlock(&inp->input_lck);
6d2010ae 2013
316670eb 2014 /*
39037602
A
2015 * NOTE warning %%% attention !!!!
2016 * We should think about putting some thread starvation
2017 * safeguards if we deal with long chains of packets.
2018 */
316670eb
A
2019 if (m_loop != NULL)
2020 dlil_input_packet_list_extended(lo_ifp, m_loop,
2021 m_cnt_loop, inp->mode);
6d2010ae 2022
316670eb
A
2023 if (m != NULL)
2024 dlil_input_packet_list_extended(NULL, m,
2025 m_cnt, inp->mode);
2026
2027 if (proto_req)
2028 proto_input_run();
2029 }
2030
2031 /* NOTREACHED */
2032 VERIFY(0); /* we should never get here */
2033}
2034
2035/*
2036 * Input thread for interfaces with legacy input model.
2037 */
2038static void
2039dlil_input_thread_func(void *v, wait_result_t w)
2040{
2041#pragma unused(w)
39037602 2042 char thread_name[MAXTHREADNAMESIZE];
316670eb
A
2043 struct dlil_threading_info *inp = v;
2044 struct ifnet *ifp = inp->ifp;
2045
39037602
A
2046 /* Construct the name for this thread, and then apply it. */
2047 bzero(thread_name, sizeof(thread_name));
2048 snprintf(thread_name, sizeof(thread_name), "dlil_input_%s", ifp->if_xname);
2049 thread_set_thread_name(inp->input_thr, thread_name);
2050
316670eb
A
2051 VERIFY(inp != dlil_main_input_thread);
2052 VERIFY(ifp != NULL);
2053 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
2054 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2d21ac55 2055
316670eb
A
2056 while (1) {
2057 struct mbuf *m = NULL;
2058 u_int32_t m_cnt;
2059
2060 lck_mtx_lock_spin(&inp->input_lck);
2d21ac55 2061
316670eb
A
2062 /* Wait until there is work to be done */
2063 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2064 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2065 (void) msleep(&inp->input_waiting, &inp->input_lck,
2066 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2d21ac55
A
2067 }
2068
316670eb
A
2069 inp->input_waiting |= DLIL_INPUT_RUNNING;
2070 inp->input_waiting &= ~DLIL_INPUT_WAITING;
6d2010ae 2071
316670eb
A
2072 /*
2073 * Protocol registration and injection must always use
2074 * the main input thread; in theory the latter can utilize
2075 * the corresponding input thread where the packet arrived
2076 * on, but that requires our knowing the interface in advance
2077 * (and the benefits might not worth the trouble.)
2078 */
2079 VERIFY(!(inp->input_waiting &
2080 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
6d2010ae 2081
316670eb
A
2082 /* Packets for this interface */
2083 m_cnt = qlen(&inp->rcvq_pkts);
39037602 2084 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
6d2010ae 2085
316670eb
A
2086 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2087 lck_mtx_unlock(&inp->input_lck);
2088
2089 /* Free up pending packets */
2090 if (m != NULL)
2091 mbuf_freem_list(m);
2092
2093 dlil_terminate_input_thread(inp);
2094 /* NOTREACHED */
2095 return;
2d21ac55
A
2096 }
2097
316670eb
A
2098 inp->wtot = 0;
2099
5ba3f43e 2100 dlil_input_stats_sync(ifp, inp);
316670eb
A
2101
2102 lck_mtx_unlock(&inp->input_lck);
2d21ac55 2103
91447636 2104 /*
39037602
A
2105 * NOTE warning %%% attention !!!!
2106 * We should think about putting some thread starvation
2107 * safeguards if we deal with long chains of packets.
2108 */
6d2010ae 2109 if (m != NULL)
316670eb
A
2110 dlil_input_packet_list_extended(NULL, m,
2111 m_cnt, inp->mode);
2d21ac55 2112 }
316670eb
A
2113
2114 /* NOTREACHED */
2115 VERIFY(0); /* we should never get here */
2d21ac55
A
2116}
2117
316670eb
A
2118/*
2119 * Input thread for interfaces with opportunistic polling input model.
2120 */
2121static void
2122dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2d21ac55 2123{
316670eb
A
2124#pragma unused(w)
2125 struct dlil_threading_info *inp = v;
2126 struct ifnet *ifp = inp->ifp;
2127 struct timespec ts;
2d21ac55 2128
316670eb
A
2129 VERIFY(inp != dlil_main_input_thread);
2130 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
2d21ac55 2131
2d21ac55 2132 while (1) {
316670eb
A
2133 struct mbuf *m = NULL;
2134 u_int32_t m_cnt, m_size, poll_req = 0;
2135 ifnet_model_t mode;
2136 struct timespec now, delta;
39236c6e 2137 u_int64_t ival;
6d2010ae 2138
316670eb 2139 lck_mtx_lock_spin(&inp->input_lck);
6d2010ae 2140
39236c6e
A
2141 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
2142 ival = IF_RXPOLL_INTERVALTIME_MIN;
2143
316670eb
A
2144 /* Link parameters changed? */
2145 if (ifp->if_poll_update != 0) {
2146 ifp->if_poll_update = 0;
39236c6e 2147 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
91447636 2148 }
1c79356b 2149
316670eb
A
2150 /* Current operating mode */
2151 mode = inp->mode;
1c79356b 2152
316670eb 2153 /* Wait until there is work to be done */
39236c6e 2154 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
316670eb
A
2155 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2156 (void) msleep(&inp->input_waiting, &inp->input_lck,
2157 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2158 }
2d21ac55 2159
316670eb
A
2160 inp->input_waiting |= DLIL_INPUT_RUNNING;
2161 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2d21ac55
A
2162
2163 /*
316670eb
A
2164 * Protocol registration and injection must always use
2165 * the main input thread; in theory the latter can utilize
2166 * the corresponding input thread where the packet arrived
2167 * on, but that requires our knowing the interface in advance
2168 * (and the benefits might not worth the trouble.)
2d21ac55 2169 */
316670eb
A
2170 VERIFY(!(inp->input_waiting &
2171 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2d21ac55 2172
316670eb
A
2173 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2174 /* Free up pending packets */
5ba3f43e 2175 lck_mtx_convert_spin(&inp->input_lck);
316670eb 2176 _flushq(&inp->rcvq_pkts);
5ba3f43e
A
2177 if (inp->input_mit_tcall != NULL) {
2178 if (thread_call_isactive(inp->input_mit_tcall))
2179 thread_call_cancel(inp->input_mit_tcall);
2180 }
316670eb 2181 lck_mtx_unlock(&inp->input_lck);
2d21ac55 2182
316670eb
A
2183 dlil_terminate_input_thread(inp);
2184 /* NOTREACHED */
2185 return;
2d21ac55 2186 }
2d21ac55 2187
316670eb
A
2188 /* Total count of all packets */
2189 m_cnt = qlen(&inp->rcvq_pkts);
2190
2191 /* Total bytes of all packets */
2192 m_size = qsize(&inp->rcvq_pkts);
2193
2194 /* Packets for this interface */
39037602 2195 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
316670eb
A
2196 VERIFY(m != NULL || m_cnt == 0);
2197
2198 nanouptime(&now);
2199 if (!net_timerisset(&inp->sample_lasttime))
2200 *(&inp->sample_lasttime) = *(&now);
2201
2202 net_timersub(&now, &inp->sample_lasttime, &delta);
2203 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
2204 u_int32_t ptot, btot;
2205
2206 /* Accumulate statistics for current sampling */
2207 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
2208
2209 if (net_timercmp(&delta, &inp->sample_holdtime, <))
2210 goto skip;
2211
2212 *(&inp->sample_lasttime) = *(&now);
2213
2214 /* Calculate min/max of inbound bytes */
2215 btot = (u_int32_t)inp->sstats.bytes;
2216 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
2217 inp->rxpoll_bmin = btot;
2218 if (btot > inp->rxpoll_bmax)
2219 inp->rxpoll_bmax = btot;
2220
2221 /* Calculate EWMA of inbound bytes */
2222 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
2223
2224 /* Calculate min/max of inbound packets */
2225 ptot = (u_int32_t)inp->sstats.packets;
2226 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
2227 inp->rxpoll_pmin = ptot;
2228 if (ptot > inp->rxpoll_pmax)
2229 inp->rxpoll_pmax = ptot;
2230
2231 /* Calculate EWMA of inbound packets */
2232 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
2233
2234 /* Reset sampling statistics */
2235 PKTCNTR_CLEAR(&inp->sstats);
2236
2237 /* Calculate EWMA of wakeup requests */
2238 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2239 inp->wtot = 0;
2240
2241 if (dlil_verbose) {
2242 if (!net_timerisset(&inp->dbg_lasttime))
2243 *(&inp->dbg_lasttime) = *(&now);
2244 net_timersub(&now, &inp->dbg_lasttime, &delta);
2245 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2246 *(&inp->dbg_lasttime) = *(&now);
39236c6e 2247 printf("%s: [%s] pkts avg %d max %d "
316670eb
A
2248 "limits [%d/%d], wreq avg %d "
2249 "limits [%d/%d], bytes avg %d "
39236c6e
A
2250 "limits [%d/%d]\n", if_name(ifp),
2251 (inp->mode ==
316670eb
A
2252 IFNET_MODEL_INPUT_POLL_ON) ?
2253 "ON" : "OFF", inp->rxpoll_pavg,
2254 inp->rxpoll_pmax,
2255 inp->rxpoll_plowat,
2256 inp->rxpoll_phiwat,
2257 inp->rxpoll_wavg,
2258 inp->rxpoll_wlowat,
2259 inp->rxpoll_whiwat,
2260 inp->rxpoll_bavg,
2261 inp->rxpoll_blowat,
2262 inp->rxpoll_bhiwat);
2263 }
2264 }
2d21ac55 2265
316670eb
A
2266 /* Perform mode transition, if necessary */
2267 if (!net_timerisset(&inp->mode_lasttime))
2268 *(&inp->mode_lasttime) = *(&now);
2269
2270 net_timersub(&now, &inp->mode_lasttime, &delta);
2271 if (net_timercmp(&delta, &inp->mode_holdtime, <))
2272 goto skip;
2273
2274 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2275 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
316670eb
A
2276 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2277 mode = IFNET_MODEL_INPUT_POLL_OFF;
2278 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2279 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2280 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2281 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2282 mode = IFNET_MODEL_INPUT_POLL_ON;
2283 }
6d2010ae 2284
316670eb
A
2285 if (mode != inp->mode) {
2286 inp->mode = mode;
2287 *(&inp->mode_lasttime) = *(&now);
2288 poll_req++;
2289 }
2290 }
2291skip:
2292 dlil_input_stats_sync(ifp, inp);
6d2010ae 2293
316670eb 2294 lck_mtx_unlock(&inp->input_lck);
6d2010ae 2295
316670eb
A
2296 /*
2297 * If there's a mode change and interface is still attached,
2298 * perform a downcall to the driver for the new mode. Also
2299 * hold an IO refcnt on the interface to prevent it from
2300 * being detached (will be release below.)
2301 */
2302 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2303 struct ifnet_model_params p = { mode, { 0 } };
2304 errno_t err;
2305
2306 if (dlil_verbose) {
39236c6e 2307 printf("%s: polling is now %s, "
316670eb
A
2308 "pkts avg %d max %d limits [%d/%d], "
2309 "wreq avg %d limits [%d/%d], "
2310 "bytes avg %d limits [%d/%d]\n",
39236c6e 2311 if_name(ifp),
316670eb
A
2312 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2313 "ON" : "OFF", inp->rxpoll_pavg,
2314 inp->rxpoll_pmax, inp->rxpoll_plowat,
2315 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2316 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2317 inp->rxpoll_bavg, inp->rxpoll_blowat,
2318 inp->rxpoll_bhiwat);
2319 }
2d21ac55 2320
316670eb
A
2321 if ((err = ((*ifp->if_input_ctl)(ifp,
2322 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
39236c6e
A
2323 printf("%s: error setting polling mode "
2324 "to %s (%d)\n", if_name(ifp),
316670eb
A
2325 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2326 "ON" : "OFF", err);
2327 }
1c79356b 2328
316670eb
A
2329 switch (mode) {
2330 case IFNET_MODEL_INPUT_POLL_OFF:
2331 ifnet_set_poll_cycle(ifp, NULL);
2332 inp->rxpoll_offreq++;
2333 if (err != 0)
2334 inp->rxpoll_offerr++;
2335 break;
2d21ac55 2336
316670eb 2337 case IFNET_MODEL_INPUT_POLL_ON:
39236c6e 2338 net_nsectimer(&ival, &ts);
316670eb
A
2339 ifnet_set_poll_cycle(ifp, &ts);
2340 ifnet_poll(ifp);
2341 inp->rxpoll_onreq++;
2342 if (err != 0)
2343 inp->rxpoll_onerr++;
2344 break;
2345
2346 default:
2347 VERIFY(0);
2348 /* NOTREACHED */
2349 }
2350
2351 /* Release the IO refcnt */
2352 ifnet_decr_iorefcnt(ifp);
2353 }
2354
2355 /*
39037602
A
2356 * NOTE warning %%% attention !!!!
2357 * We should think about putting some thread starvation
2358 * safeguards if we deal with long chains of packets.
2359 */
316670eb
A
2360 if (m != NULL)
2361 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2362 }
2363
2364 /* NOTREACHED */
2365 VERIFY(0); /* we should never get here */
2366}
2367
39236c6e
A
2368/*
2369 * Must be called on an attached ifnet (caller is expected to check.)
2370 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2371 */
2372errno_t
2373dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2374 boolean_t locked)
316670eb 2375{
39236c6e 2376 struct dlil_threading_info *inp;
316670eb
A
2377 u_int64_t sample_holdtime, inbw;
2378
39236c6e
A
2379 VERIFY(ifp != NULL);
2380 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2381 return (ENXIO);
2382
2383 if (p != NULL) {
2384 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2385 (p->packets_lowat != 0 && p->packets_hiwat == 0))
2386 return (EINVAL);
2387 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2388 p->packets_lowat >= p->packets_hiwat)
2389 return (EINVAL);
2390 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2391 (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
2392 return (EINVAL);
2393 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2394 p->bytes_lowat >= p->bytes_hiwat)
2395 return (EINVAL);
2396 if (p->interval_time != 0 &&
2397 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
2398 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2399 }
2400
2401 if (!locked)
2402 lck_mtx_lock(&inp->input_lck);
2403
5ba3f43e 2404 LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
39236c6e
A
2405
2406 /*
2407 * Normally, we'd reset the parameters to the auto-tuned values
2408 * if the the input thread detects a change in link rate. If the
2409 * driver provides its own parameters right after a link rate
2410 * changes, but before the input thread gets to run, we want to
2411 * make sure to keep the driver's values. Clearing if_poll_update
2412 * will achieve that.
2413 */
2414 if (p != NULL && !locked && ifp->if_poll_update != 0)
2415 ifp->if_poll_update = 0;
316670eb 2416
39236c6e 2417 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
316670eb
A
2418 sample_holdtime = 0; /* polling is disabled */
2419 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2420 inp->rxpoll_blowat = 0;
2421 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2422 inp->rxpoll_bhiwat = (u_int32_t)-1;
39236c6e
A
2423 inp->rxpoll_plim = 0;
2424 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
316670eb 2425 } else {
39236c6e
A
2426 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2427 u_int64_t ival;
316670eb
A
2428 unsigned int n, i;
2429
39236c6e 2430 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
316670eb
A
2431 if (inbw < rxpoll_tbl[i].speed)
2432 break;
2433 n = i;
2434 }
39236c6e
A
2435 /* auto-tune if caller didn't specify a value */
2436 plowat = ((p == NULL || p->packets_lowat == 0) ?
2437 rxpoll_tbl[n].plowat : p->packets_lowat);
2438 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2439 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2440 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2441 rxpoll_tbl[n].blowat : p->bytes_lowat);
2442 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2443 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2444 plim = ((p == NULL || p->packets_limit == 0) ?
2445 if_rxpoll_max : p->packets_limit);
2446 ival = ((p == NULL || p->interval_time == 0) ?
2447 if_rxpoll_interval_time : p->interval_time);
2448
2449 VERIFY(plowat != 0 && phiwat != 0);
2450 VERIFY(blowat != 0 && bhiwat != 0);
2451 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2452
316670eb
A
2453 sample_holdtime = if_rxpoll_sample_holdtime;
2454 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2455 inp->rxpoll_whiwat = if_rxpoll_whiwat;
39236c6e
A
2456 inp->rxpoll_plowat = plowat;
2457 inp->rxpoll_phiwat = phiwat;
2458 inp->rxpoll_blowat = blowat;
2459 inp->rxpoll_bhiwat = bhiwat;
2460 inp->rxpoll_plim = plim;
2461 inp->rxpoll_ival = ival;
316670eb
A
2462 }
2463
2464 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2465 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2466
2467 if (dlil_verbose) {
39236c6e
A
2468 printf("%s: speed %llu bps, sample per %llu nsec, "
2469 "poll interval %llu nsec, pkts per poll %u, "
2470 "pkt limits [%u/%u], wreq limits [%u/%u], "
2471 "bytes limits [%u/%u]\n", if_name(ifp),
2472 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2473 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2474 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
316670eb 2475 }
39236c6e
A
2476
2477 if (!locked)
2478 lck_mtx_unlock(&inp->input_lck);
2479
2480 return (0);
2481}
2482
2483/*
2484 * Must be called on an attached ifnet (caller is expected to check.)
2485 */
2486errno_t
2487dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2488{
2489 struct dlil_threading_info *inp;
2490
2491 VERIFY(ifp != NULL && p != NULL);
2492 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2493 return (ENXIO);
2494
2495 bzero(p, sizeof (*p));
2496
2497 lck_mtx_lock(&inp->input_lck);
2498 p->packets_limit = inp->rxpoll_plim;
2499 p->packets_lowat = inp->rxpoll_plowat;
2500 p->packets_hiwat = inp->rxpoll_phiwat;
2501 p->bytes_lowat = inp->rxpoll_blowat;
2502 p->bytes_hiwat = inp->rxpoll_bhiwat;
2503 p->interval_time = inp->rxpoll_ival;
2504 lck_mtx_unlock(&inp->input_lck);
2505
2506 return (0);
316670eb
A
2507}
2508
2509errno_t
2510ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2511 const struct ifnet_stat_increment_param *s)
2512{
2513 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
2514}
2515
2516errno_t
2517ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2518 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2519{
2520 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
2521}
2522
2523static errno_t
2524ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2525 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2526{
5ba3f43e 2527 dlil_input_func input_func;
39037602 2528 struct ifnet_stat_increment_param _s;
316670eb 2529 u_int32_t m_cnt = 0, m_size = 0;
39037602
A
2530 struct mbuf *last;
2531 errno_t err = 0;
316670eb 2532
39236c6e
A
2533 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2534 if (m_head != NULL)
2535 mbuf_freem_list(m_head);
2536 return (EINVAL);
2537 }
2538
2539 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2540 VERIFY(m_tail == NULL || ext);
2541 VERIFY(s != NULL || !ext);
2542
316670eb
A
2543 /*
2544 * Drop the packet(s) if the parameters are invalid, or if the
2545 * interface is no longer attached; else hold an IO refcnt to
2546 * prevent it from being detached (will be released below.)
2547 */
39236c6e 2548 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
316670eb
A
2549 if (m_head != NULL)
2550 mbuf_freem_list(m_head);
2551 return (EINVAL);
2552 }
2553
5ba3f43e
A
2554 input_func = ifp->if_input_dlil;
2555 VERIFY(input_func != NULL);
39037602 2556
316670eb
A
2557 if (m_tail == NULL) {
2558 last = m_head;
39236c6e 2559 while (m_head != NULL) {
316670eb
A
2560#if IFNET_INPUT_SANITY_CHK
2561 if (dlil_input_sanity_check != 0)
2562 DLIL_INPUT_CHECK(last, ifp);
2563#endif /* IFNET_INPUT_SANITY_CHK */
2564 m_cnt++;
2565 m_size += m_length(last);
2566 if (mbuf_nextpkt(last) == NULL)
2567 break;
2568 last = mbuf_nextpkt(last);
2569 }
2570 m_tail = last;
2571 } else {
2572#if IFNET_INPUT_SANITY_CHK
2573 if (dlil_input_sanity_check != 0) {
2574 last = m_head;
2575 while (1) {
2576 DLIL_INPUT_CHECK(last, ifp);
2577 m_cnt++;
2578 m_size += m_length(last);
2579 if (mbuf_nextpkt(last) == NULL)
2580 break;
2581 last = mbuf_nextpkt(last);
2582 }
2583 } else {
2584 m_cnt = s->packets_in;
2585 m_size = s->bytes_in;
2586 last = m_tail;
2587 }
2588#else
2589 m_cnt = s->packets_in;
2590 m_size = s->bytes_in;
2591 last = m_tail;
2592#endif /* IFNET_INPUT_SANITY_CHK */
2593 }
2594
2595 if (last != m_tail) {
39236c6e
A
2596 panic_plain("%s: invalid input packet chain for %s, "
2597 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2598 m_tail, last);
316670eb
A
2599 }
2600
2601 /*
2602 * Assert packet count only for the extended variant, for backwards
2603 * compatibility, since this came directly from the device driver.
2604 * Relax this assertion for input bytes, as the driver may have
2605 * included the link-layer headers in the computation; hence
2606 * m_size is just an approximation.
2607 */
2608 if (ext && s->packets_in != m_cnt) {
39236c6e
A
2609 panic_plain("%s: input packet count mismatch for %s, "
2610 "%d instead of %d\n", __func__, if_name(ifp),
2611 s->packets_in, m_cnt);
316670eb
A
2612 }
2613
39037602
A
2614 if (s == NULL) {
2615 bzero(&_s, sizeof (_s));
2616 s = &_s;
2617 } else {
2618 _s = *s;
2619 }
2620 _s.packets_in = m_cnt;
2621 _s.bytes_in = m_size;
2622
5ba3f43e 2623 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
39037602
A
2624
2625 if (ifp != lo_ifp) {
2626 /* Release the IO refcnt */
2627 ifnet_decr_iorefcnt(ifp);
2628 }
2629
2630 return (err);
2631}
2632
39037602
A
2633
2634errno_t
2635dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2636{
2637 return (ifp->if_output(ifp, m));
2638}
2639
2640errno_t
2641dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2642 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2643 boolean_t poll, struct thread *tp)
2644{
2645 struct dlil_threading_info *inp;
2646 u_int32_t m_cnt = s->packets_in;
2647 u_int32_t m_size = s->bytes_in;
2648
316670eb
A
2649 if ((inp = ifp->if_inp) == NULL)
2650 inp = dlil_main_input_thread;
2651
2652 /*
2653 * If there is a matching DLIL input thread associated with an
2654 * affinity set, associate this thread with the same set. We
2655 * will only do this once.
2656 */
2657 lck_mtx_lock_spin(&inp->input_lck);
39037602 2658 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
316670eb
A
2659 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2660 (poll && inp->poll_thr == THREAD_NULL))) {
2661 u_int32_t tag = inp->tag;
2662
2663 if (poll) {
2664 VERIFY(inp->poll_thr == THREAD_NULL);
2665 inp->poll_thr = tp;
2666 } else {
2667 VERIFY(inp->wloop_thr == THREAD_NULL);
2668 inp->wloop_thr = tp;
2669 }
2670 lck_mtx_unlock(&inp->input_lck);
2671
2672 /* Associate the current thread with the new affinity tag */
2673 (void) dlil_affinity_set(tp, tag);
2674
2675 /*
2676 * Take a reference on the current thread; during detach,
5ba3f43e 2677 * we will need to refer to it in order to tear down its
316670eb
A
2678 * affinity.
2679 */
2680 thread_reference(tp);
2681 lck_mtx_lock_spin(&inp->input_lck);
2682 }
2683
39236c6e
A
2684 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2685
39037602 2686 /*
316670eb
A
2687 * Because of loopbacked multicast we cannot stuff the ifp in
2688 * the rcvif of the packet header: loopback (lo0) packets use a
2689 * dedicated list so that we can later associate them with lo_ifp
2690 * on their way up the stack. Packets for other interfaces without
2691 * dedicated input threads go to the regular list.
2692 */
39236c6e
A
2693 if (m_head != NULL) {
2694 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2695 struct dlil_main_threading_info *inpm =
2696 (struct dlil_main_threading_info *)inp;
2697 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2698 m_cnt, m_size);
2699 } else {
2700 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2701 m_cnt, m_size);
2702 }
316670eb
A
2703 }
2704
2705#if IFNET_INPUT_SANITY_CHK
2706 if (dlil_input_sanity_check != 0) {
2707 u_int32_t count;
2708 struct mbuf *m0;
2709
2710 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
2711 count++;
2712
2713 if (count != m_cnt) {
39236c6e
A
2714 panic_plain("%s: invalid packet count %d "
2715 "(expected %d)\n", if_name(ifp),
316670eb
A
2716 count, m_cnt);
2717 /* NOTREACHED */
2718 }
2719
2720 inp->input_mbuf_cnt += m_cnt;
2721 }
2722#endif /* IFNET_INPUT_SANITY_CHK */
2723
39037602
A
2724 dlil_input_stats_add(s, inp, poll);
2725 /*
2726 * If we're using the main input thread, synchronize the
2727 * stats now since we have the interface context. All
2728 * other cases involving dedicated input threads will
2729 * have their stats synchronized there.
2730 */
2731 if (inp == dlil_main_input_thread)
2732 dlil_input_stats_sync(ifp, inp);
316670eb 2733
a39ff7e2
A
2734 if (inp->input_mit_tcall &&
2735 qlen(&inp->rcvq_pkts) >= dlil_rcv_mit_pkts_min &&
5ba3f43e
A
2736 qlen(&inp->rcvq_pkts) < dlil_rcv_mit_pkts_max &&
2737 (ifp->if_family == IFNET_FAMILY_ETHERNET ||
2738 ifp->if_type == IFT_CELLULAR)
2739 ) {
2740 if (!thread_call_isactive(inp->input_mit_tcall)) {
2741 uint64_t deadline;
2742 clock_interval_to_deadline(dlil_rcv_mit_interval,
2743 1, &deadline);
2744 (void) thread_call_enter_delayed(
2745 inp->input_mit_tcall, deadline);
2746 }
2747 } else {
2748 inp->input_waiting |= DLIL_INPUT_WAITING;
2749 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2750 inp->wtot++;
2751 wakeup_one((caddr_t)&inp->input_waiting);
2752 }
316670eb
A
2753 }
2754 lck_mtx_unlock(&inp->input_lck);
2755
316670eb
A
2756 return (0);
2757}
2758
5ba3f43e 2759
39236c6e 2760static void
5c9f4661 2761ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
316670eb 2762{
39236c6e
A
2763 if (!(ifp->if_eflags & IFEF_TXSTART))
2764 return;
316670eb 2765 /*
39236c6e
A
2766 * If the starter thread is inactive, signal it to do work,
2767 * unless the interface is being flow controlled from below,
2768 * e.g. a virtual interface being flow controlled by a real
5c9f4661
A
2769 * network interface beneath it, or it's been disabled via
2770 * a call to ifnet_disable_output().
316670eb
A
2771 */
2772 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e
A
2773 if (resetfc) {
2774 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2775 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2776 lck_mtx_unlock(&ifp->if_start_lock);
2777 return;
2778 }
316670eb 2779 ifp->if_start_req++;
3e170ce0
A
2780 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2781 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
39037602
A
2782 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
2783 ifp->if_start_delayed == 0)) {
5ba3f43e
A
2784 (void) thread_wakeup_thread((caddr_t)&ifp->if_start_thread,
2785 ifp->if_start_thread);
316670eb
A
2786 }
2787 lck_mtx_unlock(&ifp->if_start_lock);
2788}
2789
39236c6e
A
2790void
2791ifnet_start(struct ifnet *ifp)
2792{
5c9f4661 2793 ifnet_start_common(ifp, FALSE);
39236c6e
A
2794}
2795
316670eb
A
2796static void
2797ifnet_start_thread_fn(void *v, wait_result_t w)
2798{
2799#pragma unused(w)
2800 struct ifnet *ifp = v;
2801 char ifname[IFNAMSIZ + 1];
39037602 2802 char thread_name[MAXTHREADNAMESIZE];
316670eb
A
2803 struct timespec *ts = NULL;
2804 struct ifclassq *ifq = &ifp->if_snd;
3e170ce0 2805 struct timespec delay_start_ts;
316670eb 2806
39037602
A
2807 /* Construct the name for this thread, and then apply it. */
2808 bzero(thread_name, sizeof(thread_name));
5ba3f43e
A
2809 (void) snprintf(thread_name, sizeof (thread_name),
2810 "ifnet_start_%s", ifp->if_xname);
39037602
A
2811 thread_set_thread_name(ifp->if_start_thread, thread_name);
2812
316670eb
A
2813 /*
2814 * Treat the dedicated starter thread for lo0 as equivalent to
2815 * the driver workloop thread; if net_affinity is enabled for
2816 * the main input thread, associate this starter thread to it
2817 * by binding them with the same affinity tag. This is done
2818 * only once (as we only have one lo_ifp which never goes away.)
2819 */
2820 if (ifp == lo_ifp) {
2821 struct dlil_threading_info *inp = dlil_main_input_thread;
2822 struct thread *tp = current_thread();
2823
2824 lck_mtx_lock(&inp->input_lck);
2825 if (inp->net_affinity) {
2826 u_int32_t tag = inp->tag;
2827
2828 VERIFY(inp->wloop_thr == THREAD_NULL);
2829 VERIFY(inp->poll_thr == THREAD_NULL);
2830 inp->wloop_thr = tp;
2831 lck_mtx_unlock(&inp->input_lck);
2832
2833 /* Associate this thread with the affinity tag */
2834 (void) dlil_affinity_set(tp, tag);
2835 } else {
2836 lck_mtx_unlock(&inp->input_lck);
2837 }
2838 }
2839
5ba3f43e 2840 (void) snprintf(ifname, sizeof (ifname), "%s_starter", if_name(ifp));
316670eb
A
2841
2842 lck_mtx_lock_spin(&ifp->if_start_lock);
2843
2844 for (;;) {
5ba3f43e 2845 if (ifp->if_start_thread != NULL) {
39037602
A
2846 (void) msleep(&ifp->if_start_thread,
2847 &ifp->if_start_lock,
3e170ce0 2848 (PZERO - 1) | PSPIN, ifname, ts);
5ba3f43e 2849 }
316670eb
A
2850 /* interface is detached? */
2851 if (ifp->if_start_thread == THREAD_NULL) {
2852 ifnet_set_start_cycle(ifp, NULL);
2853 lck_mtx_unlock(&ifp->if_start_lock);
2854 ifnet_purge(ifp);
2855
2856 if (dlil_verbose) {
39236c6e
A
2857 printf("%s: starter thread terminated\n",
2858 if_name(ifp));
316670eb
A
2859 }
2860
2861 /* for the extra refcnt from kernel_thread_start() */
2862 thread_deallocate(current_thread());
2863 /* this is the end */
2864 thread_terminate(current_thread());
2865 /* NOTREACHED */
2866 return;
2867 }
2868
2869 ifp->if_start_active = 1;
3e170ce0 2870
316670eb
A
2871 for (;;) {
2872 u_int32_t req = ifp->if_start_req;
3e170ce0
A
2873 if (!IFCQ_IS_EMPTY(ifq) &&
2874 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2875 ifp->if_start_delayed == 0 &&
2876 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2877 (ifp->if_eflags & IFEF_DELAY_START)) {
2878 ifp->if_start_delayed = 1;
2879 ifnet_start_delayed++;
2880 break;
2881 } else {
2882 ifp->if_start_delayed = 0;
2883 }
316670eb 2884 lck_mtx_unlock(&ifp->if_start_lock);
3e170ce0
A
2885
2886 /*
2887 * If no longer attached, don't call start because ifp
2888 * is being destroyed; else hold an IO refcnt to
2889 * prevent the interface from being detached (will be
2890 * released below.)
2891 */
2892 if (!ifnet_is_attached(ifp, 1)) {
2893 lck_mtx_lock_spin(&ifp->if_start_lock);
2894 break;
2895 }
2896
316670eb
A
2897 /* invoke the driver's start routine */
2898 ((*ifp->if_start)(ifp));
3e170ce0
A
2899
2900 /*
2901 * Release the io ref count taken by ifnet_is_attached.
2902 */
2903 ifnet_decr_iorefcnt(ifp);
2904
316670eb
A
2905 lck_mtx_lock_spin(&ifp->if_start_lock);
2906
5c9f4661
A
2907 /*
2908 * If there's no pending request or if the
2909 * interface has been disabled, we're done.
2910 */
2911 if (req == ifp->if_start_req ||
2912 (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
316670eb 2913 break;
5c9f4661 2914 }
316670eb 2915 }
3e170ce0 2916
316670eb
A
2917 ifp->if_start_req = 0;
2918 ifp->if_start_active = 0;
3e170ce0 2919
316670eb
A
2920 /*
2921 * Wakeup N ns from now if rate-controlled by TBR, and if
2922 * there are still packets in the send queue which haven't
2923 * been dequeued so far; else sleep indefinitely (ts = NULL)
2924 * until ifnet_start() is called again.
2925 */
2926 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2927 &ifp->if_start_cycle : NULL);
2928
3e170ce0
A
2929 if (ts == NULL && ifp->if_start_delayed == 1) {
2930 delay_start_ts.tv_sec = 0;
2931 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2932 ts = &delay_start_ts;
2933 }
2934
316670eb
A
2935 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
2936 ts = NULL;
2937 }
2938
2939 /* NOTREACHED */
316670eb
A
2940}
2941
2942void
2943ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2944{
2945 if (ts == NULL)
2946 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
2947 else
2948 *(&ifp->if_start_cycle) = *ts;
2949
2950 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
39236c6e
A
2951 printf("%s: restart interval set to %lu nsec\n",
2952 if_name(ifp), ts->tv_nsec);
316670eb
A
2953}
2954
2955static void
2956ifnet_poll(struct ifnet *ifp)
2957{
2958 /*
2959 * If the poller thread is inactive, signal it to do work.
2960 */
2961 lck_mtx_lock_spin(&ifp->if_poll_lock);
2962 ifp->if_poll_req++;
2963 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
2964 wakeup_one((caddr_t)&ifp->if_poll_thread);
2965 }
2966 lck_mtx_unlock(&ifp->if_poll_lock);
2967}
2968
2969static void
2970ifnet_poll_thread_fn(void *v, wait_result_t w)
2971{
2972#pragma unused(w)
2973 struct dlil_threading_info *inp;
2974 struct ifnet *ifp = v;
2975 char ifname[IFNAMSIZ + 1];
2976 struct timespec *ts = NULL;
2977 struct ifnet_stat_increment_param s;
2978
39236c6e
A
2979 snprintf(ifname, sizeof (ifname), "%s_poller",
2980 if_name(ifp));
316670eb
A
2981 bzero(&s, sizeof (s));
2982
2983 lck_mtx_lock_spin(&ifp->if_poll_lock);
2984
2985 inp = ifp->if_inp;
2986 VERIFY(inp != NULL);
2987
2988 for (;;) {
2989 if (ifp->if_poll_thread != THREAD_NULL) {
2990 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
2991 (PZERO - 1) | PSPIN, ifname, ts);
2992 }
2993
2994 /* interface is detached (maybe while asleep)? */
2995 if (ifp->if_poll_thread == THREAD_NULL) {
2996 ifnet_set_poll_cycle(ifp, NULL);
2997 lck_mtx_unlock(&ifp->if_poll_lock);
2998
2999 if (dlil_verbose) {
39236c6e
A
3000 printf("%s: poller thread terminated\n",
3001 if_name(ifp));
316670eb
A
3002 }
3003
3004 /* for the extra refcnt from kernel_thread_start() */
3005 thread_deallocate(current_thread());
3006 /* this is the end */
3007 thread_terminate(current_thread());
3008 /* NOTREACHED */
3009 return;
3010 }
3011
3012 ifp->if_poll_active = 1;
3013 for (;;) {
3014 struct mbuf *m_head, *m_tail;
3015 u_int32_t m_lim, m_cnt, m_totlen;
3016 u_int16_t req = ifp->if_poll_req;
3017
3018 lck_mtx_unlock(&ifp->if_poll_lock);
3019
3020 /*
3021 * If no longer attached, there's nothing to do;
3022 * else hold an IO refcnt to prevent the interface
3023 * from being detached (will be released below.)
3024 */
db609669
A
3025 if (!ifnet_is_attached(ifp, 1)) {
3026 lck_mtx_lock_spin(&ifp->if_poll_lock);
316670eb 3027 break;
db609669 3028 }
316670eb 3029
39236c6e 3030 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
316670eb
A
3031 MAX((qlimit(&inp->rcvq_pkts)),
3032 (inp->rxpoll_phiwat << 2));
3033
3034 if (dlil_verbose > 1) {
39236c6e 3035 printf("%s: polling up to %d pkts, "
316670eb
A
3036 "pkts avg %d max %d, wreq avg %d, "
3037 "bytes avg %d\n",
39236c6e 3038 if_name(ifp), m_lim,
316670eb
A
3039 inp->rxpoll_pavg, inp->rxpoll_pmax,
3040 inp->rxpoll_wavg, inp->rxpoll_bavg);
3041 }
3042
3043 /* invoke the driver's input poll routine */
3044 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3045 &m_cnt, &m_totlen));
3046
3047 if (m_head != NULL) {
3048 VERIFY(m_tail != NULL && m_cnt > 0);
3049
3050 if (dlil_verbose > 1) {
39236c6e 3051 printf("%s: polled %d pkts, "
316670eb
A
3052 "pkts avg %d max %d, wreq avg %d, "
3053 "bytes avg %d\n",
39236c6e 3054 if_name(ifp), m_cnt,
316670eb
A
3055 inp->rxpoll_pavg, inp->rxpoll_pmax,
3056 inp->rxpoll_wavg, inp->rxpoll_bavg);
3057 }
3058
3059 /* stats are required for extended variant */
3060 s.packets_in = m_cnt;
3061 s.bytes_in = m_totlen;
3062
3063 (void) ifnet_input_common(ifp, m_head, m_tail,
3064 &s, TRUE, TRUE);
39236c6e
A
3065 } else {
3066 if (dlil_verbose > 1) {
3067 printf("%s: no packets, "
3068 "pkts avg %d max %d, wreq avg %d, "
3069 "bytes avg %d\n",
3070 if_name(ifp), inp->rxpoll_pavg,
3071 inp->rxpoll_pmax, inp->rxpoll_wavg,
3072 inp->rxpoll_bavg);
3073 }
3074
3075 (void) ifnet_input_common(ifp, NULL, NULL,
3076 NULL, FALSE, TRUE);
316670eb
A
3077 }
3078
3079 /* Release the io ref count */
3080 ifnet_decr_iorefcnt(ifp);
3081
3082 lck_mtx_lock_spin(&ifp->if_poll_lock);
3083
3084 /* if there's no pending request, we're done */
5c9f4661 3085 if (req == ifp->if_poll_req) {
316670eb 3086 break;
5c9f4661 3087 }
316670eb
A
3088 }
3089 ifp->if_poll_req = 0;
3090 ifp->if_poll_active = 0;
3091
3092 /*
3093 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3094 * until ifnet_poll() is called again.
3095 */
3096 ts = &ifp->if_poll_cycle;
3097 if (ts->tv_sec == 0 && ts->tv_nsec == 0)
3098 ts = NULL;
3099 }
3100
3101 /* NOTREACHED */
316670eb
A
3102}
3103
3104void
3105ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3106{
3107 if (ts == NULL)
3108 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
3109 else
3110 *(&ifp->if_poll_cycle) = *ts;
3111
3112 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
39236c6e
A
3113 printf("%s: poll interval set to %lu nsec\n",
3114 if_name(ifp), ts->tv_nsec);
316670eb
A
3115}
3116
3117void
3118ifnet_purge(struct ifnet *ifp)
3119{
3120 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
3121 if_qflush(ifp, 0);
3122}
3123
3124void
3125ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3126{
3127 IFCQ_LOCK_ASSERT_HELD(ifq);
3128
3129 if (!(IFCQ_IS_READY(ifq)))
3130 return;
3131
3132 if (IFCQ_TBR_IS_ENABLED(ifq)) {
3133 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
3134 ifq->ifcq_tbr.tbr_percent, 0 };
3135 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3136 }
3137
3138 ifclassq_update(ifq, ev);
3139}
3140
3141void
3142ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3143{
3144 switch (ev) {
39236c6e 3145 case CLASSQ_EV_LINK_BANDWIDTH:
316670eb
A
3146 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
3147 ifp->if_poll_update++;
3148 break;
3149
3150 default:
3151 break;
3152 }
3153}
3154
3155errno_t
3156ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3157{
3158 struct ifclassq *ifq;
3159 u_int32_t omodel;
3160 errno_t err;
3161
39037602 3162 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX)
316670eb
A
3163 return (EINVAL);
3164 else if (!(ifp->if_eflags & IFEF_TXSTART))
3165 return (ENXIO);
3166
3167 ifq = &ifp->if_snd;
3168 IFCQ_LOCK(ifq);
3169 omodel = ifp->if_output_sched_model;
3170 ifp->if_output_sched_model = model;
3171 if ((err = ifclassq_pktsched_setup(ifq)) != 0)
3172 ifp->if_output_sched_model = omodel;
3173 IFCQ_UNLOCK(ifq);
3174
3175 return (err);
3176}
3177
3178errno_t
3179ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3180{
3181 if (ifp == NULL)
3182 return (EINVAL);
3183 else if (!(ifp->if_eflags & IFEF_TXSTART))
3184 return (ENXIO);
3185
3186 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3187
3188 return (0);
3189}
3190
3191errno_t
3192ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3193{
3194 if (ifp == NULL || maxqlen == NULL)
3195 return (EINVAL);
3196 else if (!(ifp->if_eflags & IFEF_TXSTART))
3197 return (ENXIO);
3198
3199 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3200
3201 return (0);
3202}
3203
3204errno_t
39236c6e 3205ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
316670eb 3206{
39236c6e
A
3207 errno_t err;
3208
3209 if (ifp == NULL || pkts == NULL)
3210 err = EINVAL;
316670eb 3211 else if (!(ifp->if_eflags & IFEF_TXSTART))
39236c6e
A
3212 err = ENXIO;
3213 else
3214 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3215 pkts, NULL);
316670eb 3216
39236c6e
A
3217 return (err);
3218}
316670eb 3219
39236c6e
A
3220errno_t
3221ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3222 u_int32_t *pkts, u_int32_t *bytes)
3223{
3224 errno_t err;
3225
3226 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3227 (pkts == NULL && bytes == NULL))
3228 err = EINVAL;
3229 else if (!(ifp->if_eflags & IFEF_TXSTART))
3230 err = ENXIO;
3231 else
3232 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3233
3234 return (err);
316670eb
A
3235}
3236
3237errno_t
3238ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3239{
3240 struct dlil_threading_info *inp;
3241
3242 if (ifp == NULL)
3243 return (EINVAL);
3244 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3245 return (ENXIO);
3246
3247 if (maxqlen == 0)
3248 maxqlen = if_rcvq_maxlen;
3249 else if (maxqlen < IF_RCVQ_MINLEN)
3250 maxqlen = IF_RCVQ_MINLEN;
3251
3252 inp = ifp->if_inp;
3253 lck_mtx_lock(&inp->input_lck);
3254 qlimit(&inp->rcvq_pkts) = maxqlen;
3255 lck_mtx_unlock(&inp->input_lck);
3256
3257 return (0);
3258}
3259
3260errno_t
3261ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3262{
3263 struct dlil_threading_info *inp;
3264
3265 if (ifp == NULL || maxqlen == NULL)
3266 return (EINVAL);
3267 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3268 return (ENXIO);
3269
3270 inp = ifp->if_inp;
3271 lck_mtx_lock(&inp->input_lck);
3272 *maxqlen = qlimit(&inp->rcvq_pkts);
3273 lck_mtx_unlock(&inp->input_lck);
3274 return (0);
3275}
3276
5ba3f43e
A
3277void
3278ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3279 uint16_t delay_timeout)
3280{
3281 if (delay_qlen > 0 && delay_timeout > 0) {
3282 ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
3283 ifp->if_start_delay_qlen = min(100, delay_qlen);
3284 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3285 /* convert timeout to nanoseconds */
3286 ifp->if_start_delay_timeout *= 1000;
3287 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3288 ifp->if_xname, (uint32_t)delay_qlen,
3289 (uint32_t)delay_timeout);
3290 } else {
3291 ifp->if_eflags &= ~IFEF_ENQUEUE_MULTI;
3292 }
3293}
3294
3295static inline errno_t
3296ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype,
3297 boolean_t flush, boolean_t *pdrop)
316670eb 3298{
5ba3f43e
A
3299 volatile uint64_t *fg_ts = NULL;
3300 volatile uint64_t *rt_ts = NULL;
3301 struct mbuf *m = p;
3e170ce0 3302 struct timespec now;
5ba3f43e
A
3303 u_int64_t now_nsec = 0;
3304 int error = 0;
316670eb 3305
5ba3f43e
A
3306 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3307
3308 /*
3309 * If packet already carries a timestamp, either from dlil_output()
3310 * or from flowswitch, use it here. Otherwise, record timestamp.
3311 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3312 * the timestamp value is used internally there.
3313 */
3314 switch (ptype) {
3315 case QP_MBUF:
3316 ASSERT(m->m_flags & M_PKTHDR);
3317 ASSERT(m->m_nextpkt == NULL);
3318
3319 if (!(m->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3320 m->m_pkthdr.pkt_timestamp == 0) {
3321 nanouptime(&now);
3322 net_timernsec(&now, &now_nsec);
3323 m->m_pkthdr.pkt_timestamp = now_nsec;
3324 }
3325 m->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3326 /*
3327 * If the packet service class is not background,
3328 * update the timestamp to indicate recent activity
3329 * on a foreground socket.
3330 */
3331 if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3332 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3333 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND)) {
3334 ifp->if_fg_sendts = _net_uptime;
3335 if (fg_ts != NULL)
3336 *fg_ts = _net_uptime;
3337 }
3338 if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3339 ifp->if_rt_sendts = _net_uptime;
3340 if (rt_ts != NULL)
3341 *rt_ts = _net_uptime;
3342 }
3343 }
3344 break;
316670eb 3345
5ba3f43e
A
3346
3347 default:
3348 VERIFY(0);
3349 /* NOTREACHED */
3350 }
3e170ce0
A
3351
3352 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
5ba3f43e
A
3353 if (now_nsec == 0) {
3354 nanouptime(&now);
3355 net_timernsec(&now, &now_nsec);
3356 }
3e170ce0
A
3357 /*
3358 * If the driver chose to delay start callback for
3359 * coalescing multiple packets, Then use the following
3360 * heuristics to make sure that start callback will
3361 * be delayed only when bulk data transfer is detected.
3362 * 1. number of packets enqueued in (delay_win * 2) is
3363 * greater than or equal to the delay qlen.
3364 * 2. If delay_start is enabled it will stay enabled for
3365 * another 10 idle windows. This is to take into account
3366 * variable RTT and burst traffic.
3367 * 3. If the time elapsed since last enqueue is more
3368 * than 200ms we disable delaying start callback. This is
3369 * is to take idle time into account.
39037602 3370 */
3e170ce0
A
3371 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3372 if (ifp->if_start_delay_swin > 0) {
3373 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3374 ifp->if_start_delay_cnt++;
3375 } else if ((now_nsec - ifp->if_start_delay_swin)
3376 >= (200 * 1000 * 1000)) {
3377 ifp->if_start_delay_swin = now_nsec;
3378 ifp->if_start_delay_cnt = 1;
3379 ifp->if_start_delay_idle = 0;
3380 if (ifp->if_eflags & IFEF_DELAY_START) {
3381 ifp->if_eflags &=
3382 ~(IFEF_DELAY_START);
3383 ifnet_delay_start_disabled++;
3384 }
3385 } else {
3386 if (ifp->if_start_delay_cnt >=
3387 ifp->if_start_delay_qlen) {
3388 ifp->if_eflags |= IFEF_DELAY_START;
3389 ifp->if_start_delay_idle = 0;
3390 } else {
3391 if (ifp->if_start_delay_idle >= 10) {
3392 ifp->if_eflags &= ~(IFEF_DELAY_START);
3393 ifnet_delay_start_disabled++;
3394 } else {
3395 ifp->if_start_delay_idle++;
3396 }
39037602 3397 }
3e170ce0
A
3398 ifp->if_start_delay_swin = now_nsec;
3399 ifp->if_start_delay_cnt = 1;
3400 }
3401 } else {
3402 ifp->if_start_delay_swin = now_nsec;
3403 ifp->if_start_delay_cnt = 1;
3404 ifp->if_start_delay_idle = 0;
3405 ifp->if_eflags &= ~(IFEF_DELAY_START);
3406 }
3407 } else {
3408 ifp->if_eflags &= ~(IFEF_DELAY_START);
3409 }
3410
5ba3f43e
A
3411 switch (ptype) {
3412 case QP_MBUF:
3413 /* enqueue the packet (caller consumes object) */
3414 error = ifclassq_enqueue(&ifp->if_snd, m, QP_MBUF, pdrop);
3415 m = NULL;
3416 break;
3417
3418
3419 default:
3420 break;
3421 }
316670eb
A
3422
3423 /*
3424 * Tell the driver to start dequeueing; do this even when the queue
3425 * for the packet is suspended (EQSUSPENDED), as the driver could still
3426 * be dequeueing from other unsuspended queues.
3427 */
3e170ce0 3428 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
5ba3f43e 3429 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED))
316670eb
A
3430 ifnet_start(ifp);
3431
3432 return (error);
3433}
3434
5ba3f43e
A
3435errno_t
3436ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3437{
3438 boolean_t pdrop;
3439 return (ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop));
3440}
3441
3442errno_t
3443ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3444 boolean_t *pdrop)
3445{
3446 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3447 m->m_nextpkt != NULL) {
3448 if (m != NULL) {
3449 m_freem_list(m);
3450 *pdrop = TRUE;
3451 }
3452 return (EINVAL);
3453 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3454 !IF_FULLY_ATTACHED(ifp)) {
3455 /* flag tested without lock for performance */
3456 m_freem(m);
3457 *pdrop = TRUE;
3458 return (ENXIO);
3459 } else if (!(ifp->if_flags & IFF_UP)) {
3460 m_freem(m);
3461 *pdrop = TRUE;
3462 return (ENETDOWN);
3463 }
3464
3465 return (ifnet_enqueue_common(ifp, m, QP_MBUF, flush, pdrop));
3466}
3467
3468
316670eb
A
3469errno_t
3470ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3471{
fe8ab488 3472 errno_t rc;
5ba3f43e 3473 classq_pkt_type_t ptype;
316670eb
A
3474 if (ifp == NULL || mp == NULL)
3475 return (EINVAL);
3476 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
39037602 3477 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
316670eb 3478 return (ENXIO);
fe8ab488
A
3479 if (!ifnet_is_attached(ifp, 1))
3480 return (ENXIO);
5ba3f43e 3481
39037602 3482 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
5ba3f43e
A
3483 (void **)mp, NULL, NULL, NULL, &ptype);
3484 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
fe8ab488 3485 ifnet_decr_iorefcnt(ifp);
316670eb 3486
fe8ab488 3487 return (rc);
316670eb
A
3488}
3489
3490errno_t
3491ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3492 struct mbuf **mp)
3493{
fe8ab488 3494 errno_t rc;
5ba3f43e 3495 classq_pkt_type_t ptype;
316670eb
A
3496 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
3497 return (EINVAL);
3498 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
39037602 3499 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
316670eb 3500 return (ENXIO);
fe8ab488
A
3501 if (!ifnet_is_attached(ifp, 1))
3502 return (ENXIO);
39037602 3503
5ba3f43e
A
3504 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
3505 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)mp, NULL, NULL,
3506 NULL, &ptype);
3507 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
fe8ab488
A
3508 ifnet_decr_iorefcnt(ifp);
3509 return (rc);
316670eb
A
3510}
3511
3512errno_t
39037602
A
3513ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3514 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
316670eb 3515{
fe8ab488 3516 errno_t rc;
5ba3f43e 3517 classq_pkt_type_t ptype;
39037602 3518 if (ifp == NULL || head == NULL || pkt_limit < 1)
316670eb
A
3519 return (EINVAL);
3520 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
39037602 3521 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
316670eb 3522 return (ENXIO);
fe8ab488
A
3523 if (!ifnet_is_attached(ifp, 1))
3524 return (ENXIO);
39037602
A
3525
3526 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
5ba3f43e
A
3527 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head, (void **)tail, cnt,
3528 len, &ptype);
3529 VERIFY((*head == NULL) || (ptype == QP_MBUF));
39037602
A
3530 ifnet_decr_iorefcnt(ifp);
3531 return (rc);
3532}
3533
3534errno_t
3535ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3536 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3537{
3538 errno_t rc;
5ba3f43e 3539 classq_pkt_type_t ptype;
39037602
A
3540 if (ifp == NULL || head == NULL || byte_limit < 1)
3541 return (EINVAL);
3542 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3543 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3544 return (ENXIO);
3545 if (!ifnet_is_attached(ifp, 1))
3546 return (ENXIO);
3547
3548 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
5ba3f43e
A
3549 byte_limit, (void **)head, (void **)tail, cnt, len, &ptype);
3550 VERIFY((*head == NULL) || (ptype == QP_MBUF));
fe8ab488
A
3551 ifnet_decr_iorefcnt(ifp);
3552 return (rc);
316670eb
A
3553}
3554
3555errno_t
3556ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
39037602 3557 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
316670eb
A
3558 u_int32_t *len)
3559{
fe8ab488 3560 errno_t rc;
5ba3f43e 3561 classq_pkt_type_t ptype;
39037602
A
3562 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3563 !MBUF_VALID_SC(sc))
316670eb
A
3564 return (EINVAL);
3565 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
39037602 3566 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
316670eb 3567 return (ENXIO);
fe8ab488
A
3568 if (!ifnet_is_attached(ifp, 1))
3569 return (ENXIO);
5ba3f43e
A
3570
3571 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
3572 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head,
3573 (void **)tail, cnt, len, &ptype);
3574 VERIFY((*head == NULL) || (ptype == QP_MBUF));
fe8ab488
A
3575 ifnet_decr_iorefcnt(ifp);
3576 return (rc);
316670eb
A
3577}
3578
5ba3f43e 3579#if !CONFIG_EMBEDDED
39236c6e
A
3580errno_t
3581ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3582 const struct sockaddr *dest, const char *dest_linkaddr,
3583 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3584{
3585 if (pre != NULL)
3586 *pre = 0;
3587 if (post != NULL)
3588 *post = 0;
3589
3590 return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
3591}
5ba3f43e 3592#endif /* !CONFIG_EMBEDDED */
39236c6e 3593
316670eb
A
3594static int
3595dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3596 char **frame_header_p, protocol_family_t protocol_family)
3597{
3598 struct ifnet_filter *filter;
3599
3600 /*
3601 * Pass the inbound packet to the interface filters
6d2010ae
A
3602 */
3603 lck_mtx_lock_spin(&ifp->if_flt_lock);
3604 /* prevent filter list from changing in case we drop the lock */
3605 if_flt_monitor_busy(ifp);
2d21ac55
A
3606 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3607 int result;
3608
6d2010ae
A
3609 if (!filter->filt_skip && filter->filt_input != NULL &&
3610 (filter->filt_protocol == 0 ||
3611 filter->filt_protocol == protocol_family)) {
3612 lck_mtx_unlock(&ifp->if_flt_lock);
3613
2d21ac55 3614 result = (*filter->filt_input)(filter->filt_cookie,
6d2010ae
A
3615 ifp, protocol_family, m_p, frame_header_p);
3616
3617 lck_mtx_lock_spin(&ifp->if_flt_lock);
2d21ac55 3618 if (result != 0) {
6d2010ae
A
3619 /* we're done with the filter list */
3620 if_flt_monitor_unbusy(ifp);
3621 lck_mtx_unlock(&ifp->if_flt_lock);
2d21ac55
A
3622 return (result);
3623 }
3624 }
3625 }
6d2010ae
A
3626 /* we're done with the filter list */
3627 if_flt_monitor_unbusy(ifp);
3628 lck_mtx_unlock(&ifp->if_flt_lock);
b7266188
A
3629
3630 /*
6d2010ae 3631 * Strip away M_PROTO1 bit prior to sending packet up the stack as
b7266188
A
3632 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3633 */
3634 if (*m_p != NULL)
3635 (*m_p)->m_flags &= ~M_PROTO1;
3636
2d21ac55 3637 return (0);
1c79356b
A
3638}
3639
6d2010ae
A
3640static int
3641dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3642 protocol_family_t protocol_family)
3643{
3644 struct ifnet_filter *filter;
3645
3646 /*
3647 * Pass the outbound packet to the interface filters
3648 */
3649 lck_mtx_lock_spin(&ifp->if_flt_lock);
3650 /* prevent filter list from changing in case we drop the lock */
3651 if_flt_monitor_busy(ifp);
3652 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3653 int result;
3654
3655 if (!filter->filt_skip && filter->filt_output != NULL &&
3656 (filter->filt_protocol == 0 ||
3657 filter->filt_protocol == protocol_family)) {
3658 lck_mtx_unlock(&ifp->if_flt_lock);
3659
3660 result = filter->filt_output(filter->filt_cookie, ifp,
3661 protocol_family, m_p);
3662
3663 lck_mtx_lock_spin(&ifp->if_flt_lock);
3664 if (result != 0) {
3665 /* we're done with the filter list */
3666 if_flt_monitor_unbusy(ifp);
3667 lck_mtx_unlock(&ifp->if_flt_lock);
3668 return (result);
3669 }
3670 }
3671 }
3672 /* we're done with the filter list */
3673 if_flt_monitor_unbusy(ifp);
3674 lck_mtx_unlock(&ifp->if_flt_lock);
3675
3676 return (0);
3677}
3678
2d21ac55
A
3679static void
3680dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
1c79356b 3681{
2d21ac55 3682 int error;
1c79356b 3683
2d21ac55
A
3684 if (ifproto->proto_kpi == kProtoKPI_v1) {
3685 /* Version 1 protocols get one packet at a time */
3686 while (m != NULL) {
3687 char * frame_header;
3688 mbuf_t next_packet;
6d2010ae 3689
2d21ac55
A
3690 next_packet = m->m_nextpkt;
3691 m->m_nextpkt = NULL;
39236c6e
A
3692 frame_header = m->m_pkthdr.pkt_hdr;
3693 m->m_pkthdr.pkt_hdr = NULL;
6d2010ae
A
3694 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3695 ifproto->protocol_family, m, frame_header);
2d21ac55
A
3696 if (error != 0 && error != EJUSTRETURN)
3697 m_freem(m);
3698 m = next_packet;
3699 }
6d2010ae 3700 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
2d21ac55
A
3701 /* Version 2 protocols support packet lists */
3702 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
6d2010ae 3703 ifproto->protocol_family, m);
2d21ac55
A
3704 if (error != 0 && error != EJUSTRETURN)
3705 m_freem_list(m);
91447636 3706 }
2d21ac55 3707}
1c79356b 3708
316670eb
A
3709static void
3710dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3711 struct dlil_threading_info *inp, boolean_t poll)
3712{
3713 struct ifnet_stat_increment_param *d = &inp->stats;
3714
3715 if (s->packets_in != 0)
3716 d->packets_in += s->packets_in;
3717 if (s->bytes_in != 0)
3718 d->bytes_in += s->bytes_in;
3719 if (s->errors_in != 0)
3720 d->errors_in += s->errors_in;
3721
3722 if (s->packets_out != 0)
3723 d->packets_out += s->packets_out;
3724 if (s->bytes_out != 0)
3725 d->bytes_out += s->bytes_out;
3726 if (s->errors_out != 0)
3727 d->errors_out += s->errors_out;
3728
3729 if (s->collisions != 0)
3730 d->collisions += s->collisions;
3731 if (s->dropped != 0)
3732 d->dropped += s->dropped;
3733
3734 if (poll)
3735 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
3736}
3737
3738static void
3739dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3740{
3741 struct ifnet_stat_increment_param *s = &inp->stats;
3742
3743 /*
3744 * Use of atomic operations is unavoidable here because
3745 * these stats may also be incremented elsewhere via KPIs.
3746 */
3747 if (s->packets_in != 0) {
3748 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3749 s->packets_in = 0;
3750 }
3751 if (s->bytes_in != 0) {
3752 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3753 s->bytes_in = 0;
3754 }
3755 if (s->errors_in != 0) {
3756 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3757 s->errors_in = 0;
3758 }
3759
3760 if (s->packets_out != 0) {
3761 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3762 s->packets_out = 0;
3763 }
3764 if (s->bytes_out != 0) {
3765 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3766 s->bytes_out = 0;
3767 }
3768 if (s->errors_out != 0) {
3769 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3770 s->errors_out = 0;
3771 }
3772
3773 if (s->collisions != 0) {
3774 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3775 s->collisions = 0;
3776 }
3777 if (s->dropped != 0) {
3778 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3779 s->dropped = 0;
3780 }
39037602 3781
5ba3f43e 3782 if (ifp->if_data_threshold != 0) {
39037602 3783 lck_mtx_convert_spin(&inp->input_lck);
5ba3f43e 3784 ifnet_notify_data_threshold(ifp);
39236c6e 3785 }
5ba3f43e 3786
316670eb
A
3787 /*
3788 * No need for atomic operations as they are modified here
3789 * only from within the DLIL input thread context.
3790 */
3791 if (inp->tstats.packets != 0) {
3792 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3793 inp->tstats.packets = 0;
3794 }
3795 if (inp->tstats.bytes != 0) {
3796 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3797 inp->tstats.bytes = 0;
3798 }
3799}
3800
3801__private_extern__ void
3802dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3803{
3804 return (dlil_input_packet_list_common(ifp, m, 0,
3805 IFNET_MODEL_INPUT_POLL_OFF, FALSE));
3806}
3807
2d21ac55 3808__private_extern__ void
316670eb
A
3809dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3810 u_int32_t cnt, ifnet_model_t mode)
3811{
3812 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
3813}
3814
3815static void
3816dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3817 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
2d21ac55
A
3818{
3819 int error = 0;
2d21ac55
A
3820 protocol_family_t protocol_family;
3821 mbuf_t next_packet;
3822 ifnet_t ifp = ifp_param;
3823 char * frame_header;
3824 struct if_proto * last_ifproto = NULL;
3825 mbuf_t pkt_first = NULL;
3826 mbuf_t * pkt_next = NULL;
316670eb 3827 u_int32_t poll_thresh = 0, poll_ival = 0;
2d21ac55 3828
39037602 3829 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
2d21ac55 3830
316670eb
A
3831 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
3832 (poll_ival = if_rxpoll_interval_pkts) > 0)
3833 poll_thresh = cnt;
6d2010ae 3834
2d21ac55 3835 while (m != NULL) {
6d2010ae
A
3836 struct if_proto *ifproto = NULL;
3837 int iorefcnt = 0;
39236c6e 3838 uint32_t pktf_mask; /* pkt flags to preserve */
2d21ac55 3839
2d21ac55
A
3840 if (ifp_param == NULL)
3841 ifp = m->m_pkthdr.rcvif;
6d2010ae 3842
316670eb
A
3843 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
3844 poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
3845 ifnet_poll(ifp);
3846
6d2010ae 3847 /* Check if this mbuf looks valid */
316670eb 3848 MBUF_INPUT_CHECK(m, ifp);
6d2010ae
A
3849
3850 next_packet = m->m_nextpkt;
3851 m->m_nextpkt = NULL;
39236c6e
A
3852 frame_header = m->m_pkthdr.pkt_hdr;
3853 m->m_pkthdr.pkt_hdr = NULL;
2d21ac55 3854
316670eb
A
3855 /*
3856 * Get an IO reference count if the interface is not
3857 * loopback (lo0) and it is attached; lo0 never goes
3858 * away, so optimize for that.
6d2010ae
A
3859 */
3860 if (ifp != lo_ifp) {
3861 if (!ifnet_is_attached(ifp, 1)) {
3862 m_freem(m);
3863 goto next;
3864 }
3865 iorefcnt = 1;
5ba3f43e
A
3866 /*
3867 * Preserve the time stamp if it was set.
3868 */
3869 pktf_mask = PKTF_TS_VALID;
39236c6e
A
3870 } else {
3871 /*
3872 * If this arrived on lo0, preserve interface addr
3873 * info to allow for connectivity between loopback
3874 * and local interface addresses.
3875 */
3876 pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
2d21ac55 3877 }
d41d1dae 3878
39236c6e
A
3879 /* make sure packet comes in clean */
3880 m_classifier_init(m, pktf_mask);
3881
316670eb 3882 ifp_inc_traffic_class_in(ifp, m);
d41d1dae 3883
2d21ac55 3884 /* find which protocol family this packet is for */
6d2010ae 3885 ifnet_lock_shared(ifp);
2d21ac55 3886 error = (*ifp->if_demux)(ifp, m, frame_header,
6d2010ae
A
3887 &protocol_family);
3888 ifnet_lock_done(ifp);
2d21ac55 3889 if (error != 0) {
6d2010ae 3890 if (error == EJUSTRETURN)
2d21ac55 3891 goto next;
2d21ac55
A
3892 protocol_family = 0;
3893 }
6d2010ae 3894
39236c6e
A
3895 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
3896 !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
3897 dlil_input_cksum_dbg(ifp, m, frame_header,
3898 protocol_family);
3899
3900 /*
3901 * For partial checksum offload, we expect the driver to
3902 * set the start offset indicating the start of the span
3903 * that is covered by the hardware-computed checksum;
3904 * adjust this start offset accordingly because the data
3905 * pointer has been advanced beyond the link-layer header.
3906 *
3907 * Don't adjust if the interface is a bridge member, as
3908 * the adjustment will occur from the context of the
3909 * bridge interface during input.
3910 */
3911 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
3912 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
3913 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
3914 int adj;
3915
3916 if (frame_header == NULL ||
3917 frame_header < (char *)mbuf_datastart(m) ||
3918 frame_header > (char *)m->m_data ||
3919 (adj = (m->m_data - frame_header)) >
3920 m->m_pkthdr.csum_rx_start) {
3921 m->m_pkthdr.csum_data = 0;
3922 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
3923 hwcksum_in_invalidated++;
3924 } else {
3925 m->m_pkthdr.csum_rx_start -= adj;
3926 }
3927 }
3928
3929 pktap_input(ifp, protocol_family, m, frame_header);
316670eb 3930
2d21ac55 3931 if (m->m_flags & (M_BCAST|M_MCAST))
6d2010ae 3932 atomic_add_64(&ifp->if_imcasts, 1);
1c79356b 3933
2d21ac55
A
3934 /* run interface filters, exclude VLAN packets PR-3586856 */
3935 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
6d2010ae
A
3936 error = dlil_interface_filters_input(ifp, &m,
3937 &frame_header, protocol_family);
3938 if (error != 0) {
3939 if (error != EJUSTRETURN)
2d21ac55 3940 m_freem(m);
2d21ac55 3941 goto next;
91447636
A
3942 }
3943 }
39037602 3944 if (error != 0 || ((m->m_flags & M_PROMISC) != 0)) {
91447636 3945 m_freem(m);
2d21ac55 3946 goto next;
91447636 3947 }
6d2010ae 3948
2d21ac55
A
3949 /* Lookup the protocol attachment to this interface */
3950 if (protocol_family == 0) {
3951 ifproto = NULL;
6d2010ae
A
3952 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
3953 (last_ifproto->protocol_family == protocol_family)) {
3954 VERIFY(ifproto == NULL);
2d21ac55 3955 ifproto = last_ifproto;
6d2010ae
A
3956 if_proto_ref(last_ifproto);
3957 } else {
3958 VERIFY(ifproto == NULL);
3959 ifnet_lock_shared(ifp);
3960 /* callee holds a proto refcnt upon success */
2d21ac55 3961 ifproto = find_attached_proto(ifp, protocol_family);
6d2010ae 3962 ifnet_lock_done(ifp);
2d21ac55
A
3963 }
3964 if (ifproto == NULL) {
3965 /* no protocol for this packet, discard */
3966 m_freem(m);
3967 goto next;
3968 }
3969 if (ifproto != last_ifproto) {
2d21ac55
A
3970 if (last_ifproto != NULL) {
3971 /* pass up the list for the previous protocol */
2d21ac55
A
3972 dlil_ifproto_input(last_ifproto, pkt_first);
3973 pkt_first = NULL;
3974 if_proto_free(last_ifproto);
2d21ac55
A
3975 }
3976 last_ifproto = ifproto;
6d2010ae 3977 if_proto_ref(ifproto);
2d21ac55
A
3978 }
3979 /* extend the list */
39236c6e 3980 m->m_pkthdr.pkt_hdr = frame_header;
2d21ac55
A
3981 if (pkt_first == NULL) {
3982 pkt_first = m;
3983 } else {
3984 *pkt_next = m;
3985 }
3986 pkt_next = &m->m_nextpkt;
1c79356b 3987
6d2010ae 3988next:
2d21ac55
A
3989 if (next_packet == NULL && last_ifproto != NULL) {
3990 /* pass up the last list of packets */
2d21ac55
A
3991 dlil_ifproto_input(last_ifproto, pkt_first);
3992 if_proto_free(last_ifproto);
6d2010ae
A
3993 last_ifproto = NULL;
3994 }
3995 if (ifproto != NULL) {
3996 if_proto_free(ifproto);
3997 ifproto = NULL;
2d21ac55 3998 }
316670eb 3999
2d21ac55 4000 m = next_packet;
1c79356b 4001
6d2010ae
A
4002 /* update the driver's multicast filter, if needed */
4003 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
4004 ifp->if_updatemcasts = 0;
4005 if (iorefcnt == 1)
4006 ifnet_decr_iorefcnt(ifp);
91447636 4007 }
6d2010ae 4008
39037602 4009 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b
A
4010}
4011
6d2010ae
A
4012errno_t
4013if_mcasts_update(struct ifnet *ifp)
4014{
4015 errno_t err;
4016
4017 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
4018 if (err == EAFNOSUPPORT)
4019 err = 0;
39236c6e
A
4020 printf("%s: %s %d suspended link-layer multicast membership(s) "
4021 "(err=%d)\n", if_name(ifp),
6d2010ae
A
4022 (err == 0 ? "successfully restored" : "failed to restore"),
4023 ifp->if_updatemcasts, err);
4024
4025 /* just return success */
4026 return (0);
4027}
4028
39037602
A
4029/* If ifp is set, we will increment the generation for the interface */
4030int
4031dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
4032{
4033 if (ifp != NULL) {
4034 ifnet_increment_generation(ifp);
4035 }
4036
4037#if NECP
4038 necp_update_all_clients();
4039#endif /* NECP */
4040
4041 return (kev_post_msg(event));
4042}
4043
a39ff7e2
A
4044__private_extern__ void
4045dlil_post_sifflags_msg(struct ifnet * ifp)
4046{
4047 struct kev_msg ev_msg;
4048 struct net_event_data ev_data;
4049
4050 bzero(&ev_data, sizeof (ev_data));
4051 bzero(&ev_msg, sizeof (ev_msg));
4052 ev_msg.vendor_code = KEV_VENDOR_APPLE;
4053 ev_msg.kev_class = KEV_NETWORK_CLASS;
4054 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
4055 ev_msg.event_code = KEV_DL_SIFFLAGS;
4056 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
4057 ev_data.if_family = ifp->if_family;
4058 ev_data.if_unit = (u_int32_t) ifp->if_unit;
4059 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
4060 ev_msg.dv[0].data_ptr = &ev_data;
4061 ev_msg.dv[1].data_length = 0;
4062 dlil_post_complete_msg(ifp, &ev_msg);
4063}
4064
39037602 4065#define TMP_IF_PROTO_ARR_SIZE 10
91447636 4066static int
39037602 4067dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
1c79356b 4068{
a1c7dba1
A
4069 struct ifnet_filter *filter = NULL;
4070 struct if_proto *proto = NULL;
4071 int if_proto_count = 0;
4072 struct if_proto **tmp_ifproto_arr = NULL;
4073 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4074 int tmp_ifproto_arr_idx = 0;
4075 bool tmp_malloc = false;
6d2010ae 4076
6d2010ae
A
4077 /*
4078 * Pass the event to the interface filters
4079 */
4080 lck_mtx_lock_spin(&ifp->if_flt_lock);
4081 /* prevent filter list from changing in case we drop the lock */
4082 if_flt_monitor_busy(ifp);
4083 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4084 if (filter->filt_event != NULL) {
4085 lck_mtx_unlock(&ifp->if_flt_lock);
4086
4087 filter->filt_event(filter->filt_cookie, ifp,
4088 filter->filt_protocol, event);
4089
4090 lck_mtx_lock_spin(&ifp->if_flt_lock);
91447636 4091 }
6d2010ae
A
4092 }
4093 /* we're done with the filter list */
4094 if_flt_monitor_unbusy(ifp);
4095 lck_mtx_unlock(&ifp->if_flt_lock);
4096
3e170ce0
A
4097 /* Get an io ref count if the interface is attached */
4098 if (!ifnet_is_attached(ifp, 1))
4099 goto done;
4100
a1c7dba1
A
4101 /*
4102 * An embedded tmp_list_entry in if_proto may still get
4103 * over-written by another thread after giving up ifnet lock,
4104 * therefore we are avoiding embedded pointers here.
4105 */
6d2010ae 4106 ifnet_lock_shared(ifp);
a39ff7e2 4107 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
a1c7dba1 4108 if (if_proto_count) {
6d2010ae 4109 int i;
a1c7dba1
A
4110 VERIFY(ifp->if_proto_hash != NULL);
4111 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
4112 tmp_ifproto_arr = tmp_ifproto_stack_arr;
4113 } else {
4114 MALLOC(tmp_ifproto_arr, struct if_proto **,
4115 sizeof (*tmp_ifproto_arr) * if_proto_count,
4116 M_TEMP, M_ZERO);
4117 if (tmp_ifproto_arr == NULL) {
4118 ifnet_lock_done(ifp);
4119 goto cleanup;
4120 }
4121 tmp_malloc = true;
4122 }
6d2010ae
A
4123
4124 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6d2010ae
A
4125 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
4126 next_hash) {
a1c7dba1
A
4127 if_proto_ref(proto);
4128 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
4129 tmp_ifproto_arr_idx++;
91447636
A
4130 }
4131 }
a1c7dba1 4132 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
91447636 4133 }
6d2010ae
A
4134 ifnet_lock_done(ifp);
4135
a1c7dba1
A
4136 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
4137 tmp_ifproto_arr_idx++) {
4138 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
4139 VERIFY(proto != NULL);
4140 proto_media_event eventp =
4141 (proto->proto_kpi == kProtoKPI_v1 ?
4142 proto->kpi.v1.event :
4143 proto->kpi.v2.event);
4144
4145 if (eventp != NULL) {
4146 eventp(ifp, proto->protocol_family,
4147 event);
4148 }
4149 if_proto_free(proto);
4150 }
4151
39037602 4152cleanup:
a1c7dba1
A
4153 if (tmp_malloc) {
4154 FREE(tmp_ifproto_arr, M_TEMP);
4155 }
4156
6d2010ae
A
4157 /* Pass the event to the interface */
4158 if (ifp->if_event != NULL)
4159 ifp->if_event(ifp, event);
4160
4161 /* Release the io ref count */
4162 ifnet_decr_iorefcnt(ifp);
6d2010ae 4163done:
39037602 4164 return (dlil_post_complete_msg(update_generation ? ifp : NULL, event));
1c79356b
A
4165}
4166
2d21ac55 4167errno_t
6d2010ae 4168ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
1c79356b 4169{
39037602 4170 struct kev_msg kev_msg;
2d21ac55
A
4171 int result = 0;
4172
6d2010ae
A
4173 if (ifp == NULL || event == NULL)
4174 return (EINVAL);
1c79356b 4175
6d2010ae 4176 bzero(&kev_msg, sizeof (kev_msg));
39037602
A
4177 kev_msg.vendor_code = event->vendor_code;
4178 kev_msg.kev_class = event->kev_class;
4179 kev_msg.kev_subclass = event->kev_subclass;
4180 kev_msg.event_code = event->event_code;
91447636
A
4181 kev_msg.dv[0].data_ptr = &event->event_data[0];
4182 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
4183 kev_msg.dv[1].data_length = 0;
6d2010ae 4184
39037602 4185 result = dlil_event_internal(ifp, &kev_msg, TRUE);
1c79356b 4186
6d2010ae 4187 return (result);
91447636 4188}
1c79356b 4189
2d21ac55
A
4190#if CONFIG_MACF_NET
4191#include <netinet/ip6.h>
4192#include <netinet/ip.h>
6d2010ae
A
4193static int
4194dlil_get_socket_type(struct mbuf **mp, int family, int raw)
2d21ac55
A
4195{
4196 struct mbuf *m;
4197 struct ip *ip;
4198 struct ip6_hdr *ip6;
4199 int type = SOCK_RAW;
4200
4201 if (!raw) {
4202 switch (family) {
4203 case PF_INET:
4204 m = m_pullup(*mp, sizeof(struct ip));
4205 if (m == NULL)
4206 break;
4207 *mp = m;
4208 ip = mtod(m, struct ip *);
4209 if (ip->ip_p == IPPROTO_TCP)
4210 type = SOCK_STREAM;
4211 else if (ip->ip_p == IPPROTO_UDP)
4212 type = SOCK_DGRAM;
4213 break;
4214 case PF_INET6:
4215 m = m_pullup(*mp, sizeof(struct ip6_hdr));
4216 if (m == NULL)
4217 break;
4218 *mp = m;
4219 ip6 = mtod(m, struct ip6_hdr *);
4220 if (ip6->ip6_nxt == IPPROTO_TCP)
4221 type = SOCK_STREAM;
4222 else if (ip6->ip6_nxt == IPPROTO_UDP)
4223 type = SOCK_DGRAM;
4224 break;
4225 }
4226 }
4227
4228 return (type);
4229}
4230#endif
4231
3e170ce0
A
4232static void
4233dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4234{
4235 mbuf_t n = m;
4236 int chainlen = 0;
4237
4238 while (n != NULL) {
4239 chainlen++;
4240 n = n->m_next;
4241 }
4242 switch (chainlen) {
4243 case 0:
4244 break;
4245 case 1:
4246 atomic_add_64(&cls->cls_one, 1);
4247 break;
4248 case 2:
4249 atomic_add_64(&cls->cls_two, 1);
4250 break;
4251 case 3:
4252 atomic_add_64(&cls->cls_three, 1);
4253 break;
4254 case 4:
4255 atomic_add_64(&cls->cls_four, 1);
4256 break;
4257 case 5:
4258 default:
4259 atomic_add_64(&cls->cls_five_or_more, 1);
4260 break;
4261 }
4262}
4263
1c79356b 4264/*
91447636
A
4265 * dlil_output
4266 *
4267 * Caller should have a lock on the protocol domain if the protocol
4268 * doesn't support finer grained locking. In most cases, the lock
4269 * will be held from the socket layer and won't be released until
4270 * we return back to the socket layer.
4271 *
4272 * This does mean that we must take a protocol lock before we take
4273 * an interface lock if we're going to take both. This makes sense
4274 * because a protocol is likely to interact with an ifp while it
4275 * is under the protocol lock.
316670eb
A
4276 *
4277 * An advisory code will be returned if adv is not null. This
39236c6e 4278 * can be used to provide feedback about interface queues to the
316670eb 4279 * application.
1c79356b 4280 */
6d2010ae
A
4281errno_t
4282dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
316670eb 4283 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
6d2010ae
A
4284{
4285 char *frame_type = NULL;
4286 char *dst_linkaddr = NULL;
4287 int retval = 0;
4288 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4289 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
4290 struct if_proto *proto = NULL;
2d21ac55
A
4291 mbuf_t m;
4292 mbuf_t send_head = NULL;
4293 mbuf_t *send_tail = &send_head;
6d2010ae 4294 int iorefcnt = 0;
316670eb 4295 u_int32_t pre = 0, post = 0;
39236c6e
A
4296 u_int32_t fpkts = 0, fbytes = 0;
4297 int32_t flen = 0;
5ba3f43e
A
4298 struct timespec now;
4299 u_int64_t now_nsec;
6d2010ae 4300
39236c6e 4301 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
6d2010ae 4302
39037602
A
4303 /*
4304 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4305 * from happening while this operation is in progress
4306 */
6d2010ae
A
4307 if (!ifnet_is_attached(ifp, 1)) {
4308 retval = ENXIO;
4309 goto cleanup;
4310 }
4311 iorefcnt = 1;
4312
5ba3f43e 4313 VERIFY(ifp->if_output_dlil != NULL);
39037602 4314
6d2010ae
A
4315 /* update the driver's multicast filter, if needed */
4316 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
4317 ifp->if_updatemcasts = 0;
4318
4319 frame_type = frame_type_buffer;
4320 dst_linkaddr = dst_linkaddr_buffer;
4321
91447636 4322 if (raw == 0) {
6d2010ae
A
4323 ifnet_lock_shared(ifp);
4324 /* callee holds a proto refcnt upon success */
91447636
A
4325 proto = find_attached_proto(ifp, proto_family);
4326 if (proto == NULL) {
6d2010ae 4327 ifnet_lock_done(ifp);
91447636
A
4328 retval = ENXIO;
4329 goto cleanup;
4330 }
6d2010ae 4331 ifnet_lock_done(ifp);
2d21ac55 4332 }
6d2010ae 4333
2d21ac55
A
4334preout_again:
4335 if (packetlist == NULL)
4336 goto cleanup;
6d2010ae 4337
2d21ac55
A
4338 m = packetlist;
4339 packetlist = packetlist->m_nextpkt;
4340 m->m_nextpkt = NULL;
6d2010ae 4341
2d21ac55 4342 if (raw == 0) {
6d2010ae
A
4343 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
4344 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
91447636 4345 retval = 0;
6d2010ae
A
4346 if (preoutp != NULL) {
4347 retval = preoutp(ifp, proto_family, &m, dest, route,
4348 frame_type, dst_linkaddr);
4349
4350 if (retval != 0) {
4351 if (retval == EJUSTRETURN)
4352 goto preout_again;
4353 m_freem(m);
4354 goto cleanup;
91447636 4355 }
1c79356b 4356 }
1c79356b 4357 }
2d21ac55
A
4358
4359#if CONFIG_MACF_NET
4360 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
4361 dlil_get_socket_type(&m, proto_family, raw));
39236c6e 4362 if (retval != 0) {
2d21ac55
A
4363 m_freem(m);
4364 goto cleanup;
4365 }
4366#endif
4367
4368 do {
6d2010ae 4369#if CONFIG_DTRACE
316670eb 4370 if (!raw && proto_family == PF_INET) {
39037602
A
4371 struct ip *ip = mtod(m, struct ip *);
4372 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
6d2010ae
A
4373 struct ip *, ip, struct ifnet *, ifp,
4374 struct ip *, ip, struct ip6_hdr *, NULL);
4375
316670eb 4376 } else if (!raw && proto_family == PF_INET6) {
39037602
A
4377 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
4378 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4379 struct ip6_hdr *, ip6, struct ifnet *, ifp,
4380 struct ip *, NULL, struct ip6_hdr *, ip6);
6d2010ae
A
4381 }
4382#endif /* CONFIG_DTRACE */
4383
39236c6e 4384 if (raw == 0 && ifp->if_framer != NULL) {
7e4a7d39
A
4385 int rcvif_set = 0;
4386
4387 /*
4388 * If this is a broadcast packet that needs to be
4389 * looped back into the system, set the inbound ifp
4390 * to that of the outbound ifp. This will allow
4391 * us to determine that it is a legitimate packet
4392 * for the system. Only set the ifp if it's not
4393 * already set, just to be safe.
4394 */
4395 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
4396 m->m_pkthdr.rcvif == NULL) {
4397 m->m_pkthdr.rcvif = ifp;
4398 rcvif_set = 1;
4399 }
4400
6d2010ae 4401 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
39236c6e
A
4402 frame_type, &pre, &post);
4403 if (retval != 0) {
6d2010ae 4404 if (retval != EJUSTRETURN)
2d21ac55 4405 m_freem(m);
2d21ac55 4406 goto next;
91447636 4407 }
7e4a7d39 4408
39236c6e
A
4409 /*
4410 * For partial checksum offload, adjust the start
4411 * and stuff offsets based on the prepended header.
4412 */
4413 if ((m->m_pkthdr.csum_flags &
4414 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4415 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4416 m->m_pkthdr.csum_tx_stuff += pre;
4417 m->m_pkthdr.csum_tx_start += pre;
4418 }
4419
4420 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
4421 dlil_output_cksum_dbg(ifp, m, pre,
4422 proto_family);
4423
7e4a7d39
A
4424 /*
4425 * Clear the ifp if it was set above, and to be
4426 * safe, only if it is still the same as the
4427 * outbound ifp we have in context. If it was
4428 * looped back, then a copy of it was sent to the
4429 * loopback interface with the rcvif set, and we
4430 * are clearing the one that will go down to the
4431 * layer below.
4432 */
4433 if (rcvif_set && m->m_pkthdr.rcvif == ifp)
4434 m->m_pkthdr.rcvif = NULL;
91447636 4435 }
6d2010ae
A
4436
4437 /*
2d21ac55
A
4438 * Let interface filters (if any) do their thing ...
4439 */
4440 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4441 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
6d2010ae
A
4442 retval = dlil_interface_filters_output(ifp,
4443 &m, proto_family);
4444 if (retval != 0) {
4445 if (retval != EJUSTRETURN)
4446 m_freem(m);
4447 goto next;
1c79356b 4448 }
1c79356b 4449 }
b7266188 4450 /*
39236c6e
A
4451 * Strip away M_PROTO1 bit prior to sending packet
4452 * to the driver as this field may be used by the driver
b7266188
A
4453 */
4454 m->m_flags &= ~M_PROTO1;
4455
2d21ac55
A
4456 /*
4457 * If the underlying interface is not capable of handling a
4458 * packet whose data portion spans across physically disjoint
4459 * pages, we need to "normalize" the packet so that we pass
4460 * down a chain of mbufs where each mbuf points to a span that
4461 * resides in the system page boundary. If the packet does
4462 * not cross page(s), the following is a no-op.
4463 */
4464 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
4465 if ((m = m_normalize(m)) == NULL)
4466 goto next;
4467 }
4468
6d2010ae
A
4469 /*
4470 * If this is a TSO packet, make sure the interface still
4471 * advertise TSO capability.
b0d623f7 4472 */
39236c6e 4473 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
6d2010ae
A
4474 retval = EMSGSIZE;
4475 m_freem(m);
4476 goto cleanup;
b0d623f7
A
4477 }
4478
39236c6e
A
4479 ifp_inc_traffic_class_out(ifp, m);
4480 pktap_output(ifp, proto_family, m, pre, post);
6d2010ae 4481
3e170ce0
A
4482 /*
4483 * Count the number of elements in the mbuf chain
4484 */
4485 if (tx_chain_len_count) {
4486 dlil_count_chain_len(m, &tx_chain_len_stats);
4487 }
4488
5ba3f43e
A
4489 /*
4490 * Record timestamp; ifnet_enqueue() will use this info
4491 * rather than redoing the work. An optimization could
4492 * involve doing this just once at the top, if there are
4493 * no interface filters attached, but that's probably
4494 * not a big deal.
4495 */
4496 nanouptime(&now);
4497 net_timernsec(&now, &now_nsec);
4498 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
4499
4500 /*
4501 * Discard partial sum information if this packet originated
4502 * from another interface; the packet would already have the
4503 * final checksum and we shouldn't recompute it.
4504 */
4505 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
4506 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID|CSUM_PARTIAL)) ==
4507 (CSUM_DATA_VALID|CSUM_PARTIAL)) {
4508 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4509 m->m_pkthdr.csum_data = 0;
4510 }
4511
2d21ac55
A
4512 /*
4513 * Finally, call the driver.
4514 */
3e170ce0 4515 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
39236c6e
A
4516 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4517 flen += (m_pktlen(m) - (pre + post));
4518 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4519 }
2d21ac55
A
4520 *send_tail = m;
4521 send_tail = &m->m_nextpkt;
6d2010ae 4522 } else {
39236c6e
A
4523 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4524 flen = (m_pktlen(m) - (pre + post));
4525 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4526 } else {
4527 flen = 0;
4528 }
6d2010ae 4529 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
39236c6e 4530 0, 0, 0, 0, 0);
5ba3f43e 4531 retval = (*ifp->if_output_dlil)(ifp, m);
316670eb
A
4532 if (retval == EQFULL || retval == EQSUSPENDED) {
4533 if (adv != NULL && adv->code == FADV_SUCCESS) {
4534 adv->code = (retval == EQFULL ?
4535 FADV_FLOW_CONTROLLED :
4536 FADV_SUSPENDED);
4537 }
4538 retval = 0;
4539 }
39236c6e
A
4540 if (retval == 0 && flen > 0) {
4541 fbytes += flen;
4542 fpkts++;
4543 }
4544 if (retval != 0 && dlil_verbose) {
4545 printf("%s: output error on %s retval = %d\n",
4546 __func__, if_name(ifp),
6d2010ae 4547 retval);
2d21ac55 4548 }
6d2010ae 4549 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
39236c6e 4550 0, 0, 0, 0, 0);
2d21ac55 4551 }
39236c6e 4552 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
2d21ac55
A
4553
4554next:
4555 m = packetlist;
39236c6e 4556 if (m != NULL) {
2d21ac55
A
4557 packetlist = packetlist->m_nextpkt;
4558 m->m_nextpkt = NULL;
4559 }
39236c6e 4560 } while (m != NULL);
d41d1dae 4561
39236c6e 4562 if (send_head != NULL) {
39236c6e
A
4563 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4564 0, 0, 0, 0, 0);
3e170ce0 4565 if (ifp->if_eflags & IFEF_SENDLIST) {
5ba3f43e 4566 retval = (*ifp->if_output_dlil)(ifp, send_head);
3e170ce0
A
4567 if (retval == EQFULL || retval == EQSUSPENDED) {
4568 if (adv != NULL) {
4569 adv->code = (retval == EQFULL ?
4570 FADV_FLOW_CONTROLLED :
4571 FADV_SUSPENDED);
4572 }
4573 retval = 0;
4574 }
4575 if (retval == 0 && flen > 0) {
4576 fbytes += flen;
4577 fpkts++;
4578 }
4579 if (retval != 0 && dlil_verbose) {
4580 printf("%s: output error on %s retval = %d\n",
4581 __func__, if_name(ifp), retval);
4582 }
4583 } else {
4584 struct mbuf *send_m;
4585 int enq_cnt = 0;
4586 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
4587 while (send_head != NULL) {
4588 send_m = send_head;
4589 send_head = send_m->m_nextpkt;
4590 send_m->m_nextpkt = NULL;
5ba3f43e 4591 retval = (*ifp->if_output_dlil)(ifp, send_m);
3e170ce0
A
4592 if (retval == EQFULL || retval == EQSUSPENDED) {
4593 if (adv != NULL) {
4594 adv->code = (retval == EQFULL ?
4595 FADV_FLOW_CONTROLLED :
4596 FADV_SUSPENDED);
4597 }
4598 retval = 0;
4599 }
4600 if (retval == 0) {
4601 enq_cnt++;
4602 if (flen > 0)
4603 fpkts++;
4604 }
4605 if (retval != 0 && dlil_verbose) {
39037602
A
4606 printf("%s: output error on %s "
4607 "retval = %d\n",
3e170ce0
A
4608 __func__, if_name(ifp), retval);
4609 }
4610 }
4611 if (enq_cnt > 0) {
4612 fbytes += flen;
4613 ifnet_start(ifp);
316670eb 4614 }
39236c6e
A
4615 }
4616 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 4617 }
6d2010ae 4618
39236c6e 4619 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 4620
91447636 4621cleanup:
39236c6e
A
4622 if (fbytes > 0)
4623 ifp->if_fbytes += fbytes;
4624 if (fpkts > 0)
4625 ifp->if_fpackets += fpkts;
6d2010ae
A
4626 if (proto != NULL)
4627 if_proto_free(proto);
4628 if (packetlist) /* if any packets are left, clean up */
2d21ac55 4629 mbuf_freem_list(packetlist);
91447636
A
4630 if (retval == EJUSTRETURN)
4631 retval = 0;
6d2010ae
A
4632 if (iorefcnt == 1)
4633 ifnet_decr_iorefcnt(ifp);
4634
4635 return (retval);
1c79356b
A
4636}
4637
2d21ac55 4638errno_t
6d2010ae
A
4639ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
4640 void *ioctl_arg)
4641{
4642 struct ifnet_filter *filter;
4643 int retval = EOPNOTSUPP;
4644 int result = 0;
4645
2d21ac55 4646 if (ifp == NULL || ioctl_code == 0)
6d2010ae
A
4647 return (EINVAL);
4648
4649 /* Get an io ref count if the interface is attached */
4650 if (!ifnet_is_attached(ifp, 1))
4651 return (EOPNOTSUPP);
4652
39037602
A
4653 /*
4654 * Run the interface filters first.
91447636
A
4655 * We want to run all filters before calling the protocol,
4656 * interface family, or interface.
4657 */
6d2010ae
A
4658 lck_mtx_lock_spin(&ifp->if_flt_lock);
4659 /* prevent filter list from changing in case we drop the lock */
4660 if_flt_monitor_busy(ifp);
91447636 4661 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
6d2010ae
A
4662 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
4663 filter->filt_protocol == proto_fam)) {
4664 lck_mtx_unlock(&ifp->if_flt_lock);
4665
4666 result = filter->filt_ioctl(filter->filt_cookie, ifp,
4667 proto_fam, ioctl_code, ioctl_arg);
4668
4669 lck_mtx_lock_spin(&ifp->if_flt_lock);
4670
91447636
A
4671 /* Only update retval if no one has handled the ioctl */
4672 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4673 if (result == ENOTSUP)
4674 result = EOPNOTSUPP;
4675 retval = result;
6d2010ae
A
4676 if (retval != 0 && retval != EOPNOTSUPP) {
4677 /* we're done with the filter list */
4678 if_flt_monitor_unbusy(ifp);
4679 lck_mtx_unlock(&ifp->if_flt_lock);
91447636
A
4680 goto cleanup;
4681 }
4682 }
4683 }
4684 }
6d2010ae
A
4685 /* we're done with the filter list */
4686 if_flt_monitor_unbusy(ifp);
4687 lck_mtx_unlock(&ifp->if_flt_lock);
4688
91447636 4689 /* Allow the protocol to handle the ioctl */
6d2010ae
A
4690 if (proto_fam != 0) {
4691 struct if_proto *proto;
4692
4693 /* callee holds a proto refcnt upon success */
4694 ifnet_lock_shared(ifp);
4695 proto = find_attached_proto(ifp, proto_fam);
4696 ifnet_lock_done(ifp);
4697 if (proto != NULL) {
4698 proto_media_ioctl ioctlp =
4699 (proto->proto_kpi == kProtoKPI_v1 ?
4700 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
91447636 4701 result = EOPNOTSUPP;
6d2010ae
A
4702 if (ioctlp != NULL)
4703 result = ioctlp(ifp, proto_fam, ioctl_code,
4704 ioctl_arg);
4705 if_proto_free(proto);
4706
91447636
A
4707 /* Only update retval if no one has handled the ioctl */
4708 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4709 if (result == ENOTSUP)
4710 result = EOPNOTSUPP;
4711 retval = result;
6d2010ae 4712 if (retval && retval != EOPNOTSUPP)
91447636 4713 goto cleanup;
91447636
A
4714 }
4715 }
4716 }
6d2010ae 4717
91447636 4718 /* retval is either 0 or EOPNOTSUPP */
6d2010ae 4719
91447636
A
4720 /*
4721 * Let the interface handle this ioctl.
4722 * If it returns EOPNOTSUPP, ignore that, we may have
4723 * already handled this in the protocol or family.
4724 */
6d2010ae 4725 if (ifp->if_ioctl)
91447636 4726 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
6d2010ae 4727
91447636
A
4728 /* Only update retval if no one has handled the ioctl */
4729 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4730 if (result == ENOTSUP)
4731 result = EOPNOTSUPP;
4732 retval = result;
4733 if (retval && retval != EOPNOTSUPP) {
4734 goto cleanup;
4735 }
4736 }
1c79356b 4737
6d2010ae 4738cleanup:
91447636
A
4739 if (retval == EJUSTRETURN)
4740 retval = 0;
6d2010ae
A
4741
4742 ifnet_decr_iorefcnt(ifp);
4743
4744 return (retval);
91447636 4745}
1c79356b 4746
91447636 4747__private_extern__ errno_t
6d2010ae 4748dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
91447636
A
4749{
4750 errno_t error = 0;
6d2010ae
A
4751
4752
4753 if (ifp->if_set_bpf_tap) {
4754 /* Get an io reference on the interface if it is attached */
4755 if (!ifnet_is_attached(ifp, 1))
39037602 4756 return (ENXIO);
91447636 4757 error = ifp->if_set_bpf_tap(ifp, mode, callback);
6d2010ae
A
4758 ifnet_decr_iorefcnt(ifp);
4759 }
4760 return (error);
1c79356b
A
4761}
4762
2d21ac55 4763errno_t
6d2010ae
A
4764dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4765 struct sockaddr *ll_addr, size_t ll_len)
1c79356b 4766{
91447636
A
4767 errno_t result = EOPNOTSUPP;
4768 struct if_proto *proto;
4769 const struct sockaddr *verify;
2d21ac55 4770 proto_media_resolve_multi resolvep;
6d2010ae
A
4771
4772 if (!ifnet_is_attached(ifp, 1))
39037602 4773 return (result);
6d2010ae 4774
91447636 4775 bzero(ll_addr, ll_len);
6d2010ae
A
4776
4777 /* Call the protocol first; callee holds a proto refcnt upon success */
4778 ifnet_lock_shared(ifp);
91447636 4779 proto = find_attached_proto(ifp, proto_addr->sa_family);
6d2010ae 4780 ifnet_lock_done(ifp);
2d21ac55 4781 if (proto != NULL) {
6d2010ae
A
4782 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4783 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
2d21ac55 4784 if (resolvep != NULL)
6d2010ae 4785 result = resolvep(ifp, proto_addr,
39037602 4786 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
6d2010ae 4787 if_proto_free(proto);
91447636 4788 }
6d2010ae 4789
91447636
A
4790 /* Let the interface verify the multicast address */
4791 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4792 if (result == 0)
4793 verify = ll_addr;
4794 else
4795 verify = proto_addr;
4796 result = ifp->if_check_multi(ifp, verify);
4797 }
6d2010ae
A
4798
4799 ifnet_decr_iorefcnt(ifp);
4800 return (result);
91447636 4801}
1c79356b 4802
91447636 4803__private_extern__ errno_t
6d2010ae 4804dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
39037602
A
4805 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4806 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
91447636
A
4807{
4808 struct if_proto *proto;
4809 errno_t result = 0;
6d2010ae
A
4810
4811 /* callee holds a proto refcnt upon success */
4812 ifnet_lock_shared(ifp);
91447636 4813 proto = find_attached_proto(ifp, target_proto->sa_family);
6d2010ae 4814 ifnet_lock_done(ifp);
2d21ac55 4815 if (proto == NULL) {
91447636 4816 result = ENOTSUP;
6d2010ae 4817 } else {
2d21ac55 4818 proto_media_send_arp arpp;
6d2010ae
A
4819 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4820 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
39236c6e 4821 if (arpp == NULL) {
2d21ac55 4822 result = ENOTSUP;
39236c6e
A
4823 } else {
4824 switch (arpop) {
4825 case ARPOP_REQUEST:
4826 arpstat.txrequests++;
4827 if (target_hw != NULL)
4828 arpstat.txurequests++;
4829 break;
4830 case ARPOP_REPLY:
4831 arpstat.txreplies++;
4832 break;
4833 }
6d2010ae
A
4834 result = arpp(ifp, arpop, sender_hw, sender_proto,
4835 target_hw, target_proto);
39236c6e 4836 }
6d2010ae 4837 if_proto_free(proto);
91447636 4838 }
6d2010ae
A
4839
4840 return (result);
91447636 4841}
1c79356b 4842
39236c6e
A
4843struct net_thread_marks { };
4844static const struct net_thread_marks net_thread_marks_base = { };
4845
4846__private_extern__ const net_thread_marks_t net_thread_marks_none =
39037602 4847 &net_thread_marks_base;
39236c6e
A
4848
4849__private_extern__ net_thread_marks_t
4850net_thread_marks_push(u_int32_t push)
316670eb 4851{
39236c6e
A
4852 static const char *const base = (const void*)&net_thread_marks_base;
4853 u_int32_t pop = 0;
4854
4855 if (push != 0) {
4856 struct uthread *uth = get_bsdthread_info(current_thread());
4857
4858 pop = push & ~uth->uu_network_marks;
4859 if (pop != 0)
4860 uth->uu_network_marks |= pop;
4861 }
4862
4863 return ((net_thread_marks_t)&base[pop]);
316670eb
A
4864}
4865
39236c6e
A
4866__private_extern__ net_thread_marks_t
4867net_thread_unmarks_push(u_int32_t unpush)
316670eb 4868{
39236c6e
A
4869 static const char *const base = (const void*)&net_thread_marks_base;
4870 u_int32_t unpop = 0;
4871
4872 if (unpush != 0) {
4873 struct uthread *uth = get_bsdthread_info(current_thread());
316670eb 4874
39236c6e
A
4875 unpop = unpush & uth->uu_network_marks;
4876 if (unpop != 0)
4877 uth->uu_network_marks &= ~unpop;
4878 }
4879
4880 return ((net_thread_marks_t)&base[unpop]);
316670eb
A
4881}
4882
4883__private_extern__ void
39236c6e 4884net_thread_marks_pop(net_thread_marks_t popx)
316670eb 4885{
39236c6e 4886 static const char *const base = (const void*)&net_thread_marks_base;
3e170ce0 4887 const ptrdiff_t pop = (const char *)popx - (const char *)base;
316670eb 4888
39236c6e
A
4889 if (pop != 0) {
4890 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4891 struct uthread *uth = get_bsdthread_info(current_thread());
4892
4893 VERIFY((pop & ones) == pop);
4894 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
4895 uth->uu_network_marks &= ~pop;
4896 }
4897}
4898
4899__private_extern__ void
4900net_thread_unmarks_pop(net_thread_marks_t unpopx)
4901{
4902 static const char *const base = (const void*)&net_thread_marks_base;
3e170ce0 4903 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
39236c6e
A
4904
4905 if (unpop != 0) {
4906 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4907 struct uthread *uth = get_bsdthread_info(current_thread());
4908
4909 VERIFY((unpop & ones) == unpop);
4910 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
4911 uth->uu_network_marks |= unpop;
4912 }
4913}
4914
4915__private_extern__ u_int32_t
4916net_thread_is_marked(u_int32_t check)
4917{
4918 if (check != 0) {
4919 struct uthread *uth = get_bsdthread_info(current_thread());
4920 return (uth->uu_network_marks & check);
4921 }
4922 else
4923 return (0);
4924}
4925
4926__private_extern__ u_int32_t
4927net_thread_is_unmarked(u_int32_t check)
4928{
4929 if (check != 0) {
4930 struct uthread *uth = get_bsdthread_info(current_thread());
4931 return (~uth->uu_network_marks & check);
4932 }
4933 else
4934 return (0);
316670eb
A
4935}
4936
2d21ac55
A
4937static __inline__ int
4938_is_announcement(const struct sockaddr_in * sender_sin,
6d2010ae 4939 const struct sockaddr_in * target_sin)
2d21ac55
A
4940{
4941 if (sender_sin == NULL) {
6d2010ae 4942 return (FALSE);
2d21ac55
A
4943 }
4944 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
4945}
4946
91447636 4947__private_extern__ errno_t
39037602
A
4948dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4949 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4950 const struct sockaddr *target_proto0, u_int32_t rtflags)
91447636
A
4951{
4952 errno_t result = 0;
2d21ac55
A
4953 const struct sockaddr_in * sender_sin;
4954 const struct sockaddr_in * target_sin;
316670eb
A
4955 struct sockaddr_inarp target_proto_sinarp;
4956 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
6d2010ae
A
4957
4958 if (target_proto == NULL || (sender_proto != NULL &&
4959 sender_proto->sa_family != target_proto->sa_family))
4960 return (EINVAL);
4961
316670eb
A
4962 /*
4963 * If the target is a (default) router, provide that
4964 * information to the send_arp callback routine.
4965 */
4966 if (rtflags & RTF_ROUTER) {
4967 bcopy(target_proto, &target_proto_sinarp,
4968 sizeof (struct sockaddr_in));
4969 target_proto_sinarp.sin_other |= SIN_ROUTER;
4970 target_proto = (struct sockaddr *)&target_proto_sinarp;
4971 }
4972
91447636
A
4973 /*
4974 * If this is an ARP request and the target IP is IPv4LL,
2d21ac55
A
4975 * send the request on all interfaces. The exception is
4976 * an announcement, which must only appear on the specific
4977 * interface.
91447636 4978 */
316670eb
A
4979 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
4980 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
6d2010ae
A
4981 if (target_proto->sa_family == AF_INET &&
4982 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4983 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4984 !_is_announcement(target_sin, sender_sin)) {
91447636
A
4985 ifnet_t *ifp_list;
4986 u_int32_t count;
4987 u_int32_t ifp_on;
6d2010ae 4988
91447636
A
4989 result = ENOTSUP;
4990
4991 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4992 for (ifp_on = 0; ifp_on < count; ifp_on++) {
6d2010ae
A
4993 errno_t new_result;
4994 ifaddr_t source_hw = NULL;
4995 ifaddr_t source_ip = NULL;
4996 struct sockaddr_in source_ip_copy;
4997 struct ifnet *cur_ifp = ifp_list[ifp_on];
4998
91447636 4999 /*
6d2010ae
A
5000 * Only arp on interfaces marked for IPv4LL
5001 * ARPing. This may mean that we don't ARP on
5002 * the interface the subnet route points to.
91447636 5003 */
6d2010ae 5004 if (!(cur_ifp->if_eflags & IFEF_ARPLL))
91447636 5005 continue;
b0d623f7 5006
91447636 5007 /* Find the source IP address */
6d2010ae
A
5008 ifnet_lock_shared(cur_ifp);
5009 source_hw = cur_ifp->if_lladdr;
5010 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
5011 ifa_link) {
5012 IFA_LOCK(source_ip);
5013 if (source_ip->ifa_addr != NULL &&
5014 source_ip->ifa_addr->sa_family ==
5015 AF_INET) {
5016 /* Copy the source IP address */
5017 source_ip_copy =
5018 *(struct sockaddr_in *)
316670eb 5019 (void *)source_ip->ifa_addr;
6d2010ae 5020 IFA_UNLOCK(source_ip);
91447636
A
5021 break;
5022 }
6d2010ae 5023 IFA_UNLOCK(source_ip);
91447636 5024 }
6d2010ae 5025
91447636
A
5026 /* No IP Source, don't arp */
5027 if (source_ip == NULL) {
6d2010ae 5028 ifnet_lock_done(cur_ifp);
91447636
A
5029 continue;
5030 }
6d2010ae
A
5031
5032 IFA_ADDREF(source_hw);
5033 ifnet_lock_done(cur_ifp);
5034
91447636 5035 /* Send the ARP */
6d2010ae 5036 new_result = dlil_send_arp_internal(cur_ifp,
316670eb
A
5037 arpop, (struct sockaddr_dl *)(void *)
5038 source_hw->ifa_addr,
6d2010ae
A
5039 (struct sockaddr *)&source_ip_copy, NULL,
5040 target_proto);
b0d623f7 5041
6d2010ae 5042 IFA_REMREF(source_hw);
91447636
A
5043 if (result == ENOTSUP) {
5044 result = new_result;
5045 }
5046 }
6d2010ae 5047 ifnet_list_free(ifp_list);
91447636 5048 }
6d2010ae
A
5049 } else {
5050 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
5051 sender_proto, target_hw, target_proto);
91447636 5052 }
6d2010ae
A
5053
5054 return (result);
91447636 5055}
1c79356b 5056
6d2010ae
A
5057/*
5058 * Caller must hold ifnet head lock.
5059 */
5060static int
5061ifnet_lookup(struct ifnet *ifp)
91447636 5062{
6d2010ae
A
5063 struct ifnet *_ifp;
5064
5ba3f43e 5065 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
6d2010ae
A
5066 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
5067 if (_ifp == ifp)
91447636 5068 break;
6d2010ae
A
5069 }
5070 return (_ifp != NULL);
91447636 5071}
39037602 5072
6d2010ae
A
5073/*
5074 * Caller has to pass a non-zero refio argument to get a
5075 * IO reference count. This will prevent ifnet_detach from
39037602 5076 * being called when there are outstanding io reference counts.
91447636 5077 */
6d2010ae
A
5078int
5079ifnet_is_attached(struct ifnet *ifp, int refio)
5080{
5081 int ret;
5082
5083 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e 5084 if ((ret = IF_FULLY_ATTACHED(ifp))) {
6d2010ae
A
5085 if (refio > 0)
5086 ifp->if_refio++;
5087 }
5088 lck_mtx_unlock(&ifp->if_ref_lock);
5089
5090 return (ret);
5091}
5092
39037602
A
5093/*
5094 * Caller must ensure the interface is attached; the assumption is that
5095 * there is at least an outstanding IO reference count held already.
5096 * Most callers would call ifnet_is_attached() instead.
5097 */
5098void
5099ifnet_incr_iorefcnt(struct ifnet *ifp)
5100{
5101 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e 5102 VERIFY(IF_FULLY_ATTACHED(ifp));
39037602
A
5103 VERIFY(ifp->if_refio > 0);
5104 ifp->if_refio++;
5105 lck_mtx_unlock(&ifp->if_ref_lock);
5106}
5107
6d2010ae
A
5108void
5109ifnet_decr_iorefcnt(struct ifnet *ifp)
5110{
5111 lck_mtx_lock_spin(&ifp->if_ref_lock);
5112 VERIFY(ifp->if_refio > 0);
5ba3f43e 5113 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6d2010ae
A
5114 ifp->if_refio--;
5115
39037602
A
5116 /*
5117 * if there are no more outstanding io references, wakeup the
6d2010ae
A
5118 * ifnet_detach thread if detaching flag is set.
5119 */
5ba3f43e 5120 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING))
6d2010ae 5121 wakeup(&(ifp->if_refio));
5ba3f43e 5122
6d2010ae
A
5123 lck_mtx_unlock(&ifp->if_ref_lock);
5124}
b0d623f7 5125
6d2010ae
A
5126static void
5127dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
5128{
5129 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
5130 ctrace_t *tr;
5131 u_int32_t idx;
5132 u_int16_t *cnt;
1c79356b 5133
6d2010ae
A
5134 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
5135 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
5136 /* NOTREACHED */
5137 }
5138
5139 if (refhold) {
5140 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
5141 tr = dl_if_dbg->dldbg_if_refhold;
5142 } else {
5143 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
5144 tr = dl_if_dbg->dldbg_if_refrele;
5145 }
5146
5147 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
5148 ctrace_record(&tr[idx]);
91447636 5149}
1c79356b 5150
6d2010ae
A
5151errno_t
5152dlil_if_ref(struct ifnet *ifp)
5153{
5154 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5155
5156 if (dl_if == NULL)
5157 return (EINVAL);
5158
5159 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5160 ++dl_if->dl_if_refcnt;
5161 if (dl_if->dl_if_refcnt == 0) {
5162 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
5163 /* NOTREACHED */
5164 }
5165 if (dl_if->dl_if_trace != NULL)
5166 (*dl_if->dl_if_trace)(dl_if, TRUE);
5167 lck_mtx_unlock(&dl_if->dl_if_lock);
5168
5169 return (0);
91447636 5170}
1c79356b 5171
6d2010ae
A
5172errno_t
5173dlil_if_free(struct ifnet *ifp)
5174{
5175 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5ba3f43e 5176 bool need_release = FALSE;
6d2010ae
A
5177
5178 if (dl_if == NULL)
5179 return (EINVAL);
5180
5181 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5ba3f43e
A
5182 switch (dl_if->dl_if_refcnt) {
5183 case 0:
6d2010ae
A
5184 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
5185 /* NOTREACHED */
5ba3f43e
A
5186 break;
5187 case 1:
5188 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
5189 need_release = TRUE;
5190 }
5191 break;
5192 default:
5193 break;
6d2010ae
A
5194 }
5195 --dl_if->dl_if_refcnt;
5196 if (dl_if->dl_if_trace != NULL)
5197 (*dl_if->dl_if_trace)(dl_if, FALSE);
5198 lck_mtx_unlock(&dl_if->dl_if_lock);
5ba3f43e
A
5199 if (need_release) {
5200 dlil_if_release(ifp);
5201 }
6d2010ae
A
5202 return (0);
5203}
1c79356b 5204
2d21ac55 5205static errno_t
6d2010ae 5206dlil_attach_protocol_internal(struct if_proto *proto,
5ba3f43e
A
5207 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
5208 uint32_t * proto_count)
91447636 5209{
6d2010ae 5210 struct kev_dl_proto_data ev_pr_data;
91447636
A
5211 struct ifnet *ifp = proto->ifp;
5212 int retval = 0;
b0d623f7 5213 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
6d2010ae
A
5214 struct if_proto *prev_proto;
5215 struct if_proto *_proto;
5216
5217 /* callee holds a proto refcnt upon success */
5218 ifnet_lock_exclusive(ifp);
5219 _proto = find_attached_proto(ifp, proto->protocol_family);
5220 if (_proto != NULL) {
91447636 5221 ifnet_lock_done(ifp);
6d2010ae
A
5222 if_proto_free(_proto);
5223 return (EEXIST);
91447636 5224 }
6d2010ae 5225
91447636
A
5226 /*
5227 * Call family module add_proto routine so it can refine the
5228 * demux descriptors as it wishes.
5229 */
6d2010ae
A
5230 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
5231 demux_count);
91447636 5232 if (retval) {
6d2010ae
A
5233 ifnet_lock_done(ifp);
5234 return (retval);
91447636 5235 }
6d2010ae 5236
91447636
A
5237 /*
5238 * Insert the protocol in the hash
5239 */
6d2010ae
A
5240 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
5241 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
5242 prev_proto = SLIST_NEXT(prev_proto, next_hash);
5243 if (prev_proto)
5244 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
5245 else
5246 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
5247 proto, next_hash);
5248
5249 /* hold a proto refcnt for attach */
5250 if_proto_ref(proto);
1c79356b 5251
91447636 5252 /*
6d2010ae
A
5253 * The reserved field carries the number of protocol still attached
5254 * (subject to change)
91447636 5255 */
91447636 5256 ev_pr_data.proto_family = proto->protocol_family;
a39ff7e2
A
5257 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
5258
6d2010ae
A
5259 ifnet_lock_done(ifp);
5260
5261 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
5262 (struct net_event_data *)&ev_pr_data,
5263 sizeof (struct kev_dl_proto_data));
5ba3f43e
A
5264 if (proto_count != NULL) {
5265 *proto_count = ev_pr_data.proto_remaining_count;
5266 }
6d2010ae 5267 return (retval);
91447636 5268}
0b4e3aa0 5269
2d21ac55
A
5270errno_t
5271ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 5272 const struct ifnet_attach_proto_param *proto_details)
91447636
A
5273{
5274 int retval = 0;
5275 struct if_proto *ifproto = NULL;
5ba3f43e 5276 uint32_t proto_count = 0;
6d2010ae
A
5277
5278 ifnet_head_lock_shared();
5279 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5280 retval = EINVAL;
5281 goto end;
5282 }
5283 /* Check that the interface is in the global list */
5284 if (!ifnet_lookup(ifp)) {
5285 retval = ENXIO;
5286 goto end;
5287 }
5288
5289 ifproto = zalloc(dlif_proto_zone);
5290 if (ifproto == NULL) {
91447636
A
5291 retval = ENOMEM;
5292 goto end;
5293 }
6d2010ae
A
5294 bzero(ifproto, dlif_proto_size);
5295
5296 /* refcnt held above during lookup */
91447636
A
5297 ifproto->ifp = ifp;
5298 ifproto->protocol_family = protocol;
5299 ifproto->proto_kpi = kProtoKPI_v1;
5300 ifproto->kpi.v1.input = proto_details->input;
5301 ifproto->kpi.v1.pre_output = proto_details->pre_output;
5302 ifproto->kpi.v1.event = proto_details->event;
5303 ifproto->kpi.v1.ioctl = proto_details->ioctl;
5304 ifproto->kpi.v1.detached = proto_details->detached;
5305 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
5306 ifproto->kpi.v1.send_arp = proto_details->send_arp;
6d2010ae 5307
2d21ac55 5308 retval = dlil_attach_protocol_internal(ifproto,
5ba3f43e
A
5309 proto_details->demux_list, proto_details->demux_count,
5310 &proto_count);
6d2010ae 5311
9bccf70c 5312end:
6d2010ae 5313 if (retval != 0 && retval != EEXIST && ifp != NULL) {
39236c6e
A
5314 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5315 if_name(ifp), protocol, retval);
5ba3f43e
A
5316 } else {
5317 if (dlil_verbose) {
5318 printf("%s: attached v1 protocol %d (count = %d)\n",
5319 if_name(ifp),
5320 protocol, proto_count);
5321 }
6d2010ae
A
5322 }
5323 ifnet_head_done();
5ba3f43e 5324 if (retval == 0) {
a39ff7e2
A
5325 /*
5326 * A protocol has been attached, mark the interface up.
5327 * This used to be done by configd.KernelEventMonitor, but that
5328 * is inherently prone to races (rdar://problem/30810208).
5329 */
5330 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
5331 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
5332 dlil_post_sifflags_msg(ifp);
5ba3f43e 5333 } else if (ifproto != NULL) {
6d2010ae 5334 zfree(dlif_proto_zone, ifproto);
5ba3f43e 5335 }
6d2010ae 5336 return (retval);
1c79356b
A
5337}
5338
2d21ac55
A
5339errno_t
5340ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 5341 const struct ifnet_attach_proto_param_v2 *proto_details)
91447636 5342{
2d21ac55 5343 int retval = 0;
91447636 5344 struct if_proto *ifproto = NULL;
5ba3f43e 5345 uint32_t proto_count = 0;
6d2010ae
A
5346
5347 ifnet_head_lock_shared();
5348 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5349 retval = EINVAL;
5350 goto end;
5351 }
5352 /* Check that the interface is in the global list */
5353 if (!ifnet_lookup(ifp)) {
5354 retval = ENXIO;
5355 goto end;
5356 }
5357
5358 ifproto = zalloc(dlif_proto_zone);
5359 if (ifproto == NULL) {
91447636
A
5360 retval = ENOMEM;
5361 goto end;
5362 }
2d21ac55 5363 bzero(ifproto, sizeof(*ifproto));
6d2010ae
A
5364
5365 /* refcnt held above during lookup */
2d21ac55
A
5366 ifproto->ifp = ifp;
5367 ifproto->protocol_family = protocol;
5368 ifproto->proto_kpi = kProtoKPI_v2;
5369 ifproto->kpi.v2.input = proto_details->input;
5370 ifproto->kpi.v2.pre_output = proto_details->pre_output;
5371 ifproto->kpi.v2.event = proto_details->event;
5372 ifproto->kpi.v2.ioctl = proto_details->ioctl;
5373 ifproto->kpi.v2.detached = proto_details->detached;
5374 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
5375 ifproto->kpi.v2.send_arp = proto_details->send_arp;
1c79356b 5376
6d2010ae 5377 retval = dlil_attach_protocol_internal(ifproto,
5ba3f43e
A
5378 proto_details->demux_list, proto_details->demux_count,
5379 &proto_count);
6d2010ae
A
5380
5381end:
5382 if (retval != 0 && retval != EEXIST && ifp != NULL) {
39236c6e
A
5383 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5384 if_name(ifp), protocol, retval);
5ba3f43e
A
5385 } else {
5386 if (dlil_verbose) {
5387 printf("%s: attached v2 protocol %d (count = %d)\n",
5388 if_name(ifp),
5389 protocol, proto_count);
5390 }
2d21ac55 5391 }
6d2010ae 5392 ifnet_head_done();
5ba3f43e 5393 if (retval == 0) {
a39ff7e2
A
5394 /*
5395 * A protocol has been attached, mark the interface up.
5396 * This used to be done by configd.KernelEventMonitor, but that
5397 * is inherently prone to races (rdar://problem/30810208).
5398 */
5399 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
5400 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
5401 dlil_post_sifflags_msg(ifp);
5ba3f43e 5402 } else if (ifproto != NULL) {
6d2010ae 5403 zfree(dlif_proto_zone, ifproto);
5ba3f43e 5404 }
6d2010ae 5405 return (retval);
91447636 5406}
1c79356b 5407
2d21ac55
A
5408errno_t
5409ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
91447636
A
5410{
5411 struct if_proto *proto = NULL;
5412 int retval = 0;
6d2010ae
A
5413
5414 if (ifp == NULL || proto_family == 0) {
5415 retval = EINVAL;
91447636
A
5416 goto end;
5417 }
6d2010ae
A
5418
5419 ifnet_lock_exclusive(ifp);
5420 /* callee holds a proto refcnt upon success */
91447636 5421 proto = find_attached_proto(ifp, proto_family);
91447636
A
5422 if (proto == NULL) {
5423 retval = ENXIO;
6d2010ae 5424 ifnet_lock_done(ifp);
91447636
A
5425 goto end;
5426 }
6d2010ae
A
5427
5428 /* call family module del_proto */
91447636
A
5429 if (ifp->if_del_proto)
5430 ifp->if_del_proto(ifp, proto->protocol_family);
1c79356b 5431
6d2010ae
A
5432 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
5433 proto, if_proto, next_hash);
5434
5435 if (proto->proto_kpi == kProtoKPI_v1) {
5436 proto->kpi.v1.input = ifproto_media_input_v1;
39037602 5437 proto->kpi.v1.pre_output = ifproto_media_preout;
6d2010ae
A
5438 proto->kpi.v1.event = ifproto_media_event;
5439 proto->kpi.v1.ioctl = ifproto_media_ioctl;
5440 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
5441 proto->kpi.v1.send_arp = ifproto_media_send_arp;
5442 } else {
5443 proto->kpi.v2.input = ifproto_media_input_v2;
5444 proto->kpi.v2.pre_output = ifproto_media_preout;
5445 proto->kpi.v2.event = ifproto_media_event;
5446 proto->kpi.v2.ioctl = ifproto_media_ioctl;
5447 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
5448 proto->kpi.v2.send_arp = ifproto_media_send_arp;
5449 }
5450 proto->detached = 1;
5451 ifnet_lock_done(ifp);
5452
5453 if (dlil_verbose) {
39236c6e
A
5454 printf("%s: detached %s protocol %d\n", if_name(ifp),
5455 (proto->proto_kpi == kProtoKPI_v1) ?
6d2010ae
A
5456 "v1" : "v2", proto_family);
5457 }
5458
5459 /* release proto refcnt held during protocol attach */
5460 if_proto_free(proto);
91447636
A
5461
5462 /*
6d2010ae
A
5463 * Release proto refcnt held during lookup; the rest of
5464 * protocol detach steps will happen when the last proto
5465 * reference is released.
91447636 5466 */
6d2010ae
A
5467 if_proto_free(proto);
5468
91447636 5469end:
6d2010ae 5470 return (retval);
91447636 5471}
1c79356b 5472
6d2010ae
A
5473
5474static errno_t
5475ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
5476 struct mbuf *packet, char *header)
91447636 5477{
6d2010ae
A
5478#pragma unused(ifp, protocol, packet, header)
5479 return (ENXIO);
5480}
5481
5482static errno_t
5483ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
5484 struct mbuf *packet)
5485{
5486#pragma unused(ifp, protocol, packet)
5487 return (ENXIO);
5488
5489}
5490
5491static errno_t
5492ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
5493 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
5494 char *link_layer_dest)
5495{
5496#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5497 return (ENXIO);
9bccf70c 5498
91447636 5499}
9bccf70c 5500
91447636 5501static void
6d2010ae
A
5502ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
5503 const struct kev_msg *event)
5504{
5505#pragma unused(ifp, protocol, event)
5506}
5507
5508static errno_t
5509ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
5510 unsigned long command, void *argument)
5511{
5512#pragma unused(ifp, protocol, command, argument)
5513 return (ENXIO);
5514}
5515
5516static errno_t
5517ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
5518 struct sockaddr_dl *out_ll, size_t ll_len)
5519{
5520#pragma unused(ifp, proto_addr, out_ll, ll_len)
5521 return (ENXIO);
5522}
5523
5524static errno_t
5525ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
5526 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5527 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5528{
5529#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5530 return (ENXIO);
91447636 5531}
9bccf70c 5532
91447636 5533extern int if_next_index(void);
4bd07ac2 5534extern int tcp_ecn_outbound;
91447636 5535
2d21ac55 5536errno_t
6d2010ae 5537ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
91447636 5538{
91447636 5539 struct ifnet *tmp_if;
6d2010ae
A
5540 struct ifaddr *ifa;
5541 struct if_data_internal if_data_saved;
5542 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
316670eb
A
5543 struct dlil_threading_info *dl_inp;
5544 u_int32_t sflags = 0;
5545 int err;
1c79356b 5546
6d2010ae
A
5547 if (ifp == NULL)
5548 return (EINVAL);
5549
7ddcb079
A
5550 /*
5551 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5552 * prevent the interface from being configured while it is
5553 * embryonic, as ifnet_head_lock is dropped and reacquired
5554 * below prior to marking the ifnet with IFRF_ATTACHED.
5555 */
5556 dlil_if_lock();
6d2010ae 5557 ifnet_head_lock_exclusive();
91447636
A
5558 /* Verify we aren't already on the list */
5559 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
5560 if (tmp_if == ifp) {
5561 ifnet_head_done();
7ddcb079 5562 dlil_if_unlock();
6d2010ae 5563 return (EEXIST);
91447636
A
5564 }
5565 }
0b4e3aa0 5566
6d2010ae 5567 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e
A
5568 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
5569 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
6d2010ae
A
5570 __func__, ifp);
5571 /* NOTREACHED */
91447636 5572 }
6d2010ae 5573 lck_mtx_unlock(&ifp->if_ref_lock);
1c79356b 5574
6d2010ae 5575 ifnet_lock_exclusive(ifp);
b0d623f7 5576
6d2010ae
A
5577 /* Sanity check */
5578 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5579 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5580
5581 if (ll_addr != NULL) {
5582 if (ifp->if_addrlen == 0) {
5583 ifp->if_addrlen = ll_addr->sdl_alen;
5584 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
5585 ifnet_lock_done(ifp);
5586 ifnet_head_done();
7ddcb079 5587 dlil_if_unlock();
6d2010ae 5588 return (EINVAL);
b0d623f7
A
5589 }
5590 }
5591
91447636 5592 /*
b0d623f7 5593 * Allow interfaces without protocol families to attach
91447636
A
5594 * only if they have the necessary fields filled out.
5595 */
6d2010ae
A
5596 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
5597 DLIL_PRINTF("%s: Attempt to attach interface without "
5598 "family module - %d\n", __func__, ifp->if_family);
5599 ifnet_lock_done(ifp);
5600 ifnet_head_done();
7ddcb079 5601 dlil_if_unlock();
6d2010ae 5602 return (ENODEV);
1c79356b
A
5603 }
5604
6d2010ae
A
5605 /* Allocate protocol hash table */
5606 VERIFY(ifp->if_proto_hash == NULL);
5607 ifp->if_proto_hash = zalloc(dlif_phash_zone);
5608 if (ifp->if_proto_hash == NULL) {
5609 ifnet_lock_done(ifp);
5610 ifnet_head_done();
7ddcb079 5611 dlil_if_unlock();
6d2010ae
A
5612 return (ENOBUFS);
5613 }
5614 bzero(ifp->if_proto_hash, dlif_phash_size);
91447636 5615
6d2010ae
A
5616 lck_mtx_lock_spin(&ifp->if_flt_lock);
5617 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
91447636 5618 TAILQ_INIT(&ifp->if_flt_head);
6d2010ae
A
5619 VERIFY(ifp->if_flt_busy == 0);
5620 VERIFY(ifp->if_flt_waiters == 0);
5621 lck_mtx_unlock(&ifp->if_flt_lock);
5622
6d2010ae
A
5623 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
5624 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
91447636 5625 LIST_INIT(&ifp->if_multiaddrs);
6d2010ae 5626 }
1c79356b 5627
6d2010ae
A
5628 VERIFY(ifp->if_allhostsinm == NULL);
5629 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5630 TAILQ_INIT(&ifp->if_addrhead);
5631
6d2010ae
A
5632 if (ifp->if_index == 0) {
5633 int idx = if_next_index();
5634
5635 if (idx == -1) {
5636 ifp->if_index = 0;
5637 ifnet_lock_done(ifp);
5638 ifnet_head_done();
7ddcb079 5639 dlil_if_unlock();
6d2010ae 5640 return (ENOBUFS);
1c79356b 5641 }
6d2010ae
A
5642 ifp->if_index = idx;
5643 }
5644 /* There should not be anything occupying this slot */
5645 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5646
5647 /* allocate (if needed) and initialize a link address */
6d2010ae
A
5648 ifa = dlil_alloc_lladdr(ifp, ll_addr);
5649 if (ifa == NULL) {
5650 ifnet_lock_done(ifp);
5651 ifnet_head_done();
7ddcb079 5652 dlil_if_unlock();
6d2010ae
A
5653 return (ENOBUFS);
5654 }
5655
5656 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5657 ifnet_addrs[ifp->if_index - 1] = ifa;
5658
5659 /* make this address the first on the list */
5660 IFA_LOCK(ifa);
5661 /* hold a reference for ifnet_addrs[] */
5662 IFA_ADDREF_LOCKED(ifa);
5663 /* if_attach_link_ifa() holds a reference for ifa_link */
5664 if_attach_link_ifa(ifp, ifa);
5665 IFA_UNLOCK(ifa);
5666
2d21ac55 5667#if CONFIG_MACF_NET
6d2010ae 5668 mac_ifnet_label_associate(ifp);
2d21ac55 5669#endif
2d21ac55 5670
6d2010ae
A
5671 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5672 ifindex2ifnet[ifp->if_index] = ifp;
2d21ac55 5673
6d2010ae
A
5674 /* Hold a reference to the underlying dlil_ifnet */
5675 ifnet_reference(ifp);
5676
316670eb
A
5677 /* Clear stats (save and restore other fields that we care) */
5678 if_data_saved = ifp->if_data;
5679 bzero(&ifp->if_data, sizeof (ifp->if_data));
5680 ifp->if_data.ifi_type = if_data_saved.ifi_type;
5681 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5682 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5683 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5684 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5685 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5686 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5687 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5688 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5689 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5690 ifnet_touch_lastchange(ifp);
5691
5692 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
39037602
A
5693 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
5694 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
316670eb
A
5695
5696 /* By default, use SFB and enable flow advisory */
5697 sflags = PKTSCHEDF_QALG_SFB;
5698 if (if_flowadv)
5699 sflags |= PKTSCHEDF_QALG_FLOWCTL;
5700
fe8ab488
A
5701 if (if_delaybased_queue)
5702 sflags |= PKTSCHEDF_QALG_DELAYBASED;
5703
5ba3f43e
A
5704 if (ifp->if_output_sched_model ==
5705 IFNET_SCHED_MODEL_DRIVER_MANAGED)
5706 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
5707
316670eb
A
5708 /* Initialize transmit queue(s) */
5709 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
5710 if (err != 0) {
5711 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5712 "err=%d", __func__, ifp, err);
5713 /* NOTREACHED */
5714 }
5715
5716 /* Sanity checks on the input thread storage */
5717 dl_inp = &dl_if->dl_if_inpstorage;
5718 bzero(&dl_inp->stats, sizeof (dl_inp->stats));
5719 VERIFY(dl_inp->input_waiting == 0);
5720 VERIFY(dl_inp->wtot == 0);
5721 VERIFY(dl_inp->ifp == NULL);
5722 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
5723 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
5724 VERIFY(!dl_inp->net_affinity);
5725 VERIFY(ifp->if_inp == NULL);
5726 VERIFY(dl_inp->input_thr == THREAD_NULL);
5727 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
5728 VERIFY(dl_inp->poll_thr == THREAD_NULL);
5729 VERIFY(dl_inp->tag == 0);
5730 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
5731 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
5732 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
5733 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
5734#if IFNET_INPUT_SANITY_CHK
5735 VERIFY(dl_inp->input_mbuf_cnt == 0);
5736#endif /* IFNET_INPUT_SANITY_CHK */
5737
5738 /*
5739 * A specific DLIL input thread is created per Ethernet/cellular
5740 * interface or for an interface which supports opportunistic
5741 * input polling. Pseudo interfaces or other types of interfaces
5742 * use the main input thread instead.
5743 */
5744 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
5745 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
5746 ifp->if_inp = dl_inp;
5747 err = dlil_create_input_thread(ifp, ifp->if_inp);
5748 if (err != 0) {
5749 panic_plain("%s: ifp=%p couldn't get an input thread; "
5750 "err=%d", __func__, ifp, err);
5751 /* NOTREACHED */
5752 }
5753 }
5754
5ba3f43e
A
5755 if (ifp->if_inp != NULL && ifp->if_inp->input_mit_tcall == NULL) {
5756 ifp->if_inp->input_mit_tcall =
5757 thread_call_allocate_with_priority(dlil_mit_tcall_fn,
5758 ifp, THREAD_CALL_PRIORITY_KERNEL);
5759 }
5760
6d2010ae 5761 /*
39236c6e
A
5762 * If the driver supports the new transmit model, calculate flow hash
5763 * and create a workloop starter thread to invoke the if_start callback
5764 * where the packets may be dequeued and transmitted.
6d2010ae 5765 */
316670eb 5766 if (ifp->if_eflags & IFEF_TXSTART) {
39236c6e
A
5767 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5768 VERIFY(ifp->if_flowhash != 0);
316670eb
A
5769 VERIFY(ifp->if_start_thread == THREAD_NULL);
5770
5771 ifnet_set_start_cycle(ifp, NULL);
5772 ifp->if_start_active = 0;
5773 ifp->if_start_req = 0;
39236c6e 5774 ifp->if_start_flags = 0;
5ba3f43e
A
5775 VERIFY(ifp->if_start != NULL);
5776 if ((err = kernel_thread_start(ifnet_start_thread_fn,
5777 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
5778 panic_plain("%s: "
5779 "ifp=%p couldn't get a start thread; "
316670eb 5780 "err=%d", __func__, ifp, err);
5ba3f43e 5781 /* NOTREACHED */
6d2010ae 5782 }
316670eb
A
5783 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
5784 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
39236c6e
A
5785 } else {
5786 ifp->if_flowhash = 0;
316670eb
A
5787 }
5788
5789 /*
5790 * If the driver supports the new receive model, create a poller
5791 * thread to invoke if_input_poll callback where the packets may
5792 * be dequeued from the driver and processed for reception.
5793 */
5794 if (ifp->if_eflags & IFEF_RXPOLL) {
5795 VERIFY(ifp->if_input_poll != NULL);
5796 VERIFY(ifp->if_input_ctl != NULL);
5797 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5798
5799 ifnet_set_poll_cycle(ifp, NULL);
5800 ifp->if_poll_update = 0;
5801 ifp->if_poll_active = 0;
5802 ifp->if_poll_req = 0;
5803 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
5804 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5805 panic_plain("%s: ifp=%p couldn't get a poll thread; "
6d2010ae
A
5806 "err=%d", __func__, ifp, err);
5807 /* NOTREACHED */
5808 }
316670eb
A
5809 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
5810 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
91447636 5811 }
6d2010ae 5812
316670eb
A
5813 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5814 VERIFY(ifp->if_desc.ifd_len == 0);
5815 VERIFY(ifp->if_desc.ifd_desc != NULL);
6d2010ae
A
5816
5817 /* Record attach PC stacktrace */
5818 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5819
5820 ifp->if_updatemcasts = 0;
5821 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5822 struct ifmultiaddr *ifma;
5823 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5824 IFMA_LOCK(ifma);
5825 if (ifma->ifma_addr->sa_family == AF_LINK ||
5826 ifma->ifma_addr->sa_family == AF_UNSPEC)
5827 ifp->if_updatemcasts++;
5828 IFMA_UNLOCK(ifma);
5829 }
5830
39236c6e
A
5831 printf("%s: attached with %d suspended link-layer multicast "
5832 "membership(s)\n", if_name(ifp),
6d2010ae
A
5833 ifp->if_updatemcasts);
5834 }
5835
39236c6e
A
5836 /* Clear logging parameters */
5837 bzero(&ifp->if_log, sizeof (ifp->if_log));
5ba3f43e
A
5838
5839 /* Clear foreground/realtime activity timestamps */
39236c6e 5840 ifp->if_fg_sendts = 0;
5ba3f43e 5841 ifp->if_rt_sendts = 0;
39236c6e
A
5842
5843 VERIFY(ifp->if_delegated.ifp == NULL);
5844 VERIFY(ifp->if_delegated.type == 0);
5845 VERIFY(ifp->if_delegated.family == 0);
5846 VERIFY(ifp->if_delegated.subfamily == 0);
fe8ab488 5847 VERIFY(ifp->if_delegated.expensive == 0);
39236c6e 5848
39037602
A
5849 VERIFY(ifp->if_agentids == NULL);
5850 VERIFY(ifp->if_agentcount == 0);
3e170ce0
A
5851
5852 /* Reset interface state */
5853 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
39037602 5854 ifp->if_interface_state.valid_bitmask |=
3e170ce0
A
5855 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5856 ifp->if_interface_state.interface_availability =
5857 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5858
5859 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5860 if (ifp == lo_ifp) {
5861 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5862 ifp->if_interface_state.valid_bitmask |=
5863 IF_INTERFACE_STATE_LQM_STATE_VALID;
5864 } else {
5865 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5866 }
4bd07ac2
A
5867
5868 /*
5869 * Enable ECN capability on this interface depending on the
5870 * value of ECN global setting
5871 */
5872 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
5873 ifp->if_eflags |= IFEF_ECN_ENABLE;
5874 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
5875 }
5876
39037602
A
5877 /*
5878 * Built-in Cyclops always on policy for WiFi infra
5879 */
5880 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5881 errno_t error;
5882
5883 error = if_set_qosmarking_mode(ifp,
5884 IFRTYPE_QOSMARKING_FASTLANE);
5885 if (error != 0) {
5886 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5887 __func__, ifp->if_xname, error);
5888 } else {
5889 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
5890#if (DEVELOPMENT || DEBUG)
5891 printf("%s fastlane enabled on %s\n",
5892 __func__, ifp->if_xname);
5893#endif /* (DEVELOPMENT || DEBUG) */
5894 }
5895 }
5896
0c530ab8 5897 ifnet_lock_done(ifp);
b0d623f7 5898 ifnet_head_done();
6d2010ae 5899
5ba3f43e 5900
6d2010ae
A
5901 lck_mtx_lock(&ifp->if_cached_route_lock);
5902 /* Enable forwarding cached route */
5903 ifp->if_fwd_cacheok = 1;
5904 /* Clean up any existing cached routes */
39236c6e 5905 ROUTE_RELEASE(&ifp->if_fwd_route);
6d2010ae 5906 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
39236c6e 5907 ROUTE_RELEASE(&ifp->if_src_route);
6d2010ae 5908 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
39236c6e 5909 ROUTE_RELEASE(&ifp->if_src_route6);
6d2010ae
A
5910 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
5911 lck_mtx_unlock(&ifp->if_cached_route_lock);
5912
5913 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5914
b0d623f7 5915 /*
6d2010ae
A
5916 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5917 * and trees; do this before the ifnet is marked as attached.
5918 * The ifnet keeps the reference to the info structures even after
5919 * the ifnet is detached, since the network-layer records still
5920 * refer to the info structures even after that. This also
5921 * makes it possible for them to still function after the ifnet
5922 * is recycled or reattached.
b0d623f7 5923 */
6d2010ae
A
5924#if INET
5925 if (IGMP_IFINFO(ifp) == NULL) {
5926 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
5927 VERIFY(IGMP_IFINFO(ifp) != NULL);
5928 } else {
5929 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5930 igmp_domifreattach(IGMP_IFINFO(ifp));
5931 }
5932#endif /* INET */
5933#if INET6
5934 if (MLD_IFINFO(ifp) == NULL) {
5935 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
5936 VERIFY(MLD_IFINFO(ifp) != NULL);
5937 } else {
5938 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5939 mld_domifreattach(MLD_IFINFO(ifp));
5940 }
5941#endif /* INET6 */
b0d623f7 5942
39236c6e 5943 VERIFY(ifp->if_data_threshold == 0);
5ba3f43e 5944 VERIFY(ifp->if_dt_tcall != NULL);
39236c6e 5945
6d2010ae
A
5946 /*
5947 * Finally, mark this ifnet as attached.
5948 */
5949 lck_mtx_lock(rnh_lock);
5950 ifnet_lock_exclusive(ifp);
5951 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e 5952 ifp->if_refflags = IFRF_ATTACHED; /* clears embryonic */
6d2010ae 5953 lck_mtx_unlock(&ifp->if_ref_lock);
d1ecb069 5954 if (net_rtref) {
6d2010ae
A
5955 /* boot-args override; enable idle notification */
5956 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
d1ecb069 5957 IFRF_IDLE_NOTIFY);
6d2010ae
A
5958 } else {
5959 /* apply previous request(s) to set the idle flags, if any */
5960 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5961 ifp->if_idle_new_flags_mask);
5962
d1ecb069 5963 }
6d2010ae
A
5964 ifnet_lock_done(ifp);
5965 lck_mtx_unlock(rnh_lock);
7ddcb079 5966 dlil_if_unlock();
6d2010ae
A
5967
5968#if PF
5969 /*
5970 * Attach packet filter to this interface, if enabled.
5971 */
5972 pf_ifnet_hook(ifp, 1);
5973#endif /* PF */
d1ecb069 5974
2d21ac55 5975 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
1c79356b 5976
6d2010ae 5977 if (dlil_verbose) {
39236c6e 5978 printf("%s: attached%s\n", if_name(ifp),
6d2010ae
A
5979 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5980 }
5981
5982 return (0);
5983}
5984
5985/*
5986 * Prepare the storage for the first/permanent link address, which must
5987 * must have the same lifetime as the ifnet itself. Although the link
5988 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5989 * its location in memory must never change as it may still be referred
5990 * to by some parts of the system afterwards (unfortunate implementation
5991 * artifacts inherited from BSD.)
5992 *
5993 * Caller must hold ifnet lock as writer.
5994 */
5995static struct ifaddr *
5996dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
5997{
5998 struct ifaddr *ifa, *oifa;
5999 struct sockaddr_dl *asdl, *msdl;
6000 char workbuf[IFNAMSIZ*2];
6001 int namelen, masklen, socksize;
6002 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6003
6004 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
6005 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
6006
39236c6e
A
6007 namelen = snprintf(workbuf, sizeof (workbuf), "%s",
6008 if_name(ifp));
39037602
A
6009 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
6010 + ((namelen > 0) ? namelen : 0);
6d2010ae 6011 socksize = masklen + ifp->if_addrlen;
39037602 6012#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
6d2010ae
A
6013 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
6014 socksize = sizeof(struct sockaddr_dl);
6015 socksize = ROUNDUP(socksize);
6016#undef ROUNDUP
6017
6018 ifa = ifp->if_lladdr;
6019 if (socksize > DLIL_SDLMAXLEN ||
6020 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
6021 /*
6022 * Rare, but in the event that the link address requires
6023 * more storage space than DLIL_SDLMAXLEN, allocate the
6024 * largest possible storages for address and mask, such
6025 * that we can reuse the same space when if_addrlen grows.
6026 * This same space will be used when if_addrlen shrinks.
6027 */
6028 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
6029 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
6030 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
6031 if (ifa == NULL)
6032 return (NULL);
6033 ifa_lock_init(ifa);
6034 /* Don't set IFD_ALLOC, as this is permanent */
6035 ifa->ifa_debug = IFD_LINK;
6036 }
6037 IFA_LOCK(ifa);
6038 /* address and mask sockaddr_dl locations */
6039 asdl = (struct sockaddr_dl *)(ifa + 1);
6040 bzero(asdl, SOCK_MAXADDRLEN);
316670eb
A
6041 msdl = (struct sockaddr_dl *)(void *)
6042 ((char *)asdl + SOCK_MAXADDRLEN);
6d2010ae
A
6043 bzero(msdl, SOCK_MAXADDRLEN);
6044 } else {
6045 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
6046 /*
6047 * Use the storage areas for address and mask within the
6048 * dlil_ifnet structure. This is the most common case.
6049 */
6050 if (ifa == NULL) {
6051 ifa = &dl_if->dl_if_lladdr.ifa;
6052 ifa_lock_init(ifa);
6053 /* Don't set IFD_ALLOC, as this is permanent */
6054 ifa->ifa_debug = IFD_LINK;
6055 }
6056 IFA_LOCK(ifa);
6057 /* address and mask sockaddr_dl locations */
316670eb 6058 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
6d2010ae 6059 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
316670eb 6060 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
6d2010ae
A
6061 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
6062 }
6063
6064 /* hold a permanent reference for the ifnet itself */
6065 IFA_ADDREF_LOCKED(ifa);
6066 oifa = ifp->if_lladdr;
6067 ifp->if_lladdr = ifa;
6068
6069 VERIFY(ifa->ifa_debug == IFD_LINK);
6070 ifa->ifa_ifp = ifp;
6071 ifa->ifa_rtrequest = link_rtrequest;
6072 ifa->ifa_addr = (struct sockaddr *)asdl;
6073 asdl->sdl_len = socksize;
6074 asdl->sdl_family = AF_LINK;
39037602
A
6075 if (namelen > 0) {
6076 bcopy(workbuf, asdl->sdl_data, min(namelen,
6077 sizeof (asdl->sdl_data)));
6078 asdl->sdl_nlen = namelen;
6079 } else {
6080 asdl->sdl_nlen = 0;
6081 }
6d2010ae
A
6082 asdl->sdl_index = ifp->if_index;
6083 asdl->sdl_type = ifp->if_type;
6084 if (ll_addr != NULL) {
6085 asdl->sdl_alen = ll_addr->sdl_alen;
6086 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
6087 } else {
6088 asdl->sdl_alen = 0;
6089 }
39037602 6090 ifa->ifa_netmask = (struct sockaddr *)msdl;
6d2010ae 6091 msdl->sdl_len = masklen;
39037602 6092 while (namelen > 0)
6d2010ae
A
6093 msdl->sdl_data[--namelen] = 0xff;
6094 IFA_UNLOCK(ifa);
6095
6096 if (oifa != NULL)
6097 IFA_REMREF(oifa);
6098
6099 return (ifa);
6100}
6101
6102static void
6103if_purgeaddrs(struct ifnet *ifp)
6104{
6105#if INET
6106 in_purgeaddrs(ifp);
6107#endif /* INET */
6108#if INET6
6109 in6_purgeaddrs(ifp);
6110#endif /* INET6 */
1c79356b
A
6111}
6112
2d21ac55 6113errno_t
6d2010ae 6114ifnet_detach(ifnet_t ifp)
1c79356b 6115{
39236c6e 6116 struct ifnet *delegated_ifp;
39037602 6117 struct nd_ifinfo *ndi = NULL;
39236c6e 6118
6d2010ae
A
6119 if (ifp == NULL)
6120 return (EINVAL);
6121
39037602
A
6122 ndi = ND_IFINFO(ifp);
6123 if (NULL != ndi)
6124 ndi->cga_initialized = FALSE;
6125
6d2010ae 6126 lck_mtx_lock(rnh_lock);
316670eb 6127 ifnet_head_lock_exclusive();
91447636 6128 ifnet_lock_exclusive(ifp);
6d2010ae
A
6129
6130 /*
6131 * Check to see if this interface has previously triggered
6132 * aggressive protocol draining; if so, decrement the global
6133 * refcnt and clear PR_AGGDRAIN on the route domain if
6134 * there are no more of such an interface around.
6135 */
6136 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
6137
6138 lck_mtx_lock_spin(&ifp->if_ref_lock);
39037602 6139 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
6d2010ae
A
6140 lck_mtx_unlock(&ifp->if_ref_lock);
6141 ifnet_lock_done(ifp);
6d2010ae 6142 ifnet_head_done();
13f56ec4 6143 lck_mtx_unlock(rnh_lock);
6d2010ae
A
6144 return (EINVAL);
6145 } else if (ifp->if_refflags & IFRF_DETACHING) {
91447636 6146 /* Interface has already been detached */
6d2010ae 6147 lck_mtx_unlock(&ifp->if_ref_lock);
91447636 6148 ifnet_lock_done(ifp);
6d2010ae 6149 ifnet_head_done();
13f56ec4 6150 lck_mtx_unlock(rnh_lock);
6d2010ae 6151 return (ENXIO);
55e303ae 6152 }
5ba3f43e 6153 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
6d2010ae
A
6154 /* Indicate this interface is being detached */
6155 ifp->if_refflags &= ~IFRF_ATTACHED;
6156 ifp->if_refflags |= IFRF_DETACHING;
6157 lck_mtx_unlock(&ifp->if_ref_lock);
6158
5c9f4661 6159 if (dlil_verbose) {
39236c6e 6160 printf("%s: detaching\n", if_name(ifp));
5c9f4661
A
6161 }
6162
6163 /* clean up flow control entry object if there's any */
6164 if (ifp->if_eflags & IFEF_TXSTART) {
6165 ifnet_flowadv(ifp->if_flowhash);
6166 }
6d2010ae 6167
490019cf
A
6168 /* Reset ECN enable/disable flags */
6169 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
6170 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
6171
91447636 6172 /*
6d2010ae
A
6173 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6174 * no longer be visible during lookups from this point.
91447636 6175 */
6d2010ae
A
6176 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
6177 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
6178 ifp->if_link.tqe_next = NULL;
6179 ifp->if_link.tqe_prev = NULL;
39037602
A
6180 if (ifp->if_ordered_link.tqe_next != NULL ||
6181 ifp->if_ordered_link.tqe_prev != NULL) {
6182 ifnet_remove_from_ordered_list(ifp);
6183 }
6d2010ae
A
6184 ifindex2ifnet[ifp->if_index] = NULL;
6185
3e170ce0
A
6186 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6187 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
6188
6d2010ae
A
6189 /* Record detach PC stacktrace */
6190 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
6191
39236c6e
A
6192 /* Clear logging parameters */
6193 bzero(&ifp->if_log, sizeof (ifp->if_log));
6194
6195 /* Clear delegated interface info (reference released below) */
6196 delegated_ifp = ifp->if_delegated.ifp;
6197 bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
6198
3e170ce0
A
6199 /* Reset interface state */
6200 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
6201
91447636 6202 ifnet_lock_done(ifp);
6d2010ae 6203 ifnet_head_done();
13f56ec4 6204 lck_mtx_unlock(rnh_lock);
6d2010ae 6205
5ba3f43e 6206
39236c6e
A
6207 /* Release reference held on the delegated interface */
6208 if (delegated_ifp != NULL)
6209 ifnet_release(delegated_ifp);
6210
316670eb
A
6211 /* Reset Link Quality Metric (unless loopback [lo0]) */
6212 if (ifp != lo_ifp)
3e170ce0 6213 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
316670eb
A
6214
6215 /* Reset TCP local statistics */
6216 if (ifp->if_tcp_stat != NULL)
6217 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
6218
6219 /* Reset UDP local statistics */
6220 if (ifp->if_udp_stat != NULL)
6221 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
6222
4bd07ac2
A
6223 /* Reset ifnet IPv4 stats */
6224 if (ifp->if_ipv4_stat != NULL)
6225 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
6226
6227 /* Reset ifnet IPv6 stats */
6228 if (ifp->if_ipv6_stat != NULL)
6229 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
6230
3e170ce0
A
6231 /* Release memory held for interface link status report */
6232 if (ifp->if_link_status != NULL) {
6233 FREE(ifp->if_link_status, M_TEMP);
6234 ifp->if_link_status = NULL;
6235 }
6236
39037602
A
6237 /* Clear agent IDs */
6238 if (ifp->if_agentids != NULL) {
6239 FREE(ifp->if_agentids, M_NETAGENT);
6240 ifp->if_agentids = NULL;
6241 }
6242 ifp->if_agentcount = 0;
6243
6244
2d21ac55
A
6245 /* Let BPF know we're detaching */
6246 bpfdetach(ifp);
6d2010ae
A
6247
6248 /* Mark the interface as DOWN */
6249 if_down(ifp);
6250
6251 /* Disable forwarding cached route */
6252 lck_mtx_lock(&ifp->if_cached_route_lock);
6253 ifp->if_fwd_cacheok = 0;
6254 lck_mtx_unlock(&ifp->if_cached_route_lock);
6255
5ba3f43e 6256 /* Disable data threshold and wait for any pending event posting */
39236c6e 6257 ifp->if_data_threshold = 0;
5ba3f43e
A
6258 VERIFY(ifp->if_dt_tcall != NULL);
6259 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
6260
d1ecb069 6261 /*
6d2010ae
A
6262 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6263 * references to the info structures and leave them attached to
6264 * this ifnet.
d1ecb069 6265 */
6d2010ae
A
6266#if INET
6267 igmp_domifdetach(ifp);
6268#endif /* INET */
6269#if INET6
6270 mld_domifdetach(ifp);
6271#endif /* INET6 */
6272
6273 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
6274
6275 /* Let worker thread take care of the rest, to avoid reentrancy */
7ddcb079 6276 dlil_if_lock();
6d2010ae 6277 ifnet_detaching_enqueue(ifp);
7ddcb079 6278 dlil_if_unlock();
6d2010ae
A
6279
6280 return (0);
6281}
6282
6283static void
6284ifnet_detaching_enqueue(struct ifnet *ifp)
6285{
7ddcb079 6286 dlil_if_lock_assert();
6d2010ae
A
6287
6288 ++ifnet_detaching_cnt;
6289 VERIFY(ifnet_detaching_cnt != 0);
6290 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
6291 wakeup((caddr_t)&ifnet_delayed_run);
6292}
6293
6294static struct ifnet *
6295ifnet_detaching_dequeue(void)
6296{
6297 struct ifnet *ifp;
6298
7ddcb079 6299 dlil_if_lock_assert();
6d2010ae
A
6300
6301 ifp = TAILQ_FIRST(&ifnet_detaching_head);
6302 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
6303 if (ifp != NULL) {
6304 VERIFY(ifnet_detaching_cnt != 0);
6305 --ifnet_detaching_cnt;
6306 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
6307 ifp->if_detaching_link.tqe_next = NULL;
6308 ifp->if_detaching_link.tqe_prev = NULL;
6309 }
6310 return (ifp);
6311}
6312
316670eb
A
6313static int
6314ifnet_detacher_thread_cont(int err)
6d2010ae 6315{
316670eb 6316#pragma unused(err)
6d2010ae
A
6317 struct ifnet *ifp;
6318
6319 for (;;) {
316670eb 6320 dlil_if_lock_assert();
6d2010ae 6321 while (ifnet_detaching_cnt == 0) {
316670eb
A
6322 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6323 (PZERO - 1), "ifnet_detacher_cont", 0,
6324 ifnet_detacher_thread_cont);
6325 /* NOTREACHED */
6d2010ae
A
6326 }
6327
6328 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
6329
6330 /* Take care of detaching ifnet */
6331 ifp = ifnet_detaching_dequeue();
316670eb
A
6332 if (ifp != NULL) {
6333 dlil_if_unlock();
6d2010ae 6334 ifnet_detach_final(ifp);
316670eb
A
6335 dlil_if_lock();
6336 }
55e303ae 6337 }
316670eb
A
6338}
6339
6340static void
6341ifnet_detacher_thread_func(void *v, wait_result_t w)
6342{
6343#pragma unused(v, w)
6344 dlil_if_lock();
6345 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6346 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
6347 /*
6348 * msleep0() shouldn't have returned as PCATCH was not set;
6349 * therefore assert in this case.
6350 */
6351 dlil_if_unlock();
6352 VERIFY(0);
6d2010ae 6353}
b0d623f7 6354
6d2010ae
A
6355static void
6356ifnet_detach_final(struct ifnet *ifp)
6357{
6358 struct ifnet_filter *filter, *filter_next;
6359 struct ifnet_filter_head fhead;
316670eb 6360 struct dlil_threading_info *inp;
6d2010ae
A
6361 struct ifaddr *ifa;
6362 ifnet_detached_func if_free;
6363 int i;
6364
6365 lck_mtx_lock(&ifp->if_ref_lock);
6366 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6367 panic("%s: flags mismatch (detaching not set) ifp=%p",
6368 __func__, ifp);
6369 /* NOTREACHED */
6370 }
6371
316670eb
A
6372 /*
6373 * Wait until the existing IO references get released
6374 * before we proceed with ifnet_detach. This is not a
6375 * common case, so block without using a continuation.
b0d623f7 6376 */
6d2010ae 6377 while (ifp->if_refio > 0) {
39236c6e
A
6378 printf("%s: Waiting for IO references on %s interface "
6379 "to be released\n", __func__, if_name(ifp));
6d2010ae
A
6380 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
6381 (PZERO - 1), "ifnet_ioref_wait", NULL);
6382 }
6383 lck_mtx_unlock(&ifp->if_ref_lock);
6384
fe8ab488
A
6385 /* Drain and destroy send queue */
6386 ifclassq_teardown(ifp);
6387
6d2010ae
A
6388 /* Detach interface filters */
6389 lck_mtx_lock(&ifp->if_flt_lock);
6390 if_flt_monitor_enter(ifp);
b0d623f7 6391
5ba3f43e 6392 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636
A
6393 fhead = ifp->if_flt_head;
6394 TAILQ_INIT(&ifp->if_flt_head);
2d21ac55 6395
6d2010ae
A
6396 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
6397 filter_next = TAILQ_NEXT(filter, filt_next);
6398 lck_mtx_unlock(&ifp->if_flt_lock);
6399
6400 dlil_detach_filter_internal(filter, 1);
6401 lck_mtx_lock(&ifp->if_flt_lock);
6402 }
6403 if_flt_monitor_leave(ifp);
6404 lck_mtx_unlock(&ifp->if_flt_lock);
6405
6406 /* Tell upper layers to drop their network addresses */
6407 if_purgeaddrs(ifp);
6408
6409 ifnet_lock_exclusive(ifp);
6410
6411 /* Uplumb all protocols */
6412 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6413 struct if_proto *proto;
6414
6415 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6416 while (proto != NULL) {
6417 protocol_family_t family = proto->protocol_family;
6418 ifnet_lock_done(ifp);
6419 proto_unplumb(family, ifp);
6420 ifnet_lock_exclusive(ifp);
6421 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6422 }
6423 /* There should not be any protocols left */
6424 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
6425 }
6426 zfree(dlif_phash_zone, ifp->if_proto_hash);
6427 ifp->if_proto_hash = NULL;
6428
6429 /* Detach (permanent) link address from if_addrhead */
6430 ifa = TAILQ_FIRST(&ifp->if_addrhead);
6431 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
6432 IFA_LOCK(ifa);
6433 if_detach_link_ifa(ifp, ifa);
6434 IFA_UNLOCK(ifa);
6435
6436 /* Remove (permanent) link address from ifnet_addrs[] */
6437 IFA_REMREF(ifa);
6438 ifnet_addrs[ifp->if_index - 1] = NULL;
6439
6440 /* This interface should not be on {ifnet_head,detaching} */
6441 VERIFY(ifp->if_link.tqe_next == NULL);
6442 VERIFY(ifp->if_link.tqe_prev == NULL);
6443 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6444 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
39037602
A
6445 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
6446 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
6d2010ae
A
6447
6448 /* The slot should have been emptied */
6449 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6450
6451 /* There should not be any addresses left */
6452 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
1c79356b 6453
316670eb
A
6454 /*
6455 * Signal the starter thread to terminate itself.
6456 */
6457 if (ifp->if_start_thread != THREAD_NULL) {
6458 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e 6459 ifp->if_start_flags = 0;
316670eb
A
6460 ifp->if_start_thread = THREAD_NULL;
6461 wakeup_one((caddr_t)&ifp->if_start_thread);
6462 lck_mtx_unlock(&ifp->if_start_lock);
6463 }
6464
6465 /*
6466 * Signal the poller thread to terminate itself.
6467 */
6468 if (ifp->if_poll_thread != THREAD_NULL) {
6469 lck_mtx_lock_spin(&ifp->if_poll_lock);
6470 ifp->if_poll_thread = THREAD_NULL;
6471 wakeup_one((caddr_t)&ifp->if_poll_thread);
6472 lck_mtx_unlock(&ifp->if_poll_lock);
6473 }
6474
2d21ac55
A
6475 /*
6476 * If thread affinity was set for the workloop thread, we will need
6477 * to tear down the affinity and release the extra reference count
316670eb
A
6478 * taken at attach time. Does not apply to lo0 or other interfaces
6479 * without dedicated input threads.
2d21ac55 6480 */
316670eb
A
6481 if ((inp = ifp->if_inp) != NULL) {
6482 VERIFY(inp != dlil_main_input_thread);
6483
6484 if (inp->net_affinity) {
6485 struct thread *tp, *wtp, *ptp;
6486
6487 lck_mtx_lock_spin(&inp->input_lck);
6488 wtp = inp->wloop_thr;
6489 inp->wloop_thr = THREAD_NULL;
6490 ptp = inp->poll_thr;
6491 inp->poll_thr = THREAD_NULL;
6492 tp = inp->input_thr; /* don't nullify now */
6493 inp->tag = 0;
6494 inp->net_affinity = FALSE;
6495 lck_mtx_unlock(&inp->input_lck);
6496
6497 /* Tear down poll thread affinity */
6498 if (ptp != NULL) {
6499 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
6500 (void) dlil_affinity_set(ptp,
6501 THREAD_AFFINITY_TAG_NULL);
6502 thread_deallocate(ptp);
6d2010ae 6503 }
2d21ac55 6504
2d21ac55 6505 /* Tear down workloop thread affinity */
316670eb
A
6506 if (wtp != NULL) {
6507 (void) dlil_affinity_set(wtp,
2d21ac55 6508 THREAD_AFFINITY_TAG_NULL);
316670eb 6509 thread_deallocate(wtp);
2d21ac55 6510 }
1c79356b 6511
316670eb 6512 /* Tear down DLIL input thread affinity */
2d21ac55
A
6513 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
6514 thread_deallocate(tp);
9bccf70c 6515 }
1c79356b 6516
316670eb
A
6517 /* disassociate ifp DLIL input thread */
6518 ifp->if_inp = NULL;
6d2010ae 6519
5ba3f43e 6520 /* tell the input thread to terminate */
316670eb
A
6521 lck_mtx_lock_spin(&inp->input_lck);
6522 inp->input_waiting |= DLIL_INPUT_TERMINATE;
6523 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
6524 wakeup_one((caddr_t)&inp->input_waiting);
91447636 6525 }
316670eb 6526 lck_mtx_unlock(&inp->input_lck);
5c9f4661 6527 ifnet_lock_done(ifp);
5ba3f43e
A
6528
6529 /* wait for the input thread to terminate */
6530 lck_mtx_lock_spin(&inp->input_lck);
6531 while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE)
6532 == 0) {
6533 (void) msleep(&inp->input_waiting, &inp->input_lck,
6534 (PZERO - 1) | PSPIN, inp->input_name, NULL);
6535 }
6536 lck_mtx_unlock(&inp->input_lck);
5c9f4661 6537 ifnet_lock_exclusive(ifp);
5ba3f43e
A
6538
6539 /* clean-up input thread state */
6540 dlil_clean_threading_info(inp);
6541
55e303ae 6542 }
6d2010ae
A
6543
6544 /* The driver might unload, so point these to ourselves */
6545 if_free = ifp->if_free;
5ba3f43e 6546 ifp->if_output_dlil = ifp_if_output;
6d2010ae 6547 ifp->if_output = ifp_if_output;
316670eb
A
6548 ifp->if_pre_enqueue = ifp_if_output;
6549 ifp->if_start = ifp_if_start;
6550 ifp->if_output_ctl = ifp_if_ctl;
5ba3f43e 6551 ifp->if_input_dlil = ifp_if_input;
316670eb
A
6552 ifp->if_input_poll = ifp_if_input_poll;
6553 ifp->if_input_ctl = ifp_if_ctl;
6d2010ae
A
6554 ifp->if_ioctl = ifp_if_ioctl;
6555 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
6556 ifp->if_free = ifp_if_free;
6557 ifp->if_demux = ifp_if_demux;
6558 ifp->if_event = ifp_if_event;
39236c6e
A
6559 ifp->if_framer_legacy = ifp_if_framer;
6560 ifp->if_framer = ifp_if_framer_extended;
6d2010ae
A
6561 ifp->if_add_proto = ifp_if_add_proto;
6562 ifp->if_del_proto = ifp_if_del_proto;
6563 ifp->if_check_multi = ifp_if_check_multi;
6564
316670eb
A
6565 /* wipe out interface description */
6566 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
6567 ifp->if_desc.ifd_len = 0;
6568 VERIFY(ifp->if_desc.ifd_desc != NULL);
6569 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
6570
39236c6e
A
6571 /* there shouldn't be any delegation by now */
6572 VERIFY(ifp->if_delegated.ifp == NULL);
6573 VERIFY(ifp->if_delegated.type == 0);
6574 VERIFY(ifp->if_delegated.family == 0);
6575 VERIFY(ifp->if_delegated.subfamily == 0);
fe8ab488 6576 VERIFY(ifp->if_delegated.expensive == 0);
39236c6e 6577
39037602
A
6578 /* QoS marking get cleared */
6579 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
6580 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
6581
5ba3f43e 6582
6d2010ae
A
6583 ifnet_lock_done(ifp);
6584
6585#if PF
6586 /*
6587 * Detach this interface from packet filter, if enabled.
6588 */
6589 pf_ifnet_hook(ifp, 0);
6590#endif /* PF */
6591
6592 /* Filter list should be empty */
6593 lck_mtx_lock_spin(&ifp->if_flt_lock);
6594 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6595 VERIFY(ifp->if_flt_busy == 0);
6596 VERIFY(ifp->if_flt_waiters == 0);
6597 lck_mtx_unlock(&ifp->if_flt_lock);
6598
316670eb
A
6599 /* Last chance to drain send queue */
6600 if_qflush(ifp, 0);
6601
6d2010ae
A
6602 /* Last chance to cleanup any cached route */
6603 lck_mtx_lock(&ifp->if_cached_route_lock);
6604 VERIFY(!ifp->if_fwd_cacheok);
39236c6e 6605 ROUTE_RELEASE(&ifp->if_fwd_route);
6d2010ae 6606 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
39236c6e 6607 ROUTE_RELEASE(&ifp->if_src_route);
6d2010ae 6608 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
39236c6e 6609 ROUTE_RELEASE(&ifp->if_src_route6);
6d2010ae
A
6610 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
6611 lck_mtx_unlock(&ifp->if_cached_route_lock);
6612
39236c6e 6613 VERIFY(ifp->if_data_threshold == 0);
5ba3f43e
A
6614 VERIFY(ifp->if_dt_tcall != NULL);
6615 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
39236c6e 6616
6d2010ae
A
6617 ifnet_llreach_ifdetach(ifp);
6618
6619 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
6620
6d2010ae
A
6621 /*
6622 * Finally, mark this ifnet as detached.
6623 */
6624 lck_mtx_lock_spin(&ifp->if_ref_lock);
6625 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6626 panic("%s: flags mismatch (detaching not set) ifp=%p",
6627 __func__, ifp);
6628 /* NOTREACHED */
55e303ae 6629 }
6d2010ae
A
6630 ifp->if_refflags &= ~IFRF_DETACHING;
6631 lck_mtx_unlock(&ifp->if_ref_lock);
39037602
A
6632 if (if_free != NULL)
6633 if_free(ifp);
6d2010ae
A
6634
6635 if (dlil_verbose)
39236c6e 6636 printf("%s: detached\n", if_name(ifp));
6d2010ae
A
6637
6638 /* Release reference held during ifnet attach */
6639 ifnet_release(ifp);
1c79356b 6640}
9bccf70c 6641
5ba3f43e 6642errno_t
6d2010ae 6643ifp_if_output(struct ifnet *ifp, struct mbuf *m)
9bccf70c 6644{
6d2010ae 6645#pragma unused(ifp)
39037602 6646 m_freem_list(m);
6d2010ae 6647 return (0);
9bccf70c
A
6648}
6649
5ba3f43e 6650void
316670eb
A
6651ifp_if_start(struct ifnet *ifp)
6652{
6653 ifnet_purge(ifp);
6654}
6655
39037602
A
6656static errno_t
6657ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6658 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6659 boolean_t poll, struct thread *tp)
6660{
6661#pragma unused(ifp, m_tail, s, poll, tp)
6662 m_freem_list(m_head);
6663 return (ENXIO);
6664}
6665
316670eb
A
6666static void
6667ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6668 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6669{
6670#pragma unused(ifp, flags, max_cnt)
6671 if (m_head != NULL)
6672 *m_head = NULL;
6673 if (m_tail != NULL)
6674 *m_tail = NULL;
6675 if (cnt != NULL)
6676 *cnt = 0;
6677 if (len != NULL)
6678 *len = 0;
6679}
6680
6681static errno_t
6682ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6683{
6684#pragma unused(ifp, cmd, arglen, arg)
6685 return (EOPNOTSUPP);
6686}
6687
6d2010ae
A
6688static errno_t
6689ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
9bccf70c 6690{
6d2010ae
A
6691#pragma unused(ifp, fh, pf)
6692 m_freem(m);
6693 return (EJUSTRETURN);
9bccf70c
A
6694}
6695
6d2010ae
A
6696static errno_t
6697ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6698 const struct ifnet_demux_desc *da, u_int32_t dc)
9bccf70c 6699{
6d2010ae
A
6700#pragma unused(ifp, pf, da, dc)
6701 return (EINVAL);
9bccf70c
A
6702}
6703
91447636 6704static errno_t
6d2010ae 6705ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
9bccf70c 6706{
6d2010ae
A
6707#pragma unused(ifp, pf)
6708 return (EINVAL);
6709}
6710
6711static errno_t
6712ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6713{
6714#pragma unused(ifp, sa)
6715 return (EOPNOTSUPP);
6716}
6717
5ba3f43e
A
6718#if CONFIG_EMBEDDED
6719static errno_t
6720ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6721 const struct sockaddr *sa, const char *ll, const char *t,
6722 u_int32_t *pre, u_int32_t *post)
6723#else
39236c6e
A
6724static errno_t
6725ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6726 const struct sockaddr *sa, const char *ll, const char *t)
5ba3f43e 6727#endif /* !CONFIG_EMBEDDED */
6d2010ae
A
6728{
6729#pragma unused(ifp, m, sa, ll, t)
5ba3f43e
A
6730#if CONFIG_EMBEDDED
6731 return (ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post));
6732#else
39236c6e 6733 return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
5ba3f43e 6734#endif /* !CONFIG_EMBEDDED */
39236c6e
A
6735}
6736
6737static errno_t
6738ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6739 const struct sockaddr *sa, const char *ll, const char *t,
6740 u_int32_t *pre, u_int32_t *post)
6741{
6742#pragma unused(ifp, sa, ll, t)
6d2010ae
A
6743 m_freem(*m);
6744 *m = NULL;
39236c6e
A
6745
6746 if (pre != NULL)
6747 *pre = 0;
6748 if (post != NULL)
6749 *post = 0;
6750
6d2010ae
A
6751 return (EJUSTRETURN);
6752}
6753
316670eb 6754errno_t
6d2010ae
A
6755ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6756{
6757#pragma unused(ifp, cmd, arg)
6758 return (EOPNOTSUPP);
6759}
6760
6761static errno_t
6762ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6763{
6764#pragma unused(ifp, tm, f)
6765 /* XXX not sure what to do here */
6766 return (0);
6767}
6768
6769static void
6770ifp_if_free(struct ifnet *ifp)
6771{
6772#pragma unused(ifp)
6773}
6774
6775static void
6776ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6777{
6778#pragma unused(ifp, e)
9bccf70c
A
6779}
6780
6d2010ae 6781int dlil_if_acquire(u_int32_t family, const void *uniqueid,
a39ff7e2 6782 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
6d2010ae
A
6783{
6784 struct ifnet *ifp1 = NULL;
6785 struct dlil_ifnet *dlifp1 = NULL;
6786 void *buf, *base, **pbuf;
6787 int ret = 0;
6788
a39ff7e2 6789 VERIFY(*ifp == NULL);
7ddcb079 6790 dlil_if_lock();
a39ff7e2
A
6791 /*
6792 * We absolutely can't have an interface with the same name
6793 * in in-use state.
6794 * To make sure of that list has to be traversed completely
6795 */
6d2010ae
A
6796 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
6797 ifp1 = (struct ifnet *)dlifp1;
6798
6799 if (ifp1->if_family != family)
6800 continue;
6801
a39ff7e2
A
6802 /*
6803 * If interface is in use, return EBUSY if either unique id
6804 * or interface extended names are the same
6805 */
6d2010ae 6806 lck_mtx_lock(&dlifp1->dl_if_lock);
a39ff7e2 6807 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
6d2010ae 6808 if (dlifp1->dl_if_flags & DLIF_INUSE) {
a39ff7e2
A
6809 lck_mtx_unlock(&dlifp1->dl_if_lock);
6810 ret = EBUSY;
6811 goto end;
6812 }
6813 }
6814
6815 if (uniqueid_len) {
6816 if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
6817 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
6818 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6d2010ae 6819 lck_mtx_unlock(&dlifp1->dl_if_lock);
a39ff7e2 6820 ret = EBUSY;
9bccf70c 6821 goto end;
a39ff7e2
A
6822 } else {
6823 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
6824 /* Cache the first interface that can be recycled */
6825 if (*ifp == NULL)
6826 *ifp = ifp1;
6827 /*
6828 * XXX Do not break or jump to end as we have to traverse
6829 * the whole list to ensure there are no name collisions
6830 */
6d2010ae 6831 }
6d2010ae
A
6832 }
6833 }
6834 lck_mtx_unlock(&dlifp1->dl_if_lock);
6835 }
6836
a39ff7e2
A
6837 /* If there's an interface that can be recycled, use that */
6838 if (*ifp != NULL)
6839 goto end;
6840
6d2010ae
A
6841 /* no interface found, allocate a new one */
6842 buf = zalloc(dlif_zone);
6843 if (buf == NULL) {
6844 ret = ENOMEM;
6845 goto end;
6846 }
6847 bzero(buf, dlif_bufsize);
6848
6849 /* Get the 64-bit aligned base address for this object */
6850 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
6851 sizeof (u_int64_t));
6852 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
6853
6854 /*
6855 * Wind back a pointer size from the aligned base and
6856 * save the original address so we can free it later.
6857 */
6858 pbuf = (void **)((intptr_t)base - sizeof (void *));
6859 *pbuf = buf;
6860 dlifp1 = base;
6861
6862 if (uniqueid_len) {
6863 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
6864 M_NKE, M_WAITOK);
6865 if (dlifp1->dl_if_uniqueid == NULL) {
5ba3f43e 6866 zfree(dlif_zone, buf);
6d2010ae
A
6867 ret = ENOMEM;
6868 goto end;
6869 }
6870 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
6871 dlifp1->dl_if_uniqueid_len = uniqueid_len;
6872 }
6873
6874 ifp1 = (struct ifnet *)dlifp1;
6875 dlifp1->dl_if_flags = DLIF_INUSE;
6876 if (ifnet_debug) {
6877 dlifp1->dl_if_flags |= DLIF_DEBUG;
6878 dlifp1->dl_if_trace = dlil_if_trace;
6879 }
6880 ifp1->if_name = dlifp1->dl_if_namestorage;
39236c6e 6881 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
316670eb
A
6882
6883 /* initialize interface description */
6884 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
6885 ifp1->if_desc.ifd_len = 0;
6886 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
6887
5ba3f43e 6888
2d21ac55 6889#if CONFIG_MACF_NET
6d2010ae 6890 mac_ifnet_label_init(ifp1);
2d21ac55 6891#endif
9bccf70c 6892
316670eb
A
6893 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
6894 DLIL_PRINTF("%s: failed to allocate if local stats, "
6895 "error: %d\n", __func__, ret);
6896 /* This probably shouldn't be fatal */
6897 ret = 0;
6898 }
6899
6d2010ae
A
6900 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
6901 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
6902 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
6903 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6d2010ae
A
6904 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
6905 ifnet_lock_attr);
6906 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
3e170ce0
A
6907#if INET
6908 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
6909 ifnet_lock_attr);
6910 ifp1->if_inetdata = NULL;
6911#endif
39236c6e 6912#if INET6
3e170ce0
A
6913 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
6914 ifnet_lock_attr);
39236c6e
A
6915 ifp1->if_inet6data = NULL;
6916#endif
3e170ce0
A
6917 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
6918 ifnet_lock_attr);
6919 ifp1->if_link_status = NULL;
6d2010ae 6920
316670eb
A
6921 /* for send data paths */
6922 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
6923 ifnet_lock_attr);
6924 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
6925 ifnet_lock_attr);
6926 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
6927 ifnet_lock_attr);
6928
6929 /* for receive data paths */
6930 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
6931 ifnet_lock_attr);
6932
5ba3f43e
A
6933 /* thread call allocation is done with sleeping zalloc */
6934 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
6935 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
6936 if (ifp1->if_dt_tcall == NULL) {
6937 panic_plain("%s: couldn't create if_dt_tcall", __func__);
6938 /* NOTREACHED */
6939 }
6940
6d2010ae
A
6941 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
6942
6943 *ifp = ifp1;
9bccf70c
A
6944
6945end:
7ddcb079 6946 dlil_if_unlock();
9bccf70c 6947
6d2010ae
A
6948 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
6949 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
6950
6951 return (ret);
9bccf70c
A
6952}
6953
2d21ac55 6954__private_extern__ void
6d2010ae
A
6955dlil_if_release(ifnet_t ifp)
6956{
6957 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
6958
5ba3f43e
A
6959 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
6960 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
6961 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
6962 }
6963
6d2010ae
A
6964 ifnet_lock_exclusive(ifp);
6965 lck_mtx_lock(&dlifp->dl_if_lock);
6966 dlifp->dl_if_flags &= ~DLIF_INUSE;
fe8ab488 6967 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6d2010ae 6968 ifp->if_name = dlifp->dl_if_namestorage;
39236c6e
A
6969 /* Reset external name (name + unit) */
6970 ifp->if_xname = dlifp->dl_if_xnamestorage;
39037602 6971 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
39236c6e 6972 "%s?", ifp->if_name);
6d2010ae 6973 lck_mtx_unlock(&dlifp->dl_if_lock);
2d21ac55 6974#if CONFIG_MACF_NET
6d2010ae 6975 /*
39037602
A
6976 * We can either recycle the MAC label here or in dlil_if_acquire().
6977 * It seems logical to do it here but this means that anything that
6978 * still has a handle on ifp will now see it as unlabeled.
6979 * Since the interface is "dead" that may be OK. Revisit later.
6980 */
6d2010ae 6981 mac_ifnet_label_recycle(ifp);
2d21ac55 6982#endif
6d2010ae 6983 ifnet_lock_done(ifp);
9bccf70c 6984}
4a3eedf9 6985
7ddcb079
A
6986__private_extern__ void
6987dlil_if_lock(void)
6988{
6989 lck_mtx_lock(&dlil_ifnet_lock);
6990}
6991
6992__private_extern__ void
6993dlil_if_unlock(void)
6994{
6995 lck_mtx_unlock(&dlil_ifnet_lock);
6996}
6997
6998__private_extern__ void
6999dlil_if_lock_assert(void)
7000{
5ba3f43e 7001 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
7ddcb079
A
7002}
7003
4a3eedf9
A
7004__private_extern__ void
7005dlil_proto_unplumb_all(struct ifnet *ifp)
7006{
7007 /*
39236c6e
A
7008 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
7009 * each bucket contains exactly one entry; PF_VLAN does not need an
7010 * explicit unplumb.
4a3eedf9 7011 *
39236c6e 7012 * if_proto_hash[3] is for other protocols; we expect anything
4a3eedf9
A
7013 * in this bucket to respond to the DETACHING event (which would
7014 * have happened by now) and do the unplumb then.
7015 */
7016 (void) proto_unplumb(PF_INET, ifp);
7017#if INET6
7018 (void) proto_unplumb(PF_INET6, ifp);
7019#endif /* INET6 */
4a3eedf9 7020}
6d2010ae
A
7021
7022static void
7023ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
7024{
7025 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7026 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7027
7028 route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
7029
7030 lck_mtx_unlock(&ifp->if_cached_route_lock);
7031}
7032
7033static void
7034ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
7035{
7036 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7037 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7038
7039 if (ifp->if_fwd_cacheok) {
7040 route_copyin(src, &ifp->if_src_route, sizeof (*src));
7041 } else {
39236c6e 7042 ROUTE_RELEASE(src);
6d2010ae
A
7043 }
7044 lck_mtx_unlock(&ifp->if_cached_route_lock);
7045}
7046
7047#if INET6
7048static void
7049ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
7050{
7051 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7052 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7053
7054 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
7055 sizeof (*dst));
7056
7057 lck_mtx_unlock(&ifp->if_cached_route_lock);
7058}
7059
7060static void
7061ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
7062{
7063 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7064 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7065
7066 if (ifp->if_fwd_cacheok) {
7067 route_copyin((struct route *)src,
7068 (struct route *)&ifp->if_src_route6, sizeof (*src));
7069 } else {
39236c6e 7070 ROUTE_RELEASE(src);
6d2010ae
A
7071 }
7072 lck_mtx_unlock(&ifp->if_cached_route_lock);
7073}
7074#endif /* INET6 */
7075
7076struct rtentry *
7077ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
7078{
7079 struct route src_rt;
316670eb
A
7080 struct sockaddr_in *dst;
7081
7082 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
6d2010ae
A
7083
7084 ifp_src_route_copyout(ifp, &src_rt);
7085
39236c6e
A
7086 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
7087 ROUTE_RELEASE(&src_rt);
7088 if (dst->sin_family != AF_INET) {
6d2010ae
A
7089 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
7090 dst->sin_len = sizeof (src_rt.ro_dst);
7091 dst->sin_family = AF_INET;
7092 }
7093 dst->sin_addr = src_ip;
7094
5ba3f43e
A
7095 VERIFY(src_rt.ro_rt == NULL);
7096 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
7097 0, 0, ifp->if_index);
6d2010ae 7098
5ba3f43e
A
7099 if (src_rt.ro_rt != NULL) {
7100 /* retain a ref, copyin consumes one */
7101 struct rtentry *rte = src_rt.ro_rt;
7102 RT_ADDREF(rte);
7103 ifp_src_route_copyin(ifp, &src_rt);
7104 src_rt.ro_rt = rte;
6d2010ae
A
7105 }
7106 }
7107
7108 return (src_rt.ro_rt);
7109}
7110
7111#if INET6
39037602 7112struct rtentry *
6d2010ae
A
7113ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
7114{
7115 struct route_in6 src_rt;
7116
7117 ifp_src_route6_copyout(ifp, &src_rt);
7118
39236c6e
A
7119 if (ROUTE_UNUSABLE(&src_rt) ||
7120 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
7121 ROUTE_RELEASE(&src_rt);
7122 if (src_rt.ro_dst.sin6_family != AF_INET6) {
6d2010ae
A
7123 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
7124 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
7125 src_rt.ro_dst.sin6_family = AF_INET6;
7126 }
7127 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
316670eb
A
7128 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
7129 sizeof (src_rt.ro_dst.sin6_addr));
6d2010ae
A
7130
7131 if (src_rt.ro_rt == NULL) {
7132 src_rt.ro_rt = rtalloc1_scoped(
7133 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
7134 ifp->if_index);
7135
7136 if (src_rt.ro_rt != NULL) {
7137 /* retain a ref, copyin consumes one */
7138 struct rtentry *rte = src_rt.ro_rt;
7139 RT_ADDREF(rte);
7140 ifp_src_route6_copyin(ifp, &src_rt);
7141 src_rt.ro_rt = rte;
7142 }
7143 }
7144 }
7145
7146 return (src_rt.ro_rt);
7147}
7148#endif /* INET6 */
316670eb
A
7149
7150void
3e170ce0 7151if_lqm_update(struct ifnet *ifp, int lqm, int locked)
316670eb
A
7152{
7153 struct kev_dl_link_quality_metric_data ev_lqm_data;
7154
7155 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
7156
7157 /* Normalize to edge */
5ba3f43e
A
7158 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
7159 lqm = IFNET_LQM_THRESH_ABORT;
7160 atomic_bitset_32(&tcbinfo.ipi_flags,
7161 INPCBINFO_HANDLE_LQM_ABORT);
7162 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
7163 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
7164 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
7165 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
7166 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
7167 lqm <= IFNET_LQM_THRESH_POOR) {
316670eb 7168 lqm = IFNET_LQM_THRESH_POOR;
5ba3f43e
A
7169 } else if (lqm > IFNET_LQM_THRESH_POOR &&
7170 lqm <= IFNET_LQM_THRESH_GOOD) {
316670eb 7171 lqm = IFNET_LQM_THRESH_GOOD;
5ba3f43e 7172 }
316670eb 7173
3e170ce0
A
7174 /*
7175 * Take the lock if needed
7176 */
7177 if (!locked)
7178 ifnet_lock_exclusive(ifp);
7179
7180 if (lqm == ifp->if_interface_state.lqm_state &&
39037602 7181 (ifp->if_interface_state.valid_bitmask &
3e170ce0
A
7182 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
7183 /*
7184 * Release the lock if was not held by the caller
7185 */
7186 if (!locked)
7187 ifnet_lock_done(ifp);
316670eb
A
7188 return; /* nothing to update */
7189 }
3e170ce0
A
7190 ifp->if_interface_state.valid_bitmask |=
7191 IF_INTERFACE_STATE_LQM_STATE_VALID;
7192 ifp->if_interface_state.lqm_state = lqm;
7193
7194 /*
7195 * Don't want to hold the lock when issuing kernel events
7196 */
316670eb
A
7197 ifnet_lock_done(ifp);
7198
7199 bzero(&ev_lqm_data, sizeof (ev_lqm_data));
7200 ev_lqm_data.link_quality_metric = lqm;
7201
7202 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
7203 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
3e170ce0
A
7204
7205 /*
7206 * Reacquire the lock for the caller
7207 */
7208 if (locked)
7209 ifnet_lock_exclusive(ifp);
7210}
7211
7212static void
7213if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
7214{
7215 struct kev_dl_rrc_state kev;
39037602 7216
3e170ce0
A
7217 if (rrc_state == ifp->if_interface_state.rrc_state &&
7218 (ifp->if_interface_state.valid_bitmask &
7219 IF_INTERFACE_STATE_RRC_STATE_VALID))
7220 return;
7221
7222 ifp->if_interface_state.valid_bitmask |=
7223 IF_INTERFACE_STATE_RRC_STATE_VALID;
7224
7225 ifp->if_interface_state.rrc_state = rrc_state;
7226
7227 /*
7228 * Don't want to hold the lock when issuing kernel events
7229 */
7230 ifnet_lock_done(ifp);
7231
7232 bzero(&kev, sizeof(struct kev_dl_rrc_state));
7233 kev.rrc_state = rrc_state;
7234
7235 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
7236 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
7237
7238 ifnet_lock_exclusive(ifp);
7239}
7240
7241errno_t
7242if_state_update(struct ifnet *ifp,
39037602 7243 struct if_interface_state *if_interface_state)
3e170ce0
A
7244{
7245 u_short if_index_available = 0;
7246
7247 ifnet_lock_exclusive(ifp);
7248
7249 if ((ifp->if_type != IFT_CELLULAR) &&
7250 (if_interface_state->valid_bitmask &
7251 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
7252 ifnet_lock_done(ifp);
7253 return (ENOTSUP);
7254 }
7255 if ((if_interface_state->valid_bitmask &
7256 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
7257 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
7258 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
7259 ifnet_lock_done(ifp);
7260 return (EINVAL);
7261 }
7262 if ((if_interface_state->valid_bitmask &
7263 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
7264 if_interface_state->rrc_state !=
7265 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
7266 if_interface_state->rrc_state !=
7267 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
7268 ifnet_lock_done(ifp);
7269 return (EINVAL);
7270 }
7271
7272 if (if_interface_state->valid_bitmask &
7273 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7274 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
7275 }
7276 if (if_interface_state->valid_bitmask &
7277 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7278 if_rrc_state_update(ifp, if_interface_state->rrc_state);
7279 }
7280 if (if_interface_state->valid_bitmask &
7281 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7282 ifp->if_interface_state.valid_bitmask |=
7283 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7284 ifp->if_interface_state.interface_availability =
7285 if_interface_state->interface_availability;
7286
7287 if (ifp->if_interface_state.interface_availability ==
7288 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
7289 if_index_available = ifp->if_index;
7290 }
7291 }
7292 ifnet_lock_done(ifp);
7293
7294 /*
7295 * Check if the TCP connections going on this interface should be
7296 * forced to send probe packets instead of waiting for TCP timers
7297 * to fire. This will be done when there is an explicit
7298 * notification that the interface became available.
7299 */
7300 if (if_index_available > 0)
7301 tcp_interface_send_probe(if_index_available);
7302
7303 return (0);
7304}
7305
7306void
7307if_get_state(struct ifnet *ifp,
39037602 7308 struct if_interface_state *if_interface_state)
3e170ce0
A
7309{
7310 ifnet_lock_shared(ifp);
7311
7312 if_interface_state->valid_bitmask = 0;
7313
7314 if (ifp->if_interface_state.valid_bitmask &
7315 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7316 if_interface_state->valid_bitmask |=
7317 IF_INTERFACE_STATE_RRC_STATE_VALID;
7318 if_interface_state->rrc_state =
7319 ifp->if_interface_state.rrc_state;
7320 }
7321 if (ifp->if_interface_state.valid_bitmask &
7322 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7323 if_interface_state->valid_bitmask |=
7324 IF_INTERFACE_STATE_LQM_STATE_VALID;
7325 if_interface_state->lqm_state =
7326 ifp->if_interface_state.lqm_state;
7327 }
7328 if (ifp->if_interface_state.valid_bitmask &
7329 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7330 if_interface_state->valid_bitmask |=
7331 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7332 if_interface_state->interface_availability =
7333 ifp->if_interface_state.interface_availability;
7334 }
7335
7336 ifnet_lock_done(ifp);
7337}
7338
7339errno_t
7340if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
7341{
7342 ifnet_lock_exclusive(ifp);
7343 if (conn_probe > 1) {
7344 ifnet_lock_done(ifp);
7345 return (EINVAL);
7346 }
7347 if (conn_probe == 0)
7348 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
7349 else
7350 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
7351 ifnet_lock_done(ifp);
7352
5ba3f43e
A
7353#if NECP
7354 necp_update_all_clients();
7355#endif /* NECP */
7356
3e170ce0
A
7357 tcp_probe_connectivity(ifp, conn_probe);
7358 return (0);
316670eb
A
7359}
7360
7361/* for uuid.c */
7362int
7363uuid_get_ethernet(u_int8_t *node)
7364{
7365 struct ifnet *ifp;
7366 struct sockaddr_dl *sdl;
7367
7368 ifnet_head_lock_shared();
7369 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
7370 ifnet_lock_shared(ifp);
7371 IFA_LOCK_SPIN(ifp->if_lladdr);
7372 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
7373 if (sdl->sdl_type == IFT_ETHER) {
7374 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
7375 IFA_UNLOCK(ifp->if_lladdr);
7376 ifnet_lock_done(ifp);
7377 ifnet_head_done();
7378 return (0);
7379 }
7380 IFA_UNLOCK(ifp->if_lladdr);
7381 ifnet_lock_done(ifp);
7382 }
7383 ifnet_head_done();
7384
7385 return (-1);
7386}
7387
7388static int
7389sysctl_rxpoll SYSCTL_HANDLER_ARGS
7390{
7391#pragma unused(arg1, arg2)
39236c6e
A
7392 uint32_t i;
7393 int err;
316670eb
A
7394
7395 i = if_rxpoll;
7396
7397 err = sysctl_handle_int(oidp, &i, 0, req);
7398 if (err != 0 || req->newptr == USER_ADDR_NULL)
7399 return (err);
7400
7401 if (net_rxpoll == 0)
7402 return (ENXIO);
7403
7404 if_rxpoll = i;
7405 return (err);
7406}
7407
7408static int
39236c6e 7409sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
7410{
7411#pragma unused(arg1, arg2)
39236c6e
A
7412 uint64_t q;
7413 int err;
316670eb 7414
39236c6e 7415 q = if_rxpoll_mode_holdtime;
316670eb 7416
39236c6e 7417 err = sysctl_handle_quad(oidp, &q, 0, req);
316670eb
A
7418 if (err != 0 || req->newptr == USER_ADDR_NULL)
7419 return (err);
7420
39236c6e
A
7421 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN)
7422 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
7423
7424 if_rxpoll_mode_holdtime = q;
316670eb 7425
316670eb
A
7426 return (err);
7427}
7428
7429static int
39236c6e 7430sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
7431{
7432#pragma unused(arg1, arg2)
39236c6e
A
7433 uint64_t q;
7434 int err;
316670eb 7435
39236c6e 7436 q = if_rxpoll_sample_holdtime;
316670eb 7437
39236c6e 7438 err = sysctl_handle_quad(oidp, &q, 0, req);
316670eb
A
7439 if (err != 0 || req->newptr == USER_ADDR_NULL)
7440 return (err);
7441
39236c6e
A
7442 if (q < IF_RXPOLL_SAMPLETIME_MIN)
7443 q = IF_RXPOLL_SAMPLETIME_MIN;
7444
7445 if_rxpoll_sample_holdtime = q;
316670eb 7446
316670eb
A
7447 return (err);
7448}
7449
39236c6e
A
7450static int
7451sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
316670eb 7452{
39236c6e
A
7453#pragma unused(arg1, arg2)
7454 uint64_t q;
7455 int err;
316670eb 7456
39236c6e 7457 q = if_rxpoll_interval_time;
316670eb 7458
39236c6e
A
7459 err = sysctl_handle_quad(oidp, &q, 0, req);
7460 if (err != 0 || req->newptr == USER_ADDR_NULL)
7461 return (err);
7462
7463 if (q < IF_RXPOLL_INTERVALTIME_MIN)
7464 q = IF_RXPOLL_INTERVALTIME_MIN;
316670eb 7465
39236c6e 7466 if_rxpoll_interval_time = q;
316670eb 7467
39236c6e 7468 return (err);
316670eb
A
7469}
7470
39236c6e
A
7471static int
7472sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
316670eb 7473{
39236c6e
A
7474#pragma unused(arg1, arg2)
7475 uint32_t i;
7476 int err;
316670eb 7477
39236c6e 7478 i = if_rxpoll_wlowat;
316670eb 7479
39236c6e
A
7480 err = sysctl_handle_int(oidp, &i, 0, req);
7481 if (err != 0 || req->newptr == USER_ADDR_NULL)
7482 return (err);
316670eb 7483
39236c6e
A
7484 if (i == 0 || i >= if_rxpoll_whiwat)
7485 return (EINVAL);
7486
7487 if_rxpoll_wlowat = i;
7488 return (err);
316670eb
A
7489}
7490
39236c6e
A
7491static int
7492sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
316670eb 7493{
39236c6e
A
7494#pragma unused(arg1, arg2)
7495 uint32_t i;
7496 int err;
316670eb 7497
39236c6e 7498 i = if_rxpoll_whiwat;
316670eb 7499
39236c6e
A
7500 err = sysctl_handle_int(oidp, &i, 0, req);
7501 if (err != 0 || req->newptr == USER_ADDR_NULL)
7502 return (err);
316670eb 7503
39236c6e
A
7504 if (i <= if_rxpoll_wlowat)
7505 return (EINVAL);
7506
7507 if_rxpoll_whiwat = i;
7508 return (err);
316670eb
A
7509}
7510
7511static int
39236c6e 7512sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
316670eb 7513{
39236c6e
A
7514#pragma unused(arg1, arg2)
7515 int i, err;
316670eb 7516
39236c6e 7517 i = if_sndq_maxlen;
316670eb 7518
39236c6e
A
7519 err = sysctl_handle_int(oidp, &i, 0, req);
7520 if (err != 0 || req->newptr == USER_ADDR_NULL)
7521 return (err);
316670eb 7522
39236c6e
A
7523 if (i < IF_SNDQ_MINLEN)
7524 i = IF_SNDQ_MINLEN;
316670eb 7525
39236c6e
A
7526 if_sndq_maxlen = i;
7527 return (err);
316670eb
A
7528}
7529
39236c6e
A
7530static int
7531sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
316670eb 7532{
39236c6e
A
7533#pragma unused(arg1, arg2)
7534 int i, err;
7535
7536 i = if_rcvq_maxlen;
7537
7538 err = sysctl_handle_int(oidp, &i, 0, req);
7539 if (err != 0 || req->newptr == USER_ADDR_NULL)
7540 return (err);
7541
7542 if (i < IF_RCVQ_MINLEN)
7543 i = IF_RCVQ_MINLEN;
7544
7545 if_rcvq_maxlen = i;
7546 return (err);
316670eb
A
7547}
7548
7549void
7550dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
7551 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
7552{
7553 struct kev_dl_node_presence kev;
7554 struct sockaddr_dl *sdl;
7555 struct sockaddr_in6 *sin6;
7556
7557 VERIFY(ifp);
7558 VERIFY(sa);
7559 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7560
7561 bzero(&kev, sizeof (kev));
7562 sin6 = &kev.sin6_node_address;
7563 sdl = &kev.sdl_node_address;
7564 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7565 kev.rssi = rssi;
7566 kev.link_quality_metric = lqm;
7567 kev.node_proximity_metric = npm;
7568 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
7569
7570 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
7571 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
7572 &kev.link_data, sizeof (kev));
7573}
7574
7575void
7576dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
7577{
7578 struct kev_dl_node_absence kev;
7579 struct sockaddr_in6 *sin6;
7580 struct sockaddr_dl *sdl;
7581
7582 VERIFY(ifp);
7583 VERIFY(sa);
7584 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7585
7586 bzero(&kev, sizeof (kev));
7587 sin6 = &kev.sin6_node_address;
7588 sdl = &kev.sdl_node_address;
7589 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7590
7591 nd6_alt_node_absent(ifp, sin6);
7592 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
7593 &kev.link_data, sizeof (kev));
7594}
7595
39236c6e
A
7596const void *
7597dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
7598 kauth_cred_t *credp)
7599{
7600 const u_int8_t *bytes;
7601 size_t size;
7602
7603 bytes = CONST_LLADDR(sdl);
7604 size = sdl->sdl_alen;
7605
7606#if CONFIG_MACF
7607 if (dlil_lladdr_ckreq) {
7608 switch (sdl->sdl_type) {
7609 case IFT_ETHER:
39236c6e 7610 case IFT_IEEE1394:
39236c6e
A
7611 break;
7612 default:
7613 credp = NULL;
7614 break;
7615 };
7616
7617 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
7618 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
7619 [0] = 2
7620 };
7621
5ba3f43e 7622 bytes = unspec;
39236c6e
A
7623 }
7624 }
7625#else
7626#pragma unused(credp)
7627#endif
7628
7629 if (sizep != NULL) *sizep = size;
7630 return (bytes);
7631}
7632
7633void
7634dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
7635 u_int8_t info[DLIL_MODARGLEN])
7636{
7637 struct kev_dl_issues kev;
7638 struct timeval tv;
7639
7640 VERIFY(ifp != NULL);
7641 VERIFY(modid != NULL);
7642 _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
7643 _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
7644
3e170ce0 7645 bzero(&kev, sizeof (kev));
39236c6e
A
7646
7647 microtime(&tv);
7648 kev.timestamp = tv.tv_sec;
7649 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
7650 if (info != NULL)
7651 bcopy(info, &kev.info, DLIL_MODARGLEN);
7652
7653 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
7654 &kev.link_data, sizeof (kev));
7655}
7656
316670eb
A
7657errno_t
7658ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7659 struct proc *p)
7660{
7661 u_int32_t level = IFNET_THROTTLE_OFF;
7662 errno_t result = 0;
7663
7664 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
7665
7666 if (cmd == SIOCSIFOPPORTUNISTIC) {
7667 /*
7668 * XXX: Use priv_check_cred() instead of root check?
7669 */
7670 if ((result = proc_suser(p)) != 0)
7671 return (result);
7672
7673 if (ifr->ifr_opportunistic.ifo_flags ==
7674 IFRIFOF_BLOCK_OPPORTUNISTIC)
7675 level = IFNET_THROTTLE_OPPORTUNISTIC;
7676 else if (ifr->ifr_opportunistic.ifo_flags == 0)
7677 level = IFNET_THROTTLE_OFF;
7678 else
7679 result = EINVAL;
7680
7681 if (result == 0)
7682 result = ifnet_set_throttle(ifp, level);
7683 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
7684 ifr->ifr_opportunistic.ifo_flags = 0;
7685 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
7686 ifr->ifr_opportunistic.ifo_flags |=
7687 IFRIFOF_BLOCK_OPPORTUNISTIC;
7688 }
7689 }
7690
7691 /*
7692 * Return the count of current opportunistic connections
7693 * over the interface.
7694 */
7695 if (result == 0) {
7696 uint32_t flags = 0;
7697 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
7698 INPCB_OPPORTUNISTIC_SETCMD : 0;
39037602 7699 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
316670eb
A
7700 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
7701 ifr->ifr_opportunistic.ifo_inuse =
7702 udp_count_opportunistic(ifp->if_index, flags) +
7703 tcp_count_opportunistic(ifp->if_index, flags);
7704 }
7705
7706 if (result == EALREADY)
7707 result = 0;
7708
7709 return (result);
7710}
7711
7712int
7713ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
7714{
7715 struct ifclassq *ifq;
7716 int err = 0;
7717
7718 if (!(ifp->if_eflags & IFEF_TXSTART))
7719 return (ENXIO);
7720
7721 *level = IFNET_THROTTLE_OFF;
7722
7723 ifq = &ifp->if_snd;
7724 IFCQ_LOCK(ifq);
7725 /* Throttling works only for IFCQ, not ALTQ instances */
7726 if (IFCQ_IS_ENABLED(ifq))
7727 IFCQ_GET_THROTTLE(ifq, *level, err);
7728 IFCQ_UNLOCK(ifq);
7729
7730 return (err);
7731}
7732
7733int
7734ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
7735{
7736 struct ifclassq *ifq;
7737 int err = 0;
7738
7739 if (!(ifp->if_eflags & IFEF_TXSTART))
7740 return (ENXIO);
7741
39236c6e
A
7742 ifq = &ifp->if_snd;
7743
316670eb
A
7744 switch (level) {
7745 case IFNET_THROTTLE_OFF:
7746 case IFNET_THROTTLE_OPPORTUNISTIC:
316670eb
A
7747 break;
7748 default:
7749 return (EINVAL);
7750 }
7751
316670eb
A
7752 IFCQ_LOCK(ifq);
7753 if (IFCQ_IS_ENABLED(ifq))
7754 IFCQ_SET_THROTTLE(ifq, level, err);
7755 IFCQ_UNLOCK(ifq);
7756
7757 if (err == 0) {
39236c6e
A
7758 printf("%s: throttling level set to %d\n", if_name(ifp),
7759 level);
316670eb
A
7760 if (level == IFNET_THROTTLE_OFF)
7761 ifnet_start(ifp);
7762 }
7763
7764 return (err);
7765}
39236c6e
A
7766
7767errno_t
7768ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7769 struct proc *p)
7770{
7771#pragma unused(p)
7772 errno_t result = 0;
7773 uint32_t flags;
7774 int level, category, subcategory;
7775
7776 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7777
7778 if (cmd == SIOCSIFLOG) {
7779 if ((result = priv_check_cred(kauth_cred_get(),
7780 PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
7781 return (result);
7782
7783 level = ifr->ifr_log.ifl_level;
7784 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX)
7785 result = EINVAL;
7786
7787 flags = ifr->ifr_log.ifl_flags;
7788 if ((flags &= IFNET_LOGF_MASK) == 0)
7789 result = EINVAL;
7790
7791 category = ifr->ifr_log.ifl_category;
7792 subcategory = ifr->ifr_log.ifl_subcategory;
7793
7794 if (result == 0)
7795 result = ifnet_set_log(ifp, level, flags,
7796 category, subcategory);
7797 } else {
7798 result = ifnet_get_log(ifp, &level, &flags, &category,
7799 &subcategory);
7800 if (result == 0) {
7801 ifr->ifr_log.ifl_level = level;
7802 ifr->ifr_log.ifl_flags = flags;
7803 ifr->ifr_log.ifl_category = category;
7804 ifr->ifr_log.ifl_subcategory = subcategory;
7805 }
7806 }
7807
7808 return (result);
7809}
7810
7811int
7812ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7813 int32_t category, int32_t subcategory)
7814{
7815 int err = 0;
7816
7817 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7818 VERIFY(flags & IFNET_LOGF_MASK);
7819
7820 /*
7821 * The logging level applies to all facilities; make sure to
7822 * update them all with the most current level.
7823 */
7824 flags |= ifp->if_log.flags;
7825
7826 if (ifp->if_output_ctl != NULL) {
7827 struct ifnet_log_params l;
7828
7829 bzero(&l, sizeof (l));
7830 l.level = level;
7831 l.flags = flags;
7832 l.flags &= ~IFNET_LOGF_DLIL;
7833 l.category = category;
7834 l.subcategory = subcategory;
7835
7836 /* Send this request to lower layers */
7837 if (l.flags != 0) {
7838 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7839 sizeof (l), &l);
7840 }
7841 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7842 /*
7843 * If targeted to the lower layers without an output
7844 * control callback registered on the interface, just
7845 * silently ignore facilities other than ours.
7846 */
7847 flags &= IFNET_LOGF_DLIL;
490019cf 7848 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL)))
39236c6e
A
7849 level = 0;
7850 }
7851
7852 if (err == 0) {
7853 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT)
7854 ifp->if_log.flags = 0;
7855 else
7856 ifp->if_log.flags |= flags;
7857
7858 log(LOG_INFO, "%s: logging level set to %d flags=%b "
7859 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
7860 ifp->if_log.level, ifp->if_log.flags,
7861 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
7862 category, subcategory);
7863 }
7864
7865 return (err);
7866}
7867
7868int
7869ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7870 int32_t *category, int32_t *subcategory)
7871{
7872 if (level != NULL)
7873 *level = ifp->if_log.level;
7874 if (flags != NULL)
7875 *flags = ifp->if_log.flags;
7876 if (category != NULL)
7877 *category = ifp->if_log.category;
7878 if (subcategory != NULL)
7879 *subcategory = ifp->if_log.subcategory;
7880
7881 return (0);
7882}
7883
7884int
7885ifnet_notify_address(struct ifnet *ifp, int af)
7886{
7887 struct ifnet_notify_address_params na;
7888
7889#if PF
7890 (void) pf_ifaddr_hook(ifp);
7891#endif /* PF */
7892
7893 if (ifp->if_output_ctl == NULL)
7894 return (EOPNOTSUPP);
7895
7896 bzero(&na, sizeof (na));
7897 na.address_family = af;
7898
7899 return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7900 sizeof (na), &na));
7901}
7902
7903errno_t
7904ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7905{
7906 if (ifp == NULL || flowid == NULL) {
7907 return (EINVAL);
7908 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 7909 !IF_FULLY_ATTACHED(ifp)) {
39236c6e
A
7910 return (ENXIO);
7911 }
7912
7913 *flowid = ifp->if_flowhash;
7914
7915 return (0);
7916}
7917
7918errno_t
7919ifnet_disable_output(struct ifnet *ifp)
7920{
7921 int err;
7922
7923 if (ifp == NULL) {
7924 return (EINVAL);
7925 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 7926 !IF_FULLY_ATTACHED(ifp)) {
39236c6e
A
7927 return (ENXIO);
7928 }
7929
7930 if ((err = ifnet_fc_add(ifp)) == 0) {
7931 lck_mtx_lock_spin(&ifp->if_start_lock);
7932 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7933 lck_mtx_unlock(&ifp->if_start_lock);
7934 }
7935 return (err);
7936}
7937
7938errno_t
7939ifnet_enable_output(struct ifnet *ifp)
7940{
7941 if (ifp == NULL) {
7942 return (EINVAL);
7943 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 7944 !IF_FULLY_ATTACHED(ifp)) {
39236c6e
A
7945 return (ENXIO);
7946 }
7947
5c9f4661 7948 ifnet_start_common(ifp, TRUE);
39236c6e
A
7949 return (0);
7950}
7951
7952void
7953ifnet_flowadv(uint32_t flowhash)
7954{
7955 struct ifnet_fc_entry *ifce;
7956 struct ifnet *ifp;
7957
7958 ifce = ifnet_fc_get(flowhash);
7959 if (ifce == NULL)
7960 return;
7961
7962 VERIFY(ifce->ifce_ifp != NULL);
7963 ifp = ifce->ifce_ifp;
7964
7965 /* flow hash gets recalculated per attach, so check */
7966 if (ifnet_is_attached(ifp, 1)) {
7967 if (ifp->if_flowhash == flowhash)
7968 (void) ifnet_enable_output(ifp);
7969 ifnet_decr_iorefcnt(ifp);
7970 }
7971 ifnet_fc_entry_free(ifce);
7972}
7973
7974/*
7975 * Function to compare ifnet_fc_entries in ifnet flow control tree
7976 */
7977static inline int
7978ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7979{
7980 return (fc1->ifce_flowhash - fc2->ifce_flowhash);
7981}
7982
7983static int
7984ifnet_fc_add(struct ifnet *ifp)
7985{
7986 struct ifnet_fc_entry keyfc, *ifce;
7987 uint32_t flowhash;
7988
7989 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7990 VERIFY(ifp->if_flowhash != 0);
7991 flowhash = ifp->if_flowhash;
7992
7993 bzero(&keyfc, sizeof (keyfc));
7994 keyfc.ifce_flowhash = flowhash;
7995
7996 lck_mtx_lock_spin(&ifnet_fc_lock);
7997 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7998 if (ifce != NULL && ifce->ifce_ifp == ifp) {
7999 /* Entry is already in ifnet_fc_tree, return */
8000 lck_mtx_unlock(&ifnet_fc_lock);
8001 return (0);
8002 }
8003
8004 if (ifce != NULL) {
8005 /*
8006 * There is a different fc entry with the same flow hash
8007 * but different ifp pointer. There can be a collision
8008 * on flow hash but the probability is low. Let's just
8009 * avoid adding a second one when there is a collision.
8010 */
8011 lck_mtx_unlock(&ifnet_fc_lock);
8012 return (EAGAIN);
8013 }
8014
8015 /* become regular mutex */
8016 lck_mtx_convert_spin(&ifnet_fc_lock);
8017
5c9f4661 8018 ifce = zalloc(ifnet_fc_zone);
39236c6e
A
8019 if (ifce == NULL) {
8020 /* memory allocation failed */
8021 lck_mtx_unlock(&ifnet_fc_lock);
8022 return (ENOMEM);
8023 }
8024 bzero(ifce, ifnet_fc_zone_size);
8025
8026 ifce->ifce_flowhash = flowhash;
8027 ifce->ifce_ifp = ifp;
8028
8029 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
8030 lck_mtx_unlock(&ifnet_fc_lock);
8031 return (0);
8032}
8033
8034static struct ifnet_fc_entry *
8035ifnet_fc_get(uint32_t flowhash)
8036{
8037 struct ifnet_fc_entry keyfc, *ifce;
8038 struct ifnet *ifp;
8039
8040 bzero(&keyfc, sizeof (keyfc));
8041 keyfc.ifce_flowhash = flowhash;
8042
8043 lck_mtx_lock_spin(&ifnet_fc_lock);
8044 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
8045 if (ifce == NULL) {
8046 /* Entry is not present in ifnet_fc_tree, return */
8047 lck_mtx_unlock(&ifnet_fc_lock);
8048 return (NULL);
8049 }
8050
8051 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
8052
8053 VERIFY(ifce->ifce_ifp != NULL);
8054 ifp = ifce->ifce_ifp;
8055
8056 /* become regular mutex */
8057 lck_mtx_convert_spin(&ifnet_fc_lock);
8058
8059 if (!ifnet_is_attached(ifp, 0)) {
8060 /*
8061 * This ifp is not attached or in the process of being
8062 * detached; just don't process it.
8063 */
8064 ifnet_fc_entry_free(ifce);
8065 ifce = NULL;
8066 }
8067 lck_mtx_unlock(&ifnet_fc_lock);
8068
8069 return (ifce);
8070}
8071
8072static void
8073ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
8074{
8075 zfree(ifnet_fc_zone, ifce);
8076}
8077
8078static uint32_t
8079ifnet_calc_flowhash(struct ifnet *ifp)
8080{
8081 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
8082 uint32_t flowhash = 0;
8083
8084 if (ifnet_flowhash_seed == 0)
8085 ifnet_flowhash_seed = RandomULong();
8086
8087 bzero(&fh, sizeof (fh));
8088
8089 (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name);
8090 fh.ifk_unit = ifp->if_unit;
8091 fh.ifk_flags = ifp->if_flags;
8092 fh.ifk_eflags = ifp->if_eflags;
8093 fh.ifk_capabilities = ifp->if_capabilities;
8094 fh.ifk_capenable = ifp->if_capenable;
8095 fh.ifk_output_sched_model = ifp->if_output_sched_model;
8096 fh.ifk_rand1 = RandomULong();
8097 fh.ifk_rand2 = RandomULong();
8098
8099try_again:
8100 flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed);
8101 if (flowhash == 0) {
8102 /* try to get a non-zero flowhash */
8103 ifnet_flowhash_seed = RandomULong();
8104 goto try_again;
8105 }
8106
8107 return (flowhash);
8108}
8109
3e170ce0
A
8110int
8111ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
8112 uint16_t flags, uint8_t *data)
8113{
8114#pragma unused(flags)
8115 int error = 0;
8116
8117 switch (family) {
8118 case AF_INET:
8119 if_inetdata_lock_exclusive(ifp);
8120 if (IN_IFEXTRA(ifp) != NULL) {
8121 if (len == 0) {
8122 /* Allow clearing the signature */
8123 IN_IFEXTRA(ifp)->netsig_len = 0;
8124 bzero(IN_IFEXTRA(ifp)->netsig,
8125 sizeof (IN_IFEXTRA(ifp)->netsig));
8126 if_inetdata_lock_done(ifp);
8127 break;
8128 } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) {
8129 error = EINVAL;
8130 if_inetdata_lock_done(ifp);
8131 break;
8132 }
8133 IN_IFEXTRA(ifp)->netsig_len = len;
8134 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
8135 } else {
8136 error = ENOMEM;
8137 }
8138 if_inetdata_lock_done(ifp);
8139 break;
8140
8141 case AF_INET6:
8142 if_inet6data_lock_exclusive(ifp);
8143 if (IN6_IFEXTRA(ifp) != NULL) {
8144 if (len == 0) {
8145 /* Allow clearing the signature */
8146 IN6_IFEXTRA(ifp)->netsig_len = 0;
8147 bzero(IN6_IFEXTRA(ifp)->netsig,
8148 sizeof (IN6_IFEXTRA(ifp)->netsig));
8149 if_inet6data_lock_done(ifp);
8150 break;
8151 } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) {
8152 error = EINVAL;
8153 if_inet6data_lock_done(ifp);
8154 break;
8155 }
8156 IN6_IFEXTRA(ifp)->netsig_len = len;
8157 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
8158 } else {
8159 error = ENOMEM;
8160 }
8161 if_inet6data_lock_done(ifp);
8162 break;
8163
8164 default:
8165 error = EINVAL;
8166 break;
8167 }
8168
8169 return (error);
8170}
8171
8172int
8173ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
8174 uint16_t *flags, uint8_t *data)
8175{
8176 int error = 0;
8177
5ba3f43e 8178 if (ifp == NULL || len == NULL || data == NULL)
3e170ce0
A
8179 return (EINVAL);
8180
8181 switch (family) {
8182 case AF_INET:
8183 if_inetdata_lock_shared(ifp);
8184 if (IN_IFEXTRA(ifp) != NULL) {
8185 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
8186 error = EINVAL;
8187 if_inetdata_lock_done(ifp);
8188 break;
8189 }
8190 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0)
8191 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
8192 else
8193 error = ENOENT;
8194 } else {
8195 error = ENOMEM;
8196 }
8197 if_inetdata_lock_done(ifp);
8198 break;
8199
8200 case AF_INET6:
8201 if_inet6data_lock_shared(ifp);
8202 if (IN6_IFEXTRA(ifp) != NULL) {
8203 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
8204 error = EINVAL;
8205 if_inet6data_lock_done(ifp);
8206 break;
8207 }
8208 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0)
8209 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
8210 else
8211 error = ENOENT;
8212 } else {
8213 error = ENOMEM;
8214 }
8215 if_inet6data_lock_done(ifp);
8216 break;
8217
8218 default:
8219 error = EINVAL;
8220 break;
8221 }
8222
5ba3f43e 8223 if (error == 0 && flags != NULL)
3e170ce0
A
8224 *flags = 0;
8225
8226 return (error);
8227}
8228
5ba3f43e
A
8229#if INET6
8230int
8231ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8232{
8233 int i, error = 0, one_set = 0;
8234
8235 if_inet6data_lock_exclusive(ifp);
8236
8237 if (IN6_IFEXTRA(ifp) == NULL) {
8238 error = ENOMEM;
8239 goto out;
8240 }
8241
8242 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8243 uint32_t prefix_len =
8244 prefixes[i].prefix_len;
8245 struct in6_addr *prefix =
8246 &prefixes[i].ipv6_prefix;
8247
8248 if (prefix_len == 0) {
8249 /* Allow clearing the signature */
8250 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
8251 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8252 sizeof(struct in6_addr));
8253
8254 continue;
8255 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
8256 prefix_len != NAT64_PREFIX_LEN_40 &&
8257 prefix_len != NAT64_PREFIX_LEN_48 &&
8258 prefix_len != NAT64_PREFIX_LEN_56 &&
8259 prefix_len != NAT64_PREFIX_LEN_64 &&
8260 prefix_len != NAT64_PREFIX_LEN_96) {
8261 error = EINVAL;
8262 goto out;
8263 }
8264
8265 if (IN6_IS_SCOPE_EMBED(prefix)) {
8266 error = EINVAL;
8267 goto out;
8268 }
8269
8270 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
8271 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8272 sizeof(struct in6_addr));
8273 one_set = 1;
8274 }
8275
8276out:
8277 if_inet6data_lock_done(ifp);
8278
8279 if (error == 0 && one_set != 0)
8280 necp_update_all_clients();
8281
8282 return (error);
8283}
8284
8285int
8286ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8287{
8288 int i, found_one = 0, error = 0;
8289
8290 if (ifp == NULL)
8291 return (EINVAL);
8292
8293 if_inet6data_lock_shared(ifp);
8294
8295 if (IN6_IFEXTRA(ifp) == NULL) {
8296 error = ENOMEM;
8297 goto out;
8298 }
8299
8300 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8301 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0)
8302 found_one = 1;
8303 }
8304
8305 if (found_one == 0) {
8306 error = ENOENT;
8307 goto out;
8308 }
8309
8310 if (prefixes)
8311 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
8312 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
8313
8314out:
8315 if_inet6data_lock_done(ifp);
8316
8317 return (error);
8318}
8319#endif
8320
39236c6e
A
8321static void
8322dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
8323 protocol_family_t pf)
8324{
8325#pragma unused(ifp)
8326 uint32_t did_sw;
8327
8328 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
8329 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6)))
8330 return;
8331
8332 switch (pf) {
8333 case PF_INET:
8334 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
8335 if (did_sw & CSUM_DELAY_IP)
8336 hwcksum_dbg_finalized_hdr++;
8337 if (did_sw & CSUM_DELAY_DATA)
8338 hwcksum_dbg_finalized_data++;
8339 break;
8340#if INET6
8341 case PF_INET6:
8342 /*
8343 * Checksum offload should not have been enabled when
8344 * extension headers exist; that also means that we
8345 * cannot force-finalize packets with extension headers.
8346 * Indicate to the callee should it skip such case by
8347 * setting optlen to -1.
8348 */
8349 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
8350 m->m_pkthdr.csum_flags);
8351 if (did_sw & CSUM_DELAY_IPV6_DATA)
8352 hwcksum_dbg_finalized_data++;
8353 break;
8354#endif /* INET6 */
8355 default:
8356 return;
8357 }
8358}
8359
8360static void
8361dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
8362 protocol_family_t pf)
8363{
5ba3f43e 8364 uint16_t sum = 0;
39236c6e
A
8365 uint32_t hlen;
8366
8367 if (frame_header == NULL ||
8368 frame_header < (char *)mbuf_datastart(m) ||
8369 frame_header > (char *)m->m_data) {
8370 printf("%s: frame header pointer 0x%llx out of range "
8371 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
8372 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
8373 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
8374 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
8375 (uint64_t)VM_KERNEL_ADDRPERM(m));
8376 return;
8377 }
8378 hlen = (m->m_data - frame_header);
8379
8380 switch (pf) {
8381 case PF_INET:
8382#if INET6
8383 case PF_INET6:
8384#endif /* INET6 */
8385 break;
8386 default:
8387 return;
8388 }
8389
8390 /*
8391 * Force partial checksum offload; useful to simulate cases
8392 * where the hardware does not support partial checksum offload,
8393 * in order to validate correctness throughout the layers above.
8394 */
8395 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
8396 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
8397
8398 if (foff > (uint32_t)m->m_pkthdr.len)
8399 return;
8400
8401 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
8402
8403 /* Compute 16-bit 1's complement sum from forced offset */
8404 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
8405
8406 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
8407 m->m_pkthdr.csum_rx_val = sum;
8408 m->m_pkthdr.csum_rx_start = (foff + hlen);
8409
8410 hwcksum_dbg_partial_forced++;
8411 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
8412 }
8413
8414 /*
8415 * Partial checksum offload verification (and adjustment);
8416 * useful to validate and test cases where the hardware
8417 * supports partial checksum offload.
8418 */
8419 if ((m->m_pkthdr.csum_flags &
8420 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
8421 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
8422 uint32_t rxoff;
8423
8424 /* Start offset must begin after frame header */
8425 rxoff = m->m_pkthdr.csum_rx_start;
8426 if (hlen > rxoff) {
8427 hwcksum_dbg_bad_rxoff++;
8428 if (dlil_verbose) {
8429 printf("%s: partial cksum start offset %d "
8430 "is less than frame header length %d for "
8431 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
8432 (uint64_t)VM_KERNEL_ADDRPERM(m));
8433 }
8434 return;
8435 }
39037602 8436 rxoff -= hlen;
39236c6e
A
8437
8438 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
8439 /*
8440 * Compute the expected 16-bit 1's complement sum;
8441 * skip this if we've already computed it above
8442 * when partial checksum offload is forced.
8443 */
8444 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
8445
8446 /* Hardware or driver is buggy */
8447 if (sum != m->m_pkthdr.csum_rx_val) {
8448 hwcksum_dbg_bad_cksum++;
8449 if (dlil_verbose) {
8450 printf("%s: bad partial cksum value "
8451 "0x%x (expected 0x%x) for mbuf "
8452 "0x%llx [rx_start %d]\n",
8453 if_name(ifp),
8454 m->m_pkthdr.csum_rx_val, sum,
8455 (uint64_t)VM_KERNEL_ADDRPERM(m),
8456 m->m_pkthdr.csum_rx_start);
8457 }
8458 return;
8459 }
8460 }
8461 hwcksum_dbg_verified++;
8462
8463 /*
8464 * This code allows us to emulate various hardwares that
8465 * perform 16-bit 1's complement sum beginning at various
8466 * start offset values.
8467 */
8468 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
8469 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
8470
8471 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len)
8472 return;
8473
5ba3f43e
A
8474 sum = m_adj_sum16(m, rxoff, aoff,
8475 m_pktlen(m) - aoff, sum);
39236c6e
A
8476
8477 m->m_pkthdr.csum_rx_val = sum;
8478 m->m_pkthdr.csum_rx_start = (aoff + hlen);
8479
8480 hwcksum_dbg_adjusted++;
8481 }
8482 }
8483}
8484
8485static int
8486sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8487{
8488#pragma unused(arg1, arg2)
8489 u_int32_t i;
8490 int err;
8491
8492 i = hwcksum_dbg_mode;
8493
8494 err = sysctl_handle_int(oidp, &i, 0, req);
8495 if (err != 0 || req->newptr == USER_ADDR_NULL)
8496 return (err);
8497
8498 if (hwcksum_dbg == 0)
8499 return (ENODEV);
8500
8501 if ((i & ~HWCKSUM_DBG_MASK) != 0)
8502 return (EINVAL);
8503
8504 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
8505
8506 return (err);
8507}
8508
8509static int
8510sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8511{
8512#pragma unused(arg1, arg2)
8513 u_int32_t i;
8514 int err;
8515
8516 i = hwcksum_dbg_partial_rxoff_forced;
8517
8518 err = sysctl_handle_int(oidp, &i, 0, req);
8519 if (err != 0 || req->newptr == USER_ADDR_NULL)
8520 return (err);
8521
8522 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED))
8523 return (ENODEV);
8524
8525 hwcksum_dbg_partial_rxoff_forced = i;
8526
8527 return (err);
8528}
8529
8530static int
8531sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8532{
8533#pragma unused(arg1, arg2)
8534 u_int32_t i;
8535 int err;
8536
8537 i = hwcksum_dbg_partial_rxoff_adj;
8538
8539 err = sysctl_handle_int(oidp, &i, 0, req);
8540 if (err != 0 || req->newptr == USER_ADDR_NULL)
8541 return (err);
8542
8543 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ))
8544 return (ENODEV);
8545
8546 hwcksum_dbg_partial_rxoff_adj = i;
8547
8548 return (err);
8549}
8550
3e170ce0
A
8551static int
8552sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8553{
8554#pragma unused(oidp, arg1, arg2)
8555 int err;
39037602 8556
3e170ce0 8557 if (req->oldptr == USER_ADDR_NULL) {
39037602 8558
3e170ce0
A
8559 }
8560 if (req->newptr != USER_ADDR_NULL) {
8561 return (EPERM);
8562 }
8563 err = SYSCTL_OUT(req, &tx_chain_len_stats,
8564 sizeof(struct chain_len_stats));
8565
8566 return (err);
8567}
8568
8569
5ba3f43e 8570#if DEBUG || DEVELOPMENT
39236c6e
A
8571/* Blob for sum16 verification */
8572static uint8_t sumdata[] = {
8573 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8574 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8575 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8576 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8577 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8578 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8579 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8580 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8581 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8582 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8583 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8584 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8585 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8586 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8587 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8588 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8589 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8590 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8591 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8592 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8593 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8594 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8595 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8596 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8597 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8598 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8599 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8600 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8601 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8602 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8603 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8604 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8605 0xc8, 0x28, 0x02, 0x00, 0x00
8606};
8607
8608/* Precomputed 16-bit 1's complement sums for various spans of the above data */
8609static struct {
5ba3f43e
A
8610 boolean_t init;
8611 uint16_t len;
8612 uint16_t sumr; /* reference */
8613 uint16_t sumrp; /* reference, precomputed */
39236c6e 8614} sumtbl[] = {
5ba3f43e
A
8615 { FALSE, 0, 0, 0x0000 },
8616 { FALSE, 1, 0, 0x001f },
8617 { FALSE, 2, 0, 0x8b1f },
8618 { FALSE, 3, 0, 0x8b27 },
8619 { FALSE, 7, 0, 0x790e },
8620 { FALSE, 11, 0, 0xcb6d },
8621 { FALSE, 20, 0, 0x20dd },
8622 { FALSE, 27, 0, 0xbabd },
8623 { FALSE, 32, 0, 0xf3e8 },
8624 { FALSE, 37, 0, 0x197d },
8625 { FALSE, 43, 0, 0x9eae },
8626 { FALSE, 64, 0, 0x4678 },
8627 { FALSE, 127, 0, 0x9399 },
8628 { FALSE, 256, 0, 0xd147 },
8629 { FALSE, 325, 0, 0x0358 },
39236c6e
A
8630};
8631#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8632
8633static void
8634dlil_verify_sum16(void)
8635{
8636 struct mbuf *m;
8637 uint8_t *buf;
8638 int n;
8639
8640 /* Make sure test data plus extra room for alignment fits in cluster */
8641 _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES);
8642
5ba3f43e
A
8643 kprintf("DLIL: running SUM16 self-tests ... ");
8644
39236c6e
A
8645 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
8646 MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */
8647 buf = mtod(m, uint8_t *); /* base address */
8648
8649 for (n = 0; n < SUMTBL_MAX; n++) {
8650 uint16_t len = sumtbl[n].len;
8651 int i;
8652
8653 /* Verify for all possible alignments */
8654 for (i = 0; i < (int)sizeof (uint64_t); i++) {
5ba3f43e 8655 uint16_t sum, sumr;
39236c6e
A
8656 uint8_t *c;
8657
8658 /* Copy over test data to mbuf */
8659 VERIFY(len <= sizeof (sumdata));
8660 c = buf + i;
8661 bcopy(sumdata, c, len);
8662
8663 /* Zero-offset test (align by data pointer) */
8664 m->m_data = (caddr_t)c;
8665 m->m_len = len;
8666 sum = m_sum16(m, 0, len);
8667
5ba3f43e
A
8668 if (!sumtbl[n].init) {
8669 sumr = in_cksum_mbuf_ref(m, len, 0, 0);
8670 sumtbl[n].sumr = sumr;
8671 sumtbl[n].init = TRUE;
8672 } else {
8673 sumr = sumtbl[n].sumr;
8674 }
8675
39236c6e 8676 /* Something is horribly broken; stop now */
5ba3f43e
A
8677 if (sumr != sumtbl[n].sumrp) {
8678 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
8679 "for len=%d align=%d sum=0x%04x "
8680 "[expected=0x%04x]\n", __func__,
8681 len, i, sum, sumr);
8682 /* NOTREACHED */
8683 } else if (sum != sumr) {
8684 panic_plain("\n%s: broken m_sum16() for len=%d "
8685 "align=%d sum=0x%04x [expected=0x%04x]\n",
8686 __func__, len, i, sum, sumr);
39236c6e
A
8687 /* NOTREACHED */
8688 }
8689
8690 /* Alignment test by offset (fixed data pointer) */
8691 m->m_data = (caddr_t)buf;
8692 m->m_len = i + len;
8693 sum = m_sum16(m, i, len);
8694
8695 /* Something is horribly broken; stop now */
5ba3f43e
A
8696 if (sum != sumr) {
8697 panic_plain("\n%s: broken m_sum16() for len=%d "
8698 "offset=%d sum=0x%04x [expected=0x%04x]\n",
8699 __func__, len, i, sum, sumr);
39236c6e
A
8700 /* NOTREACHED */
8701 }
8702#if INET
8703 /* Simple sum16 contiguous buffer test by aligment */
8704 sum = b_sum16(c, len);
8705
8706 /* Something is horribly broken; stop now */
5ba3f43e
A
8707 if (sum != sumr) {
8708 panic_plain("\n%s: broken b_sum16() for len=%d "
8709 "align=%d sum=0x%04x [expected=0x%04x]\n",
8710 __func__, len, i, sum, sumr);
39236c6e
A
8711 /* NOTREACHED */
8712 }
8713#endif /* INET */
8714 }
8715 }
8716 m_freem(m);
8717
5ba3f43e 8718 kprintf("PASSED\n");
39236c6e 8719}
5ba3f43e 8720#endif /* DEBUG || DEVELOPMENT */
39236c6e
A
8721
8722#define CASE_STRINGIFY(x) case x: return #x
8723
8724__private_extern__ const char *
8725dlil_kev_dl_code_str(u_int32_t event_code)
8726{
8727 switch (event_code) {
8728 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
8729 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
8730 CASE_STRINGIFY(KEV_DL_SIFMTU);
8731 CASE_STRINGIFY(KEV_DL_SIFPHYS);
8732 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
8733 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
8734 CASE_STRINGIFY(KEV_DL_ADDMULTI);
8735 CASE_STRINGIFY(KEV_DL_DELMULTI);
8736 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
8737 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
8738 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
8739 CASE_STRINGIFY(KEV_DL_LINK_OFF);
8740 CASE_STRINGIFY(KEV_DL_LINK_ON);
8741 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
8742 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
8743 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
8744 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
8745 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
8746 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
8747 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
8748 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
8749 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
8750 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
8751 CASE_STRINGIFY(KEV_DL_ISSUES);
8752 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
8753 default:
8754 break;
8755 }
8756 return ("");
8757}
3e170ce0 8758
5ba3f43e
A
8759static void
8760dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
8761{
8762#pragma unused(arg1)
8763 struct ifnet *ifp = arg0;
8764
8765 if (ifnet_is_attached(ifp, 1)) {
8766 nstat_ifnet_threshold_reached(ifp->if_index);
8767 ifnet_decr_iorefcnt(ifp);
8768 }
8769}
8770
8771void
8772ifnet_notify_data_threshold(struct ifnet *ifp)
8773{
8774 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
8775 uint64_t oldbytes = ifp->if_dt_bytes;
8776
8777 ASSERT(ifp->if_dt_tcall != NULL);
8778
8779 /*
8780 * If we went over the threshold, notify NetworkStatistics.
8781 * We rate-limit it based on the threshold interval value.
8782 */
8783 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
8784 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
8785 !thread_call_isactive(ifp->if_dt_tcall)) {
8786 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
8787 uint64_t now = mach_absolute_time(), deadline = now;
8788 uint64_t ival;
8789
8790 if (tival != 0) {
8791 nanoseconds_to_absolutetime(tival, &ival);
8792 clock_deadline_for_periodic_event(ival, now, &deadline);
8793 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
8794 deadline);
8795 } else {
8796 (void) thread_call_enter(ifp->if_dt_tcall);
8797 }
8798 }
8799}
8800
39037602
A
8801#if (DEVELOPMENT || DEBUG)
8802/*
8803 * The sysctl variable name contains the input parameters of
8804 * ifnet_get_keepalive_offload_frames()
8805 * ifp (interface index): name[0]
8806 * frames_array_count: name[1]
8807 * frame_data_offset: name[2]
8808 * The return length gives used_frames_count
8809 */
8810static int
8811sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8812{
8813#pragma unused(oidp)
8814 int *name = (int *)arg1;
8815 u_int namelen = arg2;
8816 int idx;
8817 ifnet_t ifp = NULL;
8818 u_int32_t frames_array_count;
8819 size_t frame_data_offset;
8820 u_int32_t used_frames_count;
8821 struct ifnet_keepalive_offload_frame *frames_array = NULL;
8822 int error = 0;
8823 u_int32_t i;
8824
8825 /*
8826 * Only root can get look at other people TCP frames
8827 */
8828 error = proc_suser(current_proc());
8829 if (error != 0)
8830 goto done;
8831 /*
8832 * Validate the input parameters
8833 */
8834 if (req->newptr != USER_ADDR_NULL) {
8835 error = EPERM;
8836 goto done;
8837 }
8838 if (namelen != 3) {
8839 error = EINVAL;
8840 goto done;
8841 }
8842 if (req->oldptr == USER_ADDR_NULL) {
8843 error = EINVAL;
8844 goto done;
8845 }
8846 if (req->oldlen == 0) {
8847 error = EINVAL;
8848 goto done;
8849 }
8850 idx = name[0];
8851 frames_array_count = name[1];
8852 frame_data_offset = name[2];
8853
8854 /* Make sure the passed buffer is large enough */
8855 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
8856 req->oldlen) {
8857 error = ENOMEM;
8858 goto done;
8859 }
8860
8861 ifnet_head_lock_shared();
4d15aeb1 8862 if (!IF_INDEX_IN_RANGE(idx)) {
39037602
A
8863 ifnet_head_done();
8864 error = ENOENT;
8865 goto done;
8866 }
8867 ifp = ifindex2ifnet[idx];
8868 ifnet_head_done();
8869
8870 frames_array = _MALLOC(frames_array_count *
8871 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
8872 if (frames_array == NULL) {
8873 error = ENOMEM;
8874 goto done;
8875 }
8876
8877 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
8878 frames_array_count, frame_data_offset, &used_frames_count);
8879 if (error != 0) {
8880 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8881 __func__, error);
8882 goto done;
8883 }
8884
8885 for (i = 0; i < used_frames_count; i++) {
8886 error = SYSCTL_OUT(req, frames_array + i,
8887 sizeof(struct ifnet_keepalive_offload_frame));
8888 if (error != 0) {
8889 goto done;
8890 }
8891 }
8892done:
8893 if (frames_array != NULL)
8894 _FREE(frames_array, M_TEMP);
8895 return (error);
8896}
8897#endif /* DEVELOPMENT || DEBUG */
5ba3f43e
A
8898
8899void
8900ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
8901 struct ifnet *ifp)
8902{
8903 tcp_update_stats_per_flow(ifs, ifp);
8904}
8905
8906static void
8907dlil_mit_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
8908{
8909#pragma unused(arg1)
8910 struct ifnet *ifp = (struct ifnet *)arg0;
8911 struct dlil_threading_info *inp = ifp->if_inp;
8912
8913 ifnet_lock_shared(ifp);
8914 if (!IF_FULLY_ATTACHED(ifp) || inp == NULL) {
8915 ifnet_lock_done(ifp);
8916 return;
8917 }
8918
8919 lck_mtx_lock_spin(&inp->input_lck);
8920 inp->input_waiting |= DLIL_INPUT_WAITING;
8921 if (!(inp->input_waiting & DLIL_INPUT_RUNNING) ||
8922 !qempty(&inp->rcvq_pkts)) {
8923 inp->wtot++;
8924 wakeup_one((caddr_t)&inp->input_waiting);
8925 }
8926 lck_mtx_unlock(&inp->input_lck);
8927 ifnet_lock_done(ifp);
8928}