]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/dlil.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
CommitLineData
1c79356b 1/*
f427ee49 2 * Copyright (c) 1999-2020 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
2d21ac55
A
28/*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
39236c6e 34#include <stddef.h>
f427ee49 35#include <ptrauth.h>
1c79356b 36
1c79356b
A
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/socket.h>
91447636
A
43#include <sys/domain.h>
44#include <sys/user.h>
2d21ac55 45#include <sys/random.h>
316670eb 46#include <sys/socketvar.h>
1c79356b
A
47#include <net/if_dl.h>
48#include <net/if.h>
91447636 49#include <net/route.h>
1c79356b
A
50#include <net/if_var.h>
51#include <net/dlil.h>
91447636 52#include <net/if_arp.h>
316670eb 53#include <net/iptap.h>
39236c6e 54#include <net/pktap.h>
1c79356b
A
55#include <sys/kern_event.h>
56#include <sys/kdebug.h>
6d2010ae 57#include <sys/mcache.h>
39236c6e
A
58#include <sys/syslog.h>
59#include <sys/protosw.h>
60#include <sys/priv.h>
1c79356b 61
91447636 62#include <kern/assert.h>
1c79356b 63#include <kern/task.h>
9bccf70c
A
64#include <kern/thread.h>
65#include <kern/sched_prim.h>
91447636 66#include <kern/locks.h>
6d2010ae 67#include <kern/zalloc.h>
9bccf70c 68
39236c6e 69#include <net/kpi_protocol.h>
1c79356b 70#include <net/if_types.h>
9d749ea3 71#include <net/if_ipsec.h>
6d2010ae 72#include <net/if_llreach.h>
9d749ea3 73#include <net/if_utun.h>
91447636 74#include <net/kpi_interfacefilter.h>
316670eb
A
75#include <net/classq/classq.h>
76#include <net/classq/classq_sfb.h>
39236c6e
A
77#include <net/flowhash.h>
78#include <net/ntstat.h>
5ba3f43e
A
79#include <net/if_llatbl.h>
80#include <net/net_api_stats.h>
a39ff7e2 81#include <net/if_ports_used.h>
c6bf4f31 82#include <net/if_vlan_var.h>
d9a64523 83#include <netinet/in.h>
6d2010ae
A
84#if INET
85#include <netinet/in_var.h>
86#include <netinet/igmp_var.h>
316670eb
A
87#include <netinet/ip_var.h>
88#include <netinet/tcp.h>
89#include <netinet/tcp_var.h>
90#include <netinet/udp.h>
91#include <netinet/udp_var.h>
92#include <netinet/if_ether.h>
93#include <netinet/in_pcb.h>
39037602 94#include <netinet/in_tclass.h>
d9a64523
A
95#include <netinet/ip.h>
96#include <netinet/ip_icmp.h>
97#include <netinet/icmp_var.h>
6d2010ae
A
98#endif /* INET */
99
d9a64523 100#include <net/nat464_utils.h>
6d2010ae
A
101#include <netinet6/in6_var.h>
102#include <netinet6/nd6.h>
103#include <netinet6/mld6_var.h>
39236c6e 104#include <netinet6/scope6_var.h>
d9a64523
A
105#include <netinet/ip6.h>
106#include <netinet/icmp6.h>
d9a64523 107#include <net/pf_pbuf.h>
91447636 108#include <libkern/OSAtomic.h>
39236c6e 109#include <libkern/tree.h>
1c79356b 110
39236c6e 111#include <dev/random/randomdev.h>
d52fe63f 112#include <machine/machine_routines.h>
1c79356b 113
2d21ac55 114#include <mach/thread_act.h>
6d2010ae 115#include <mach/sdt.h>
2d21ac55 116
39236c6e
A
117#if CONFIG_MACF
118#include <sys/kauth.h>
2d21ac55 119#include <security/mac_framework.h>
39236c6e
A
120#include <net/ethernet.h>
121#include <net/firewire.h>
122#endif
2d21ac55 123
b0d623f7
A
124#if PF
125#include <net/pfvar.h>
126#endif /* PF */
316670eb 127#include <net/pktsched/pktsched.h>
cb323159 128#include <net/pktsched/pktsched_netem.h>
b0d623f7 129
39037602
A
130#if NECP
131#include <net/necp.h>
132#endif /* NECP */
1c79356b 133
5ba3f43e 134
cb323159
A
135#include <os/log.h>
136
0a7de745
A
137#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
138#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
139#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
140#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
141#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
39037602 142
0a7de745
A
143#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
144#define MAX_LINKADDR 4 /* LONGWORDS */
145#define M_NKE M_IFADDR
1c79356b 146
2d21ac55 147#if 1
0a7de745 148#define DLIL_PRINTF printf
91447636 149#else
0a7de745 150#define DLIL_PRINTF kprintf
91447636
A
151#endif
152
0a7de745 153#define IF_DATA_REQUIRE_ALIGNED_64(f) \
6d2010ae 154 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
d1ecb069 155
0a7de745 156#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
6d2010ae
A
157 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
158
91447636 159enum {
0a7de745
A
160 kProtoKPI_v1 = 1,
161 kProtoKPI_v2 = 2
91447636
A
162};
163
6d2010ae
A
164/*
165 * List of if_proto structures in if_proto_hash[] is protected by
166 * the ifnet lock. The rest of the fields are initialized at protocol
167 * attach time and never change, thus no lock required as long as
168 * a reference to it is valid, via if_proto_ref().
169 */
91447636 170struct if_proto {
0a7de745
A
171 SLIST_ENTRY(if_proto) next_hash;
172 u_int32_t refcount;
173 u_int32_t detached;
174 struct ifnet *ifp;
175 protocol_family_t protocol_family;
176 int proto_kpi;
177 union {
91447636 178 struct {
0a7de745
A
179 proto_media_input input;
180 proto_media_preout pre_output;
181 proto_media_event event;
182 proto_media_ioctl ioctl;
183 proto_media_detached detached;
184 proto_media_resolve_multi resolve_multi;
185 proto_media_send_arp send_arp;
91447636 186 } v1;
2d21ac55 187 struct {
0a7de745
A
188 proto_media_input_v2 input;
189 proto_media_preout pre_output;
190 proto_media_event event;
191 proto_media_ioctl ioctl;
192 proto_media_detached detached;
193 proto_media_resolve_multi resolve_multi;
194 proto_media_send_arp send_arp;
2d21ac55 195 } v2;
91447636 196 } kpi;
1c79356b
A
197};
198
91447636
A
199SLIST_HEAD(proto_hash_entry, if_proto);
200
0a7de745 201#define DLIL_SDLDATALEN \
6d2010ae 202 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
1c79356b 203
9bccf70c 204struct dlil_ifnet {
0a7de745 205 struct ifnet dl_if; /* public ifnet */
6d2010ae 206 /*
316670eb 207 * DLIL private fields, protected by dl_if_lock
6d2010ae
A
208 */
209 decl_lck_mtx_data(, dl_if_lock);
0a7de745
A
210 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
211 u_int32_t dl_if_flags; /* flags (below) */
212 u_int32_t dl_if_refcnt; /* refcnt */
6d2010ae 213 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
0a7de745
A
214 void *dl_if_uniqueid; /* unique interface id */
215 size_t dl_if_uniqueid_len; /* length of the unique id */
216 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
217 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
6d2010ae 218 struct {
0a7de745
A
219 struct ifaddr ifa; /* lladdr ifa */
220 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
221 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
6d2010ae 222 } dl_if_lladdr;
316670eb 223 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
f427ee49
A
224 u_int8_t dl_if_permanent_ether[ETHER_ADDR_LEN]; /* permanent address */
225 u_int8_t dl_if_permanent_ether_is_set;
226 u_int8_t dl_if_unused;
316670eb 227 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
0a7de745
A
228 ctrace_t dl_if_attach; /* attach PC stacktrace */
229 ctrace_t dl_if_detach; /* detach PC stacktrace */
6d2010ae
A
230};
231
232/* Values for dl_if_flags (private to DLIL) */
0a7de745
A
233#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
234#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
235#define DLIF_DEBUG 0x4 /* has debugging info */
6d2010ae 236
0a7de745 237#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
6d2010ae
A
238
239/* For gdb */
240__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
241
242struct dlil_ifnet_dbg {
0a7de745
A
243 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
244 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
245 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
6d2010ae
A
246 /*
247 * Circular lists of ifnet_{reference,release} callers.
248 */
0a7de745
A
249 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
250 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
1c79356b
A
251};
252
0a7de745
A
253#define DLIL_TO_IFP(s) (&s->dl_if)
254#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
6d2010ae 255
91447636 256struct ifnet_filter {
0a7de745
A
257 TAILQ_ENTRY(ifnet_filter) filt_next;
258 u_int32_t filt_skip;
259 u_int32_t filt_flags;
260 ifnet_t filt_ifp;
261 const char *filt_name;
262 void *filt_cookie;
263 protocol_family_t filt_protocol;
264 iff_input_func filt_input;
265 iff_output_func filt_output;
266 iff_event_func filt_event;
267 iff_ioctl_func filt_ioctl;
268 iff_detached_func filt_detached;
1c79356b
A
269};
270
2d21ac55 271struct proto_input_entry;
55e303ae 272
91447636 273static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
91447636 274static lck_grp_t *dlil_lock_group;
6d2010ae 275lck_grp_t *ifnet_lock_group;
91447636 276static lck_grp_t *ifnet_head_lock_group;
316670eb
A
277static lck_grp_t *ifnet_snd_lock_group;
278static lck_grp_t *ifnet_rcv_lock_group;
6d2010ae 279lck_attr_t *ifnet_lock_attr;
7ddcb079
A
280decl_lck_rw_data(static, ifnet_head_lock);
281decl_lck_mtx_data(static, dlil_ifnet_lock);
39236c6e 282u_int32_t dlil_filter_disable_tso_count = 0;
316670eb 283
6d2010ae 284#if DEBUG
0a7de745 285static unsigned int ifnet_debug = 1; /* debugging (enabled) */
6d2010ae 286#else
0a7de745 287static unsigned int ifnet_debug; /* debugging (disabled) */
6d2010ae 288#endif /* !DEBUG */
0a7de745
A
289static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
290static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
291static struct zone *dlif_zone; /* zone for dlil_ifnet */
0a7de745 292#define DLIF_ZONE_NAME "ifnet" /* zone name */
6d2010ae 293
f427ee49
A
294static ZONE_DECLARE(dlif_filt_zone, "ifnet_filter",
295 sizeof(struct ifnet_filter), ZC_ZFREE_CLEARMEM);
6d2010ae 296
f427ee49
A
297static ZONE_DECLARE(dlif_phash_zone, "ifnet_proto_hash",
298 sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS, ZC_ZFREE_CLEARMEM);
6d2010ae 299
f427ee49
A
300static ZONE_DECLARE(dlif_proto_zone, "ifnet_proto",
301 sizeof(struct if_proto), ZC_ZFREE_CLEARMEM);
6d2010ae 302
0a7de745 303static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
39037602 304static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
0a7de745 305static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
0a7de745 306#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
316670eb 307
0a7de745
A
308static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
309static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
310static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
0a7de745 311#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
316670eb 312
d1ecb069 313static u_int32_t net_rtref;
d1ecb069 314
316670eb
A
315static struct dlil_main_threading_info dlil_main_input_thread_info;
316__private_extern__ struct dlil_threading_info *dlil_main_input_thread =
317 (struct dlil_threading_info *)&dlil_main_input_thread_info;
2d21ac55 318
39037602 319static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
91447636 320static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
6d2010ae
A
321static void dlil_if_trace(struct dlil_ifnet *, int);
322static void if_proto_ref(struct if_proto *);
323static void if_proto_free(struct if_proto *);
324static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
a39ff7e2 325static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
0a7de745 326 u_int32_t list_count);
6d2010ae
A
327static void if_flt_monitor_busy(struct ifnet *);
328static void if_flt_monitor_unbusy(struct ifnet *);
329static void if_flt_monitor_enter(struct ifnet *);
330static void if_flt_monitor_leave(struct ifnet *);
331static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
332 char **, protocol_family_t);
333static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
334 protocol_family_t);
335static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
336 const struct sockaddr_dl *);
337static int ifnet_lookup(struct ifnet *);
338static void if_purgeaddrs(struct ifnet *);
339
340static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
341 struct mbuf *, char *);
342static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
343 struct mbuf *);
344static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
345 mbuf_t *, const struct sockaddr *, void *, char *, char *);
346static void ifproto_media_event(struct ifnet *, protocol_family_t,
347 const struct kev_msg *);
348static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
349 unsigned long, void *);
350static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
351 struct sockaddr_dl *, size_t);
352static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
353 const struct sockaddr_dl *, const struct sockaddr *,
354 const struct sockaddr_dl *, const struct sockaddr *);
355
39037602
A
356static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
357 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
358 boolean_t poll, struct thread *tp);
316670eb
A
359static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
360 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
361static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
6d2010ae
A
362static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
363 protocol_family_t *);
364static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
365 const struct ifnet_demux_desc *, u_int32_t);
366static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
367static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
f427ee49 368#if !XNU_TARGET_OS_OSX
5ba3f43e
A
369static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
370 const struct sockaddr *, const char *, const char *,
371 u_int32_t *, u_int32_t *);
f427ee49 372#else /* XNU_TARGET_OS_OSX */
6d2010ae 373static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
39236c6e 374 const struct sockaddr *, const char *, const char *);
f427ee49 375#endif /* XNU_TARGET_OS_OSX */
39236c6e
A
376static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
377 const struct sockaddr *, const char *, const char *,
378 u_int32_t *, u_int32_t *);
6d2010ae
A
379static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
380static void ifp_if_free(struct ifnet *);
381static void ifp_if_event(struct ifnet *, const struct kev_msg *);
316670eb
A
382static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
383static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
6d2010ae 384
f427ee49
A
385static errno_t dlil_input_async(struct dlil_threading_info *, struct ifnet *,
386 struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *,
387 boolean_t, struct thread *);
388static errno_t dlil_input_sync(struct dlil_threading_info *, struct ifnet *,
389 struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *,
390 boolean_t, struct thread *);
391
316670eb 392static void dlil_main_input_thread_func(void *, wait_result_t);
cb323159
A
393static void dlil_main_input_thread_cont(void *, wait_result_t);
394
316670eb 395static void dlil_input_thread_func(void *, wait_result_t);
cb323159
A
396static void dlil_input_thread_cont(void *, wait_result_t);
397
316670eb 398static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
cb323159
A
399static void dlil_rxpoll_input_thread_cont(void *, wait_result_t);
400
f427ee49
A
401static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *,
402 thread_continue_t *);
316670eb
A
403static void dlil_terminate_input_thread(struct dlil_threading_info *);
404static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
cb323159
A
405 struct dlil_threading_info *, struct ifnet *, boolean_t);
406static boolean_t dlil_input_stats_sync(struct ifnet *,
407 struct dlil_threading_info *);
316670eb
A
408static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
409 u_int32_t, ifnet_model_t, boolean_t);
410static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
411 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
0a7de745 412static int dlil_is_clat_needed(protocol_family_t, mbuf_t );
d9a64523
A
413static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *);
414static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *);
5ba3f43e 415#if DEBUG || DEVELOPMENT
39236c6e 416static void dlil_verify_sum16(void);
5ba3f43e 417#endif /* DEBUG || DEVELOPMENT */
39236c6e
A
418static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
419 protocol_family_t);
420static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
421 protocol_family_t);
422
cb323159
A
423static void dlil_incr_pending_thread_count(void);
424static void dlil_decr_pending_thread_count(void);
425
316670eb 426static void ifnet_detacher_thread_func(void *, wait_result_t);
f427ee49 427static void ifnet_detacher_thread_cont(void *, wait_result_t);
6d2010ae
A
428static void ifnet_detach_final(struct ifnet *);
429static void ifnet_detaching_enqueue(struct ifnet *);
430static struct ifnet *ifnet_detaching_dequeue(void);
431
cb323159
A
432static void ifnet_start_thread_func(void *, wait_result_t);
433static void ifnet_start_thread_cont(void *, wait_result_t);
434
435static void ifnet_poll_thread_func(void *, wait_result_t);
436static void ifnet_poll_thread_cont(void *, wait_result_t);
437
438static errno_t ifnet_enqueue_common(struct ifnet *, classq_pkt_t *,
439 boolean_t, boolean_t *);
316670eb 440
6d2010ae
A
441static void ifp_src_route_copyout(struct ifnet *, struct route *);
442static void ifp_src_route_copyin(struct ifnet *, struct route *);
6d2010ae
A
443static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
444static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
6d2010ae 445
316670eb 446static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
39236c6e
A
447static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
448static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
449static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
450static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
451static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
316670eb
A
452static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
453static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
39236c6e
A
454static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
455static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
456static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
3e170ce0
A
457
458struct chain_len_stats tx_chain_len_stats;
459static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
316670eb 460
5ba3f43e
A
461#if TEST_INPUT_THREAD_TERMINATION
462static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
463#endif /* TEST_INPUT_THREAD_TERMINATION */
464
6d2010ae
A
465/* The following are protected by dlil_ifnet_lock */
466static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
467static u_int32_t ifnet_detaching_cnt;
f427ee49 468static boolean_t ifnet_detaching_embryonic;
0a7de745 469static void *ifnet_delayed_run; /* wait channel for detaching thread */
6d2010ae 470
39236c6e
A
471decl_lck_mtx_data(static, ifnet_fc_lock);
472
473static uint32_t ifnet_flowhash_seed;
474
475struct ifnet_flowhash_key {
0a7de745
A
476 char ifk_name[IFNAMSIZ];
477 uint32_t ifk_unit;
478 uint32_t ifk_flags;
479 uint32_t ifk_eflags;
480 uint32_t ifk_capabilities;
481 uint32_t ifk_capenable;
482 uint32_t ifk_output_sched_model;
483 uint32_t ifk_rand1;
484 uint32_t ifk_rand2;
39236c6e
A
485};
486
487/* Flow control entry per interface */
488struct ifnet_fc_entry {
489 RB_ENTRY(ifnet_fc_entry) ifce_entry;
0a7de745
A
490 u_int32_t ifce_flowhash;
491 struct ifnet *ifce_ifp;
39236c6e
A
492};
493
494static uint32_t ifnet_calc_flowhash(struct ifnet *);
495static int ifce_cmp(const struct ifnet_fc_entry *,
496 const struct ifnet_fc_entry *);
497static int ifnet_fc_add(struct ifnet *);
498static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
499static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
500
501/* protected by ifnet_fc_lock */
502RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
503RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
504RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
505
f427ee49
A
506static ZONE_DECLARE(ifnet_fc_zone, "ifnet_fc_zone",
507 sizeof(struct ifnet_fc_entry), ZC_ZFREE_CLEARMEM);
39236c6e 508
39037602 509extern void bpfdetach(struct ifnet *);
6d2010ae 510extern void proto_input_run(void);
91447636 511
39037602 512extern uint32_t udp_count_opportunistic(unsigned int ifindex,
0a7de745 513 u_int32_t flags);
39037602 514extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
0a7de745 515 u_int32_t flags);
316670eb 516
6d2010ae 517__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
91447636 518
39236c6e 519#if CONFIG_MACF
f427ee49 520#if !XNU_TARGET_OS_OSX
5ba3f43e 521int dlil_lladdr_ckreq = 1;
f427ee49 522#else /* XNU_TARGET_OS_OSX */
39236c6e 523int dlil_lladdr_ckreq = 0;
f427ee49
A
524#endif /* XNU_TARGET_OS_OSX */
525#endif /* CONFIG_MACF */
39236c6e 526
b0d623f7 527#if DEBUG
39236c6e 528int dlil_verbose = 1;
b0d623f7 529#else
39236c6e 530int dlil_verbose = 0;
b0d623f7 531#endif /* DEBUG */
6d2010ae 532#if IFNET_INPUT_SANITY_CHK
6d2010ae 533/* sanity checking of input packet lists received */
316670eb
A
534static u_int32_t dlil_input_sanity_check = 0;
535#endif /* IFNET_INPUT_SANITY_CHK */
536/* rate limit debug messages */
cb323159 537struct timespec dlil_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
b0d623f7 538
6d2010ae 539SYSCTL_DECL(_net_link_generic_system);
91447636 540
316670eb
A
541SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
542 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
543
0a7de745 544#define IF_SNDQ_MINLEN 32
316670eb
A
545u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
546SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
547 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
548 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
549
0a7de745
A
550#define IF_RCVQ_MINLEN 32
551#define IF_RCVQ_MAXLEN 256
316670eb
A
552u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
553SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
554 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
555 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
556
0a7de745 557#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
cb323159 558u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
316670eb
A
559SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
560 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
561 "ilog2 of EWMA decay rate of avg inbound packets");
562
0a7de745
A
563#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
564#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
316670eb 565static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
39236c6e
A
566SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
567 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
568 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
569 "Q", "input poll mode freeze time");
316670eb 570
0a7de745
A
571#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
572#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
316670eb 573static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
39236c6e
A
574SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
575 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
576 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
577 "Q", "input poll sampling time");
578
39236c6e
A
579static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
580SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
581 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
582 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
583 "Q", "input poll interval (time)");
584
0a7de745 585#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
cb323159 586u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
316670eb
A
587SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
588 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
589 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
590
0a7de745 591#define IF_RXPOLL_WLOWAT 10
cb323159 592static u_int32_t if_sysctl_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
39236c6e 593SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
cb323159 594 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_wlowat,
39236c6e
A
595 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
596 "I", "input poll wakeup low watermark");
316670eb 597
0a7de745 598#define IF_RXPOLL_WHIWAT 100
cb323159 599static u_int32_t if_sysctl_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
39236c6e 600SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
cb323159 601 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_whiwat,
39236c6e
A
602 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
603 "I", "input poll wakeup high watermark");
316670eb 604
0a7de745 605static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
316670eb
A
606SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
607 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
608 "max packets per poll call");
609
cb323159 610u_int32_t if_rxpoll = 1;
316670eb
A
611SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
612 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
613 sysctl_rxpoll, "I", "enable opportunistic input polling");
614
5ba3f43e
A
615#if TEST_INPUT_THREAD_TERMINATION
616static u_int32_t if_input_thread_termination_spin = 0;
617SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
618 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
619 &if_input_thread_termination_spin, 0,
620 sysctl_input_thread_termination_spin,
621 "I", "input thread termination spin limit");
622#endif /* TEST_INPUT_THREAD_TERMINATION */
316670eb
A
623
624static u_int32_t cur_dlil_input_threads = 0;
625SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
39037602 626 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
316670eb 627 "Current number of DLIL input threads");
91447636 628
6d2010ae 629#if IFNET_INPUT_SANITY_CHK
316670eb 630SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
39037602 631 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
6d2010ae 632 "Turn on sanity checking in DLIL input");
316670eb 633#endif /* IFNET_INPUT_SANITY_CHK */
1c79356b 634
316670eb
A
635static u_int32_t if_flowadv = 1;
636SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
637 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
638 "enable flow-advisory mechanism");
639
fe8ab488
A
640static u_int32_t if_delaybased_queue = 1;
641SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
642 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
643 "enable delay based dynamic queue sizing");
644
39236c6e
A
645static uint64_t hwcksum_in_invalidated = 0;
646SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
647 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
648 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
649
650uint32_t hwcksum_dbg = 0;
651SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
652 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
653 "enable hardware cksum debugging");
654
3e170ce0
A
655u_int32_t ifnet_start_delayed = 0;
656SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
657 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
658 "number of times start was delayed");
659
660u_int32_t ifnet_delay_start_disabled = 0;
661SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
662 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
663 "number of times start was delayed");
664
f427ee49
A
665static inline void
666ifnet_delay_start_disabled_increment(void)
667{
668 OSIncrementAtomic(&ifnet_delay_start_disabled);
669}
670
0a7de745
A
671#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
672#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
673#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
674#define HWCKSUM_DBG_MASK \
675 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
39236c6e
A
676 HWCKSUM_DBG_FINALIZE_FORCED)
677
678static uint32_t hwcksum_dbg_mode = 0;
679SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
680 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
681 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
682
683static uint64_t hwcksum_dbg_partial_forced = 0;
684SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
685 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
686 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
687
688static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
689SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
690 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
691 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
692
693static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
694SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
695 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
696 &hwcksum_dbg_partial_rxoff_forced, 0,
697 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
698 "forced partial cksum rx offset");
699
700static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
701SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
702 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
703 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
704 "adjusted partial cksum rx offset");
705
706static uint64_t hwcksum_dbg_verified = 0;
707SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
708 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
709 &hwcksum_dbg_verified, "packets verified for having good checksum");
710
711static uint64_t hwcksum_dbg_bad_cksum = 0;
712SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
713 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
714 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
715
716static uint64_t hwcksum_dbg_bad_rxoff = 0;
717SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
718 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
719 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
720
721static uint64_t hwcksum_dbg_adjusted = 0;
722SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
723 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
724 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
725
726static uint64_t hwcksum_dbg_finalized_hdr = 0;
727SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
728 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
729 &hwcksum_dbg_finalized_hdr, "finalized headers");
730
731static uint64_t hwcksum_dbg_finalized_data = 0;
732SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
733 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
734 &hwcksum_dbg_finalized_data, "finalized payloads");
735
736uint32_t hwcksum_tx = 1;
737SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
738 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
739 "enable transmit hardware checksum offload");
740
741uint32_t hwcksum_rx = 1;
742SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
743 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
744 "enable receive hardware checksum offload");
745
3e170ce0
A
746SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
747 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
748 sysctl_tx_chain_len_stats, "S", "");
749
750uint32_t tx_chain_len_count = 0;
751SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
39037602 752 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
3e170ce0 753
0a7de745 754static uint32_t threshold_notify = 1; /* enable/disable */
5ba3f43e
A
755SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
756 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
757
0a7de745 758static uint32_t threshold_interval = 2; /* in seconds */
5ba3f43e
A
759SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
760 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
761
39037602
A
762#if (DEVELOPMENT || DEBUG)
763static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
764SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
765 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
766#endif /* DEVELOPMENT || DEBUG */
767
5ba3f43e 768struct net_api_stats net_api_stats;
0a7de745
A
769SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD | CTLFLAG_LOCKED,
770 &net_api_stats, net_api_stats, "");
5ba3f43e 771
316670eb 772unsigned int net_rxpoll = 1;
6d2010ae 773unsigned int net_affinity = 1;
f427ee49
A
774unsigned int net_async = 1; /* 0: synchronous, 1: asynchronous */
775
6d2010ae 776static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
1c79356b 777
0a7de745 778extern u_int32_t inject_buckets;
b36670ce 779
0a7de745
A
780static lck_grp_attr_t *dlil_grp_attributes = NULL;
781static lck_attr_t *dlil_lck_attributes = NULL;
91447636 782
5ba3f43e
A
783/* DLIL data threshold thread call */
784static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
785
cb323159
A
786void
787ifnet_filter_update_tso(boolean_t filter_enable)
788{
789 /*
790 * update filter count and route_generation ID to let TCP
791 * know it should reevalute doing TSO or not
792 */
793 OSAddAtomic(filter_enable ? 1 : -1, &dlil_filter_disable_tso_count);
794 routegenid_update();
795}
5ba3f43e 796
91447636 797
0a7de745
A
798#define DLIL_INPUT_CHECK(m, ifp) { \
799 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
800 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
801 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
802 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
803 /* NOTREACHED */ \
804 } \
316670eb
A
805}
806
0a7de745
A
807#define DLIL_EWMA(old, new, decay) do { \
808 u_int32_t _avg; \
809 if ((_avg = (old)) > 0) \
810 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
811 else \
812 _avg = (new); \
813 (old) = _avg; \
316670eb
A
814} while (0)
815
0a7de745
A
816#define MBPS (1ULL * 1000 * 1000)
817#define GBPS (MBPS * 1000)
316670eb
A
818
819struct rxpoll_time_tbl {
0a7de745
A
820 u_int64_t speed; /* downlink speed */
821 u_int32_t plowat; /* packets low watermark */
822 u_int32_t phiwat; /* packets high watermark */
823 u_int32_t blowat; /* bytes low watermark */
824 u_int32_t bhiwat; /* bytes high watermark */
316670eb
A
825};
826
827static struct rxpoll_time_tbl rxpoll_tbl[] = {
cb323159
A
828 { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) },
829 { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
830 { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
831 { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
832 { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
833 { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
316670eb
A
834};
835
cb323159
A
836decl_lck_mtx_data(static, dlil_thread_sync_lock);
837static uint32_t dlil_pending_thread_cnt = 0;
f427ee49 838
cb323159
A
839static void
840dlil_incr_pending_thread_count(void)
841{
842 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
843 lck_mtx_lock(&dlil_thread_sync_lock);
844 dlil_pending_thread_cnt++;
845 lck_mtx_unlock(&dlil_thread_sync_lock);
846}
847
848static void
849dlil_decr_pending_thread_count(void)
850{
851 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
852 lck_mtx_lock(&dlil_thread_sync_lock);
853 VERIFY(dlil_pending_thread_cnt > 0);
854 dlil_pending_thread_cnt--;
855 if (dlil_pending_thread_cnt == 0) {
856 wakeup(&dlil_pending_thread_cnt);
857 }
858 lck_mtx_unlock(&dlil_thread_sync_lock);
859}
860
39236c6e 861int
b0d623f7 862proto_hash_value(u_int32_t protocol_family)
91447636 863{
4a3eedf9
A
864 /*
865 * dlil_proto_unplumb_all() depends on the mapping between
866 * the hash bucket index and the protocol family defined
867 * here; future changes must be applied there as well.
868 */
39037602 869 switch (protocol_family) {
0a7de745
A
870 case PF_INET:
871 return 0;
872 case PF_INET6:
873 return 1;
874 case PF_VLAN:
875 return 2;
cb323159
A
876 case PF_802154:
877 return 3;
0a7de745
A
878 case PF_UNSPEC:
879 default:
cb323159 880 return 4;
91447636
A
881 }
882}
883
6d2010ae
A
884/*
885 * Caller must already be holding ifnet lock.
886 */
887static struct if_proto *
b0d623f7 888find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
1c79356b 889{
91447636 890 struct if_proto *proto = NULL;
b0d623f7 891 u_int32_t i = proto_hash_value(protocol_family);
6d2010ae
A
892
893 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
894
0a7de745 895 if (ifp->if_proto_hash != NULL) {
91447636 896 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
0a7de745 897 }
6d2010ae 898
0a7de745 899 while (proto != NULL && proto->protocol_family != protocol_family) {
91447636 900 proto = SLIST_NEXT(proto, next_hash);
0a7de745 901 }
6d2010ae 902
0a7de745 903 if (proto != NULL) {
6d2010ae 904 if_proto_ref(proto);
0a7de745 905 }
6d2010ae 906
0a7de745 907 return proto;
1c79356b
A
908}
909
91447636
A
910static void
911if_proto_ref(struct if_proto *proto)
1c79356b 912{
6d2010ae 913 atomic_add_32(&proto->refcount, 1);
1c79356b
A
914}
915
6d2010ae
A
916extern void if_rtproto_del(struct ifnet *ifp, int protocol);
917
91447636
A
918static void
919if_proto_free(struct if_proto *proto)
0b4e3aa0 920{
6d2010ae
A
921 u_int32_t oldval;
922 struct ifnet *ifp = proto->ifp;
923 u_int32_t proto_family = proto->protocol_family;
924 struct kev_dl_proto_data ev_pr_data;
925
926 oldval = atomic_add_32_ov(&proto->refcount, -1);
0a7de745 927 if (oldval > 1) {
6d2010ae 928 return;
0a7de745 929 }
6d2010ae 930
6d2010ae 931 if (proto->proto_kpi == kProtoKPI_v1) {
0a7de745 932 if (proto->kpi.v1.detached) {
6d2010ae 933 proto->kpi.v1.detached(ifp, proto->protocol_family);
0a7de745 934 }
6d2010ae
A
935 }
936 if (proto->proto_kpi == kProtoKPI_v2) {
0a7de745 937 if (proto->kpi.v2.detached) {
6d2010ae 938 proto->kpi.v2.detached(ifp, proto->protocol_family);
0a7de745 939 }
91447636 940 }
6d2010ae
A
941
942 /*
943 * Cleanup routes that may still be in the routing table for that
944 * interface/protocol pair.
945 */
946 if_rtproto_del(ifp, proto_family);
947
f427ee49
A
948 ifnet_lock_shared(ifp);
949
950 /* No more reference on this, protocol must have been detached */
951 VERIFY(proto->detached);
952
6d2010ae
A
953 /*
954 * The reserved field carries the number of protocol still attached
955 * (subject to change)
956 */
6d2010ae 957 ev_pr_data.proto_family = proto_family;
a39ff7e2 958 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
f427ee49 959
6d2010ae
A
960 ifnet_lock_done(ifp);
961
962 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
963 (struct net_event_data *)&ev_pr_data,
0a7de745 964 sizeof(struct kev_dl_proto_data));
6d2010ae 965
a39ff7e2
A
966 if (ev_pr_data.proto_remaining_count == 0) {
967 /*
968 * The protocol count has gone to zero, mark the interface down.
969 * This used to be done by configd.KernelEventMonitor, but that
970 * is inherently prone to races (rdar://problem/30810208).
971 */
972 (void) ifnet_set_flags(ifp, 0, IFF_UP);
973 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
974 dlil_post_sifflags_msg(ifp);
975 }
976
6d2010ae 977 zfree(dlif_proto_zone, proto);
0b4e3aa0
A
978}
979
91447636 980__private_extern__ void
6d2010ae 981ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
1c79356b 982{
5ba3f43e
A
983#if !MACH_ASSERT
984#pragma unused(ifp)
985#endif
6d2010ae
A
986 unsigned int type = 0;
987 int ass = 1;
988
989 switch (what) {
990 case IFNET_LCK_ASSERT_EXCLUSIVE:
991 type = LCK_RW_ASSERT_EXCLUSIVE;
992 break;
993
994 case IFNET_LCK_ASSERT_SHARED:
995 type = LCK_RW_ASSERT_SHARED;
996 break;
997
998 case IFNET_LCK_ASSERT_OWNED:
999 type = LCK_RW_ASSERT_HELD;
1000 break;
1001
1002 case IFNET_LCK_ASSERT_NOTOWNED:
1003 /* nothing to do here for RW lock; bypass assert */
1004 ass = 0;
1005 break;
1006
1007 default:
1008 panic("bad ifnet assert type: %d", what);
1009 /* NOTREACHED */
1010 }
0a7de745 1011 if (ass) {
5ba3f43e 1012 LCK_RW_ASSERT(&ifp->if_lock, type);
0a7de745 1013 }
1c79356b
A
1014}
1015
91447636 1016__private_extern__ void
6d2010ae 1017ifnet_lock_shared(struct ifnet *ifp)
1c79356b 1018{
6d2010ae 1019 lck_rw_lock_shared(&ifp->if_lock);
1c79356b
A
1020}
1021
91447636 1022__private_extern__ void
6d2010ae 1023ifnet_lock_exclusive(struct ifnet *ifp)
0b4e3aa0 1024{
6d2010ae 1025 lck_rw_lock_exclusive(&ifp->if_lock);
0b4e3aa0
A
1026}
1027
91447636 1028__private_extern__ void
6d2010ae 1029ifnet_lock_done(struct ifnet *ifp)
1c79356b 1030{
6d2010ae 1031 lck_rw_done(&ifp->if_lock);
1c79356b
A
1032}
1033
3e170ce0
A
1034#if INET
1035__private_extern__ void
1036if_inetdata_lock_shared(struct ifnet *ifp)
1037{
1038 lck_rw_lock_shared(&ifp->if_inetdata_lock);
1039}
1040
1041__private_extern__ void
1042if_inetdata_lock_exclusive(struct ifnet *ifp)
1043{
1044 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
1045}
1046
1047__private_extern__ void
1048if_inetdata_lock_done(struct ifnet *ifp)
1049{
1050 lck_rw_done(&ifp->if_inetdata_lock);
1051}
1052#endif
1053
39236c6e
A
1054__private_extern__ void
1055if_inet6data_lock_shared(struct ifnet *ifp)
1056{
1057 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1058}
1059
1060__private_extern__ void
1061if_inet6data_lock_exclusive(struct ifnet *ifp)
1062{
1063 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1064}
1065
1066__private_extern__ void
1067if_inet6data_lock_done(struct ifnet *ifp)
1068{
1069 lck_rw_done(&ifp->if_inet6data_lock);
1070}
39236c6e 1071
91447636 1072__private_extern__ void
2d21ac55 1073ifnet_head_lock_shared(void)
1c79356b 1074{
6d2010ae 1075 lck_rw_lock_shared(&ifnet_head_lock);
1c79356b
A
1076}
1077
91447636 1078__private_extern__ void
2d21ac55 1079ifnet_head_lock_exclusive(void)
91447636 1080{
6d2010ae 1081 lck_rw_lock_exclusive(&ifnet_head_lock);
91447636 1082}
1c79356b 1083
91447636 1084__private_extern__ void
2d21ac55 1085ifnet_head_done(void)
1c79356b 1086{
6d2010ae 1087 lck_rw_done(&ifnet_head_lock);
91447636 1088}
1c79356b 1089
39037602
A
1090__private_extern__ void
1091ifnet_head_assert_exclusive(void)
1092{
5ba3f43e 1093 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
39037602
A
1094}
1095
6d2010ae 1096/*
a39ff7e2
A
1097 * dlil_ifp_protolist
1098 * - get the list of protocols attached to the interface, or just the number
1099 * of attached protocols
1100 * - if the number returned is greater than 'list_count', truncation occurred
1101 *
1102 * Note:
1103 * - caller must already be holding ifnet lock.
6d2010ae 1104 */
a39ff7e2
A
1105static u_int32_t
1106dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1107 u_int32_t list_count)
91447636 1108{
0a7de745
A
1109 u_int32_t count = 0;
1110 int i;
6d2010ae
A
1111
1112 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1113
0a7de745 1114 if (ifp->if_proto_hash == NULL) {
6d2010ae 1115 goto done;
0a7de745 1116 }
6d2010ae
A
1117
1118 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1119 struct if_proto *proto;
1120 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
a39ff7e2
A
1121 if (list != NULL && count < list_count) {
1122 list[count] = proto->protocol_family;
1123 }
6d2010ae 1124 count++;
91447636
A
1125 }
1126 }
6d2010ae 1127done:
0a7de745 1128 return count;
91447636 1129}
1c79356b 1130
a39ff7e2
A
1131__private_extern__ u_int32_t
1132if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1133{
1134 ifnet_lock_shared(ifp);
1135 count = dlil_ifp_protolist(ifp, protolist, count);
1136 ifnet_lock_done(ifp);
0a7de745 1137 return count;
a39ff7e2
A
1138}
1139
1140__private_extern__ void
1141if_free_protolist(u_int32_t *list)
1142{
1143 _FREE(list, M_TEMP);
1144}
1145
cb323159 1146__private_extern__ int
6d2010ae
A
1147dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1148 u_int32_t event_code, struct net_event_data *event_data,
1149 u_int32_t event_data_len)
91447636 1150{
6d2010ae
A
1151 struct net_event_data ev_data;
1152 struct kev_msg ev_msg;
1153
0a7de745
A
1154 bzero(&ev_msg, sizeof(ev_msg));
1155 bzero(&ev_data, sizeof(ev_data));
6d2010ae 1156 /*
2d21ac55 1157 * a net event always starts with a net_event_data structure
91447636
A
1158 * but the caller can generate a simple net event or
1159 * provide a longer event structure to post
1160 */
0a7de745
A
1161 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1162 ev_msg.kev_class = KEV_NETWORK_CLASS;
1163 ev_msg.kev_subclass = event_subclass;
1164 ev_msg.event_code = event_code;
6d2010ae
A
1165
1166 if (event_data == NULL) {
91447636 1167 event_data = &ev_data;
0a7de745 1168 event_data_len = sizeof(struct net_event_data);
91447636 1169 }
6d2010ae 1170
fe8ab488 1171 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
91447636 1172 event_data->if_family = ifp->if_family;
39037602 1173 event_data->if_unit = (u_int32_t)ifp->if_unit;
6d2010ae 1174
91447636 1175 ev_msg.dv[0].data_length = event_data_len;
6d2010ae 1176 ev_msg.dv[0].data_ptr = event_data;
91447636 1177 ev_msg.dv[1].data_length = 0;
6d2010ae 1178
cb323159
A
1179 bool update_generation = true;
1180 if (event_subclass == KEV_DL_SUBCLASS) {
1181 /* Don't update interface generation for frequent link quality and state changes */
1182 switch (event_code) {
1183 case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1184 case KEV_DL_RRC_STATE_CHANGED:
1185 case KEV_DL_NODE_PRESENCE:
1186 case KEV_DL_NODE_ABSENCE:
1187 case KEV_DL_MASTER_ELECTED:
1188 update_generation = false;
1189 break;
1190 default:
1191 break;
1192 }
1193 }
39037602 1194
cb323159 1195 return dlil_event_internal(ifp, &ev_msg, update_generation);
1c79356b
A
1196}
1197
316670eb
A
1198__private_extern__ int
1199dlil_alloc_local_stats(struct ifnet *ifp)
1200{
1201 int ret = EINVAL;
1202 void *buf, *base, **pbuf;
1203
0a7de745 1204 if (ifp == NULL) {
316670eb 1205 goto end;
0a7de745 1206 }
316670eb
A
1207
1208 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1209 /* allocate tcpstat_local structure */
f427ee49 1210 buf = zalloc_flags(dlif_tcpstat_zone, Z_WAITOK | Z_ZERO);
316670eb
A
1211 if (buf == NULL) {
1212 ret = ENOMEM;
1213 goto end;
1214 }
316670eb
A
1215
1216 /* Get the 64-bit aligned base address for this object */
0a7de745
A
1217 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1218 sizeof(u_int64_t));
316670eb
A
1219 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1220 ((intptr_t)buf + dlif_tcpstat_bufsize));
1221
1222 /*
1223 * Wind back a pointer size from the aligned base and
1224 * save the original address so we can free it later.
1225 */
0a7de745 1226 pbuf = (void **)((intptr_t)base - sizeof(void *));
316670eb
A
1227 *pbuf = buf;
1228 ifp->if_tcp_stat = base;
1229
1230 /* allocate udpstat_local structure */
f427ee49 1231 buf = zalloc_flags(dlif_udpstat_zone, Z_WAITOK | Z_ZERO);
316670eb
A
1232 if (buf == NULL) {
1233 ret = ENOMEM;
1234 goto end;
1235 }
316670eb
A
1236
1237 /* Get the 64-bit aligned base address for this object */
0a7de745
A
1238 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1239 sizeof(u_int64_t));
316670eb
A
1240 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1241 ((intptr_t)buf + dlif_udpstat_bufsize));
1242
1243 /*
1244 * Wind back a pointer size from the aligned base and
1245 * save the original address so we can free it later.
1246 */
0a7de745 1247 pbuf = (void **)((intptr_t)base - sizeof(void *));
316670eb
A
1248 *pbuf = buf;
1249 ifp->if_udp_stat = base;
1250
0a7de745
A
1251 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
1252 IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
316670eb
A
1253
1254 ret = 0;
1255 }
1256
4bd07ac2
A
1257 if (ifp->if_ipv4_stat == NULL) {
1258 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
0a7de745 1259 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
4bd07ac2
A
1260 if (ifp->if_ipv4_stat == NULL) {
1261 ret = ENOMEM;
1262 goto end;
1263 }
1264 }
1265
1266 if (ifp->if_ipv6_stat == NULL) {
1267 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
0a7de745 1268 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
4bd07ac2
A
1269 if (ifp->if_ipv6_stat == NULL) {
1270 ret = ENOMEM;
1271 goto end;
1272 }
1273 }
316670eb 1274end:
cb323159 1275 if (ifp != NULL && ret != 0) {
316670eb
A
1276 if (ifp->if_tcp_stat != NULL) {
1277 pbuf = (void **)
0a7de745 1278 ((intptr_t)ifp->if_tcp_stat - sizeof(void *));
316670eb
A
1279 zfree(dlif_tcpstat_zone, *pbuf);
1280 ifp->if_tcp_stat = NULL;
1281 }
1282 if (ifp->if_udp_stat != NULL) {
1283 pbuf = (void **)
0a7de745 1284 ((intptr_t)ifp->if_udp_stat - sizeof(void *));
316670eb
A
1285 zfree(dlif_udpstat_zone, *pbuf);
1286 ifp->if_udp_stat = NULL;
1287 }
4bd07ac2
A
1288 if (ifp->if_ipv4_stat != NULL) {
1289 FREE(ifp->if_ipv4_stat, M_TEMP);
1290 ifp->if_ipv4_stat = NULL;
1291 }
1292 if (ifp->if_ipv6_stat != NULL) {
1293 FREE(ifp->if_ipv6_stat, M_TEMP);
1294 ifp->if_ipv6_stat = NULL;
1295 }
316670eb
A
1296 }
1297
0a7de745 1298 return ret;
316670eb
A
1299}
1300
cb323159
A
1301static void
1302dlil_reset_rxpoll_params(ifnet_t ifp)
1303{
1304 ASSERT(ifp != NULL);
1305 ifnet_set_poll_cycle(ifp, NULL);
1306 ifp->if_poll_update = 0;
1307 ifp->if_poll_flags = 0;
1308 ifp->if_poll_req = 0;
1309 ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1310 bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1311 bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1312 bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1313 net_timerclear(&ifp->if_poll_mode_holdtime);
1314 net_timerclear(&ifp->if_poll_mode_lasttime);
1315 net_timerclear(&ifp->if_poll_sample_holdtime);
1316 net_timerclear(&ifp->if_poll_sample_lasttime);
1317 net_timerclear(&ifp->if_poll_dbg_lasttime);
1318}
1319
6d2010ae 1320static int
f427ee49
A
1321dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp,
1322 thread_continue_t *thfunc)
2d21ac55 1323{
cb323159 1324 boolean_t dlil_rxpoll_input;
f427ee49 1325 thread_continue_t func = NULL;
316670eb 1326 u_int32_t limit;
f427ee49 1327 int error = 0;
2d21ac55 1328
cb323159
A
1329 dlil_rxpoll_input = (ifp != NULL && net_rxpoll &&
1330 (ifp->if_eflags & IFEF_RXPOLL) && (ifp->if_xflags & IFXF_LEGACY));
1331
f427ee49
A
1332 /* default strategy utilizes the DLIL worker thread */
1333 inp->dlth_strategy = dlil_input_async;
1334
316670eb
A
1335 /* NULL ifp indicates the main input thread, called at dlil_init time */
1336 if (ifp == NULL) {
f427ee49
A
1337 /*
1338 * Main input thread only.
1339 */
316670eb
A
1340 func = dlil_main_input_thread_func;
1341 VERIFY(inp == dlil_main_input_thread);
f427ee49 1342 (void) strlcat(inp->dlth_name,
316670eb 1343 "main_input", DLIL_THREADNAME_LEN);
cb323159 1344 } else if (dlil_rxpoll_input) {
f427ee49
A
1345 /*
1346 * Legacy (non-netif) hybrid polling.
1347 */
316670eb
A
1348 func = dlil_rxpoll_input_thread_func;
1349 VERIFY(inp != dlil_main_input_thread);
f427ee49 1350 (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN,
39236c6e 1351 "%s_input_poll", if_name(ifp));
f427ee49
A
1352 } else if (net_async || (ifp->if_xflags & IFXF_LEGACY)) {
1353 /*
1354 * Asynchronous strategy.
1355 */
316670eb
A
1356 func = dlil_input_thread_func;
1357 VERIFY(inp != dlil_main_input_thread);
f427ee49 1358 (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN,
39236c6e 1359 "%s_input", if_name(ifp));
f427ee49
A
1360 } else {
1361 /*
1362 * Synchronous strategy if there's a netif below and
1363 * the device isn't capable of hybrid polling.
1364 */
1365 ASSERT(func == NULL);
1366 ASSERT(!(ifp->if_xflags & IFXF_LEGACY));
1367 VERIFY(inp != dlil_main_input_thread);
1368 ASSERT(!inp->dlth_affinity);
1369 inp->dlth_strategy = dlil_input_sync;
6d2010ae 1370 }
f427ee49 1371 VERIFY(inp->dlth_thread == THREAD_NULL);
2d21ac55 1372
f427ee49
A
1373 /* let caller know */
1374 if (thfunc != NULL) {
1375 *thfunc = func;
1376 }
316670eb 1377
f427ee49
A
1378 inp->dlth_lock_grp = lck_grp_alloc_init(inp->dlth_name,
1379 dlil_grp_attributes);
1380 lck_mtx_init(&inp->dlth_lock, inp->dlth_lock_grp, dlil_lck_attributes);
1381
1382 inp->dlth_ifp = ifp; /* NULL for main input thread */
316670eb
A
1383 /*
1384 * For interfaces that support opportunistic polling, set the
1385 * low and high watermarks for outstanding inbound packets/bytes.
1386 * Also define freeze times for transitioning between modes
1387 * and updating the average.
1388 */
1389 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1390 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
cb323159
A
1391 if (ifp->if_xflags & IFXF_LEGACY) {
1392 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1393 }
316670eb
A
1394 } else {
1395 limit = (u_int32_t)-1;
1396 }
1397
f427ee49 1398 _qinit(&inp->dlth_pkts, Q_DROPTAIL, limit, QP_MBUF);
316670eb
A
1399 if (inp == dlil_main_input_thread) {
1400 struct dlil_main_threading_info *inpm =
1401 (struct dlil_main_threading_info *)inp;
5ba3f43e 1402 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
316670eb 1403 }
2d21ac55 1404
f427ee49
A
1405 if (func == NULL) {
1406 ASSERT(!(ifp->if_xflags & IFXF_LEGACY));
1407 ASSERT(error == 0);
1408 error = ENODEV;
1409 goto done;
1410 }
1411
1412 error = kernel_thread_start(func, inp, &inp->dlth_thread);
316670eb 1413 if (error == KERN_SUCCESS) {
f427ee49
A
1414 thread_precedence_policy_data_t info;
1415 __unused kern_return_t kret;
1416
1417 bzero(&info, sizeof(info));
1418 info.importance = 0;
1419 kret = thread_policy_set(inp->dlth_thread,
1420 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
1421 THREAD_PRECEDENCE_POLICY_COUNT);
1422 ASSERT(kret == KERN_SUCCESS);
2d21ac55 1423 /*
316670eb
A
1424 * We create an affinity set so that the matching workloop
1425 * thread or the starter thread (for loopback) can be
1426 * scheduled on the same processor set as the input thread.
2d21ac55 1427 */
316670eb 1428 if (net_affinity) {
f427ee49 1429 struct thread *tp = inp->dlth_thread;
2d21ac55
A
1430 u_int32_t tag;
1431 /*
1432 * Randomize to reduce the probability
1433 * of affinity tag namespace collision.
1434 */
0a7de745 1435 read_frandom(&tag, sizeof(tag));
2d21ac55
A
1436 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1437 thread_reference(tp);
f427ee49
A
1438 inp->dlth_affinity_tag = tag;
1439 inp->dlth_affinity = TRUE;
2d21ac55
A
1440 }
1441 }
316670eb
A
1442 } else if (inp == dlil_main_input_thread) {
1443 panic_plain("%s: couldn't create main input thread", __func__);
1444 /* NOTREACHED */
2d21ac55 1445 } else {
39236c6e
A
1446 panic_plain("%s: couldn't create %s input thread", __func__,
1447 if_name(ifp));
6d2010ae 1448 /* NOTREACHED */
2d21ac55 1449 }
b0d623f7 1450 OSAddAtomic(1, &cur_dlil_input_threads);
316670eb 1451
f427ee49 1452done:
0a7de745 1453 return error;
2d21ac55
A
1454}
1455
5ba3f43e
A
1456#if TEST_INPUT_THREAD_TERMINATION
1457static int
1458sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
316670eb 1459{
5ba3f43e
A
1460#pragma unused(arg1, arg2)
1461 uint32_t i;
1462 int err;
316670eb 1463
5ba3f43e 1464 i = if_input_thread_termination_spin;
316670eb 1465
5ba3f43e 1466 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
1467 if (err != 0 || req->newptr == USER_ADDR_NULL) {
1468 return err;
1469 }
5ba3f43e 1470
0a7de745
A
1471 if (net_rxpoll == 0) {
1472 return ENXIO;
1473 }
316670eb 1474
5ba3f43e 1475 if_input_thread_termination_spin = i;
0a7de745 1476 return err;
5ba3f43e
A
1477}
1478#endif /* TEST_INPUT_THREAD_TERMINATION */
1479
1480static void
1481dlil_clean_threading_info(struct dlil_threading_info *inp)
1482{
f427ee49
A
1483 lck_mtx_destroy(&inp->dlth_lock, inp->dlth_lock_grp);
1484 lck_grp_free(inp->dlth_lock_grp);
1485 inp->dlth_lock_grp = NULL;
1486
1487 inp->dlth_flags = 0;
1488 inp->dlth_wtot = 0;
1489 bzero(inp->dlth_name, sizeof(inp->dlth_name));
1490 inp->dlth_ifp = NULL;
1491 VERIFY(qhead(&inp->dlth_pkts) == NULL && qempty(&inp->dlth_pkts));
1492 qlimit(&inp->dlth_pkts) = 0;
1493 bzero(&inp->dlth_stats, sizeof(inp->dlth_stats));
1494
1495 VERIFY(!inp->dlth_affinity);
1496 inp->dlth_thread = THREAD_NULL;
1497 inp->dlth_strategy = NULL;
1498 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
1499 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
1500 VERIFY(inp->dlth_affinity_tag == 0);
316670eb 1501#if IFNET_INPUT_SANITY_CHK
f427ee49 1502 inp->dlth_pkts_cnt = 0;
316670eb 1503#endif /* IFNET_INPUT_SANITY_CHK */
5ba3f43e 1504}
316670eb 1505
5ba3f43e
A
1506static void
1507dlil_terminate_input_thread(struct dlil_threading_info *inp)
1508{
f427ee49 1509 struct ifnet *ifp = inp->dlth_ifp;
cb323159 1510 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
5ba3f43e 1511
f427ee49 1512 VERIFY(current_thread() == inp->dlth_thread);
5ba3f43e
A
1513 VERIFY(inp != dlil_main_input_thread);
1514
1515 OSAddAtomic(-1, &cur_dlil_input_threads);
1516
1517#if TEST_INPUT_THREAD_TERMINATION
1518 { /* do something useless that won't get optimized away */
0a7de745 1519 uint32_t v = 1;
5ba3f43e 1520 for (uint32_t i = 0;
0a7de745
A
1521 i < if_input_thread_termination_spin;
1522 i++) {
5ba3f43e
A
1523 v = (i + 1) * v;
1524 }
cb323159 1525 DLIL_PRINTF("the value is %d\n", v);
316670eb 1526 }
5ba3f43e
A
1527#endif /* TEST_INPUT_THREAD_TERMINATION */
1528
f427ee49
A
1529 lck_mtx_lock_spin(&inp->dlth_lock);
1530 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
1531 VERIFY((inp->dlth_flags & DLIL_INPUT_TERMINATE) != 0);
1532 inp->dlth_flags |= DLIL_INPUT_TERMINATE_COMPLETE;
1533 wakeup_one((caddr_t)&inp->dlth_flags);
1534 lck_mtx_unlock(&inp->dlth_lock);
316670eb 1535
cb323159
A
1536 /* free up pending packets */
1537 if (pkt.cp_mbuf != NULL) {
1538 mbuf_freem_list(pkt.cp_mbuf);
1539 }
1540
316670eb
A
1541 /* for the extra refcnt from kernel_thread_start() */
1542 thread_deallocate(current_thread());
1543
5ba3f43e 1544 if (dlil_verbose) {
cb323159 1545 DLIL_PRINTF("%s: input thread terminated\n",
5ba3f43e
A
1546 if_name(ifp));
1547 }
1548
316670eb
A
1549 /* this is the end */
1550 thread_terminate(current_thread());
1551 /* NOTREACHED */
1552}
1553
2d21ac55
A
1554static kern_return_t
1555dlil_affinity_set(struct thread *tp, u_int32_t tag)
1556{
1557 thread_affinity_policy_data_t policy;
1558
0a7de745 1559 bzero(&policy, sizeof(policy));
2d21ac55 1560 policy.affinity_tag = tag;
0a7de745
A
1561 return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1562 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
2d21ac55
A
1563}
1564
91447636
A
1565void
1566dlil_init(void)
1567{
6d2010ae
A
1568 thread_t thread = THREAD_NULL;
1569
1570 /*
1571 * The following fields must be 64-bit aligned for atomic operations.
1572 */
1573 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
39037602 1574 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
6d2010ae
A
1575 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1576 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1577 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1578 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1579 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1580 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1581 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1582 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1583 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1584 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1585 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1586 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1587 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1588
1589 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
39037602 1590 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
6d2010ae
A
1591 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1592 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1593 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1594 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1595 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1596 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1597 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1598 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1599 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1600 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1601 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1602 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1603 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1604
1605 /*
1606 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1607 */
1608 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1609 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1610 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1611 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1612 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
39236c6e
A
1613 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1614 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1615 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1616 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
5ba3f43e 1617 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
6d2010ae
A
1618 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1619 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1620 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1621 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1622
39236c6e
A
1623 /*
1624 * ... as well as the mbuf checksum flags counterparts.
1625 */
1626 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1627 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1628 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1629 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1630 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1631 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1632 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1633 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1634 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
5ba3f43e 1635 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
39236c6e
A
1636 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1637
6d2010ae
A
1638 /*
1639 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1640 */
1641 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
316670eb 1642 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
6d2010ae 1643
39236c6e
A
1644 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1645 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1646 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1647 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1648
1649 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1650 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1651 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1652
1653 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1654 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1655 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1656 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1657 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1658 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1659 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1660 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1661 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1662 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1663 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1664 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1665 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1666 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1667 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1668 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
cb323159
A
1669 _CASSERT(IFRTYPE_FAMILY_6LOWPAN == IFNET_FAMILY_6LOWPAN);
1670 _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1671 _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
39236c6e
A
1672
1673 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1674 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1675 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1676 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1677 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
fe8ab488 1678 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
39037602 1679 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
cb323159
A
1680 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1681 _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT == IFNET_SUBFAMILY_DEFAULT);
39236c6e
A
1682
1683 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1684 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1685
6d2010ae 1686 PE_parse_boot_argn("net_affinity", &net_affinity,
0a7de745 1687 sizeof(net_affinity));
b0d623f7 1688
0a7de745 1689 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
316670eb 1690
0a7de745 1691 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
6d2010ae 1692
f427ee49
A
1693 PE_parse_boot_argn("net_async", &net_async, sizeof(net_async));
1694
0a7de745 1695 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
6d2010ae 1696
cb323159 1697 VERIFY(dlil_pending_thread_cnt == 0);
0a7de745
A
1698 dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) :
1699 sizeof(struct dlil_ifnet_dbg);
6d2010ae 1700 /* Enforce 64-bit alignment for dlil_ifnet structure */
0a7de745 1701 dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t);
f427ee49
A
1702 dlif_bufsize = (uint32_t)P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t));
1703 dlif_zone = zone_create(DLIF_ZONE_NAME, dlif_bufsize, ZC_ZFREE_CLEARMEM);
6d2010ae 1704
0a7de745 1705 dlif_tcpstat_size = sizeof(struct tcpstat_local);
316670eb
A
1706 /* Enforce 64-bit alignment for tcpstat_local structure */
1707 dlif_tcpstat_bufsize =
0a7de745 1708 dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t);
f427ee49 1709 dlif_tcpstat_bufsize = (uint32_t)
0a7de745 1710 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t));
f427ee49
A
1711 dlif_tcpstat_zone = zone_create(DLIF_TCPSTAT_ZONE_NAME,
1712 dlif_tcpstat_bufsize, ZC_ZFREE_CLEARMEM);
316670eb 1713
0a7de745 1714 dlif_udpstat_size = sizeof(struct udpstat_local);
316670eb
A
1715 /* Enforce 64-bit alignment for udpstat_local structure */
1716 dlif_udpstat_bufsize =
0a7de745 1717 dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t);
f427ee49 1718 dlif_udpstat_bufsize = (uint32_t)
0a7de745 1719 P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t));
f427ee49
A
1720 dlif_udpstat_zone = zone_create(DLIF_UDPSTAT_ZONE_NAME,
1721 dlif_udpstat_bufsize, ZC_ZFREE_CLEARMEM);
316670eb 1722
5ba3f43e 1723 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
d1ecb069 1724
91447636 1725 TAILQ_INIT(&dlil_ifnet_head);
91447636 1726 TAILQ_INIT(&ifnet_head);
6d2010ae 1727 TAILQ_INIT(&ifnet_detaching_head);
39037602 1728 TAILQ_INIT(&ifnet_ordered_head);
6d2010ae 1729
91447636 1730 /* Setup the lock groups we will use */
2d21ac55 1731 dlil_grp_attributes = lck_grp_attr_alloc_init();
91447636 1732
316670eb 1733 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
6d2010ae
A
1734 dlil_grp_attributes);
1735 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1736 dlil_grp_attributes);
1737 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1738 dlil_grp_attributes);
316670eb
A
1739 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1740 dlil_grp_attributes);
1741 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
6d2010ae
A
1742 dlil_grp_attributes);
1743
91447636 1744 /* Setup the lock attributes we will use */
2d21ac55 1745 dlil_lck_attributes = lck_attr_alloc_init();
6d2010ae 1746
91447636 1747 ifnet_lock_attr = lck_attr_alloc_init();
6d2010ae
A
1748
1749 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1750 dlil_lck_attributes);
1751 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
cb323159 1752 lck_mtx_init(&dlil_thread_sync_lock, dlil_lock_group, dlil_lck_attributes);
6d2010ae 1753
39236c6e
A
1754 /* Setup interface flow control related items */
1755 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
316670eb 1756
39236c6e 1757 /* Initialize interface address subsystem */
6d2010ae 1758 ifa_init();
39236c6e
A
1759
1760#if PF
1761 /* Initialize the packet filter */
1762 pfinit();
1763#endif /* PF */
1764
1765 /* Initialize queue algorithms */
1766 classq_init();
1767
1768 /* Initialize packet schedulers */
1769 pktsched_init();
1770
1771 /* Initialize flow advisory subsystem */
1772 flowadv_init();
1773
1774 /* Initialize the pktap virtual interface */
1775 pktap_init();
1776
39037602
A
1777 /* Initialize the service class to dscp map */
1778 net_qos_map_init();
1779
a39ff7e2
A
1780 /* Initialize the interface port list */
1781 if_ports_used_init();
1782
d9a64523
A
1783 /* Initialize the interface low power mode event handler */
1784 if_low_power_evhdlr_init();
1785
5ba3f43e 1786#if DEBUG || DEVELOPMENT
39236c6e
A
1787 /* Run self-tests */
1788 dlil_verify_sum16();
5ba3f43e
A
1789#endif /* DEBUG || DEVELOPMENT */
1790
1791 /* Initialize link layer table */
1792 lltable_glbl_init();
39236c6e 1793
91447636 1794 /*
316670eb
A
1795 * Create and start up the main DLIL input thread and the interface
1796 * detacher threads once everything is initialized.
91447636 1797 */
cb323159 1798 dlil_incr_pending_thread_count();
f427ee49 1799 (void) dlil_create_input_thread(NULL, dlil_main_input_thread, NULL);
2d21ac55 1800
cb323159
A
1801 /*
1802 * Create ifnet detacher thread.
1803 * When an interface gets detached, part of the detach processing
1804 * is delayed. The interface is added to delayed detach list
1805 * and this thread is woken up to call ifnet_detach_final
1806 * on these interfaces.
1807 */
1808 dlil_incr_pending_thread_count();
316670eb
A
1809 if (kernel_thread_start(ifnet_detacher_thread_func,
1810 NULL, &thread) != KERN_SUCCESS) {
1811 panic_plain("%s: couldn't create detacher thread", __func__);
6d2010ae
A
1812 /* NOTREACHED */
1813 }
b0d623f7 1814 thread_deallocate(thread);
cb323159
A
1815
1816 /*
1817 * Wait for the created kernel threads for dlil to get
1818 * scheduled and run at least once before we proceed
1819 */
1820 lck_mtx_lock(&dlil_thread_sync_lock);
1821 while (dlil_pending_thread_cnt != 0) {
f427ee49
A
1822 DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
1823 "threads to get scheduled at least once.\n", __func__);
1824 (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock,
1825 (PZERO - 1), __func__, NULL);
cb323159
A
1826 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1827 }
1828 lck_mtx_unlock(&dlil_thread_sync_lock);
f427ee49
A
1829 DLIL_PRINTF("%s: All the created dlil kernel threads have been "
1830 "scheduled at least once. Proceeding.\n", __func__);
91447636 1831}
1c79356b 1832
6d2010ae
A
1833static void
1834if_flt_monitor_busy(struct ifnet *ifp)
1835{
5ba3f43e 1836 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1837
1838 ++ifp->if_flt_busy;
1839 VERIFY(ifp->if_flt_busy != 0);
1840}
1841
1842static void
1843if_flt_monitor_unbusy(struct ifnet *ifp)
1844{
1845 if_flt_monitor_leave(ifp);
1846}
1847
1848static void
1849if_flt_monitor_enter(struct ifnet *ifp)
1850{
5ba3f43e 1851 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1852
1853 while (ifp->if_flt_busy) {
1854 ++ifp->if_flt_waiters;
1855 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1856 (PZERO - 1), "if_flt_monitor", NULL);
1857 }
1858 if_flt_monitor_busy(ifp);
1859}
1860
1861static void
1862if_flt_monitor_leave(struct ifnet *ifp)
1863{
5ba3f43e 1864 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1865
1866 VERIFY(ifp->if_flt_busy != 0);
1867 --ifp->if_flt_busy;
1868
1869 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1870 ifp->if_flt_waiters = 0;
1871 wakeup(&ifp->if_flt_head);
1872 }
1873}
1874
2d21ac55 1875__private_extern__ int
0a7de745 1876dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
39236c6e 1877 interface_filter_t *filter_ref, u_int32_t flags)
6d2010ae
A
1878{
1879 int retval = 0;
1880 struct ifnet_filter *filter = NULL;
9bccf70c 1881
6d2010ae
A
1882 ifnet_head_lock_shared();
1883 /* Check that the interface is in the global list */
1884 if (!ifnet_lookup(ifp)) {
1885 retval = ENXIO;
1886 goto done;
1887 }
1888
f427ee49 1889 filter = zalloc_flags(dlif_filt_zone, Z_WAITOK | Z_ZERO);
6d2010ae
A
1890 if (filter == NULL) {
1891 retval = ENOMEM;
1892 goto done;
1893 }
6d2010ae
A
1894
1895 /* refcnt held above during lookup */
39236c6e 1896 filter->filt_flags = flags;
91447636
A
1897 filter->filt_ifp = ifp;
1898 filter->filt_cookie = if_filter->iff_cookie;
1899 filter->filt_name = if_filter->iff_name;
1900 filter->filt_protocol = if_filter->iff_protocol;
743345f9
A
1901 /*
1902 * Do not install filter callbacks for internal coproc interface
1903 */
1904 if (!IFNET_IS_INTCOPROC(ifp)) {
1905 filter->filt_input = if_filter->iff_input;
1906 filter->filt_output = if_filter->iff_output;
1907 filter->filt_event = if_filter->iff_event;
1908 filter->filt_ioctl = if_filter->iff_ioctl;
1909 }
91447636 1910 filter->filt_detached = if_filter->iff_detached;
6d2010ae
A
1911
1912 lck_mtx_lock(&ifp->if_flt_lock);
1913 if_flt_monitor_enter(ifp);
1914
5ba3f43e 1915 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636 1916 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
6d2010ae
A
1917
1918 if_flt_monitor_leave(ifp);
1919 lck_mtx_unlock(&ifp->if_flt_lock);
1920
91447636 1921 *filter_ref = filter;
b0d623f7
A
1922
1923 /*
1924 * Bump filter count and route_generation ID to let TCP
1925 * know it shouldn't do TSO on this connection
1926 */
39236c6e 1927 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
cb323159 1928 ifnet_filter_update_tso(TRUE);
39236c6e 1929 }
5ba3f43e
A
1930 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1931 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1932 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1933 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1934 }
6d2010ae 1935 if (dlil_verbose) {
cb323159 1936 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
39236c6e 1937 if_filter->iff_name);
6d2010ae
A
1938 }
1939done:
1940 ifnet_head_done();
1941 if (retval != 0 && ifp != NULL) {
39236c6e
A
1942 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1943 if_name(ifp), if_filter->iff_name, retval);
6d2010ae 1944 }
0a7de745 1945 if (retval != 0 && filter != NULL) {
6d2010ae 1946 zfree(dlif_filt_zone, filter);
0a7de745 1947 }
6d2010ae 1948
0a7de745 1949 return retval;
1c79356b
A
1950}
1951
91447636 1952static int
0a7de745 1953dlil_detach_filter_internal(interface_filter_t filter, int detached)
1c79356b 1954{
91447636 1955 int retval = 0;
6d2010ae 1956
3a60a9f5 1957 if (detached == 0) {
6d2010ae
A
1958 ifnet_t ifp = NULL;
1959
3a60a9f5
A
1960 ifnet_head_lock_shared();
1961 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6d2010ae
A
1962 interface_filter_t entry = NULL;
1963
1964 lck_mtx_lock(&ifp->if_flt_lock);
3a60a9f5 1965 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
0a7de745 1966 if (entry != filter || entry->filt_skip) {
6d2010ae 1967 continue;
0a7de745 1968 }
6d2010ae
A
1969 /*
1970 * We've found a match; since it's possible
1971 * that the thread gets blocked in the monitor,
1972 * we do the lock dance. Interface should
1973 * not be detached since we still have a use
1974 * count held during filter attach.
1975 */
0a7de745 1976 entry->filt_skip = 1; /* skip input/output */
6d2010ae
A
1977 lck_mtx_unlock(&ifp->if_flt_lock);
1978 ifnet_head_done();
1979
1980 lck_mtx_lock(&ifp->if_flt_lock);
1981 if_flt_monitor_enter(ifp);
5ba3f43e 1982 LCK_MTX_ASSERT(&ifp->if_flt_lock,
6d2010ae
A
1983 LCK_MTX_ASSERT_OWNED);
1984
1985 /* Remove the filter from the list */
1986 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1987 filt_next);
1988
1989 if_flt_monitor_leave(ifp);
1990 lck_mtx_unlock(&ifp->if_flt_lock);
1991 if (dlil_verbose) {
cb323159 1992 DLIL_PRINTF("%s: %s filter detached\n",
39236c6e 1993 if_name(ifp), filter->filt_name);
6d2010ae
A
1994 }
1995 goto destroy;
3a60a9f5 1996 }
6d2010ae 1997 lck_mtx_unlock(&ifp->if_flt_lock);
3a60a9f5
A
1998 }
1999 ifnet_head_done();
6d2010ae
A
2000
2001 /* filter parameter is not a valid filter ref */
2002 retval = EINVAL;
2003 goto done;
3a60a9f5 2004 }
6d2010ae 2005
0a7de745 2006 if (dlil_verbose) {
cb323159 2007 DLIL_PRINTF("%s filter detached\n", filter->filt_name);
0a7de745 2008 }
6d2010ae
A
2009
2010destroy:
2011
2012 /* Call the detached function if there is one */
0a7de745 2013 if (filter->filt_detached) {
91447636 2014 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
0a7de745 2015 }
9bccf70c 2016
b0d623f7
A
2017 /*
2018 * Decrease filter count and route_generation ID to let TCP
2019 * know it should reevalute doing TSO or not
2020 */
39236c6e 2021 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
cb323159 2022 ifnet_filter_update_tso(FALSE);
39236c6e 2023 }
39037602 2024
5ba3f43e
A
2025 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
2026
39037602
A
2027 /* Free the filter */
2028 zfree(dlif_filt_zone, filter);
2029 filter = NULL;
6d2010ae 2030done:
39037602 2031 if (retval != 0 && filter != NULL) {
6d2010ae
A
2032 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
2033 filter->filt_name, retval);
2034 }
39037602 2035
0a7de745 2036 return retval;
1c79356b
A
2037}
2038
2d21ac55 2039__private_extern__ void
91447636
A
2040dlil_detach_filter(interface_filter_t filter)
2041{
0a7de745 2042 if (filter == NULL) {
3a60a9f5 2043 return;
0a7de745 2044 }
91447636
A
2045 dlil_detach_filter_internal(filter, 0);
2046}
1c79356b 2047
f427ee49
A
2048__private_extern__ boolean_t
2049dlil_has_ip_filter(void)
2050{
2051 boolean_t has_filter = (net_api_stats.nas_ipf_add_count > 0);
2052 DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter);
2053 return has_filter;
2054}
2055
2056__private_extern__ boolean_t
2057dlil_has_if_filter(struct ifnet *ifp)
2058{
2059 boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head);
2060 DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter);
2061 return has_filter;
2062}
2063
2064static inline void
2065dlil_input_wakeup(struct dlil_threading_info *inp)
2066{
2067 LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2068
2069 inp->dlth_flags |= DLIL_INPUT_WAITING;
2070 if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
2071 inp->dlth_wtot++;
2072 wakeup_one((caddr_t)&inp->dlth_flags);
2073 }
2074}
2075
cb323159
A
2076__attribute__((noreturn))
2077static void
2078dlil_main_input_thread_func(void *v, wait_result_t w)
2079{
2080#pragma unused(w)
2081 struct dlil_threading_info *inp = v;
2082
2083 VERIFY(inp == dlil_main_input_thread);
f427ee49
A
2084 VERIFY(inp->dlth_ifp == NULL);
2085 VERIFY(current_thread() == inp->dlth_thread);
2086
2087 lck_mtx_lock(&inp->dlth_lock);
2088 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2089 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2090 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2091 /* wake up once to get out of embryonic state */
2092 dlil_input_wakeup(inp);
2093 lck_mtx_unlock(&inp->dlth_lock);
cb323159
A
2094 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2095 /* NOTREACHED */
2096 __builtin_unreachable();
2097}
2098
316670eb
A
2099/*
2100 * Main input thread:
2101 *
2102 * a) handles all inbound packets for lo0
2103 * b) handles all inbound packets for interfaces with no dedicated
2104 * input thread (e.g. anything but Ethernet/PDP or those that support
2105 * opportunistic polling.)
2106 * c) protocol registrations
2107 * d) packet injections
2108 */
39037602 2109__attribute__((noreturn))
91447636 2110static void
cb323159 2111dlil_main_input_thread_cont(void *v, wait_result_t wres)
91447636 2112{
316670eb
A
2113 struct dlil_main_threading_info *inpm = v;
2114 struct dlil_threading_info *inp = v;
2115
cb323159
A
2116 /* main input thread is uninterruptible */
2117 VERIFY(wres != THREAD_INTERRUPTED);
f427ee49
A
2118 lck_mtx_lock_spin(&inp->dlth_lock);
2119 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_TERMINATE |
cb323159 2120 DLIL_INPUT_RUNNING)));
f427ee49 2121 inp->dlth_flags |= DLIL_INPUT_RUNNING;
316670eb 2122
91447636 2123 while (1) {
2d21ac55 2124 struct mbuf *m = NULL, *m_loop = NULL;
316670eb 2125 u_int32_t m_cnt, m_cnt_loop;
cb323159 2126 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
316670eb 2127 boolean_t proto_req;
f427ee49 2128 boolean_t embryonic;
6d2010ae 2129
f427ee49 2130 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2d21ac55 2131
f427ee49
A
2132 if (__improbable(embryonic =
2133 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2134 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2135 }
2136
2137 proto_req = (inp->dlth_flags &
316670eb 2138 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
6d2010ae 2139
316670eb 2140 /* Packets for non-dedicated interfaces other than lo0 */
f427ee49
A
2141 m_cnt = qlen(&inp->dlth_pkts);
2142 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
cb323159 2143 m = pkt.cp_mbuf;
6d2010ae 2144
39236c6e 2145 /* Packets exclusive to lo0 */
316670eb 2146 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
cb323159
A
2147 _getq_all(&inpm->lo_rcvq_pkts, &pkt, NULL, NULL, NULL);
2148 m_loop = pkt.cp_mbuf;
6d2010ae 2149
f427ee49 2150 inp->dlth_wtot = 0;
6d2010ae 2151
f427ee49
A
2152 lck_mtx_unlock(&inp->dlth_lock);
2153
2154 if (__improbable(embryonic)) {
2155 dlil_decr_pending_thread_count();
2156 }
6d2010ae 2157
316670eb 2158 /*
39037602
A
2159 * NOTE warning %%% attention !!!!
2160 * We should think about putting some thread starvation
2161 * safeguards if we deal with long chains of packets.
2162 */
f427ee49 2163 if (__probable(m_loop != NULL)) {
316670eb 2164 dlil_input_packet_list_extended(lo_ifp, m_loop,
cb323159 2165 m_cnt_loop, IFNET_MODEL_INPUT_POLL_OFF);
0a7de745 2166 }
6d2010ae 2167
f427ee49 2168 if (__probable(m != NULL)) {
316670eb 2169 dlil_input_packet_list_extended(NULL, m,
cb323159 2170 m_cnt, IFNET_MODEL_INPUT_POLL_OFF);
0a7de745 2171 }
316670eb 2172
f427ee49 2173 if (__improbable(proto_req)) {
316670eb 2174 proto_input_run();
0a7de745 2175 }
cb323159 2176
f427ee49
A
2177 lck_mtx_lock_spin(&inp->dlth_lock);
2178 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
cb323159 2179 /* main input thread cannot be terminated */
f427ee49
A
2180 VERIFY(!(inp->dlth_flags & DLIL_INPUT_TERMINATE));
2181 if (!(inp->dlth_flags & ~DLIL_INPUT_RUNNING)) {
cb323159
A
2182 break;
2183 }
316670eb
A
2184 }
2185
f427ee49
A
2186 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
2187 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2188 lck_mtx_unlock(&inp->dlth_lock);
cb323159
A
2189 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2190
0a7de745 2191 VERIFY(0); /* we should never get here */
cb323159
A
2192 /* NOTREACHED */
2193 __builtin_unreachable();
316670eb
A
2194}
2195
2196/*
2197 * Input thread for interfaces with legacy input model.
2198 */
cb323159 2199__attribute__((noreturn))
316670eb
A
2200static void
2201dlil_input_thread_func(void *v, wait_result_t w)
2202{
2203#pragma unused(w)
39037602 2204 char thread_name[MAXTHREADNAMESIZE];
316670eb 2205 struct dlil_threading_info *inp = v;
f427ee49 2206 struct ifnet *ifp = inp->dlth_ifp;
316670eb 2207
cb323159
A
2208 VERIFY(inp != dlil_main_input_thread);
2209 VERIFY(ifp != NULL);
2210 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll ||
2211 !(ifp->if_xflags & IFXF_LEGACY));
2212 VERIFY(ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_OFF ||
2213 !(ifp->if_xflags & IFXF_LEGACY));
f427ee49 2214 VERIFY(current_thread() == inp->dlth_thread);
cb323159
A
2215
2216 /* construct the name for this thread, and then apply it */
39037602 2217 bzero(thread_name, sizeof(thread_name));
cb323159
A
2218 (void) snprintf(thread_name, sizeof(thread_name),
2219 "dlil_input_%s", ifp->if_xname);
f427ee49
A
2220 thread_set_thread_name(inp->dlth_thread, thread_name);
2221
2222 lck_mtx_lock(&inp->dlth_lock);
2223 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2224 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2225 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2226 /* wake up once to get out of embryonic state */
2227 dlil_input_wakeup(inp);
2228 lck_mtx_unlock(&inp->dlth_lock);
cb323159
A
2229 (void) thread_block_parameter(dlil_input_thread_cont, inp);
2230 /* NOTREACHED */
2231 __builtin_unreachable();
2232}
2233
2234__attribute__((noreturn))
2235static void
2236dlil_input_thread_cont(void *v, wait_result_t wres)
2237{
2238 struct dlil_threading_info *inp = v;
f427ee49 2239 struct ifnet *ifp = inp->dlth_ifp;
cb323159 2240
f427ee49 2241 lck_mtx_lock_spin(&inp->dlth_lock);
cb323159 2242 if (__improbable(wres == THREAD_INTERRUPTED ||
f427ee49 2243 (inp->dlth_flags & DLIL_INPUT_TERMINATE))) {
cb323159
A
2244 goto terminate;
2245 }
2246
f427ee49
A
2247 VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING));
2248 inp->dlth_flags |= DLIL_INPUT_RUNNING;
2d21ac55 2249
316670eb
A
2250 while (1) {
2251 struct mbuf *m = NULL;
cb323159
A
2252 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2253 boolean_t notify = FALSE;
f427ee49 2254 boolean_t embryonic;
316670eb
A
2255 u_int32_t m_cnt;
2256
f427ee49
A
2257 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2258
2259 if (__improbable(embryonic =
2260 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2261 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2262 }
6d2010ae 2263
316670eb
A
2264 /*
2265 * Protocol registration and injection must always use
2266 * the main input thread; in theory the latter can utilize
2267 * the corresponding input thread where the packet arrived
2268 * on, but that requires our knowing the interface in advance
2269 * (and the benefits might not worth the trouble.)
2270 */
f427ee49 2271 VERIFY(!(inp->dlth_flags &
0a7de745 2272 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
6d2010ae 2273
316670eb 2274 /* Packets for this interface */
f427ee49
A
2275 m_cnt = qlen(&inp->dlth_pkts);
2276 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
cb323159 2277 m = pkt.cp_mbuf;
2d21ac55 2278
f427ee49 2279 inp->dlth_wtot = 0;
316670eb 2280
cb323159 2281 notify = dlil_input_stats_sync(ifp, inp);
316670eb 2282
f427ee49 2283 lck_mtx_unlock(&inp->dlth_lock);
2d21ac55 2284
f427ee49
A
2285 if (__improbable(embryonic)) {
2286 ifnet_decr_pending_thread_count(ifp);
2287 }
2288
2289 if (__improbable(notify)) {
cb323159
A
2290 ifnet_notify_data_threshold(ifp);
2291 }
2292
91447636 2293 /*
39037602
A
2294 * NOTE warning %%% attention !!!!
2295 * We should think about putting some thread starvation
2296 * safeguards if we deal with long chains of packets.
2297 */
f427ee49 2298 if (__probable(m != NULL)) {
316670eb 2299 dlil_input_packet_list_extended(NULL, m,
cb323159
A
2300 m_cnt, ifp->if_poll_mode);
2301 }
2302
f427ee49
A
2303 lck_mtx_lock_spin(&inp->dlth_lock);
2304 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
2305 if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING |
ea3f0419 2306 DLIL_INPUT_TERMINATE))) {
cb323159 2307 break;
0a7de745 2308 }
2d21ac55 2309 }
316670eb 2310
f427ee49 2311 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
cb323159 2312
f427ee49 2313 if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) {
cb323159 2314terminate:
f427ee49 2315 lck_mtx_unlock(&inp->dlth_lock);
cb323159
A
2316 dlil_terminate_input_thread(inp);
2317 /* NOTREACHED */
2318 } else {
f427ee49
A
2319 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2320 lck_mtx_unlock(&inp->dlth_lock);
cb323159
A
2321 (void) thread_block_parameter(dlil_input_thread_cont, inp);
2322 /* NOTREACHED */
2323 }
2324
0a7de745 2325 VERIFY(0); /* we should never get here */
cb323159
A
2326 /* NOTREACHED */
2327 __builtin_unreachable();
2d21ac55
A
2328}
2329
316670eb
A
2330/*
2331 * Input thread for interfaces with opportunistic polling input model.
2332 */
cb323159 2333__attribute__((noreturn))
316670eb
A
2334static void
2335dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2d21ac55 2336{
316670eb 2337#pragma unused(w)
cb323159 2338 char thread_name[MAXTHREADNAMESIZE];
316670eb 2339 struct dlil_threading_info *inp = v;
f427ee49 2340 struct ifnet *ifp = inp->dlth_ifp;
2d21ac55 2341
316670eb 2342 VERIFY(inp != dlil_main_input_thread);
cb323159
A
2343 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL) &&
2344 (ifp->if_xflags & IFXF_LEGACY));
f427ee49 2345 VERIFY(current_thread() == inp->dlth_thread);
cb323159
A
2346
2347 /* construct the name for this thread, and then apply it */
2348 bzero(thread_name, sizeof(thread_name));
2349 (void) snprintf(thread_name, sizeof(thread_name),
2350 "dlil_input_poll_%s", ifp->if_xname);
f427ee49
A
2351 thread_set_thread_name(inp->dlth_thread, thread_name);
2352
2353 lck_mtx_lock(&inp->dlth_lock);
2354 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2355 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2356 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2357 /* wake up once to get out of embryonic state */
2358 dlil_input_wakeup(inp);
2359 lck_mtx_unlock(&inp->dlth_lock);
cb323159
A
2360 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont, inp);
2361 /* NOTREACHED */
2362 __builtin_unreachable();
2363}
2364
2365__attribute__((noreturn))
2366static void
2367dlil_rxpoll_input_thread_cont(void *v, wait_result_t wres)
2368{
2369 struct dlil_threading_info *inp = v;
f427ee49 2370 struct ifnet *ifp = inp->dlth_ifp;
cb323159
A
2371 struct timespec ts;
2372
f427ee49 2373 lck_mtx_lock_spin(&inp->dlth_lock);
cb323159 2374 if (__improbable(wres == THREAD_INTERRUPTED ||
f427ee49 2375 (inp->dlth_flags & DLIL_INPUT_TERMINATE))) {
cb323159
A
2376 goto terminate;
2377 }
2378
f427ee49
A
2379 VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING));
2380 inp->dlth_flags |= DLIL_INPUT_RUNNING;
2d21ac55 2381
2d21ac55 2382 while (1) {
316670eb 2383 struct mbuf *m = NULL;
f427ee49
A
2384 uint32_t m_cnt, poll_req = 0;
2385 uint64_t m_size = 0;
316670eb
A
2386 ifnet_model_t mode;
2387 struct timespec now, delta;
cb323159
A
2388 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2389 boolean_t notify;
f427ee49
A
2390 boolean_t embryonic;
2391 uint64_t ival;
6d2010ae 2392
f427ee49
A
2393 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2394
2395 if (__improbable(embryonic =
2396 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2397 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2398 goto skip;
2399 }
6d2010ae 2400
cb323159 2401 if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
39236c6e 2402 ival = IF_RXPOLL_INTERVALTIME_MIN;
0a7de745 2403 }
39236c6e 2404
316670eb
A
2405 /* Link parameters changed? */
2406 if (ifp->if_poll_update != 0) {
2407 ifp->if_poll_update = 0;
39236c6e 2408 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
91447636 2409 }
1c79356b 2410
316670eb 2411 /* Current operating mode */
cb323159 2412 mode = ifp->if_poll_mode;
2d21ac55
A
2413
2414 /*
316670eb
A
2415 * Protocol registration and injection must always use
2416 * the main input thread; in theory the latter can utilize
2417 * the corresponding input thread where the packet arrived
2418 * on, but that requires our knowing the interface in advance
2419 * (and the benefits might not worth the trouble.)
2d21ac55 2420 */
f427ee49 2421 VERIFY(!(inp->dlth_flags &
0a7de745 2422 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2d21ac55 2423
316670eb 2424 /* Total count of all packets */
f427ee49 2425 m_cnt = qlen(&inp->dlth_pkts);
316670eb
A
2426
2427 /* Total bytes of all packets */
f427ee49 2428 m_size = qsize(&inp->dlth_pkts);
316670eb
A
2429
2430 /* Packets for this interface */
f427ee49 2431 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
cb323159 2432 m = pkt.cp_mbuf;
316670eb
A
2433 VERIFY(m != NULL || m_cnt == 0);
2434
2435 nanouptime(&now);
cb323159
A
2436 if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
2437 *(&ifp->if_poll_sample_lasttime) = *(&now);
0a7de745 2438 }
316670eb 2439
cb323159
A
2440 net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
2441 if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
316670eb
A
2442 u_int32_t ptot, btot;
2443
2444 /* Accumulate statistics for current sampling */
cb323159 2445 PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
316670eb 2446
cb323159 2447 if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
316670eb 2448 goto skip;
0a7de745 2449 }
316670eb 2450
cb323159 2451 *(&ifp->if_poll_sample_lasttime) = *(&now);
316670eb
A
2452
2453 /* Calculate min/max of inbound bytes */
cb323159
A
2454 btot = (u_int32_t)ifp->if_poll_sstats.bytes;
2455 if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
2456 ifp->if_rxpoll_bmin = btot;
0a7de745 2457 }
cb323159
A
2458 if (btot > ifp->if_rxpoll_bmax) {
2459 ifp->if_rxpoll_bmax = btot;
0a7de745 2460 }
316670eb
A
2461
2462 /* Calculate EWMA of inbound bytes */
cb323159 2463 DLIL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
316670eb
A
2464
2465 /* Calculate min/max of inbound packets */
cb323159
A
2466 ptot = (u_int32_t)ifp->if_poll_sstats.packets;
2467 if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
2468 ifp->if_rxpoll_pmin = ptot;
0a7de745 2469 }
cb323159
A
2470 if (ptot > ifp->if_rxpoll_pmax) {
2471 ifp->if_rxpoll_pmax = ptot;
0a7de745 2472 }
316670eb
A
2473
2474 /* Calculate EWMA of inbound packets */
cb323159 2475 DLIL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
316670eb
A
2476
2477 /* Reset sampling statistics */
cb323159 2478 PKTCNTR_CLEAR(&ifp->if_poll_sstats);
316670eb
A
2479
2480 /* Calculate EWMA of wakeup requests */
f427ee49
A
2481 DLIL_EWMA(ifp->if_rxpoll_wavg, inp->dlth_wtot,
2482 if_rxpoll_decay);
2483 inp->dlth_wtot = 0;
316670eb
A
2484
2485 if (dlil_verbose) {
cb323159
A
2486 if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
2487 *(&ifp->if_poll_dbg_lasttime) = *(&now);
0a7de745 2488 }
cb323159 2489 net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
316670eb 2490 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
cb323159
A
2491 *(&ifp->if_poll_dbg_lasttime) = *(&now);
2492 DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
316670eb
A
2493 "limits [%d/%d], wreq avg %d "
2494 "limits [%d/%d], bytes avg %d "
39236c6e 2495 "limits [%d/%d]\n", if_name(ifp),
cb323159 2496 (ifp->if_poll_mode ==
316670eb 2497 IFNET_MODEL_INPUT_POLL_ON) ?
cb323159
A
2498 "ON" : "OFF", ifp->if_rxpoll_pavg,
2499 ifp->if_rxpoll_pmax,
2500 ifp->if_rxpoll_plowat,
2501 ifp->if_rxpoll_phiwat,
2502 ifp->if_rxpoll_wavg,
2503 ifp->if_rxpoll_wlowat,
2504 ifp->if_rxpoll_whiwat,
2505 ifp->if_rxpoll_bavg,
2506 ifp->if_rxpoll_blowat,
2507 ifp->if_rxpoll_bhiwat);
316670eb
A
2508 }
2509 }
2d21ac55 2510
316670eb 2511 /* Perform mode transition, if necessary */
cb323159
A
2512 if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
2513 *(&ifp->if_poll_mode_lasttime) = *(&now);
0a7de745 2514 }
316670eb 2515
cb323159
A
2516 net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
2517 if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
316670eb 2518 goto skip;
0a7de745 2519 }
316670eb 2520
cb323159
A
2521 if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
2522 ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
2523 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
316670eb 2524 mode = IFNET_MODEL_INPUT_POLL_OFF;
cb323159
A
2525 } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
2526 (ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat ||
2527 ifp->if_rxpoll_wavg >= ifp->if_rxpoll_whiwat) &&
2528 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
316670eb
A
2529 mode = IFNET_MODEL_INPUT_POLL_ON;
2530 }
6d2010ae 2531
cb323159
A
2532 if (mode != ifp->if_poll_mode) {
2533 ifp->if_poll_mode = mode;
2534 *(&ifp->if_poll_mode_lasttime) = *(&now);
316670eb
A
2535 poll_req++;
2536 }
2537 }
2538skip:
cb323159 2539 notify = dlil_input_stats_sync(ifp, inp);
6d2010ae 2540
f427ee49
A
2541 lck_mtx_unlock(&inp->dlth_lock);
2542
2543 if (__improbable(embryonic)) {
2544 ifnet_decr_pending_thread_count(ifp);
2545 }
6d2010ae 2546
f427ee49 2547 if (__improbable(notify)) {
cb323159
A
2548 ifnet_notify_data_threshold(ifp);
2549 }
2550
316670eb
A
2551 /*
2552 * If there's a mode change and interface is still attached,
2553 * perform a downcall to the driver for the new mode. Also
2554 * hold an IO refcnt on the interface to prevent it from
2555 * being detached (will be release below.)
2556 */
2557 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
cb323159
A
2558 struct ifnet_model_params p = {
2559 .model = mode, .reserved = { 0 }
2560 };
316670eb
A
2561 errno_t err;
2562
2563 if (dlil_verbose) {
cb323159 2564 DLIL_PRINTF("%s: polling is now %s, "
316670eb
A
2565 "pkts avg %d max %d limits [%d/%d], "
2566 "wreq avg %d limits [%d/%d], "
2567 "bytes avg %d limits [%d/%d]\n",
39236c6e 2568 if_name(ifp),
316670eb 2569 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
cb323159
A
2570 "ON" : "OFF", ifp->if_rxpoll_pavg,
2571 ifp->if_rxpoll_pmax, ifp->if_rxpoll_plowat,
2572 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wavg,
2573 ifp->if_rxpoll_wlowat, ifp->if_rxpoll_whiwat,
2574 ifp->if_rxpoll_bavg, ifp->if_rxpoll_blowat,
2575 ifp->if_rxpoll_bhiwat);
316670eb 2576 }
2d21ac55 2577
316670eb 2578 if ((err = ((*ifp->if_input_ctl)(ifp,
0a7de745 2579 IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) {
cb323159 2580 DLIL_PRINTF("%s: error setting polling mode "
39236c6e 2581 "to %s (%d)\n", if_name(ifp),
316670eb
A
2582 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2583 "ON" : "OFF", err);
2584 }
1c79356b 2585
316670eb
A
2586 switch (mode) {
2587 case IFNET_MODEL_INPUT_POLL_OFF:
2588 ifnet_set_poll_cycle(ifp, NULL);
cb323159 2589 ifp->if_rxpoll_offreq++;
0a7de745 2590 if (err != 0) {
cb323159 2591 ifp->if_rxpoll_offerr++;
0a7de745 2592 }
316670eb 2593 break;
2d21ac55 2594
316670eb 2595 case IFNET_MODEL_INPUT_POLL_ON:
39236c6e 2596 net_nsectimer(&ival, &ts);
316670eb
A
2597 ifnet_set_poll_cycle(ifp, &ts);
2598 ifnet_poll(ifp);
cb323159 2599 ifp->if_rxpoll_onreq++;
0a7de745 2600 if (err != 0) {
cb323159 2601 ifp->if_rxpoll_onerr++;
0a7de745 2602 }
316670eb
A
2603 break;
2604
2605 default:
2606 VERIFY(0);
2607 /* NOTREACHED */
2608 }
2609
2610 /* Release the IO refcnt */
2611 ifnet_decr_iorefcnt(ifp);
2612 }
2613
2614 /*
39037602
A
2615 * NOTE warning %%% attention !!!!
2616 * We should think about putting some thread starvation
2617 * safeguards if we deal with long chains of packets.
2618 */
f427ee49 2619 if (__probable(m != NULL)) {
316670eb 2620 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
0a7de745 2621 }
cb323159 2622
f427ee49
A
2623 lck_mtx_lock_spin(&inp->dlth_lock);
2624 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
2625 if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING |
ea3f0419 2626 DLIL_INPUT_TERMINATE))) {
cb323159
A
2627 break;
2628 }
2629 }
2630
f427ee49 2631 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
cb323159 2632
f427ee49 2633 if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) {
cb323159 2634terminate:
f427ee49 2635 lck_mtx_unlock(&inp->dlth_lock);
cb323159
A
2636 dlil_terminate_input_thread(inp);
2637 /* NOTREACHED */
2638 } else {
f427ee49
A
2639 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2640 lck_mtx_unlock(&inp->dlth_lock);
cb323159
A
2641 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont,
2642 inp);
2643 /* NOTREACHED */
316670eb
A
2644 }
2645
0a7de745 2646 VERIFY(0); /* we should never get here */
cb323159
A
2647 /* NOTREACHED */
2648 __builtin_unreachable();
316670eb
A
2649}
2650
39236c6e 2651errno_t
cb323159 2652dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
316670eb 2653{
39236c6e
A
2654 if (p != NULL) {
2655 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
0a7de745
A
2656 (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
2657 return EINVAL;
2658 }
2659 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2660 p->packets_lowat >= p->packets_hiwat) {
2661 return EINVAL;
2662 }
39236c6e 2663 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
0a7de745
A
2664 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
2665 return EINVAL;
2666 }
2667 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2668 p->bytes_lowat >= p->bytes_hiwat) {
2669 return EINVAL;
2670 }
39236c6e 2671 if (p->interval_time != 0 &&
0a7de745 2672 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
39236c6e 2673 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
0a7de745 2674 }
39236c6e 2675 }
cb323159
A
2676 return 0;
2677}
39236c6e 2678
cb323159
A
2679void
2680dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2681{
2682 u_int64_t sample_holdtime, inbw;
316670eb 2683
39236c6e 2684 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
0a7de745 2685 sample_holdtime = 0; /* polling is disabled */
cb323159
A
2686 ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
2687 ifp->if_rxpoll_blowat = 0;
2688 ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
2689 ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
2690 ifp->if_rxpoll_plim = 0;
2691 ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
316670eb 2692 } else {
39236c6e
A
2693 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2694 u_int64_t ival;
316670eb
A
2695 unsigned int n, i;
2696
39236c6e 2697 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
0a7de745 2698 if (inbw < rxpoll_tbl[i].speed) {
316670eb 2699 break;
0a7de745 2700 }
316670eb
A
2701 n = i;
2702 }
39236c6e
A
2703 /* auto-tune if caller didn't specify a value */
2704 plowat = ((p == NULL || p->packets_lowat == 0) ?
2705 rxpoll_tbl[n].plowat : p->packets_lowat);
2706 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2707 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2708 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2709 rxpoll_tbl[n].blowat : p->bytes_lowat);
2710 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2711 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2712 plim = ((p == NULL || p->packets_limit == 0) ?
2713 if_rxpoll_max : p->packets_limit);
2714 ival = ((p == NULL || p->interval_time == 0) ?
2715 if_rxpoll_interval_time : p->interval_time);
2716
2717 VERIFY(plowat != 0 && phiwat != 0);
2718 VERIFY(blowat != 0 && bhiwat != 0);
2719 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2720
316670eb 2721 sample_holdtime = if_rxpoll_sample_holdtime;
cb323159
A
2722 ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
2723 ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
2724 ifp->if_rxpoll_plowat = plowat;
2725 ifp->if_rxpoll_phiwat = phiwat;
2726 ifp->if_rxpoll_blowat = blowat;
2727 ifp->if_rxpoll_bhiwat = bhiwat;
2728 ifp->if_rxpoll_plim = plim;
2729 ifp->if_rxpoll_ival = ival;
316670eb
A
2730 }
2731
cb323159
A
2732 net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
2733 net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
316670eb
A
2734
2735 if (dlil_verbose) {
cb323159 2736 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
39236c6e
A
2737 "poll interval %llu nsec, pkts per poll %u, "
2738 "pkt limits [%u/%u], wreq limits [%u/%u], "
2739 "bytes limits [%u/%u]\n", if_name(ifp),
cb323159
A
2740 inbw, sample_holdtime, ifp->if_rxpoll_ival,
2741 ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
2742 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
2743 ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
2744 ifp->if_rxpoll_bhiwat);
2745 }
2746}
2747
2748/*
2749 * Must be called on an attached ifnet (caller is expected to check.)
2750 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2751 */
2752errno_t
2753dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2754 boolean_t locked)
2755{
2756 errno_t err;
2757 struct dlil_threading_info *inp;
2758
2759 VERIFY(ifp != NULL);
2760 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2761 return ENXIO;
2762 }
2763 err = dlil_rxpoll_validate_params(p);
2764 if (err != 0) {
2765 return err;
316670eb 2766 }
39236c6e 2767
cb323159 2768 if (!locked) {
f427ee49 2769 lck_mtx_lock(&inp->dlth_lock);
cb323159 2770 }
f427ee49 2771 LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
cb323159
A
2772 /*
2773 * Normally, we'd reset the parameters to the auto-tuned values
2774 * if the the input thread detects a change in link rate. If the
2775 * driver provides its own parameters right after a link rate
2776 * changes, but before the input thread gets to run, we want to
2777 * make sure to keep the driver's values. Clearing if_poll_update
2778 * will achieve that.
2779 */
2780 if (p != NULL && !locked && ifp->if_poll_update != 0) {
2781 ifp->if_poll_update = 0;
2782 }
2783 dlil_rxpoll_update_params(ifp, p);
0a7de745 2784 if (!locked) {
f427ee49 2785 lck_mtx_unlock(&inp->dlth_lock);
0a7de745 2786 }
0a7de745 2787 return 0;
39236c6e
A
2788}
2789
2790/*
2791 * Must be called on an attached ifnet (caller is expected to check.)
2792 */
2793errno_t
2794dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2795{
2796 struct dlil_threading_info *inp;
2797
2798 VERIFY(ifp != NULL && p != NULL);
0a7de745
A
2799 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2800 return ENXIO;
2801 }
39236c6e 2802
0a7de745 2803 bzero(p, sizeof(*p));
39236c6e 2804
f427ee49 2805 lck_mtx_lock(&inp->dlth_lock);
cb323159
A
2806 p->packets_limit = ifp->if_rxpoll_plim;
2807 p->packets_lowat = ifp->if_rxpoll_plowat;
2808 p->packets_hiwat = ifp->if_rxpoll_phiwat;
2809 p->bytes_lowat = ifp->if_rxpoll_blowat;
2810 p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2811 p->interval_time = ifp->if_rxpoll_ival;
f427ee49 2812 lck_mtx_unlock(&inp->dlth_lock);
39236c6e 2813
0a7de745 2814 return 0;
316670eb
A
2815}
2816
2817errno_t
2818ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2819 const struct ifnet_stat_increment_param *s)
2820{
0a7de745 2821 return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
316670eb
A
2822}
2823
2824errno_t
2825ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2826 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2827{
0a7de745 2828 return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
316670eb
A
2829}
2830
cb323159
A
2831errno_t
2832ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2833 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2834{
2835 return ifnet_input_common(ifp, m_head, m_tail, s,
2836 (m_head != NULL), TRUE);
2837}
2838
316670eb
A
2839static errno_t
2840ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2841 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2842{
5ba3f43e 2843 dlil_input_func input_func;
39037602 2844 struct ifnet_stat_increment_param _s;
316670eb 2845 u_int32_t m_cnt = 0, m_size = 0;
39037602
A
2846 struct mbuf *last;
2847 errno_t err = 0;
316670eb 2848
39236c6e 2849 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
0a7de745 2850 if (m_head != NULL) {
39236c6e 2851 mbuf_freem_list(m_head);
0a7de745
A
2852 }
2853 return EINVAL;
39236c6e
A
2854 }
2855
2856 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2857 VERIFY(m_tail == NULL || ext);
2858 VERIFY(s != NULL || !ext);
2859
316670eb
A
2860 /*
2861 * Drop the packet(s) if the parameters are invalid, or if the
2862 * interface is no longer attached; else hold an IO refcnt to
2863 * prevent it from being detached (will be released below.)
2864 */
cb323159 2865 if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
0a7de745 2866 if (m_head != NULL) {
316670eb 2867 mbuf_freem_list(m_head);
0a7de745
A
2868 }
2869 return EINVAL;
316670eb
A
2870 }
2871
5ba3f43e
A
2872 input_func = ifp->if_input_dlil;
2873 VERIFY(input_func != NULL);
39037602 2874
316670eb
A
2875 if (m_tail == NULL) {
2876 last = m_head;
39236c6e 2877 while (m_head != NULL) {
316670eb 2878#if IFNET_INPUT_SANITY_CHK
f427ee49 2879 if (__improbable(dlil_input_sanity_check != 0)) {
316670eb 2880 DLIL_INPUT_CHECK(last, ifp);
0a7de745 2881 }
316670eb
A
2882#endif /* IFNET_INPUT_SANITY_CHK */
2883 m_cnt++;
2884 m_size += m_length(last);
0a7de745 2885 if (mbuf_nextpkt(last) == NULL) {
316670eb 2886 break;
0a7de745 2887 }
316670eb
A
2888 last = mbuf_nextpkt(last);
2889 }
2890 m_tail = last;
2891 } else {
2892#if IFNET_INPUT_SANITY_CHK
f427ee49 2893 if (__improbable(dlil_input_sanity_check != 0)) {
316670eb
A
2894 last = m_head;
2895 while (1) {
2896 DLIL_INPUT_CHECK(last, ifp);
2897 m_cnt++;
2898 m_size += m_length(last);
0a7de745 2899 if (mbuf_nextpkt(last) == NULL) {
316670eb 2900 break;
0a7de745 2901 }
316670eb
A
2902 last = mbuf_nextpkt(last);
2903 }
2904 } else {
2905 m_cnt = s->packets_in;
2906 m_size = s->bytes_in;
2907 last = m_tail;
2908 }
2909#else
2910 m_cnt = s->packets_in;
2911 m_size = s->bytes_in;
2912 last = m_tail;
2913#endif /* IFNET_INPUT_SANITY_CHK */
2914 }
2915
2916 if (last != m_tail) {
39236c6e
A
2917 panic_plain("%s: invalid input packet chain for %s, "
2918 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2919 m_tail, last);
316670eb
A
2920 }
2921
2922 /*
2923 * Assert packet count only for the extended variant, for backwards
2924 * compatibility, since this came directly from the device driver.
2925 * Relax this assertion for input bytes, as the driver may have
2926 * included the link-layer headers in the computation; hence
2927 * m_size is just an approximation.
2928 */
2929 if (ext && s->packets_in != m_cnt) {
39236c6e
A
2930 panic_plain("%s: input packet count mismatch for %s, "
2931 "%d instead of %d\n", __func__, if_name(ifp),
2932 s->packets_in, m_cnt);
316670eb
A
2933 }
2934
39037602 2935 if (s == NULL) {
0a7de745 2936 bzero(&_s, sizeof(_s));
39037602
A
2937 s = &_s;
2938 } else {
2939 _s = *s;
2940 }
2941 _s.packets_in = m_cnt;
2942 _s.bytes_in = m_size;
2943
5ba3f43e 2944 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
39037602
A
2945
2946 if (ifp != lo_ifp) {
2947 /* Release the IO refcnt */
cb323159 2948 ifnet_datamov_end(ifp);
39037602
A
2949 }
2950
0a7de745 2951 return err;
39037602
A
2952}
2953
39037602
A
2954
2955errno_t
2956dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2957{
0a7de745 2958 return ifp->if_output(ifp, m);
39037602
A
2959}
2960
2961errno_t
2962dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2963 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2964 boolean_t poll, struct thread *tp)
2965{
f427ee49 2966 struct dlil_threading_info *inp = ifp->if_inp;
39037602 2967
f427ee49 2968 if (__improbable(inp == NULL)) {
316670eb 2969 inp = dlil_main_input_thread;
0a7de745 2970 }
316670eb 2971
f427ee49
A
2972 return inp->dlth_strategy(inp, ifp, m_head, m_tail, s, poll, tp);
2973}
2974
2975static errno_t
2976dlil_input_async(struct dlil_threading_info *inp,
2977 struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2978 const struct ifnet_stat_increment_param *s, boolean_t poll,
2979 struct thread *tp)
2980{
2981 u_int32_t m_cnt = s->packets_in;
2982 u_int32_t m_size = s->bytes_in;
2983 boolean_t notify = FALSE;
2984
316670eb
A
2985 /*
2986 * If there is a matching DLIL input thread associated with an
2987 * affinity set, associate this thread with the same set. We
2988 * will only do this once.
2989 */
f427ee49
A
2990 lck_mtx_lock_spin(&inp->dlth_lock);
2991 if (inp != dlil_main_input_thread && inp->dlth_affinity && tp != NULL &&
2992 ((!poll && inp->dlth_driver_thread == THREAD_NULL) ||
2993 (poll && inp->dlth_poller_thread == THREAD_NULL))) {
2994 u_int32_t tag = inp->dlth_affinity_tag;
316670eb
A
2995
2996 if (poll) {
f427ee49
A
2997 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
2998 inp->dlth_poller_thread = tp;
316670eb 2999 } else {
f427ee49
A
3000 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
3001 inp->dlth_driver_thread = tp;
316670eb 3002 }
f427ee49 3003 lck_mtx_unlock(&inp->dlth_lock);
316670eb
A
3004
3005 /* Associate the current thread with the new affinity tag */
3006 (void) dlil_affinity_set(tp, tag);
3007
3008 /*
3009 * Take a reference on the current thread; during detach,
5ba3f43e 3010 * we will need to refer to it in order to tear down its
316670eb
A
3011 * affinity.
3012 */
3013 thread_reference(tp);
f427ee49 3014 lck_mtx_lock_spin(&inp->dlth_lock);
316670eb
A
3015 }
3016
39236c6e
A
3017 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
3018
39037602 3019 /*
316670eb
A
3020 * Because of loopbacked multicast we cannot stuff the ifp in
3021 * the rcvif of the packet header: loopback (lo0) packets use a
3022 * dedicated list so that we can later associate them with lo_ifp
3023 * on their way up the stack. Packets for other interfaces without
3024 * dedicated input threads go to the regular list.
3025 */
39236c6e 3026 if (m_head != NULL) {
cb323159
A
3027 classq_pkt_t head, tail;
3028 CLASSQ_PKT_INIT_MBUF(&head, m_head);
3029 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
39236c6e
A
3030 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
3031 struct dlil_main_threading_info *inpm =
3032 (struct dlil_main_threading_info *)inp;
cb323159 3033 _addq_multi(&inpm->lo_rcvq_pkts, &head, &tail,
39236c6e
A
3034 m_cnt, m_size);
3035 } else {
f427ee49 3036 _addq_multi(&inp->dlth_pkts, &head, &tail,
39236c6e
A
3037 m_cnt, m_size);
3038 }
316670eb
A
3039 }
3040
3041#if IFNET_INPUT_SANITY_CHK
f427ee49
A
3042 if (__improbable(dlil_input_sanity_check != 0)) {
3043 u_int32_t count = 0, size = 0;
316670eb
A
3044 struct mbuf *m0;
3045
f427ee49
A
3046 for (m0 = m_head; m0; m0 = mbuf_nextpkt(m0)) {
3047 size += m_length(m0);
316670eb 3048 count++;
0a7de745 3049 }
316670eb
A
3050
3051 if (count != m_cnt) {
f427ee49
A
3052 panic_plain("%s: invalid total packet count %u "
3053 "(expected %u)\n", if_name(ifp), count, m_cnt);
316670eb 3054 /* NOTREACHED */
f427ee49
A
3055 __builtin_unreachable();
3056 } else if (size != m_size) {
3057 panic_plain("%s: invalid total packet size %u "
3058 "(expected %u)\n", if_name(ifp), size, m_size);
3059 /* NOTREACHED */
3060 __builtin_unreachable();
316670eb
A
3061 }
3062
f427ee49 3063 inp->dlth_pkts_cnt += m_cnt;
316670eb
A
3064 }
3065#endif /* IFNET_INPUT_SANITY_CHK */
3066
cb323159 3067 dlil_input_stats_add(s, inp, ifp, poll);
39037602
A
3068 /*
3069 * If we're using the main input thread, synchronize the
3070 * stats now since we have the interface context. All
3071 * other cases involving dedicated input threads will
3072 * have their stats synchronized there.
3073 */
0a7de745 3074 if (inp == dlil_main_input_thread) {
cb323159
A
3075 notify = dlil_input_stats_sync(ifp, inp);
3076 }
3077
f427ee49
A
3078 dlil_input_wakeup(inp);
3079 lck_mtx_unlock(&inp->dlth_lock);
3080
3081 if (notify) {
3082 ifnet_notify_data_threshold(ifp);
3083 }
3084
3085 return 0;
3086}
3087
3088static errno_t
3089dlil_input_sync(struct dlil_threading_info *inp,
3090 struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
3091 const struct ifnet_stat_increment_param *s, boolean_t poll,
3092 struct thread *tp)
3093{
3094#pragma unused(tp)
3095 u_int32_t m_cnt = s->packets_in;
3096 u_int32_t m_size = s->bytes_in;
3097 boolean_t notify = FALSE;
3098 classq_pkt_t head, tail;
3099
3100 ASSERT(inp != dlil_main_input_thread);
3101
3102 /* XXX: should we just assert instead? */
3103 if (__improbable(m_head == NULL)) {
3104 return 0;
3105 }
3106
3107 CLASSQ_PKT_INIT_MBUF(&head, m_head);
3108 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3109
3110 lck_mtx_lock_spin(&inp->dlth_lock);
3111 _addq_multi(&inp->dlth_pkts, &head, &tail, m_cnt, m_size);
3112
3113#if IFNET_INPUT_SANITY_CHK
3114 if (__improbable(dlil_input_sanity_check != 0)) {
3115 u_int32_t count = 0, size = 0;
3116 struct mbuf *m0;
3117
3118 for (m0 = m_head; m0; m0 = mbuf_nextpkt(m0)) {
3119 size += m_length(m0);
3120 count++;
3121 }
3122
3123 if (count != m_cnt) {
3124 panic_plain("%s: invalid total packet count %u "
3125 "(expected %u)\n", if_name(ifp), count, m_cnt);
3126 /* NOTREACHED */
3127 __builtin_unreachable();
3128 } else if (size != m_size) {
3129 panic_plain("%s: invalid total packet size %u "
3130 "(expected %u)\n", if_name(ifp), size, m_size);
3131 /* NOTREACHED */
3132 __builtin_unreachable();
3133 }
3134
3135 inp->dlth_pkts_cnt += m_cnt;
316670eb 3136 }
f427ee49
A
3137#endif /* IFNET_INPUT_SANITY_CHK */
3138
3139 dlil_input_stats_add(s, inp, ifp, poll);
3140
3141 m_cnt = qlen(&inp->dlth_pkts);
3142 _getq_all(&inp->dlth_pkts, &head, NULL, NULL, NULL);
3143
3144 notify = dlil_input_stats_sync(ifp, inp);
3145
3146 lck_mtx_unlock(&inp->dlth_lock);
316670eb 3147
cb323159
A
3148 if (notify) {
3149 ifnet_notify_data_threshold(ifp);
3150 }
3151
f427ee49
A
3152 /*
3153 * NOTE warning %%% attention !!!!
3154 * We should think about putting some thread starvation
3155 * safeguards if we deal with long chains of packets.
3156 */
3157 if (head.cp_mbuf != NULL) {
3158 dlil_input_packet_list_extended(NULL, head.cp_mbuf,
3159 m_cnt, ifp->if_poll_mode);
3160 }
3161
0a7de745 3162 return 0;
316670eb
A
3163}
3164
5ba3f43e 3165
39236c6e 3166static void
5c9f4661 3167ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
316670eb 3168{
0a7de745 3169 if (!(ifp->if_eflags & IFEF_TXSTART)) {
39236c6e 3170 return;
0a7de745 3171 }
316670eb 3172 /*
39236c6e
A
3173 * If the starter thread is inactive, signal it to do work,
3174 * unless the interface is being flow controlled from below,
3175 * e.g. a virtual interface being flow controlled by a real
5c9f4661
A
3176 * network interface beneath it, or it's been disabled via
3177 * a call to ifnet_disable_output().
316670eb
A
3178 */
3179 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e
A
3180 if (resetfc) {
3181 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
3182 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
3183 lck_mtx_unlock(&ifp->if_start_lock);
3184 return;
3185 }
316670eb 3186 ifp->if_start_req++;
3e170ce0
A
3187 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
3188 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
39037602
A
3189 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
3190 ifp->if_start_delayed == 0)) {
f427ee49 3191 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
316670eb
A
3192 }
3193 lck_mtx_unlock(&ifp->if_start_lock);
3194}
3195
39236c6e
A
3196void
3197ifnet_start(struct ifnet *ifp)
3198{
5c9f4661 3199 ifnet_start_common(ifp, FALSE);
39236c6e
A
3200}
3201
cb323159 3202__attribute__((noreturn))
316670eb 3203static void
cb323159 3204ifnet_start_thread_func(void *v, wait_result_t w)
316670eb
A
3205{
3206#pragma unused(w)
3207 struct ifnet *ifp = v;
39037602 3208 char thread_name[MAXTHREADNAMESIZE];
316670eb 3209
39037602
A
3210 /* Construct the name for this thread, and then apply it. */
3211 bzero(thread_name, sizeof(thread_name));
0a7de745 3212 (void) snprintf(thread_name, sizeof(thread_name),
5ba3f43e 3213 "ifnet_start_%s", ifp->if_xname);
cb323159
A
3214 ASSERT(ifp->if_start_thread == current_thread());
3215 thread_set_thread_name(current_thread(), thread_name);
39037602 3216
316670eb
A
3217 /*
3218 * Treat the dedicated starter thread for lo0 as equivalent to
3219 * the driver workloop thread; if net_affinity is enabled for
3220 * the main input thread, associate this starter thread to it
3221 * by binding them with the same affinity tag. This is done
3222 * only once (as we only have one lo_ifp which never goes away.)
3223 */
3224 if (ifp == lo_ifp) {
3225 struct dlil_threading_info *inp = dlil_main_input_thread;
3226 struct thread *tp = current_thread();
3227
f427ee49
A
3228 lck_mtx_lock(&inp->dlth_lock);
3229 if (inp->dlth_affinity) {
3230 u_int32_t tag = inp->dlth_affinity_tag;
316670eb 3231
f427ee49
A
3232 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
3233 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
3234 inp->dlth_driver_thread = tp;
3235 lck_mtx_unlock(&inp->dlth_lock);
316670eb
A
3236
3237 /* Associate this thread with the affinity tag */
3238 (void) dlil_affinity_set(tp, tag);
3239 } else {
f427ee49 3240 lck_mtx_unlock(&inp->dlth_lock);
316670eb
A
3241 }
3242 }
3243
cb323159 3244 lck_mtx_lock(&ifp->if_start_lock);
f427ee49 3245 VERIFY(!ifp->if_start_embryonic && !ifp->if_start_active);
cb323159 3246 (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
f427ee49
A
3247 ifp->if_start_embryonic = 1;
3248 /* wake up once to get out of embryonic state */
3249 ifp->if_start_req++;
3250 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
cb323159
A
3251 lck_mtx_unlock(&ifp->if_start_lock);
3252 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3253 /* NOTREACHED */
3254 __builtin_unreachable();
3255}
316670eb 3256
cb323159
A
3257__attribute__((noreturn))
3258static void
3259ifnet_start_thread_cont(void *v, wait_result_t wres)
3260{
3261 struct ifnet *ifp = v;
3262 struct ifclassq *ifq = &ifp->if_snd;
316670eb 3263
f427ee49 3264 lck_mtx_lock_spin(&ifp->if_start_lock);
cb323159
A
3265 if (__improbable(wres == THREAD_INTERRUPTED ||
3266 ifp->if_start_thread == THREAD_NULL)) {
3267 goto terminate;
3268 }
316670eb 3269
f427ee49
A
3270 if (__improbable(ifp->if_start_embryonic)) {
3271 ifp->if_start_embryonic = 0;
3272 lck_mtx_unlock(&ifp->if_start_lock);
3273 ifnet_decr_pending_thread_count(ifp);
3274 lck_mtx_lock_spin(&ifp->if_start_lock);
3275 goto skip;
3276 }
3277
cb323159 3278 ifp->if_start_active = 1;
316670eb 3279
cb323159
A
3280 /*
3281 * Keep on servicing until no more request.
3282 */
3283 for (;;) {
3284 u_int32_t req = ifp->if_start_req;
3285 if (!IFCQ_IS_EMPTY(ifq) &&
3286 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3287 ifp->if_start_delayed == 0 &&
3288 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
3289 (ifp->if_eflags & IFEF_DELAY_START)) {
3290 ifp->if_start_delayed = 1;
3291 ifnet_start_delayed++;
3292 break;
3293 } else {
3294 ifp->if_start_delayed = 0;
316670eb 3295 }
cb323159 3296 lck_mtx_unlock(&ifp->if_start_lock);
316670eb 3297
cb323159
A
3298 /*
3299 * If no longer attached, don't call start because ifp
3300 * is being destroyed; else hold an IO refcnt to
3301 * prevent the interface from being detached (will be
3302 * released below.)
3303 */
3304 if (!ifnet_datamov_begin(ifp)) {
3305 lck_mtx_lock_spin(&ifp->if_start_lock);
3306 break;
3307 }
3e170ce0 3308
cb323159
A
3309 /* invoke the driver's start routine */
3310 ((*ifp->if_start)(ifp));
3e170ce0 3311
cb323159
A
3312 /*
3313 * Release the io ref count taken above.
3314 */
3315 ifnet_datamov_end(ifp);
3e170ce0 3316
cb323159 3317 lck_mtx_lock_spin(&ifp->if_start_lock);
3e170ce0 3318
cb323159
A
3319 /*
3320 * If there's no pending request or if the
3321 * interface has been disabled, we're done.
3322 */
3323 if (req == ifp->if_start_req ||
3324 (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
3325 break;
3326 }
3327 }
f427ee49 3328skip:
cb323159
A
3329 ifp->if_start_req = 0;
3330 ifp->if_start_active = 0;
316670eb 3331
3e170ce0 3332
cb323159
A
3333 if (__probable(ifp->if_start_thread != THREAD_NULL)) {
3334 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3335 struct timespec delay_start_ts;
3336 struct timespec *ts;
3e170ce0 3337
316670eb
A
3338 /*
3339 * Wakeup N ns from now if rate-controlled by TBR, and if
3340 * there are still packets in the send queue which haven't
3341 * been dequeued so far; else sleep indefinitely (ts = NULL)
3342 * until ifnet_start() is called again.
3343 */
3344 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
3345 &ifp->if_start_cycle : NULL);
3346
3e170ce0
A
3347 if (ts == NULL && ifp->if_start_delayed == 1) {
3348 delay_start_ts.tv_sec = 0;
3349 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
3350 ts = &delay_start_ts;
3351 }
3352
0a7de745 3353 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
316670eb 3354 ts = NULL;
0a7de745 3355 }
cb323159
A
3356
3357 if (__improbable(ts != NULL)) {
f427ee49 3358 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
cb323159
A
3359 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3360 }
3361
3362 (void) assert_wait_deadline(&ifp->if_start_thread,
3363 THREAD_UNINT, deadline);
3364 lck_mtx_unlock(&ifp->if_start_lock);
3365 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3366 /* NOTREACHED */
3367 } else {
3368terminate:
3369 /* interface is detached? */
3370 ifnet_set_start_cycle(ifp, NULL);
3371 lck_mtx_unlock(&ifp->if_start_lock);
3372 ifnet_purge(ifp);
3373
3374 if (dlil_verbose) {
3375 DLIL_PRINTF("%s: starter thread terminated\n",
3376 if_name(ifp));
3377 }
3378
3379 /* for the extra refcnt from kernel_thread_start() */
3380 thread_deallocate(current_thread());
3381 /* this is the end */
3382 thread_terminate(current_thread());
3383 /* NOTREACHED */
316670eb
A
3384 }
3385
cb323159
A
3386 /* must never get here */
3387 VERIFY(0);
316670eb 3388 /* NOTREACHED */
cb323159 3389 __builtin_unreachable();
316670eb
A
3390}
3391
3392void
3393ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
3394{
0a7de745
A
3395 if (ts == NULL) {
3396 bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
3397 } else {
316670eb 3398 *(&ifp->if_start_cycle) = *ts;
0a7de745 3399 }
316670eb 3400
0a7de745 3401 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
cb323159 3402 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
39236c6e 3403 if_name(ifp), ts->tv_nsec);
0a7de745 3404 }
316670eb
A
3405}
3406
f427ee49
A
3407static inline void
3408ifnet_poll_wakeup(struct ifnet *ifp)
3409{
3410 LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
3411
3412 ifp->if_poll_req++;
3413 if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
3414 ifp->if_poll_thread != THREAD_NULL) {
3415 wakeup_one((caddr_t)&ifp->if_poll_thread);
3416 }
3417}
3418
cb323159 3419void
316670eb
A
3420ifnet_poll(struct ifnet *ifp)
3421{
3422 /*
3423 * If the poller thread is inactive, signal it to do work.
3424 */
3425 lck_mtx_lock_spin(&ifp->if_poll_lock);
f427ee49 3426 ifnet_poll_wakeup(ifp);
316670eb
A
3427 lck_mtx_unlock(&ifp->if_poll_lock);
3428}
3429
cb323159 3430__attribute__((noreturn))
316670eb 3431static void
cb323159 3432ifnet_poll_thread_func(void *v, wait_result_t w)
316670eb
A
3433{
3434#pragma unused(w)
cb323159
A
3435 char thread_name[MAXTHREADNAMESIZE];
3436 struct ifnet *ifp = v;
3437
3438 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
3439 VERIFY(current_thread() == ifp->if_poll_thread);
3440
3441 /* construct the name for this thread, and then apply it */
3442 bzero(thread_name, sizeof(thread_name));
3443 (void) snprintf(thread_name, sizeof(thread_name),
3444 "ifnet_poller_%s", ifp->if_xname);
3445 thread_set_thread_name(ifp->if_poll_thread, thread_name);
cb323159
A
3446
3447 lck_mtx_lock(&ifp->if_poll_lock);
f427ee49 3448 VERIFY(!(ifp->if_poll_flags & (IF_POLLF_EMBRYONIC | IF_POLLF_RUNNING)));
cb323159 3449 (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
f427ee49
A
3450 ifp->if_poll_flags |= IF_POLLF_EMBRYONIC;
3451 /* wake up once to get out of embryonic state */
3452 ifnet_poll_wakeup(ifp);
cb323159
A
3453 lck_mtx_unlock(&ifp->if_poll_lock);
3454 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3455 /* NOTREACHED */
3456 __builtin_unreachable();
3457}
3458
3459__attribute__((noreturn))
3460static void
3461ifnet_poll_thread_cont(void *v, wait_result_t wres)
3462{
316670eb
A
3463 struct dlil_threading_info *inp;
3464 struct ifnet *ifp = v;
316670eb 3465 struct ifnet_stat_increment_param s;
cb323159
A
3466 struct timespec start_time;
3467
3468 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
316670eb 3469
0a7de745 3470 bzero(&s, sizeof(s));
cb323159 3471 net_timerclear(&start_time);
316670eb
A
3472
3473 lck_mtx_lock_spin(&ifp->if_poll_lock);
cb323159
A
3474 if (__improbable(wres == THREAD_INTERRUPTED ||
3475 ifp->if_poll_thread == THREAD_NULL)) {
3476 goto terminate;
3477 }
316670eb
A
3478
3479 inp = ifp->if_inp;
3480 VERIFY(inp != NULL);
3481
f427ee49
A
3482 if (__improbable(ifp->if_poll_flags & IF_POLLF_EMBRYONIC)) {
3483 ifp->if_poll_flags &= ~IF_POLLF_EMBRYONIC;
3484 lck_mtx_unlock(&ifp->if_poll_lock);
3485 ifnet_decr_pending_thread_count(ifp);
3486 lck_mtx_lock_spin(&ifp->if_poll_lock);
3487 goto skip;
3488 }
3489
cb323159
A
3490 ifp->if_poll_flags |= IF_POLLF_RUNNING;
3491
3492 /*
3493 * Keep on servicing until no more request.
3494 */
316670eb 3495 for (;;) {
cb323159
A
3496 struct mbuf *m_head, *m_tail;
3497 u_int32_t m_lim, m_cnt, m_totlen;
3498 u_int16_t req = ifp->if_poll_req;
316670eb 3499
cb323159 3500 m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
f427ee49 3501 MAX((qlimit(&inp->dlth_pkts)), (ifp->if_rxpoll_phiwat << 2));
cb323159 3502 lck_mtx_unlock(&ifp->if_poll_lock);
316670eb 3503
cb323159
A
3504 /*
3505 * If no longer attached, there's nothing to do;
3506 * else hold an IO refcnt to prevent the interface
3507 * from being detached (will be released below.)
3508 */
3509 if (!ifnet_is_attached(ifp, 1)) {
3510 lck_mtx_lock_spin(&ifp->if_poll_lock);
3511 break;
3512 }
316670eb 3513
cb323159
A
3514 if (dlil_verbose > 1) {
3515 DLIL_PRINTF("%s: polling up to %d pkts, "
3516 "pkts avg %d max %d, wreq avg %d, "
3517 "bytes avg %d\n",
3518 if_name(ifp), m_lim,
3519 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3520 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
316670eb
A
3521 }
3522
cb323159
A
3523 /* invoke the driver's input poll routine */
3524 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3525 &m_cnt, &m_totlen));
316670eb 3526
cb323159
A
3527 if (m_head != NULL) {
3528 VERIFY(m_tail != NULL && m_cnt > 0);
316670eb 3529
cb323159
A
3530 if (dlil_verbose > 1) {
3531 DLIL_PRINTF("%s: polled %d pkts, "
3532 "pkts avg %d max %d, wreq avg %d, "
3533 "bytes avg %d\n",
3534 if_name(ifp), m_cnt,
3535 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3536 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
db609669 3537 }
316670eb 3538
cb323159
A
3539 /* stats are required for extended variant */
3540 s.packets_in = m_cnt;
3541 s.bytes_in = m_totlen;
316670eb 3542
cb323159
A
3543 (void) ifnet_input_common(ifp, m_head, m_tail,
3544 &s, TRUE, TRUE);
3545 } else {
316670eb 3546 if (dlil_verbose > 1) {
cb323159 3547 DLIL_PRINTF("%s: no packets, "
316670eb
A
3548 "pkts avg %d max %d, wreq avg %d, "
3549 "bytes avg %d\n",
cb323159
A
3550 if_name(ifp), ifp->if_rxpoll_pavg,
3551 ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
3552 ifp->if_rxpoll_bavg);
316670eb
A
3553 }
3554
cb323159
A
3555 (void) ifnet_input_common(ifp, NULL, NULL,
3556 NULL, FALSE, TRUE);
3557 }
316670eb 3558
cb323159
A
3559 /* Release the io ref count */
3560 ifnet_decr_iorefcnt(ifp);
39236c6e 3561
cb323159 3562 lck_mtx_lock_spin(&ifp->if_poll_lock);
316670eb 3563
cb323159
A
3564 /* if there's no pending request, we're done */
3565 if (req == ifp->if_poll_req ||
3566 ifp->if_poll_thread == THREAD_NULL) {
3567 break;
3568 }
3569 }
f427ee49 3570skip:
cb323159
A
3571 ifp->if_poll_req = 0;
3572 ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
316670eb 3573
cb323159
A
3574 if (ifp->if_poll_thread != THREAD_NULL) {
3575 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3576 struct timespec *ts;
316670eb
A
3577
3578 /*
3579 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3580 * until ifnet_poll() is called again.
3581 */
3582 ts = &ifp->if_poll_cycle;
0a7de745 3583 if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
316670eb 3584 ts = NULL;
0a7de745 3585 }
cb323159
A
3586
3587 if (ts != NULL) {
f427ee49 3588 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
cb323159
A
3589 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3590 }
3591
3592 (void) assert_wait_deadline(&ifp->if_poll_thread,
3593 THREAD_UNINT, deadline);
3594 lck_mtx_unlock(&ifp->if_poll_lock);
3595 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3596 /* NOTREACHED */
3597 } else {
3598terminate:
3599 /* interface is detached (maybe while asleep)? */
3600 ifnet_set_poll_cycle(ifp, NULL);
3601 lck_mtx_unlock(&ifp->if_poll_lock);
3602
3603 if (dlil_verbose) {
3604 DLIL_PRINTF("%s: poller thread terminated\n",
3605 if_name(ifp));
3606 }
3607
3608 /* for the extra refcnt from kernel_thread_start() */
3609 thread_deallocate(current_thread());
3610 /* this is the end */
3611 thread_terminate(current_thread());
3612 /* NOTREACHED */
316670eb
A
3613 }
3614
cb323159
A
3615 /* must never get here */
3616 VERIFY(0);
316670eb 3617 /* NOTREACHED */
cb323159 3618 __builtin_unreachable();
316670eb
A
3619}
3620
3621void
3622ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3623{
0a7de745
A
3624 if (ts == NULL) {
3625 bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
3626 } else {
316670eb 3627 *(&ifp->if_poll_cycle) = *ts;
0a7de745 3628 }
316670eb 3629
0a7de745 3630 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
cb323159 3631 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
39236c6e 3632 if_name(ifp), ts->tv_nsec);
0a7de745 3633 }
316670eb
A
3634}
3635
3636void
3637ifnet_purge(struct ifnet *ifp)
3638{
0a7de745 3639 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
316670eb 3640 if_qflush(ifp, 0);
0a7de745 3641 }
316670eb
A
3642}
3643
3644void
3645ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3646{
3647 IFCQ_LOCK_ASSERT_HELD(ifq);
3648
0a7de745 3649 if (!(IFCQ_IS_READY(ifq))) {
316670eb 3650 return;
0a7de745 3651 }
316670eb
A
3652
3653 if (IFCQ_TBR_IS_ENABLED(ifq)) {
cb323159
A
3654 struct tb_profile tb = {
3655 .rate = ifq->ifcq_tbr.tbr_rate_raw,
3656 .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
3657 };
316670eb
A
3658 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3659 }
3660
3661 ifclassq_update(ifq, ev);
3662}
3663
3664void
3665ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3666{
3667 switch (ev) {
39236c6e 3668 case CLASSQ_EV_LINK_BANDWIDTH:
0a7de745 3669 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
316670eb 3670 ifp->if_poll_update++;
0a7de745 3671 }
316670eb
A
3672 break;
3673
3674 default:
3675 break;
3676 }
3677}
3678
3679errno_t
3680ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3681{
3682 struct ifclassq *ifq;
3683 u_int32_t omodel;
3684 errno_t err;
3685
0a7de745
A
3686 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
3687 return EINVAL;
3688 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3689 return ENXIO;
3690 }
316670eb
A
3691
3692 ifq = &ifp->if_snd;
3693 IFCQ_LOCK(ifq);
3694 omodel = ifp->if_output_sched_model;
3695 ifp->if_output_sched_model = model;
0a7de745 3696 if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
316670eb 3697 ifp->if_output_sched_model = omodel;
0a7de745 3698 }
316670eb
A
3699 IFCQ_UNLOCK(ifq);
3700
0a7de745 3701 return err;
316670eb
A
3702}
3703
3704errno_t
3705ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3706{
0a7de745
A
3707 if (ifp == NULL) {
3708 return EINVAL;
3709 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3710 return ENXIO;
3711 }
316670eb
A
3712
3713 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3714
0a7de745 3715 return 0;
316670eb
A
3716}
3717
3718errno_t
3719ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3720{
0a7de745
A
3721 if (ifp == NULL || maxqlen == NULL) {
3722 return EINVAL;
3723 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3724 return ENXIO;
3725 }
316670eb
A
3726
3727 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3728
0a7de745 3729 return 0;
316670eb
A
3730}
3731
3732errno_t
39236c6e 3733ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
316670eb 3734{
39236c6e
A
3735 errno_t err;
3736
0a7de745 3737 if (ifp == NULL || pkts == NULL) {
39236c6e 3738 err = EINVAL;
0a7de745 3739 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
39236c6e 3740 err = ENXIO;
0a7de745 3741 } else {
39236c6e
A
3742 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3743 pkts, NULL);
0a7de745 3744 }
316670eb 3745
0a7de745 3746 return err;
39236c6e 3747}
316670eb 3748
39236c6e
A
3749errno_t
3750ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3751 u_int32_t *pkts, u_int32_t *bytes)
3752{
3753 errno_t err;
3754
3755 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
0a7de745 3756 (pkts == NULL && bytes == NULL)) {
39236c6e 3757 err = EINVAL;
0a7de745 3758 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
39236c6e 3759 err = ENXIO;
0a7de745 3760 } else {
39236c6e 3761 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
0a7de745 3762 }
39236c6e 3763
0a7de745 3764 return err;
316670eb
A
3765}
3766
3767errno_t
3768ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3769{
3770 struct dlil_threading_info *inp;
3771
0a7de745
A
3772 if (ifp == NULL) {
3773 return EINVAL;
3774 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3775 return ENXIO;
3776 }
316670eb 3777
0a7de745 3778 if (maxqlen == 0) {
316670eb 3779 maxqlen = if_rcvq_maxlen;
0a7de745 3780 } else if (maxqlen < IF_RCVQ_MINLEN) {
316670eb 3781 maxqlen = IF_RCVQ_MINLEN;
0a7de745 3782 }
316670eb
A
3783
3784 inp = ifp->if_inp;
f427ee49
A
3785 lck_mtx_lock(&inp->dlth_lock);
3786 qlimit(&inp->dlth_pkts) = maxqlen;
3787 lck_mtx_unlock(&inp->dlth_lock);
316670eb 3788
0a7de745 3789 return 0;
316670eb
A
3790}
3791
3792errno_t
3793ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3794{
3795 struct dlil_threading_info *inp;
3796
0a7de745
A
3797 if (ifp == NULL || maxqlen == NULL) {
3798 return EINVAL;
3799 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3800 return ENXIO;
3801 }
316670eb
A
3802
3803 inp = ifp->if_inp;
f427ee49
A
3804 lck_mtx_lock(&inp->dlth_lock);
3805 *maxqlen = qlimit(&inp->dlth_pkts);
3806 lck_mtx_unlock(&inp->dlth_lock);
0a7de745 3807 return 0;
316670eb
A
3808}
3809
5ba3f43e
A
3810void
3811ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3812 uint16_t delay_timeout)
3813{
3814 if (delay_qlen > 0 && delay_timeout > 0) {
f427ee49
A
3815 if_set_eflags(ifp, IFEF_ENQUEUE_MULTI);
3816 ifp->if_start_delay_qlen = MIN(100, delay_qlen);
5ba3f43e
A
3817 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3818 /* convert timeout to nanoseconds */
3819 ifp->if_start_delay_timeout *= 1000;
3820 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3821 ifp->if_xname, (uint32_t)delay_qlen,
3822 (uint32_t)delay_timeout);
3823 } else {
f427ee49 3824 if_clear_eflags(ifp, IFEF_ENQUEUE_MULTI);
5ba3f43e
A
3825 }
3826}
3827
cb323159
A
3828/*
3829 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
3830 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
3831 * buf holds the full header.
3832 */
3833static __attribute__((noinline)) void
3834ifnet_mcast_clear_dscp(uint8_t *buf, uint8_t ip_ver)
3835{
3836 struct ip *ip;
3837 struct ip6_hdr *ip6;
3838 uint8_t lbuf[64] __attribute__((aligned(8)));
3839 uint8_t *p = buf;
3840
3841 if (ip_ver == IPVERSION) {
3842 uint8_t old_tos;
3843 uint32_t sum;
3844
3845 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3846 DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
3847 bcopy(buf, lbuf, sizeof(struct ip));
3848 p = lbuf;
3849 }
3850 ip = (struct ip *)(void *)p;
3851 if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
3852 return;
3853 }
3854
3855 DTRACE_IP1(clear__v4, struct ip *, ip);
3856 old_tos = ip->ip_tos;
3857 ip->ip_tos &= IPTOS_ECN_MASK;
3858 sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
3859 sum = (sum >> 16) + (sum & 0xffff);
3860 ip->ip_sum = (uint16_t)(sum & 0xffff);
3861
3862 if (__improbable(p == lbuf)) {
3863 bcopy(lbuf, buf, sizeof(struct ip));
3864 }
3865 } else {
3866 uint32_t flow;
3867 ASSERT(ip_ver == IPV6_VERSION);
3868
3869 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3870 DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
3871 bcopy(buf, lbuf, sizeof(struct ip6_hdr));
3872 p = lbuf;
3873 }
3874 ip6 = (struct ip6_hdr *)(void *)p;
3875 flow = ntohl(ip6->ip6_flow);
3876 if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
3877 return;
3878 }
3879
3880 DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
3881 ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
3882
3883 if (__improbable(p == lbuf)) {
3884 bcopy(lbuf, buf, sizeof(struct ip6_hdr));
3885 }
3886 }
3887}
3888
5ba3f43e 3889static inline errno_t
cb323159
A
3890ifnet_enqueue_ifclassq(struct ifnet *ifp, classq_pkt_t *p, boolean_t flush,
3891 boolean_t *pdrop)
316670eb 3892{
5ba3f43e
A
3893 volatile uint64_t *fg_ts = NULL;
3894 volatile uint64_t *rt_ts = NULL;
3e170ce0 3895 struct timespec now;
5ba3f43e
A
3896 u_int64_t now_nsec = 0;
3897 int error = 0;
cb323159
A
3898 uint8_t *mcast_buf = NULL;
3899 uint8_t ip_ver;
f427ee49 3900 uint32_t pktlen;
316670eb 3901
5ba3f43e
A
3902 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3903
3904 /*
3905 * If packet already carries a timestamp, either from dlil_output()
3906 * or from flowswitch, use it here. Otherwise, record timestamp.
3907 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3908 * the timestamp value is used internally there.
3909 */
cb323159 3910 switch (p->cp_ptype) {
5ba3f43e 3911 case QP_MBUF:
cb323159
A
3912 ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
3913 ASSERT(p->cp_mbuf->m_nextpkt == NULL);
5ba3f43e 3914
cb323159
A
3915 if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3916 p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
5ba3f43e
A
3917 nanouptime(&now);
3918 net_timernsec(&now, &now_nsec);
cb323159 3919 p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
5ba3f43e 3920 }
cb323159 3921 p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
5ba3f43e
A
3922 /*
3923 * If the packet service class is not background,
3924 * update the timestamp to indicate recent activity
3925 * on a foreground socket.
3926 */
cb323159
A
3927 if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3928 p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3929 if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
3930 PKTF_SO_BACKGROUND)) {
f427ee49 3931 ifp->if_fg_sendts = (uint32_t)_net_uptime;
0a7de745 3932 if (fg_ts != NULL) {
f427ee49 3933 *fg_ts = (uint32_t)_net_uptime;
0a7de745 3934 }
5ba3f43e 3935 }
cb323159 3936 if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
f427ee49 3937 ifp->if_rt_sendts = (uint32_t)_net_uptime;
0a7de745 3938 if (rt_ts != NULL) {
f427ee49 3939 *rt_ts = (uint32_t)_net_uptime;
0a7de745 3940 }
5ba3f43e
A
3941 }
3942 }
f427ee49 3943 pktlen = m_pktlen(p->cp_mbuf);
cb323159
A
3944
3945 /*
3946 * Some Wi-Fi AP implementations do not correctly handle
3947 * multicast IP packets with DSCP bits set (radr://9331522).
f427ee49
A
3948 * As a workaround we clear the DSCP bits but keep service
3949 * class (rdar://51507725).
cb323159
A
3950 */
3951 if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3952 IFNET_IS_WIFI_INFRA(ifp)) {
3953 size_t len = mbuf_len(p->cp_mbuf), hlen;
3954 struct ether_header *eh;
3955 boolean_t pullup = FALSE;
3956 uint16_t etype;
3957
3958 if (__improbable(len < sizeof(struct ether_header))) {
3959 DTRACE_IP1(small__ether, size_t, len);
3960 if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3961 sizeof(struct ether_header))) == NULL) {
3962 return ENOMEM;
3963 }
3964 }
3965 eh = (struct ether_header *)mbuf_data(p->cp_mbuf);
3966 etype = ntohs(eh->ether_type);
3967 if (etype == ETHERTYPE_IP) {
3968 hlen = sizeof(struct ether_header) +
3969 sizeof(struct ip);
3970 if (len < hlen) {
3971 DTRACE_IP1(small__v4, size_t, len);
3972 pullup = TRUE;
3973 }
3974 ip_ver = IPVERSION;
3975 } else if (etype == ETHERTYPE_IPV6) {
3976 hlen = sizeof(struct ether_header) +
3977 sizeof(struct ip6_hdr);
3978 if (len < hlen) {
3979 DTRACE_IP1(small__v6, size_t, len);
3980 pullup = TRUE;
3981 }
3982 ip_ver = IPV6_VERSION;
3983 } else {
3984 DTRACE_IP1(invalid__etype, uint16_t, etype);
3985 break;
3986 }
3987 if (pullup) {
f427ee49 3988 if ((p->cp_mbuf = m_pullup(p->cp_mbuf, (int)hlen)) ==
cb323159
A
3989 NULL) {
3990 return ENOMEM;
3991 }
3992
3993 eh = (struct ether_header *)mbuf_data(
3994 p->cp_mbuf);
3995 }
cb323159
A
3996 mcast_buf = (uint8_t *)(eh + 1);
3997 /*
3998 * ifnet_mcast_clear_dscp() will finish the work below.
3999 * Note that the pullups above ensure that mcast_buf
4000 * points to a full IP header.
4001 */
4002 }
5ba3f43e 4003 break;
316670eb 4004
5ba3f43e
A
4005
4006 default:
4007 VERIFY(0);
4008 /* NOTREACHED */
cb323159
A
4009 __builtin_unreachable();
4010 }
4011
4012 if (mcast_buf != NULL) {
4013 ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
5ba3f43e 4014 }
3e170ce0
A
4015
4016 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
5ba3f43e
A
4017 if (now_nsec == 0) {
4018 nanouptime(&now);
4019 net_timernsec(&now, &now_nsec);
4020 }
3e170ce0
A
4021 /*
4022 * If the driver chose to delay start callback for
4023 * coalescing multiple packets, Then use the following
4024 * heuristics to make sure that start callback will
4025 * be delayed only when bulk data transfer is detected.
4026 * 1. number of packets enqueued in (delay_win * 2) is
4027 * greater than or equal to the delay qlen.
4028 * 2. If delay_start is enabled it will stay enabled for
4029 * another 10 idle windows. This is to take into account
4030 * variable RTT and burst traffic.
4031 * 3. If the time elapsed since last enqueue is more
4032 * than 200ms we disable delaying start callback. This is
4033 * is to take idle time into account.
39037602 4034 */
3e170ce0
A
4035 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
4036 if (ifp->if_start_delay_swin > 0) {
4037 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
4038 ifp->if_start_delay_cnt++;
4039 } else if ((now_nsec - ifp->if_start_delay_swin)
4040 >= (200 * 1000 * 1000)) {
4041 ifp->if_start_delay_swin = now_nsec;
4042 ifp->if_start_delay_cnt = 1;
4043 ifp->if_start_delay_idle = 0;
4044 if (ifp->if_eflags & IFEF_DELAY_START) {
f427ee49
A
4045 if_clear_eflags(ifp, IFEF_DELAY_START);
4046 ifnet_delay_start_disabled_increment();
3e170ce0
A
4047 }
4048 } else {
4049 if (ifp->if_start_delay_cnt >=
4050 ifp->if_start_delay_qlen) {
f427ee49 4051 if_set_eflags(ifp, IFEF_DELAY_START);
3e170ce0
A
4052 ifp->if_start_delay_idle = 0;
4053 } else {
4054 if (ifp->if_start_delay_idle >= 10) {
f427ee49
A
4055 if_clear_eflags(ifp,
4056 IFEF_DELAY_START);
4057 ifnet_delay_start_disabled_increment();
3e170ce0
A
4058 } else {
4059 ifp->if_start_delay_idle++;
4060 }
39037602 4061 }
3e170ce0
A
4062 ifp->if_start_delay_swin = now_nsec;
4063 ifp->if_start_delay_cnt = 1;
4064 }
4065 } else {
4066 ifp->if_start_delay_swin = now_nsec;
4067 ifp->if_start_delay_cnt = 1;
4068 ifp->if_start_delay_idle = 0;
f427ee49 4069 if_clear_eflags(ifp, IFEF_DELAY_START);
3e170ce0
A
4070 }
4071 } else {
f427ee49 4072 if_clear_eflags(ifp, IFEF_DELAY_START);
3e170ce0
A
4073 }
4074
cb323159 4075 /* enqueue the packet (caller consumes object) */
f427ee49 4076 error = ifclassq_enqueue(&ifp->if_snd, p, p, 1, pktlen, pdrop);
316670eb
A
4077
4078 /*
4079 * Tell the driver to start dequeueing; do this even when the queue
4080 * for the packet is suspended (EQSUSPENDED), as the driver could still
4081 * be dequeueing from other unsuspended queues.
4082 */
3e170ce0 4083 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
0a7de745 4084 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
316670eb 4085 ifnet_start(ifp);
0a7de745 4086 }
316670eb 4087
cb323159
A
4088 return error;
4089}
4090
f427ee49
A
4091static inline errno_t
4092ifnet_enqueue_ifclassq_chain(struct ifnet *ifp, classq_pkt_t *head,
4093 classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
4094 boolean_t *pdrop)
4095{
4096 int error;
4097
4098 /* enqueue the packet (caller consumes object) */
4099 error = ifclassq_enqueue(&ifp->if_snd, head, tail, cnt, bytes, pdrop);
4100
4101 /*
4102 * Tell the driver to start dequeueing; do this even when the queue
4103 * for the packet is suspended (EQSUSPENDED), as the driver could still
4104 * be dequeueing from other unsuspended queues.
4105 */
4106 if ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED) {
4107 ifnet_start(ifp);
4108 }
4109 return error;
4110}
4111
cb323159
A
4112int
4113ifnet_enqueue_netem(void *handle, pktsched_pkt_t *pkts, uint32_t n_pkts)
4114{
4115 struct ifnet *ifp = handle;
4116 boolean_t pdrop; /* dummy */
4117 uint32_t i;
4118
4119 ASSERT(n_pkts >= 1);
4120 for (i = 0; i < n_pkts - 1; i++) {
4121 (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt,
4122 FALSE, &pdrop);
4123 }
4124 /* flush with the last packet */
4125 (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt, TRUE, &pdrop);
4126
4127 return 0;
4128}
4129
4130static inline errno_t
4131ifnet_enqueue_common(struct ifnet *ifp, classq_pkt_t *pkt, boolean_t flush,
4132 boolean_t *pdrop)
4133{
4134 if (ifp->if_output_netem != NULL) {
4135 return netem_enqueue(ifp->if_output_netem, pkt, pdrop);
4136 } else {
4137 return ifnet_enqueue_ifclassq(ifp, pkt, flush, pdrop);
4138 }
316670eb
A
4139}
4140
5ba3f43e
A
4141errno_t
4142ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
4143{
4144 boolean_t pdrop;
0a7de745 4145 return ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop);
5ba3f43e
A
4146}
4147
4148errno_t
4149ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
4150 boolean_t *pdrop)
4151{
cb323159
A
4152 classq_pkt_t pkt;
4153
5ba3f43e
A
4154 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
4155 m->m_nextpkt != NULL) {
4156 if (m != NULL) {
4157 m_freem_list(m);
4158 *pdrop = TRUE;
4159 }
0a7de745 4160 return EINVAL;
5ba3f43e
A
4161 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4162 !IF_FULLY_ATTACHED(ifp)) {
4163 /* flag tested without lock for performance */
4164 m_freem(m);
4165 *pdrop = TRUE;
0a7de745 4166 return ENXIO;
5ba3f43e
A
4167 } else if (!(ifp->if_flags & IFF_UP)) {
4168 m_freem(m);
4169 *pdrop = TRUE;
0a7de745 4170 return ENETDOWN;
5ba3f43e
A
4171 }
4172
cb323159
A
4173 CLASSQ_PKT_INIT_MBUF(&pkt, m);
4174 return ifnet_enqueue_common(ifp, &pkt, flush, pdrop);
5ba3f43e
A
4175}
4176
f427ee49
A
4177errno_t
4178ifnet_enqueue_mbuf_chain(struct ifnet *ifp, struct mbuf *m_head,
4179 struct mbuf *m_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
4180 boolean_t *pdrop)
4181{
4182 classq_pkt_t head, tail;
4183
4184 ASSERT(m_head != NULL);
4185 ASSERT((m_head->m_flags & M_PKTHDR) != 0);
4186 ASSERT(m_tail != NULL);
4187 ASSERT((m_tail->m_flags & M_PKTHDR) != 0);
4188 ASSERT(ifp != NULL);
4189 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
4190
4191 if (!IF_FULLY_ATTACHED(ifp)) {
4192 /* flag tested without lock for performance */
4193 m_freem_list(m_head);
4194 *pdrop = TRUE;
4195 return ENXIO;
4196 } else if (!(ifp->if_flags & IFF_UP)) {
4197 m_freem_list(m_head);
4198 *pdrop = TRUE;
4199 return ENETDOWN;
4200 }
4201
4202 CLASSQ_PKT_INIT_MBUF(&head, m_head);
4203 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
4204 return ifnet_enqueue_ifclassq_chain(ifp, &head, &tail, cnt, bytes,
4205 flush, pdrop);
4206}
4207
5ba3f43e 4208
316670eb
A
4209errno_t
4210ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
4211{
fe8ab488 4212 errno_t rc;
cb323159
A
4213 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
4214
0a7de745
A
4215 if (ifp == NULL || mp == NULL) {
4216 return EINVAL;
4217 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4218 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4219 return ENXIO;
4220 }
4221 if (!ifnet_is_attached(ifp, 1)) {
4222 return ENXIO;
4223 }
5ba3f43e 4224
39037602 4225 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
cb323159
A
4226 &pkt, NULL, NULL, NULL);
4227 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
fe8ab488 4228 ifnet_decr_iorefcnt(ifp);
cb323159 4229 *mp = pkt.cp_mbuf;
0a7de745 4230 return rc;
316670eb
A
4231}
4232
4233errno_t
4234ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
4235 struct mbuf **mp)
4236{
fe8ab488 4237 errno_t rc;
cb323159
A
4238 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
4239
0a7de745
A
4240 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
4241 return EINVAL;
4242 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4243 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4244 return ENXIO;
4245 }
4246 if (!ifnet_is_attached(ifp, 1)) {
4247 return ENXIO;
4248 }
39037602 4249
5ba3f43e 4250 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
cb323159
A
4251 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL);
4252 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
fe8ab488 4253 ifnet_decr_iorefcnt(ifp);
cb323159 4254 *mp = pkt.cp_mbuf;
0a7de745 4255 return rc;
316670eb
A
4256}
4257
4258errno_t
39037602
A
4259ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
4260 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
316670eb 4261{
fe8ab488 4262 errno_t rc;
cb323159
A
4263 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4264 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4265
0a7de745
A
4266 if (ifp == NULL || head == NULL || pkt_limit < 1) {
4267 return EINVAL;
4268 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4269 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4270 return ENXIO;
4271 }
4272 if (!ifnet_is_attached(ifp, 1)) {
4273 return ENXIO;
4274 }
39037602
A
4275
4276 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
cb323159
A
4277 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len);
4278 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
39037602 4279 ifnet_decr_iorefcnt(ifp);
cb323159
A
4280 *head = pkt_head.cp_mbuf;
4281 if (tail != NULL) {
4282 *tail = pkt_tail.cp_mbuf;
4283 }
0a7de745 4284 return rc;
39037602
A
4285}
4286
4287errno_t
4288ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
4289 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
4290{
4291 errno_t rc;
cb323159
A
4292 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4293 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4294
0a7de745
A
4295 if (ifp == NULL || head == NULL || byte_limit < 1) {
4296 return EINVAL;
4297 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4298 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4299 return ENXIO;
4300 }
4301 if (!ifnet_is_attached(ifp, 1)) {
4302 return ENXIO;
4303 }
39037602
A
4304
4305 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
cb323159
A
4306 byte_limit, &pkt_head, &pkt_tail, cnt, len);
4307 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
fe8ab488 4308 ifnet_decr_iorefcnt(ifp);
cb323159
A
4309 *head = pkt_head.cp_mbuf;
4310 if (tail != NULL) {
4311 *tail = pkt_tail.cp_mbuf;
4312 }
0a7de745 4313 return rc;
316670eb
A
4314}
4315
4316errno_t
4317ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
39037602 4318 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
316670eb
A
4319 u_int32_t *len)
4320{
fe8ab488 4321 errno_t rc;
cb323159
A
4322 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4323 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4324
39037602 4325 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
0a7de745
A
4326 !MBUF_VALID_SC(sc)) {
4327 return EINVAL;
4328 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4329 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4330 return ENXIO;
4331 }
4332 if (!ifnet_is_attached(ifp, 1)) {
4333 return ENXIO;
4334 }
5ba3f43e
A
4335
4336 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
cb323159
A
4337 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
4338 cnt, len);
4339 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
fe8ab488 4340 ifnet_decr_iorefcnt(ifp);
cb323159
A
4341 *head = pkt_head.cp_mbuf;
4342 if (tail != NULL) {
4343 *tail = pkt_tail.cp_mbuf;
4344 }
0a7de745 4345 return rc;
316670eb
A
4346}
4347
f427ee49 4348#if XNU_TARGET_OS_OSX
39236c6e
A
4349errno_t
4350ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
4351 const struct sockaddr *dest, const char *dest_linkaddr,
4352 const char *frame_type, u_int32_t *pre, u_int32_t *post)
4353{
0a7de745 4354 if (pre != NULL) {
39236c6e 4355 *pre = 0;
0a7de745
A
4356 }
4357 if (post != NULL) {
39236c6e 4358 *post = 0;
0a7de745 4359 }
39236c6e 4360
0a7de745 4361 return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
39236c6e 4362}
f427ee49 4363#endif /* XNU_TARGET_OS_OSX */
39236c6e 4364
cb323159
A
4365static boolean_t
4366packet_has_vlan_tag(struct mbuf * m)
4367{
4368 u_int tag = 0;
4369
4370 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
4371 tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag);
4372 if (tag == 0) {
4373 /* the packet is just priority-tagged, clear the bit */
4374 m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
4375 }
4376 }
4377 return tag != 0;
4378}
4379
316670eb
A
4380static int
4381dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
4382 char **frame_header_p, protocol_family_t protocol_family)
4383{
cb323159
A
4384 boolean_t is_vlan_packet = FALSE;
4385 struct ifnet_filter *filter;
4386 struct mbuf *m = *m_p;
4387
4388 is_vlan_packet = packet_has_vlan_tag(m);
316670eb 4389
f427ee49
A
4390 if (TAILQ_EMPTY(&ifp->if_flt_head)) {
4391 return 0;
4392 }
4393
316670eb
A
4394 /*
4395 * Pass the inbound packet to the interface filters
6d2010ae
A
4396 */
4397 lck_mtx_lock_spin(&ifp->if_flt_lock);
4398 /* prevent filter list from changing in case we drop the lock */
4399 if_flt_monitor_busy(ifp);
2d21ac55
A
4400 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4401 int result;
4402
cb323159
A
4403 /* exclude VLAN packets from external filters PR-3586856 */
4404 if (is_vlan_packet &&
4405 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4406 continue;
4407 }
4408
6d2010ae
A
4409 if (!filter->filt_skip && filter->filt_input != NULL &&
4410 (filter->filt_protocol == 0 ||
4411 filter->filt_protocol == protocol_family)) {
4412 lck_mtx_unlock(&ifp->if_flt_lock);
4413
2d21ac55 4414 result = (*filter->filt_input)(filter->filt_cookie,
6d2010ae
A
4415 ifp, protocol_family, m_p, frame_header_p);
4416
4417 lck_mtx_lock_spin(&ifp->if_flt_lock);
2d21ac55 4418 if (result != 0) {
6d2010ae
A
4419 /* we're done with the filter list */
4420 if_flt_monitor_unbusy(ifp);
4421 lck_mtx_unlock(&ifp->if_flt_lock);
0a7de745 4422 return result;
2d21ac55
A
4423 }
4424 }
4425 }
6d2010ae
A
4426 /* we're done with the filter list */
4427 if_flt_monitor_unbusy(ifp);
4428 lck_mtx_unlock(&ifp->if_flt_lock);
b7266188
A
4429
4430 /*
6d2010ae 4431 * Strip away M_PROTO1 bit prior to sending packet up the stack as
b7266188
A
4432 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
4433 */
0a7de745 4434 if (*m_p != NULL) {
b7266188 4435 (*m_p)->m_flags &= ~M_PROTO1;
0a7de745 4436 }
b7266188 4437
0a7de745 4438 return 0;
1c79356b
A
4439}
4440
6d2010ae
A
4441static int
4442dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
4443 protocol_family_t protocol_family)
4444{
cb323159
A
4445 boolean_t is_vlan_packet;
4446 struct ifnet_filter *filter;
4447 struct mbuf *m = *m_p;
4448
4449 is_vlan_packet = packet_has_vlan_tag(m);
6d2010ae
A
4450
4451 /*
4452 * Pass the outbound packet to the interface filters
4453 */
4454 lck_mtx_lock_spin(&ifp->if_flt_lock);
4455 /* prevent filter list from changing in case we drop the lock */
4456 if_flt_monitor_busy(ifp);
4457 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4458 int result;
4459
cb323159
A
4460 /* exclude VLAN packets from external filters PR-3586856 */
4461 if (is_vlan_packet &&
4462 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4463 continue;
4464 }
4465
6d2010ae
A
4466 if (!filter->filt_skip && filter->filt_output != NULL &&
4467 (filter->filt_protocol == 0 ||
4468 filter->filt_protocol == protocol_family)) {
4469 lck_mtx_unlock(&ifp->if_flt_lock);
4470
4471 result = filter->filt_output(filter->filt_cookie, ifp,
4472 protocol_family, m_p);
4473
4474 lck_mtx_lock_spin(&ifp->if_flt_lock);
4475 if (result != 0) {
4476 /* we're done with the filter list */
4477 if_flt_monitor_unbusy(ifp);
4478 lck_mtx_unlock(&ifp->if_flt_lock);
0a7de745 4479 return result;
6d2010ae
A
4480 }
4481 }
4482 }
4483 /* we're done with the filter list */
4484 if_flt_monitor_unbusy(ifp);
4485 lck_mtx_unlock(&ifp->if_flt_lock);
4486
0a7de745 4487 return 0;
6d2010ae
A
4488}
4489
2d21ac55
A
4490static void
4491dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
1c79356b 4492{
2d21ac55 4493 int error;
1c79356b 4494
2d21ac55
A
4495 if (ifproto->proto_kpi == kProtoKPI_v1) {
4496 /* Version 1 protocols get one packet at a time */
4497 while (m != NULL) {
0a7de745
A
4498 char * frame_header;
4499 mbuf_t next_packet;
6d2010ae 4500
2d21ac55
A
4501 next_packet = m->m_nextpkt;
4502 m->m_nextpkt = NULL;
39236c6e
A
4503 frame_header = m->m_pkthdr.pkt_hdr;
4504 m->m_pkthdr.pkt_hdr = NULL;
6d2010ae
A
4505 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
4506 ifproto->protocol_family, m, frame_header);
0a7de745 4507 if (error != 0 && error != EJUSTRETURN) {
2d21ac55 4508 m_freem(m);
0a7de745 4509 }
2d21ac55
A
4510 m = next_packet;
4511 }
6d2010ae 4512 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
2d21ac55
A
4513 /* Version 2 protocols support packet lists */
4514 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
6d2010ae 4515 ifproto->protocol_family, m);
0a7de745 4516 if (error != 0 && error != EJUSTRETURN) {
2d21ac55 4517 m_freem_list(m);
0a7de745 4518 }
91447636 4519 }
2d21ac55 4520}
1c79356b 4521
316670eb
A
4522static void
4523dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
cb323159 4524 struct dlil_threading_info *inp, struct ifnet *ifp, boolean_t poll)
316670eb 4525{
f427ee49 4526 struct ifnet_stat_increment_param *d = &inp->dlth_stats;
316670eb 4527
0a7de745 4528 if (s->packets_in != 0) {
316670eb 4529 d->packets_in += s->packets_in;
0a7de745
A
4530 }
4531 if (s->bytes_in != 0) {
316670eb 4532 d->bytes_in += s->bytes_in;
0a7de745
A
4533 }
4534 if (s->errors_in != 0) {
316670eb 4535 d->errors_in += s->errors_in;
0a7de745 4536 }
316670eb 4537
0a7de745 4538 if (s->packets_out != 0) {
316670eb 4539 d->packets_out += s->packets_out;
0a7de745
A
4540 }
4541 if (s->bytes_out != 0) {
316670eb 4542 d->bytes_out += s->bytes_out;
0a7de745
A
4543 }
4544 if (s->errors_out != 0) {
316670eb 4545 d->errors_out += s->errors_out;
0a7de745 4546 }
316670eb 4547
0a7de745 4548 if (s->collisions != 0) {
316670eb 4549 d->collisions += s->collisions;
0a7de745
A
4550 }
4551 if (s->dropped != 0) {
316670eb 4552 d->dropped += s->dropped;
0a7de745 4553 }
316670eb 4554
0a7de745 4555 if (poll) {
cb323159 4556 PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in);
0a7de745 4557 }
316670eb
A
4558}
4559
cb323159 4560static boolean_t
316670eb
A
4561dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
4562{
f427ee49 4563 struct ifnet_stat_increment_param *s = &inp->dlth_stats;
316670eb
A
4564
4565 /*
4566 * Use of atomic operations is unavoidable here because
4567 * these stats may also be incremented elsewhere via KPIs.
4568 */
4569 if (s->packets_in != 0) {
4570 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
4571 s->packets_in = 0;
4572 }
4573 if (s->bytes_in != 0) {
4574 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
4575 s->bytes_in = 0;
4576 }
4577 if (s->errors_in != 0) {
4578 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
4579 s->errors_in = 0;
4580 }
4581
4582 if (s->packets_out != 0) {
4583 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
4584 s->packets_out = 0;
4585 }
4586 if (s->bytes_out != 0) {
4587 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
4588 s->bytes_out = 0;
4589 }
4590 if (s->errors_out != 0) {
4591 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
4592 s->errors_out = 0;
4593 }
4594
4595 if (s->collisions != 0) {
4596 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
4597 s->collisions = 0;
4598 }
4599 if (s->dropped != 0) {
4600 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
4601 s->dropped = 0;
4602 }
39037602 4603
316670eb
A
4604 /*
4605 * No need for atomic operations as they are modified here
4606 * only from within the DLIL input thread context.
4607 */
cb323159
A
4608 if (ifp->if_poll_tstats.packets != 0) {
4609 ifp->if_poll_pstats.ifi_poll_packets += ifp->if_poll_tstats.packets;
4610 ifp->if_poll_tstats.packets = 0;
316670eb 4611 }
cb323159
A
4612 if (ifp->if_poll_tstats.bytes != 0) {
4613 ifp->if_poll_pstats.ifi_poll_bytes += ifp->if_poll_tstats.bytes;
4614 ifp->if_poll_tstats.bytes = 0;
316670eb 4615 }
cb323159
A
4616
4617 return ifp->if_data_threshold != 0;
316670eb
A
4618}
4619
4620__private_extern__ void
4621dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
4622{
0a7de745
A
4623 return dlil_input_packet_list_common(ifp, m, 0,
4624 IFNET_MODEL_INPUT_POLL_OFF, FALSE);
316670eb
A
4625}
4626
2d21ac55 4627__private_extern__ void
316670eb
A
4628dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
4629 u_int32_t cnt, ifnet_model_t mode)
4630{
0a7de745 4631 return dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE);
316670eb
A
4632}
4633
4634static void
4635dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
4636 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
2d21ac55 4637{
d9a64523
A
4638 int error = 0;
4639 protocol_family_t protocol_family;
4640 mbuf_t next_packet;
0a7de745 4641 ifnet_t ifp = ifp_param;
d9a64523 4642 char *frame_header = NULL;
0a7de745 4643 struct if_proto *last_ifproto = NULL;
d9a64523
A
4644 mbuf_t pkt_first = NULL;
4645 mbuf_t *pkt_next = NULL;
4646 u_int32_t poll_thresh = 0, poll_ival = 0;
f427ee49 4647 int iorefcnt = 0;
2d21ac55 4648
39037602 4649 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
2d21ac55 4650
316670eb 4651 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
0a7de745 4652 (poll_ival = if_rxpoll_interval_pkts) > 0) {
316670eb 4653 poll_thresh = cnt;
0a7de745 4654 }
6d2010ae 4655
2d21ac55 4656 while (m != NULL) {
6d2010ae 4657 struct if_proto *ifproto = NULL;
0a7de745 4658 uint32_t pktf_mask; /* pkt flags to preserve */
2d21ac55 4659
0a7de745 4660 if (ifp_param == NULL) {
2d21ac55 4661 ifp = m->m_pkthdr.rcvif;
0a7de745 4662 }
6d2010ae 4663
cb323159
A
4664 if ((ifp->if_eflags & IFEF_RXPOLL) &&
4665 (ifp->if_xflags & IFXF_LEGACY) && poll_thresh != 0 &&
0a7de745 4666 poll_ival > 0 && (--poll_thresh % poll_ival) == 0) {
316670eb 4667 ifnet_poll(ifp);
0a7de745 4668 }
316670eb 4669
6d2010ae 4670 /* Check if this mbuf looks valid */
316670eb 4671 MBUF_INPUT_CHECK(m, ifp);
6d2010ae
A
4672
4673 next_packet = m->m_nextpkt;
4674 m->m_nextpkt = NULL;
39236c6e
A
4675 frame_header = m->m_pkthdr.pkt_hdr;
4676 m->m_pkthdr.pkt_hdr = NULL;
2d21ac55 4677
316670eb
A
4678 /*
4679 * Get an IO reference count if the interface is not
4680 * loopback (lo0) and it is attached; lo0 never goes
4681 * away, so optimize for that.
6d2010ae
A
4682 */
4683 if (ifp != lo_ifp) {
f427ee49
A
4684 /* iorefcnt is 0 if it hasn't been taken yet */
4685 if (iorefcnt == 0) {
4686 if (!ifnet_datamov_begin(ifp)) {
4687 m_freem(m);
4688 goto next;
4689 }
6d2010ae
A
4690 }
4691 iorefcnt = 1;
5ba3f43e 4692 /*
f427ee49 4693 * Preserve the time stamp and skip pktap flags.
5ba3f43e 4694 */
f427ee49 4695 pktf_mask = PKTF_TS_VALID | PKTF_SKIP_PKTAP;
39236c6e
A
4696 } else {
4697 /*
4698 * If this arrived on lo0, preserve interface addr
4699 * info to allow for connectivity between loopback
4700 * and local interface addresses.
4701 */
0a7de745 4702 pktf_mask = (PKTF_LOOP | PKTF_IFAINFO);
2d21ac55 4703 }
d41d1dae 4704
39236c6e
A
4705 /* make sure packet comes in clean */
4706 m_classifier_init(m, pktf_mask);
4707
316670eb 4708 ifp_inc_traffic_class_in(ifp, m);
d41d1dae 4709
2d21ac55 4710 /* find which protocol family this packet is for */
6d2010ae 4711 ifnet_lock_shared(ifp);
2d21ac55 4712 error = (*ifp->if_demux)(ifp, m, frame_header,
6d2010ae
A
4713 &protocol_family);
4714 ifnet_lock_done(ifp);
2d21ac55 4715 if (error != 0) {
0a7de745 4716 if (error == EJUSTRETURN) {
2d21ac55 4717 goto next;
0a7de745 4718 }
2d21ac55
A
4719 protocol_family = 0;
4720 }
6d2010ae 4721
d9a64523
A
4722 pktap_input(ifp, protocol_family, m, frame_header);
4723
4724 /* Drop v4 packets received on CLAT46 enabled interface */
4725 if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp)) {
4726 m_freem(m);
4727 ip6stat.ip6s_clat464_in_v4_drop++;
4728 goto next;
4729 }
4730
4731 /* Translate the packet if it is received on CLAT interface */
4732 if (protocol_family == PF_INET6 && IS_INTF_CLAT46(ifp)
4733 && dlil_is_clat_needed(protocol_family, m)) {
4734 char *data = NULL;
4735 struct ether_header eh;
4736 struct ether_header *ehp = NULL;
4737
4738 if (ifp->if_type == IFT_ETHER) {
4739 ehp = (struct ether_header *)(void *)frame_header;
4740 /* Skip RX Ethernet packets if they are not IPV6 */
0a7de745 4741 if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6) {
d9a64523 4742 goto skip_clat;
0a7de745 4743 }
d9a64523
A
4744
4745 /* Keep a copy of frame_header for Ethernet packets */
4746 bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN);
4747 }
4748 error = dlil_clat64(ifp, &protocol_family, &m);
4749 data = (char *) mbuf_data(m);
4750 if (error != 0) {
4751 m_freem(m);
4752 ip6stat.ip6s_clat464_in_drop++;
4753 goto next;
4754 }
4755 /* Native v6 should be No-op */
0a7de745 4756 if (protocol_family != PF_INET) {
d9a64523 4757 goto skip_clat;
0a7de745 4758 }
d9a64523
A
4759
4760 /* Do this only for translated v4 packets. */
4761 switch (ifp->if_type) {
4762 case IFT_CELLULAR:
4763 frame_header = data;
4764 break;
4765 case IFT_ETHER:
4766 /*
4767 * Drop if the mbuf doesn't have enough
4768 * space for Ethernet header
4769 */
4770 if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) {
4771 m_free(m);
4772 ip6stat.ip6s_clat464_in_drop++;
4773 goto next;
4774 }
4775 /*
4776 * Set the frame_header ETHER_HDR_LEN bytes
4777 * preceeding the data pointer. Change
4778 * the ether_type too.
4779 */
4780 frame_header = data - ETHER_HDR_LEN;
4781 eh.ether_type = htons(ETHERTYPE_IP);
4782 bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN);
4783 break;
4784 }
4785 }
4786skip_clat:
39236c6e 4787 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
0a7de745 4788 !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
39236c6e
A
4789 dlil_input_cksum_dbg(ifp, m, frame_header,
4790 protocol_family);
0a7de745 4791 }
39236c6e
A
4792 /*
4793 * For partial checksum offload, we expect the driver to
4794 * set the start offset indicating the start of the span
4795 * that is covered by the hardware-computed checksum;
4796 * adjust this start offset accordingly because the data
4797 * pointer has been advanced beyond the link-layer header.
4798 *
cb323159
A
4799 * Virtual lan types (bridge, vlan, bond) can call
4800 * dlil_input_packet_list() with the same packet with the
4801 * checksum flags set. Set a flag indicating that the
4802 * adjustment has already been done.
39236c6e 4803 */
cb323159
A
4804 if ((m->m_pkthdr.csum_flags & CSUM_ADJUST_DONE) != 0) {
4805 /* adjustment has already been done */
4806 } else if ((m->m_pkthdr.csum_flags &
39236c6e
A
4807 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4808 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4809 int adj;
39236c6e
A
4810 if (frame_header == NULL ||
4811 frame_header < (char *)mbuf_datastart(m) ||
4812 frame_header > (char *)m->m_data ||
f427ee49 4813 (adj = (int)(m->m_data - frame_header)) >
39236c6e
A
4814 m->m_pkthdr.csum_rx_start) {
4815 m->m_pkthdr.csum_data = 0;
4816 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
4817 hwcksum_in_invalidated++;
4818 } else {
4819 m->m_pkthdr.csum_rx_start -= adj;
4820 }
cb323159
A
4821 /* make sure we don't adjust more than once */
4822 m->m_pkthdr.csum_flags |= CSUM_ADJUST_DONE;
39236c6e 4823 }
0a7de745 4824 if (clat_debug) {
d9a64523 4825 pktap_input(ifp, protocol_family, m, frame_header);
0a7de745 4826 }
316670eb 4827
0a7de745 4828 if (m->m_flags & (M_BCAST | M_MCAST)) {
6d2010ae 4829 atomic_add_64(&ifp->if_imcasts, 1);
0a7de745 4830 }
1c79356b 4831
cb323159
A
4832 /* run interface filters */
4833 error = dlil_interface_filters_input(ifp, &m,
4834 &frame_header, protocol_family);
4835 if (error != 0) {
4836 if (error != EJUSTRETURN) {
4837 m_freem(m);
91447636 4838 }
cb323159 4839 goto next;
91447636 4840 }
c3c9b80d
A
4841 /*
4842 * A VLAN interface receives VLAN-tagged packets by attaching
4843 * its PF_VLAN protocol to a parent interface. When a VLAN
4844 * interface is a member of a bridge, the parent interface
4845 * receives VLAN-tagged M_PROMISC packets. A VLAN-tagged
4846 * M_PROMISC packet must be processed by the VLAN protocol
4847 * so that it can be sent up the stack via
4848 * dlil_input_packet_list(). That allows the bridge interface's
4849 * input filter, attached to the VLAN interface, to process
4850 * the packet.
4851 */
4852 if (protocol_family != PF_VLAN &&
4853 (m->m_flags & M_PROMISC) != 0) {
91447636 4854 m_freem(m);
2d21ac55 4855 goto next;
91447636 4856 }
6d2010ae 4857
2d21ac55
A
4858 /* Lookup the protocol attachment to this interface */
4859 if (protocol_family == 0) {
4860 ifproto = NULL;
6d2010ae
A
4861 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
4862 (last_ifproto->protocol_family == protocol_family)) {
4863 VERIFY(ifproto == NULL);
2d21ac55 4864 ifproto = last_ifproto;
6d2010ae
A
4865 if_proto_ref(last_ifproto);
4866 } else {
4867 VERIFY(ifproto == NULL);
4868 ifnet_lock_shared(ifp);
4869 /* callee holds a proto refcnt upon success */
0a7de745 4870 ifproto = find_attached_proto(ifp, protocol_family);
6d2010ae 4871 ifnet_lock_done(ifp);
2d21ac55
A
4872 }
4873 if (ifproto == NULL) {
4874 /* no protocol for this packet, discard */
4875 m_freem(m);
4876 goto next;
4877 }
4878 if (ifproto != last_ifproto) {
2d21ac55
A
4879 if (last_ifproto != NULL) {
4880 /* pass up the list for the previous protocol */
2d21ac55
A
4881 dlil_ifproto_input(last_ifproto, pkt_first);
4882 pkt_first = NULL;
4883 if_proto_free(last_ifproto);
2d21ac55
A
4884 }
4885 last_ifproto = ifproto;
6d2010ae 4886 if_proto_ref(ifproto);
2d21ac55
A
4887 }
4888 /* extend the list */
39236c6e 4889 m->m_pkthdr.pkt_hdr = frame_header;
2d21ac55
A
4890 if (pkt_first == NULL) {
4891 pkt_first = m;
4892 } else {
4893 *pkt_next = m;
4894 }
4895 pkt_next = &m->m_nextpkt;
1c79356b 4896
6d2010ae 4897next:
2d21ac55
A
4898 if (next_packet == NULL && last_ifproto != NULL) {
4899 /* pass up the last list of packets */
2d21ac55
A
4900 dlil_ifproto_input(last_ifproto, pkt_first);
4901 if_proto_free(last_ifproto);
6d2010ae
A
4902 last_ifproto = NULL;
4903 }
4904 if (ifproto != NULL) {
4905 if_proto_free(ifproto);
4906 ifproto = NULL;
2d21ac55 4907 }
316670eb 4908
2d21ac55 4909 m = next_packet;
1c79356b 4910
6d2010ae 4911 /* update the driver's multicast filter, if needed */
0a7de745 4912 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
6d2010ae 4913 ifp->if_updatemcasts = 0;
0a7de745
A
4914 }
4915 if (iorefcnt == 1) {
f427ee49
A
4916 /* If the next mbuf is on a different interface, unlock data-mov */
4917 if (!m || (ifp != ifp_param && ifp != m->m_pkthdr.rcvif)) {
4918 ifnet_datamov_end(ifp);
4919 iorefcnt = 0;
4920 }
0a7de745 4921 }
91447636 4922 }
6d2010ae 4923
39037602 4924 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b
A
4925}
4926
6d2010ae
A
4927errno_t
4928if_mcasts_update(struct ifnet *ifp)
4929{
4930 errno_t err;
4931
4932 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
0a7de745 4933 if (err == EAFNOSUPPORT) {
6d2010ae 4934 err = 0;
0a7de745 4935 }
cb323159 4936 DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
39236c6e 4937 "(err=%d)\n", if_name(ifp),
6d2010ae
A
4938 (err == 0 ? "successfully restored" : "failed to restore"),
4939 ifp->if_updatemcasts, err);
4940
4941 /* just return success */
0a7de745 4942 return 0;
6d2010ae
A
4943}
4944
39037602
A
4945/* If ifp is set, we will increment the generation for the interface */
4946int
4947dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
4948{
4949 if (ifp != NULL) {
4950 ifnet_increment_generation(ifp);
4951 }
4952
4953#if NECP
4954 necp_update_all_clients();
4955#endif /* NECP */
4956
0a7de745 4957 return kev_post_msg(event);
39037602
A
4958}
4959
a39ff7e2
A
4960__private_extern__ void
4961dlil_post_sifflags_msg(struct ifnet * ifp)
4962{
4963 struct kev_msg ev_msg;
4964 struct net_event_data ev_data;
4965
0a7de745
A
4966 bzero(&ev_data, sizeof(ev_data));
4967 bzero(&ev_msg, sizeof(ev_msg));
a39ff7e2
A
4968 ev_msg.vendor_code = KEV_VENDOR_APPLE;
4969 ev_msg.kev_class = KEV_NETWORK_CLASS;
4970 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
4971 ev_msg.event_code = KEV_DL_SIFFLAGS;
4972 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
4973 ev_data.if_family = ifp->if_family;
4974 ev_data.if_unit = (u_int32_t) ifp->if_unit;
4975 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
4976 ev_msg.dv[0].data_ptr = &ev_data;
4977 ev_msg.dv[1].data_length = 0;
4978 dlil_post_complete_msg(ifp, &ev_msg);
4979}
4980
0a7de745 4981#define TMP_IF_PROTO_ARR_SIZE 10
91447636 4982static int
39037602 4983dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
1c79356b 4984{
a1c7dba1
A
4985 struct ifnet_filter *filter = NULL;
4986 struct if_proto *proto = NULL;
4987 int if_proto_count = 0;
4988 struct if_proto **tmp_ifproto_arr = NULL;
4989 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4990 int tmp_ifproto_arr_idx = 0;
4991 bool tmp_malloc = false;
6d2010ae 4992
6d2010ae
A
4993 /*
4994 * Pass the event to the interface filters
4995 */
4996 lck_mtx_lock_spin(&ifp->if_flt_lock);
4997 /* prevent filter list from changing in case we drop the lock */
4998 if_flt_monitor_busy(ifp);
4999 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5000 if (filter->filt_event != NULL) {
5001 lck_mtx_unlock(&ifp->if_flt_lock);
5002
5003 filter->filt_event(filter->filt_cookie, ifp,
5004 filter->filt_protocol, event);
5005
5006 lck_mtx_lock_spin(&ifp->if_flt_lock);
91447636 5007 }
6d2010ae
A
5008 }
5009 /* we're done with the filter list */
5010 if_flt_monitor_unbusy(ifp);
5011 lck_mtx_unlock(&ifp->if_flt_lock);
5012
3e170ce0 5013 /* Get an io ref count if the interface is attached */
0a7de745 5014 if (!ifnet_is_attached(ifp, 1)) {
3e170ce0 5015 goto done;
0a7de745 5016 }
3e170ce0 5017
a1c7dba1
A
5018 /*
5019 * An embedded tmp_list_entry in if_proto may still get
5020 * over-written by another thread after giving up ifnet lock,
5021 * therefore we are avoiding embedded pointers here.
5022 */
6d2010ae 5023 ifnet_lock_shared(ifp);
a39ff7e2 5024 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
a1c7dba1 5025 if (if_proto_count) {
6d2010ae 5026 int i;
a1c7dba1
A
5027 VERIFY(ifp->if_proto_hash != NULL);
5028 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
5029 tmp_ifproto_arr = tmp_ifproto_stack_arr;
5030 } else {
5031 MALLOC(tmp_ifproto_arr, struct if_proto **,
0a7de745 5032 sizeof(*tmp_ifproto_arr) * if_proto_count,
a1c7dba1
A
5033 M_TEMP, M_ZERO);
5034 if (tmp_ifproto_arr == NULL) {
5035 ifnet_lock_done(ifp);
5036 goto cleanup;
5037 }
5038 tmp_malloc = true;
5039 }
6d2010ae
A
5040
5041 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6d2010ae
A
5042 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
5043 next_hash) {
a1c7dba1
A
5044 if_proto_ref(proto);
5045 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
5046 tmp_ifproto_arr_idx++;
91447636
A
5047 }
5048 }
a1c7dba1 5049 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
91447636 5050 }
6d2010ae
A
5051 ifnet_lock_done(ifp);
5052
a1c7dba1
A
5053 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
5054 tmp_ifproto_arr_idx++) {
5055 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
5056 VERIFY(proto != NULL);
5057 proto_media_event eventp =
5058 (proto->proto_kpi == kProtoKPI_v1 ?
5059 proto->kpi.v1.event :
5060 proto->kpi.v2.event);
5061
5062 if (eventp != NULL) {
5063 eventp(ifp, proto->protocol_family,
5064 event);
5065 }
5066 if_proto_free(proto);
5067 }
5068
39037602 5069cleanup:
a1c7dba1
A
5070 if (tmp_malloc) {
5071 FREE(tmp_ifproto_arr, M_TEMP);
5072 }
5073
6d2010ae 5074 /* Pass the event to the interface */
0a7de745 5075 if (ifp->if_event != NULL) {
6d2010ae 5076 ifp->if_event(ifp, event);
0a7de745 5077 }
6d2010ae
A
5078
5079 /* Release the io ref count */
5080 ifnet_decr_iorefcnt(ifp);
6d2010ae 5081done:
0a7de745 5082 return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
1c79356b
A
5083}
5084
2d21ac55 5085errno_t
6d2010ae 5086ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
1c79356b 5087{
39037602 5088 struct kev_msg kev_msg;
2d21ac55
A
5089 int result = 0;
5090
0a7de745
A
5091 if (ifp == NULL || event == NULL) {
5092 return EINVAL;
5093 }
1c79356b 5094
0a7de745 5095 bzero(&kev_msg, sizeof(kev_msg));
39037602
A
5096 kev_msg.vendor_code = event->vendor_code;
5097 kev_msg.kev_class = event->kev_class;
5098 kev_msg.kev_subclass = event->kev_subclass;
5099 kev_msg.event_code = event->event_code;
91447636
A
5100 kev_msg.dv[0].data_ptr = &event->event_data[0];
5101 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
5102 kev_msg.dv[1].data_length = 0;
6d2010ae 5103
39037602 5104 result = dlil_event_internal(ifp, &kev_msg, TRUE);
1c79356b 5105
0a7de745 5106 return result;
91447636 5107}
1c79356b 5108
3e170ce0
A
5109static void
5110dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
5111{
0a7de745 5112 mbuf_t n = m;
3e170ce0
A
5113 int chainlen = 0;
5114
5115 while (n != NULL) {
5116 chainlen++;
5117 n = n->m_next;
5118 }
5119 switch (chainlen) {
0a7de745
A
5120 case 0:
5121 break;
5122 case 1:
5123 atomic_add_64(&cls->cls_one, 1);
5124 break;
5125 case 2:
5126 atomic_add_64(&cls->cls_two, 1);
5127 break;
5128 case 3:
5129 atomic_add_64(&cls->cls_three, 1);
5130 break;
5131 case 4:
5132 atomic_add_64(&cls->cls_four, 1);
5133 break;
5134 case 5:
5135 default:
5136 atomic_add_64(&cls->cls_five_or_more, 1);
5137 break;
3e170ce0
A
5138 }
5139}
5140
1c79356b 5141/*
91447636
A
5142 * dlil_output
5143 *
5144 * Caller should have a lock on the protocol domain if the protocol
5145 * doesn't support finer grained locking. In most cases, the lock
5146 * will be held from the socket layer and won't be released until
5147 * we return back to the socket layer.
5148 *
5149 * This does mean that we must take a protocol lock before we take
5150 * an interface lock if we're going to take both. This makes sense
5151 * because a protocol is likely to interact with an ifp while it
5152 * is under the protocol lock.
316670eb
A
5153 *
5154 * An advisory code will be returned if adv is not null. This
39236c6e 5155 * can be used to provide feedback about interface queues to the
316670eb 5156 * application.
1c79356b 5157 */
6d2010ae
A
5158errno_t
5159dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
316670eb 5160 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
6d2010ae
A
5161{
5162 char *frame_type = NULL;
5163 char *dst_linkaddr = NULL;
5164 int retval = 0;
5165 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
5166 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
0a7de745
A
5167 struct if_proto *proto = NULL;
5168 mbuf_t m = NULL;
5169 mbuf_t send_head = NULL;
5170 mbuf_t *send_tail = &send_head;
6d2010ae 5171 int iorefcnt = 0;
316670eb 5172 u_int32_t pre = 0, post = 0;
39236c6e
A
5173 u_int32_t fpkts = 0, fbytes = 0;
5174 int32_t flen = 0;
5ba3f43e
A
5175 struct timespec now;
5176 u_int64_t now_nsec;
d9a64523
A
5177 boolean_t did_clat46 = FALSE;
5178 protocol_family_t old_proto_family = proto_family;
cb323159 5179 struct sockaddr_in6 dest6;
d9a64523 5180 struct rtentry *rt = NULL;
cb323159 5181 u_int32_t m_loop_set = 0;
6d2010ae 5182
39236c6e 5183 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
6d2010ae 5184
39037602
A
5185 /*
5186 * Get an io refcnt if the interface is attached to prevent ifnet_detach
5187 * from happening while this operation is in progress
5188 */
cb323159 5189 if (!ifnet_datamov_begin(ifp)) {
6d2010ae
A
5190 retval = ENXIO;
5191 goto cleanup;
5192 }
5193 iorefcnt = 1;
5194
5ba3f43e 5195 VERIFY(ifp->if_output_dlil != NULL);
39037602 5196
6d2010ae 5197 /* update the driver's multicast filter, if needed */
0a7de745 5198 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
6d2010ae 5199 ifp->if_updatemcasts = 0;
0a7de745 5200 }
6d2010ae
A
5201
5202 frame_type = frame_type_buffer;
5203 dst_linkaddr = dst_linkaddr_buffer;
5204
91447636 5205 if (raw == 0) {
6d2010ae
A
5206 ifnet_lock_shared(ifp);
5207 /* callee holds a proto refcnt upon success */
91447636
A
5208 proto = find_attached_proto(ifp, proto_family);
5209 if (proto == NULL) {
6d2010ae 5210 ifnet_lock_done(ifp);
91447636
A
5211 retval = ENXIO;
5212 goto cleanup;
5213 }
6d2010ae 5214 ifnet_lock_done(ifp);
2d21ac55 5215 }
6d2010ae 5216
2d21ac55 5217preout_again:
0a7de745 5218 if (packetlist == NULL) {
2d21ac55 5219 goto cleanup;
0a7de745 5220 }
6d2010ae 5221
2d21ac55
A
5222 m = packetlist;
5223 packetlist = packetlist->m_nextpkt;
5224 m->m_nextpkt = NULL;
6d2010ae 5225
d9a64523
A
5226 /*
5227 * Perform address family translation for the first
5228 * packet outside the loop in order to perform address
5229 * lookup for the translated proto family.
5230 */
5231 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5232 (ifp->if_type == IFT_CELLULAR ||
0a7de745 5233 dlil_is_clat_needed(proto_family, m))) {
d9a64523
A
5234 retval = dlil_clat46(ifp, &proto_family, &m);
5235 /*
5236 * Go to the next packet if translation fails
5237 */
5238 if (retval != 0) {
5239 m_freem(m);
5240 m = NULL;
5241 ip6stat.ip6s_clat464_out_drop++;
5242 /* Make sure that the proto family is PF_INET */
5243 ASSERT(proto_family == PF_INET);
5244 goto preout_again;
5245 }
5246 /*
5247 * Free the old one and make it point to the IPv6 proto structure.
5248 *
5249 * Change proto for the first time we have successfully
5250 * performed address family translation.
5251 */
5252 if (!did_clat46 && proto_family == PF_INET6) {
d9a64523
A
5253 did_clat46 = TRUE;
5254
0a7de745 5255 if (proto != NULL) {
d9a64523 5256 if_proto_free(proto);
0a7de745 5257 }
d9a64523
A
5258 ifnet_lock_shared(ifp);
5259 /* callee holds a proto refcnt upon success */
5260 proto = find_attached_proto(ifp, proto_family);
5261 if (proto == NULL) {
5262 ifnet_lock_done(ifp);
5263 retval = ENXIO;
5264 m_freem(m);
5265 m = NULL;
5266 goto cleanup;
5267 }
5268 ifnet_lock_done(ifp);
5269 if (ifp->if_type == IFT_ETHER) {
5270 /* Update the dest to translated v6 address */
5271 dest6.sin6_len = sizeof(struct sockaddr_in6);
5272 dest6.sin6_family = AF_INET6;
5273 dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
5274 dest = (const struct sockaddr *)&dest6;
5275
5276 /*
5277 * Lookup route to the translated destination
5278 * Free this route ref during cleanup
5279 */
5280 rt = rtalloc1_scoped((struct sockaddr *)&dest6,
5281 0, 0, ifp->if_index);
5282
5283 route = rt;
5284 }
5285 }
5286 }
5287
5288 /*
5289 * This path gets packet chain going to the same destination.
5290 * The pre output routine is used to either trigger resolution of
5291 * the next hop or retreive the next hop's link layer addressing.
5292 * For ex: ether_inet(6)_pre_output routine.
5293 *
5294 * If the routine returns EJUSTRETURN, it implies that packet has
5295 * been queued, and therefore we have to call preout_again for the
5296 * following packet in the chain.
5297 *
5298 * For errors other than EJUSTRETURN, the current packet is freed
5299 * and the rest of the chain (pointed by packetlist is freed as
5300 * part of clean up.
5301 *
5302 * Else if there is no error the retrieved information is used for
5303 * all the packets in the chain.
5304 */
2d21ac55 5305 if (raw == 0) {
6d2010ae
A
5306 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
5307 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
91447636 5308 retval = 0;
6d2010ae
A
5309 if (preoutp != NULL) {
5310 retval = preoutp(ifp, proto_family, &m, dest, route,
5311 frame_type, dst_linkaddr);
5312
5313 if (retval != 0) {
0a7de745 5314 if (retval == EJUSTRETURN) {
6d2010ae 5315 goto preout_again;
0a7de745 5316 }
6d2010ae 5317 m_freem(m);
d9a64523 5318 m = NULL;
6d2010ae 5319 goto cleanup;
91447636 5320 }
1c79356b 5321 }
1c79356b 5322 }
2d21ac55 5323
2d21ac55 5324 do {
f427ee49
A
5325 /*
5326 * pkt_hdr is set here to point to m_data prior to
5327 * calling into the framer. This value of pkt_hdr is
5328 * used by the netif gso logic to retrieve the ip header
5329 * for the TCP packets, offloaded for TSO processing.
5330 */
5331 if ((raw != 0) && (ifp->if_family == IFNET_FAMILY_ETHERNET)) {
5332 uint8_t vlan_encap_len = 0;
5333
c3c9b80d 5334 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_ENCAP_PRESENT) != 0) {
f427ee49
A
5335 vlan_encap_len = ETHER_VLAN_ENCAP_LEN;
5336 }
5337 m->m_pkthdr.pkt_hdr = mtod(m, char *) + ETHER_HDR_LEN + vlan_encap_len;
5338 } else {
5339 m->m_pkthdr.pkt_hdr = mtod(m, void *);
5340 }
5341
d9a64523
A
5342 /*
5343 * Perform address family translation if needed.
5344 * For now we only support stateless 4 to 6 translation
5345 * on the out path.
5346 *
5347 * The routine below translates IP header, updates protocol
5348 * checksum and also translates ICMP.
5349 *
5350 * We skip the first packet as it is already translated and
5351 * the proto family is set to PF_INET6.
5352 */
5353 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5354 (ifp->if_type == IFT_CELLULAR ||
0a7de745 5355 dlil_is_clat_needed(proto_family, m))) {
d9a64523 5356 retval = dlil_clat46(ifp, &proto_family, &m);
0a7de745 5357 /* Goto the next packet if the translation fails */
d9a64523
A
5358 if (retval != 0) {
5359 m_freem(m);
5360 m = NULL;
5361 ip6stat.ip6s_clat464_out_drop++;
5362 goto next;
5363 }
5364 }
5365
6d2010ae 5366#if CONFIG_DTRACE
316670eb 5367 if (!raw && proto_family == PF_INET) {
39037602
A
5368 struct ip *ip = mtod(m, struct ip *);
5369 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
0a7de745
A
5370 struct ip *, ip, struct ifnet *, ifp,
5371 struct ip *, ip, struct ip6_hdr *, NULL);
316670eb 5372 } else if (!raw && proto_family == PF_INET6) {
39037602
A
5373 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
5374 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
0a7de745
A
5375 struct ip6_hdr *, ip6, struct ifnet *, ifp,
5376 struct ip *, NULL, struct ip6_hdr *, ip6);
6d2010ae
A
5377 }
5378#endif /* CONFIG_DTRACE */
5379
39236c6e 5380 if (raw == 0 && ifp->if_framer != NULL) {
7e4a7d39
A
5381 int rcvif_set = 0;
5382
5383 /*
5384 * If this is a broadcast packet that needs to be
5385 * looped back into the system, set the inbound ifp
5386 * to that of the outbound ifp. This will allow
5387 * us to determine that it is a legitimate packet
5388 * for the system. Only set the ifp if it's not
5389 * already set, just to be safe.
5390 */
5391 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
5392 m->m_pkthdr.rcvif == NULL) {
5393 m->m_pkthdr.rcvif = ifp;
5394 rcvif_set = 1;
5395 }
cb323159 5396 m_loop_set = m->m_flags & M_LOOP;
6d2010ae 5397 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
39236c6e
A
5398 frame_type, &pre, &post);
5399 if (retval != 0) {
0a7de745 5400 if (retval != EJUSTRETURN) {
2d21ac55 5401 m_freem(m);
0a7de745 5402 }
2d21ac55 5403 goto next;
91447636 5404 }
7e4a7d39 5405
39236c6e
A
5406 /*
5407 * For partial checksum offload, adjust the start
5408 * and stuff offsets based on the prepended header.
5409 */
5410 if ((m->m_pkthdr.csum_flags &
5411 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5412 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5413 m->m_pkthdr.csum_tx_stuff += pre;
5414 m->m_pkthdr.csum_tx_start += pre;
5415 }
5416
0a7de745 5417 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) {
39236c6e
A
5418 dlil_output_cksum_dbg(ifp, m, pre,
5419 proto_family);
0a7de745 5420 }
39236c6e 5421
7e4a7d39
A
5422 /*
5423 * Clear the ifp if it was set above, and to be
5424 * safe, only if it is still the same as the
5425 * outbound ifp we have in context. If it was
5426 * looped back, then a copy of it was sent to the
5427 * loopback interface with the rcvif set, and we
5428 * are clearing the one that will go down to the
5429 * layer below.
5430 */
0a7de745 5431 if (rcvif_set && m->m_pkthdr.rcvif == ifp) {
7e4a7d39 5432 m->m_pkthdr.rcvif = NULL;
0a7de745 5433 }
91447636 5434 }
6d2010ae
A
5435
5436 /*
2d21ac55
A
5437 * Let interface filters (if any) do their thing ...
5438 */
cb323159
A
5439 retval = dlil_interface_filters_output(ifp, &m, proto_family);
5440 if (retval != 0) {
5441 if (retval != EJUSTRETURN) {
5442 m_freem(m);
1c79356b 5443 }
cb323159 5444 goto next;
1c79356b 5445 }
b7266188 5446 /*
39236c6e
A
5447 * Strip away M_PROTO1 bit prior to sending packet
5448 * to the driver as this field may be used by the driver
b7266188
A
5449 */
5450 m->m_flags &= ~M_PROTO1;
5451
2d21ac55
A
5452 /*
5453 * If the underlying interface is not capable of handling a
5454 * packet whose data portion spans across physically disjoint
5455 * pages, we need to "normalize" the packet so that we pass
5456 * down a chain of mbufs where each mbuf points to a span that
5457 * resides in the system page boundary. If the packet does
5458 * not cross page(s), the following is a no-op.
5459 */
5460 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
0a7de745 5461 if ((m = m_normalize(m)) == NULL) {
2d21ac55 5462 goto next;
0a7de745 5463 }
2d21ac55
A
5464 }
5465
6d2010ae
A
5466 /*
5467 * If this is a TSO packet, make sure the interface still
5468 * advertise TSO capability.
b0d623f7 5469 */
39236c6e 5470 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
6d2010ae
A
5471 retval = EMSGSIZE;
5472 m_freem(m);
5473 goto cleanup;
b0d623f7
A
5474 }
5475
39236c6e 5476 ifp_inc_traffic_class_out(ifp, m);
f427ee49 5477
39236c6e 5478 pktap_output(ifp, proto_family, m, pre, post);
6d2010ae 5479
3e170ce0
A
5480 /*
5481 * Count the number of elements in the mbuf chain
5482 */
5483 if (tx_chain_len_count) {
5484 dlil_count_chain_len(m, &tx_chain_len_stats);
5485 }
5486
5ba3f43e
A
5487 /*
5488 * Record timestamp; ifnet_enqueue() will use this info
5489 * rather than redoing the work. An optimization could
5490 * involve doing this just once at the top, if there are
5491 * no interface filters attached, but that's probably
5492 * not a big deal.
5493 */
5494 nanouptime(&now);
5495 net_timernsec(&now, &now_nsec);
5496 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
5497
5498 /*
5499 * Discard partial sum information if this packet originated
5500 * from another interface; the packet would already have the
5501 * final checksum and we shouldn't recompute it.
5502 */
5503 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
0a7de745
A
5504 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5505 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5ba3f43e
A
5506 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5507 m->m_pkthdr.csum_data = 0;
5508 }
5509
2d21ac55
A
5510 /*
5511 * Finally, call the driver.
5512 */
3e170ce0 5513 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
39236c6e
A
5514 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5515 flen += (m_pktlen(m) - (pre + post));
5516 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5517 }
2d21ac55
A
5518 *send_tail = m;
5519 send_tail = &m->m_nextpkt;
6d2010ae 5520 } else {
39236c6e
A
5521 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5522 flen = (m_pktlen(m) - (pre + post));
5523 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5524 } else {
5525 flen = 0;
5526 }
6d2010ae 5527 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
39236c6e 5528 0, 0, 0, 0, 0);
5ba3f43e 5529 retval = (*ifp->if_output_dlil)(ifp, m);
316670eb
A
5530 if (retval == EQFULL || retval == EQSUSPENDED) {
5531 if (adv != NULL && adv->code == FADV_SUCCESS) {
5532 adv->code = (retval == EQFULL ?
5533 FADV_FLOW_CONTROLLED :
5534 FADV_SUSPENDED);
5535 }
5536 retval = 0;
5537 }
39236c6e
A
5538 if (retval == 0 && flen > 0) {
5539 fbytes += flen;
5540 fpkts++;
5541 }
5542 if (retval != 0 && dlil_verbose) {
cb323159 5543 DLIL_PRINTF("%s: output error on %s retval = %d\n",
39236c6e 5544 __func__, if_name(ifp),
6d2010ae 5545 retval);
2d21ac55 5546 }
6d2010ae 5547 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
39236c6e 5548 0, 0, 0, 0, 0);
2d21ac55 5549 }
39236c6e 5550 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
2d21ac55
A
5551
5552next:
5553 m = packetlist;
39236c6e 5554 if (m != NULL) {
cb323159 5555 m->m_flags |= m_loop_set;
2d21ac55
A
5556 packetlist = packetlist->m_nextpkt;
5557 m->m_nextpkt = NULL;
5558 }
d9a64523 5559 /* Reset the proto family to old proto family for CLAT */
0a7de745 5560 if (did_clat46) {
d9a64523 5561 proto_family = old_proto_family;
0a7de745 5562 }
39236c6e 5563 } while (m != NULL);
d41d1dae 5564
39236c6e 5565 if (send_head != NULL) {
39236c6e
A
5566 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
5567 0, 0, 0, 0, 0);
3e170ce0 5568 if (ifp->if_eflags & IFEF_SENDLIST) {
5ba3f43e 5569 retval = (*ifp->if_output_dlil)(ifp, send_head);
3e170ce0
A
5570 if (retval == EQFULL || retval == EQSUSPENDED) {
5571 if (adv != NULL) {
5572 adv->code = (retval == EQFULL ?
5573 FADV_FLOW_CONTROLLED :
5574 FADV_SUSPENDED);
5575 }
5576 retval = 0;
5577 }
5578 if (retval == 0 && flen > 0) {
5579 fbytes += flen;
5580 fpkts++;
5581 }
5582 if (retval != 0 && dlil_verbose) {
cb323159 5583 DLIL_PRINTF("%s: output error on %s retval = %d\n",
3e170ce0
A
5584 __func__, if_name(ifp), retval);
5585 }
5586 } else {
5587 struct mbuf *send_m;
5588 int enq_cnt = 0;
5589 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
5590 while (send_head != NULL) {
5591 send_m = send_head;
5592 send_head = send_m->m_nextpkt;
5593 send_m->m_nextpkt = NULL;
5ba3f43e 5594 retval = (*ifp->if_output_dlil)(ifp, send_m);
3e170ce0
A
5595 if (retval == EQFULL || retval == EQSUSPENDED) {
5596 if (adv != NULL) {
5597 adv->code = (retval == EQFULL ?
5598 FADV_FLOW_CONTROLLED :
5599 FADV_SUSPENDED);
5600 }
5601 retval = 0;
5602 }
5603 if (retval == 0) {
5604 enq_cnt++;
0a7de745 5605 if (flen > 0) {
3e170ce0 5606 fpkts++;
0a7de745 5607 }
3e170ce0
A
5608 }
5609 if (retval != 0 && dlil_verbose) {
cb323159 5610 DLIL_PRINTF("%s: output error on %s "
39037602 5611 "retval = %d\n",
3e170ce0
A
5612 __func__, if_name(ifp), retval);
5613 }
5614 }
5615 if (enq_cnt > 0) {
5616 fbytes += flen;
5617 ifnet_start(ifp);
316670eb 5618 }
39236c6e
A
5619 }
5620 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 5621 }
6d2010ae 5622
39236c6e 5623 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 5624
91447636 5625cleanup:
0a7de745 5626 if (fbytes > 0) {
39236c6e 5627 ifp->if_fbytes += fbytes;
0a7de745
A
5628 }
5629 if (fpkts > 0) {
39236c6e 5630 ifp->if_fpackets += fpkts;
0a7de745
A
5631 }
5632 if (proto != NULL) {
6d2010ae 5633 if_proto_free(proto);
0a7de745
A
5634 }
5635 if (packetlist) { /* if any packets are left, clean up */
2d21ac55 5636 mbuf_freem_list(packetlist);
0a7de745
A
5637 }
5638 if (retval == EJUSTRETURN) {
91447636 5639 retval = 0;
0a7de745
A
5640 }
5641 if (iorefcnt == 1) {
cb323159 5642 ifnet_datamov_end(ifp);
0a7de745 5643 }
d9a64523
A
5644 if (rt != NULL) {
5645 rtfree(rt);
5646 rt = NULL;
5647 }
6d2010ae 5648
0a7de745 5649 return retval;
1c79356b
A
5650}
5651
d9a64523
A
5652/*
5653 * This routine checks if the destination address is not a loopback, link-local,
5654 * multicast or broadcast address.
5655 */
5656static int
5657dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
5658{
5659 int ret = 0;
0a7de745 5660 switch (proto_family) {
d9a64523
A
5661 case PF_INET: {
5662 struct ip *iph = mtod(m, struct ip *);
0a7de745 5663 if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
d9a64523 5664 ret = 1;
0a7de745 5665 }
d9a64523
A
5666 break;
5667 }
5668 case PF_INET6: {
5669 struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
5670 if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
0a7de745 5671 CLAT64_NEEDED(&ip6h->ip6_dst)) {
d9a64523 5672 ret = 1;
0a7de745 5673 }
d9a64523
A
5674 break;
5675 }
5676 }
5677
0a7de745 5678 return ret;
d9a64523
A
5679}
5680/*
5681 * @brief This routine translates IPv4 packet to IPv6 packet,
5682 * updates protocol checksum and also translates ICMP for code
5683 * along with inner header translation.
5684 *
5685 * @param ifp Pointer to the interface
5686 * @param proto_family pointer to protocol family. It is updated if function
5687 * performs the translation successfully.
5688 * @param m Pointer to the pointer pointing to the packet. Needed because this
5689 * routine can end up changing the mbuf to a different one.
5690 *
5691 * @return 0 on success or else a negative value.
5692 */
5693static errno_t
5694dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5695{
5696 VERIFY(*proto_family == PF_INET);
5697 VERIFY(IS_INTF_CLAT46(ifp));
5698
5699 pbuf_t pbuf_store, *pbuf = NULL;
5700 struct ip *iph = NULL;
5701 struct in_addr osrc, odst;
5702 uint8_t proto = 0;
5703 struct in6_ifaddr *ia6_clat_src = NULL;
5704 struct in6_addr *src = NULL;
5705 struct in6_addr dst;
5706 int error = 0;
f427ee49
A
5707 uint16_t off = 0;
5708 uint16_t tot_len = 0;
d9a64523
A
5709 uint16_t ip_id_val = 0;
5710 uint16_t ip_frag_off = 0;
5711
5712 boolean_t is_frag = FALSE;
5713 boolean_t is_first_frag = TRUE;
5714 boolean_t is_last_frag = TRUE;
5715
5716 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5717 pbuf = &pbuf_store;
5718 iph = pbuf->pb_data;
5719
5720 osrc = iph->ip_src;
5721 odst = iph->ip_dst;
5722 proto = iph->ip_p;
f427ee49 5723 off = (uint16_t)(iph->ip_hl << 2);
d9a64523
A
5724 ip_id_val = iph->ip_id;
5725 ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
5726
5727 tot_len = ntohs(iph->ip_len);
5728
5729 /*
5730 * For packets that are not first frags
5731 * we only need to adjust CSUM.
5732 * For 4 to 6, Fragmentation header gets appended
5733 * after proto translation.
5734 */
5735 if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
5736 is_frag = TRUE;
5737
5738 /* If the offset is not zero, it is not first frag */
0a7de745 5739 if (ip_frag_off != 0) {
d9a64523 5740 is_first_frag = FALSE;
0a7de745 5741 }
d9a64523
A
5742
5743 /* If IP_MF is set, then it is not last frag */
0a7de745 5744 if (ntohs(iph->ip_off) & IP_MF) {
d9a64523 5745 is_last_frag = FALSE;
0a7de745 5746 }
d9a64523
A
5747 }
5748
5749 /*
5750 * Retrive the local IPv6 CLAT46 address reserved for stateless
5751 * translation.
5752 */
5753 ia6_clat_src = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5754 if (ia6_clat_src == NULL) {
5755 ip6stat.ip6s_clat464_out_nov6addr_drop++;
5756 error = -1;
5757 goto cleanup;
5758 }
5759
5760 src = &ia6_clat_src->ia_addr.sin6_addr;
5761
5762 /*
5763 * Translate IPv4 destination to IPv6 destination by using the
5764 * prefixes learned through prior PLAT discovery.
5765 */
5766 if ((error = nat464_synthesize_ipv6(ifp, &odst, &dst)) != 0) {
5767 ip6stat.ip6s_clat464_out_v6synthfail_drop++;
5768 goto cleanup;
5769 }
5770
5771 /* Translate the IP header part first */
5772 error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
5773 iph->ip_ttl, *src, dst, tot_len) == NT_NAT64) ? 0 : -1;
5774
0a7de745 5775 iph = NULL; /* Invalidate iph as pbuf has been modified */
d9a64523
A
5776
5777 if (error != 0) {
5778 ip6stat.ip6s_clat464_out_46transfail_drop++;
5779 goto cleanup;
5780 }
5781
5782 /*
5783 * Translate protocol header, update checksum, checksum flags
5784 * and related fields.
5785 */
5786 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
5787 proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
5788
5789 if (error != 0) {
5790 ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
5791 goto cleanup;
5792 }
5793
5794 /* Now insert the IPv6 fragment header */
5795 if (is_frag) {
5796 error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
5797
5798 if (error != 0) {
5799 ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
5800 goto cleanup;
5801 }
5802 }
5803
5804cleanup:
0a7de745 5805 if (ia6_clat_src != NULL) {
d9a64523 5806 IFA_REMREF(&ia6_clat_src->ia_ifa);
0a7de745 5807 }
d9a64523
A
5808
5809 if (pbuf_is_valid(pbuf)) {
5810 *m = pbuf->pb_mbuf;
5811 pbuf->pb_mbuf = NULL;
5812 pbuf_destroy(pbuf);
5813 } else {
5814 error = -1;
5815 ip6stat.ip6s_clat464_out_invalpbuf_drop++;
5816 }
5817
5818 if (error == 0) {
5819 *proto_family = PF_INET6;
5820 ip6stat.ip6s_clat464_out_success++;
5821 }
5822
0a7de745 5823 return error;
d9a64523
A
5824}
5825
5826/*
5827 * @brief This routine translates incoming IPv6 to IPv4 packet,
5828 * updates protocol checksum and also translates ICMPv6 outer
5829 * and inner headers
5830 *
5831 * @return 0 on success or else a negative value.
5832 */
5833static errno_t
5834dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5835{
5836 VERIFY(*proto_family == PF_INET6);
5837 VERIFY(IS_INTF_CLAT46(ifp));
5838
5839 struct ip6_hdr *ip6h = NULL;
5840 struct in6_addr osrc, odst;
5841 uint8_t proto = 0;
5842 struct in6_ifaddr *ia6_clat_dst = NULL;
5843 struct in_ifaddr *ia4_clat_dst = NULL;
5844 struct in_addr *dst = NULL;
5845 struct in_addr src;
5846 int error = 0;
5847 uint32_t off = 0;
5848 u_int64_t tot_len = 0;
5849 uint8_t tos = 0;
5850 boolean_t is_first_frag = TRUE;
5851
5852 /* Incoming mbuf does not contain valid IP6 header */
5853 if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
5854 ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
5855 (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
5856 ip6stat.ip6s_clat464_in_tooshort_drop++;
0a7de745 5857 return -1;
d9a64523
A
5858 }
5859
5860 ip6h = mtod(*m, struct ip6_hdr *);
5861 /* Validate that mbuf contains IP payload equal to ip6_plen */
5862 if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
5863 ip6stat.ip6s_clat464_in_tooshort_drop++;
0a7de745 5864 return -1;
d9a64523
A
5865 }
5866
5867 osrc = ip6h->ip6_src;
5868 odst = ip6h->ip6_dst;
5869
5870 /*
5871 * Retrieve the local CLAT46 reserved IPv6 address.
5872 * Let the packet pass if we don't find one, as the flag
5873 * may get set before IPv6 configuration has taken place.
5874 */
5875 ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
0a7de745 5876 if (ia6_clat_dst == NULL) {
d9a64523 5877 goto done;
0a7de745 5878 }
d9a64523
A
5879
5880 /*
5881 * Check if the original dest in the packet is same as the reserved
5882 * CLAT46 IPv6 address
5883 */
5884 if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
5885 pbuf_t pbuf_store, *pbuf = NULL;
5886 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5887 pbuf = &pbuf_store;
5888
5889 /*
5890 * Retrive the local CLAT46 IPv4 address reserved for stateless
5891 * translation.
5892 */
5893 ia4_clat_dst = inifa_ifpclatv4(ifp);
5894 if (ia4_clat_dst == NULL) {
5895 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5896 ip6stat.ip6s_clat464_in_nov4addr_drop++;
5897 error = -1;
5898 goto cleanup;
5899 }
5900 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5901
5902 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5903 dst = &ia4_clat_dst->ia_addr.sin_addr;
5904 if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) {
5905 ip6stat.ip6s_clat464_in_v4synthfail_drop++;
5906 error = -1;
5907 goto cleanup;
5908 }
5909
5910 ip6h = pbuf->pb_data;
5911 off = sizeof(struct ip6_hdr);
5912 proto = ip6h->ip6_nxt;
5913 tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
5914 tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
5915
5916 /*
5917 * Translate the IP header and update the fragmentation
5918 * header if needed
5919 */
5920 error = (nat464_translate_64(pbuf, off, tos, &proto,
5921 ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
5922 0 : -1;
5923
5924 ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
5925
5926 if (error != 0) {
5927 ip6stat.ip6s_clat464_in_64transfail_drop++;
5928 goto cleanup;
5929 }
5930
5931 /*
5932 * Translate protocol header, update checksum, checksum flags
5933 * and related fields.
5934 */
5935 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
5936 (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
5937 NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
5938
5939 if (error != 0) {
5940 ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
5941 goto cleanup;
5942 }
5943
5944cleanup:
0a7de745 5945 if (ia4_clat_dst != NULL) {
d9a64523 5946 IFA_REMREF(&ia4_clat_dst->ia_ifa);
0a7de745 5947 }
d9a64523
A
5948
5949 if (pbuf_is_valid(pbuf)) {
5950 *m = pbuf->pb_mbuf;
5951 pbuf->pb_mbuf = NULL;
5952 pbuf_destroy(pbuf);
5953 } else {
5954 error = -1;
5955 ip6stat.ip6s_clat464_in_invalpbuf_drop++;
5956 }
5957
5958 if (error == 0) {
5959 *proto_family = PF_INET;
5960 ip6stat.ip6s_clat464_in_success++;
5961 }
5962 } /* CLAT traffic */
5963
5964done:
0a7de745 5965 return error;
d9a64523
A
5966}
5967
2d21ac55 5968errno_t
6d2010ae
A
5969ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
5970 void *ioctl_arg)
5971{
5972 struct ifnet_filter *filter;
5973 int retval = EOPNOTSUPP;
5974 int result = 0;
5975
0a7de745
A
5976 if (ifp == NULL || ioctl_code == 0) {
5977 return EINVAL;
5978 }
6d2010ae
A
5979
5980 /* Get an io ref count if the interface is attached */
0a7de745
A
5981 if (!ifnet_is_attached(ifp, 1)) {
5982 return EOPNOTSUPP;
5983 }
6d2010ae 5984
39037602
A
5985 /*
5986 * Run the interface filters first.
91447636
A
5987 * We want to run all filters before calling the protocol,
5988 * interface family, or interface.
5989 */
6d2010ae
A
5990 lck_mtx_lock_spin(&ifp->if_flt_lock);
5991 /* prevent filter list from changing in case we drop the lock */
5992 if_flt_monitor_busy(ifp);
91447636 5993 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
6d2010ae
A
5994 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
5995 filter->filt_protocol == proto_fam)) {
5996 lck_mtx_unlock(&ifp->if_flt_lock);
5997
5998 result = filter->filt_ioctl(filter->filt_cookie, ifp,
5999 proto_fam, ioctl_code, ioctl_arg);
6000
6001 lck_mtx_lock_spin(&ifp->if_flt_lock);
6002
91447636
A
6003 /* Only update retval if no one has handled the ioctl */
6004 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
0a7de745 6005 if (result == ENOTSUP) {
91447636 6006 result = EOPNOTSUPP;
0a7de745 6007 }
91447636 6008 retval = result;
6d2010ae
A
6009 if (retval != 0 && retval != EOPNOTSUPP) {
6010 /* we're done with the filter list */
6011 if_flt_monitor_unbusy(ifp);
6012 lck_mtx_unlock(&ifp->if_flt_lock);
91447636
A
6013 goto cleanup;
6014 }
6015 }
6016 }
6017 }
6d2010ae
A
6018 /* we're done with the filter list */
6019 if_flt_monitor_unbusy(ifp);
6020 lck_mtx_unlock(&ifp->if_flt_lock);
6021
91447636 6022 /* Allow the protocol to handle the ioctl */
6d2010ae 6023 if (proto_fam != 0) {
0a7de745 6024 struct if_proto *proto;
6d2010ae
A
6025
6026 /* callee holds a proto refcnt upon success */
6027 ifnet_lock_shared(ifp);
6028 proto = find_attached_proto(ifp, proto_fam);
6029 ifnet_lock_done(ifp);
6030 if (proto != NULL) {
6031 proto_media_ioctl ioctlp =
6032 (proto->proto_kpi == kProtoKPI_v1 ?
6033 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
91447636 6034 result = EOPNOTSUPP;
0a7de745 6035 if (ioctlp != NULL) {
6d2010ae
A
6036 result = ioctlp(ifp, proto_fam, ioctl_code,
6037 ioctl_arg);
0a7de745 6038 }
6d2010ae
A
6039 if_proto_free(proto);
6040
91447636
A
6041 /* Only update retval if no one has handled the ioctl */
6042 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
0a7de745 6043 if (result == ENOTSUP) {
91447636 6044 result = EOPNOTSUPP;
0a7de745 6045 }
91447636 6046 retval = result;
0a7de745 6047 if (retval && retval != EOPNOTSUPP) {
91447636 6048 goto cleanup;
0a7de745 6049 }
91447636
A
6050 }
6051 }
6052 }
6d2010ae 6053
91447636 6054 /* retval is either 0 or EOPNOTSUPP */
6d2010ae 6055
91447636
A
6056 /*
6057 * Let the interface handle this ioctl.
6058 * If it returns EOPNOTSUPP, ignore that, we may have
6059 * already handled this in the protocol or family.
6060 */
0a7de745 6061 if (ifp->if_ioctl) {
91447636 6062 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
0a7de745 6063 }
6d2010ae 6064
91447636
A
6065 /* Only update retval if no one has handled the ioctl */
6066 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
0a7de745 6067 if (result == ENOTSUP) {
91447636 6068 result = EOPNOTSUPP;
0a7de745 6069 }
91447636
A
6070 retval = result;
6071 if (retval && retval != EOPNOTSUPP) {
6072 goto cleanup;
6073 }
6074 }
1c79356b 6075
6d2010ae 6076cleanup:
0a7de745 6077 if (retval == EJUSTRETURN) {
91447636 6078 retval = 0;
0a7de745 6079 }
6d2010ae
A
6080
6081 ifnet_decr_iorefcnt(ifp);
6082
0a7de745 6083 return retval;
91447636 6084}
1c79356b 6085
91447636 6086__private_extern__ errno_t
6d2010ae 6087dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
91447636 6088{
0a7de745 6089 errno_t error = 0;
6d2010ae
A
6090
6091
6092 if (ifp->if_set_bpf_tap) {
6093 /* Get an io reference on the interface if it is attached */
0a7de745
A
6094 if (!ifnet_is_attached(ifp, 1)) {
6095 return ENXIO;
6096 }
91447636 6097 error = ifp->if_set_bpf_tap(ifp, mode, callback);
6d2010ae
A
6098 ifnet_decr_iorefcnt(ifp);
6099 }
0a7de745 6100 return error;
1c79356b
A
6101}
6102
2d21ac55 6103errno_t
6d2010ae
A
6104dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
6105 struct sockaddr *ll_addr, size_t ll_len)
1c79356b 6106{
0a7de745 6107 errno_t result = EOPNOTSUPP;
91447636
A
6108 struct if_proto *proto;
6109 const struct sockaddr *verify;
2d21ac55 6110 proto_media_resolve_multi resolvep;
6d2010ae 6111
0a7de745
A
6112 if (!ifnet_is_attached(ifp, 1)) {
6113 return result;
6114 }
6d2010ae 6115
91447636 6116 bzero(ll_addr, ll_len);
6d2010ae
A
6117
6118 /* Call the protocol first; callee holds a proto refcnt upon success */
6119 ifnet_lock_shared(ifp);
91447636 6120 proto = find_attached_proto(ifp, proto_addr->sa_family);
6d2010ae 6121 ifnet_lock_done(ifp);
2d21ac55 6122 if (proto != NULL) {
6d2010ae
A
6123 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
6124 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
0a7de745 6125 if (resolvep != NULL) {
6d2010ae 6126 result = resolvep(ifp, proto_addr,
39037602 6127 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
0a7de745 6128 }
6d2010ae 6129 if_proto_free(proto);
91447636 6130 }
6d2010ae 6131
91447636
A
6132 /* Let the interface verify the multicast address */
6133 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
0a7de745 6134 if (result == 0) {
91447636 6135 verify = ll_addr;
0a7de745 6136 } else {
91447636 6137 verify = proto_addr;
0a7de745 6138 }
91447636
A
6139 result = ifp->if_check_multi(ifp, verify);
6140 }
6d2010ae
A
6141
6142 ifnet_decr_iorefcnt(ifp);
0a7de745 6143 return result;
91447636 6144}
1c79356b 6145
91447636 6146__private_extern__ errno_t
6d2010ae 6147dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
39037602
A
6148 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6149 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
91447636
A
6150{
6151 struct if_proto *proto;
0a7de745 6152 errno_t result = 0;
6d2010ae
A
6153
6154 /* callee holds a proto refcnt upon success */
6155 ifnet_lock_shared(ifp);
91447636 6156 proto = find_attached_proto(ifp, target_proto->sa_family);
6d2010ae 6157 ifnet_lock_done(ifp);
2d21ac55 6158 if (proto == NULL) {
91447636 6159 result = ENOTSUP;
6d2010ae 6160 } else {
0a7de745 6161 proto_media_send_arp arpp;
6d2010ae
A
6162 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
6163 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
39236c6e 6164 if (arpp == NULL) {
2d21ac55 6165 result = ENOTSUP;
39236c6e
A
6166 } else {
6167 switch (arpop) {
6168 case ARPOP_REQUEST:
6169 arpstat.txrequests++;
0a7de745 6170 if (target_hw != NULL) {
39236c6e 6171 arpstat.txurequests++;
0a7de745 6172 }
39236c6e
A
6173 break;
6174 case ARPOP_REPLY:
6175 arpstat.txreplies++;
6176 break;
6177 }
6d2010ae
A
6178 result = arpp(ifp, arpop, sender_hw, sender_proto,
6179 target_hw, target_proto);
39236c6e 6180 }
6d2010ae 6181 if_proto_free(proto);
91447636 6182 }
6d2010ae 6183
0a7de745 6184 return result;
91447636 6185}
1c79356b 6186
39236c6e
A
6187struct net_thread_marks { };
6188static const struct net_thread_marks net_thread_marks_base = { };
6189
6190__private_extern__ const net_thread_marks_t net_thread_marks_none =
0a7de745 6191 &net_thread_marks_base;
39236c6e
A
6192
6193__private_extern__ net_thread_marks_t
6194net_thread_marks_push(u_int32_t push)
316670eb 6195{
39236c6e
A
6196 static const char *const base = (const void*)&net_thread_marks_base;
6197 u_int32_t pop = 0;
6198
6199 if (push != 0) {
6200 struct uthread *uth = get_bsdthread_info(current_thread());
6201
6202 pop = push & ~uth->uu_network_marks;
0a7de745 6203 if (pop != 0) {
39236c6e 6204 uth->uu_network_marks |= pop;
0a7de745 6205 }
39236c6e
A
6206 }
6207
0a7de745 6208 return (net_thread_marks_t)&base[pop];
316670eb
A
6209}
6210
39236c6e
A
6211__private_extern__ net_thread_marks_t
6212net_thread_unmarks_push(u_int32_t unpush)
316670eb 6213{
39236c6e
A
6214 static const char *const base = (const void*)&net_thread_marks_base;
6215 u_int32_t unpop = 0;
6216
6217 if (unpush != 0) {
6218 struct uthread *uth = get_bsdthread_info(current_thread());
316670eb 6219
39236c6e 6220 unpop = unpush & uth->uu_network_marks;
0a7de745 6221 if (unpop != 0) {
39236c6e 6222 uth->uu_network_marks &= ~unpop;
0a7de745 6223 }
39236c6e
A
6224 }
6225
0a7de745 6226 return (net_thread_marks_t)&base[unpop];
316670eb
A
6227}
6228
6229__private_extern__ void
39236c6e 6230net_thread_marks_pop(net_thread_marks_t popx)
316670eb 6231{
39236c6e 6232 static const char *const base = (const void*)&net_thread_marks_base;
3e170ce0 6233 const ptrdiff_t pop = (const char *)popx - (const char *)base;
316670eb 6234
39236c6e
A
6235 if (pop != 0) {
6236 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6237 struct uthread *uth = get_bsdthread_info(current_thread());
6238
6239 VERIFY((pop & ones) == pop);
6240 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
6241 uth->uu_network_marks &= ~pop;
6242 }
6243}
6244
6245__private_extern__ void
6246net_thread_unmarks_pop(net_thread_marks_t unpopx)
6247{
6248 static const char *const base = (const void*)&net_thread_marks_base;
3e170ce0 6249 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
39236c6e
A
6250
6251 if (unpop != 0) {
6252 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6253 struct uthread *uth = get_bsdthread_info(current_thread());
6254
6255 VERIFY((unpop & ones) == unpop);
6256 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
6257 uth->uu_network_marks |= unpop;
6258 }
6259}
6260
6261__private_extern__ u_int32_t
6262net_thread_is_marked(u_int32_t check)
6263{
6264 if (check != 0) {
6265 struct uthread *uth = get_bsdthread_info(current_thread());
0a7de745
A
6266 return uth->uu_network_marks & check;
6267 } else {
6268 return 0;
39236c6e 6269 }
39236c6e
A
6270}
6271
6272__private_extern__ u_int32_t
6273net_thread_is_unmarked(u_int32_t check)
6274{
6275 if (check != 0) {
6276 struct uthread *uth = get_bsdthread_info(current_thread());
0a7de745
A
6277 return ~uth->uu_network_marks & check;
6278 } else {
6279 return 0;
39236c6e 6280 }
316670eb
A
6281}
6282
2d21ac55
A
6283static __inline__ int
6284_is_announcement(const struct sockaddr_in * sender_sin,
6d2010ae 6285 const struct sockaddr_in * target_sin)
2d21ac55 6286{
cb323159 6287 if (target_sin == NULL || sender_sin == NULL) {
0a7de745 6288 return FALSE;
2d21ac55 6289 }
cb323159 6290
0a7de745 6291 return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
2d21ac55
A
6292}
6293
91447636 6294__private_extern__ errno_t
39037602
A
6295dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
6296 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
6297 const struct sockaddr *target_proto0, u_int32_t rtflags)
91447636 6298{
0a7de745 6299 errno_t result = 0;
2d21ac55
A
6300 const struct sockaddr_in * sender_sin;
6301 const struct sockaddr_in * target_sin;
316670eb
A
6302 struct sockaddr_inarp target_proto_sinarp;
6303 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
6d2010ae 6304
cb323159
A
6305 if (target_proto == NULL || sender_proto == NULL) {
6306 return EINVAL;
6307 }
6308
6309 if (sender_proto->sa_family != target_proto->sa_family) {
0a7de745
A
6310 return EINVAL;
6311 }
6d2010ae 6312
316670eb
A
6313 /*
6314 * If the target is a (default) router, provide that
6315 * information to the send_arp callback routine.
6316 */
6317 if (rtflags & RTF_ROUTER) {
6318 bcopy(target_proto, &target_proto_sinarp,
0a7de745 6319 sizeof(struct sockaddr_in));
316670eb
A
6320 target_proto_sinarp.sin_other |= SIN_ROUTER;
6321 target_proto = (struct sockaddr *)&target_proto_sinarp;
6322 }
6323
91447636
A
6324 /*
6325 * If this is an ARP request and the target IP is IPv4LL,
2d21ac55
A
6326 * send the request on all interfaces. The exception is
6327 * an announcement, which must only appear on the specific
6328 * interface.
91447636 6329 */
316670eb
A
6330 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
6331 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
6d2010ae
A
6332 if (target_proto->sa_family == AF_INET &&
6333 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
6334 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
cb323159 6335 !_is_announcement(sender_sin, target_sin)) {
0a7de745
A
6336 ifnet_t *ifp_list;
6337 u_int32_t count;
6338 u_int32_t ifp_on;
6d2010ae 6339
91447636
A
6340 result = ENOTSUP;
6341
6342 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
6343 for (ifp_on = 0; ifp_on < count; ifp_on++) {
6d2010ae
A
6344 errno_t new_result;
6345 ifaddr_t source_hw = NULL;
6346 ifaddr_t source_ip = NULL;
6347 struct sockaddr_in source_ip_copy;
6348 struct ifnet *cur_ifp = ifp_list[ifp_on];
6349
91447636 6350 /*
6d2010ae
A
6351 * Only arp on interfaces marked for IPv4LL
6352 * ARPing. This may mean that we don't ARP on
6353 * the interface the subnet route points to.
91447636 6354 */
0a7de745 6355 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
91447636 6356 continue;
0a7de745 6357 }
b0d623f7 6358
91447636 6359 /* Find the source IP address */
6d2010ae
A
6360 ifnet_lock_shared(cur_ifp);
6361 source_hw = cur_ifp->if_lladdr;
6362 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
6363 ifa_link) {
6364 IFA_LOCK(source_ip);
6365 if (source_ip->ifa_addr != NULL &&
6366 source_ip->ifa_addr->sa_family ==
6367 AF_INET) {
6368 /* Copy the source IP address */
6369 source_ip_copy =
6370 *(struct sockaddr_in *)
316670eb 6371 (void *)source_ip->ifa_addr;
6d2010ae 6372 IFA_UNLOCK(source_ip);
91447636
A
6373 break;
6374 }
6d2010ae 6375 IFA_UNLOCK(source_ip);
91447636 6376 }
6d2010ae 6377
91447636
A
6378 /* No IP Source, don't arp */
6379 if (source_ip == NULL) {
6d2010ae 6380 ifnet_lock_done(cur_ifp);
91447636
A
6381 continue;
6382 }
6d2010ae
A
6383
6384 IFA_ADDREF(source_hw);
6385 ifnet_lock_done(cur_ifp);
6386
91447636 6387 /* Send the ARP */
6d2010ae 6388 new_result = dlil_send_arp_internal(cur_ifp,
316670eb
A
6389 arpop, (struct sockaddr_dl *)(void *)
6390 source_hw->ifa_addr,
6d2010ae
A
6391 (struct sockaddr *)&source_ip_copy, NULL,
6392 target_proto);
b0d623f7 6393
6d2010ae 6394 IFA_REMREF(source_hw);
91447636
A
6395 if (result == ENOTSUP) {
6396 result = new_result;
6397 }
6398 }
6d2010ae 6399 ifnet_list_free(ifp_list);
91447636 6400 }
6d2010ae
A
6401 } else {
6402 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
6403 sender_proto, target_hw, target_proto);
91447636 6404 }
6d2010ae 6405
0a7de745 6406 return result;
91447636 6407}
1c79356b 6408
6d2010ae
A
6409/*
6410 * Caller must hold ifnet head lock.
6411 */
6412static int
6413ifnet_lookup(struct ifnet *ifp)
91447636 6414{
6d2010ae
A
6415 struct ifnet *_ifp;
6416
5ba3f43e 6417 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
6d2010ae 6418 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
0a7de745 6419 if (_ifp == ifp) {
91447636 6420 break;
0a7de745 6421 }
6d2010ae 6422 }
0a7de745 6423 return _ifp != NULL;
91447636 6424}
39037602 6425
6d2010ae
A
6426/*
6427 * Caller has to pass a non-zero refio argument to get a
6428 * IO reference count. This will prevent ifnet_detach from
39037602 6429 * being called when there are outstanding io reference counts.
91447636 6430 */
6d2010ae
A
6431int
6432ifnet_is_attached(struct ifnet *ifp, int refio)
6433{
6434 int ret;
6435
6436 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e 6437 if ((ret = IF_FULLY_ATTACHED(ifp))) {
0a7de745 6438 if (refio > 0) {
6d2010ae 6439 ifp->if_refio++;
0a7de745 6440 }
6d2010ae
A
6441 }
6442 lck_mtx_unlock(&ifp->if_ref_lock);
6443
0a7de745 6444 return ret;
6d2010ae
A
6445}
6446
cb323159
A
6447void
6448ifnet_incr_pending_thread_count(struct ifnet *ifp)
6449{
6450 lck_mtx_lock_spin(&ifp->if_ref_lock);
6451 ifp->if_threads_pending++;
6452 lck_mtx_unlock(&ifp->if_ref_lock);
6453}
6454
6455void
6456ifnet_decr_pending_thread_count(struct ifnet *ifp)
6457{
6458 lck_mtx_lock_spin(&ifp->if_ref_lock);
6459 VERIFY(ifp->if_threads_pending > 0);
6460 ifp->if_threads_pending--;
6461 if (ifp->if_threads_pending == 0) {
6462 wakeup(&ifp->if_threads_pending);
6463 }
6464 lck_mtx_unlock(&ifp->if_ref_lock);
6465}
6466
39037602
A
6467/*
6468 * Caller must ensure the interface is attached; the assumption is that
6469 * there is at least an outstanding IO reference count held already.
cb323159 6470 * Most callers would call ifnet_is_{attached,data_ready}() instead.
39037602
A
6471 */
6472void
6473ifnet_incr_iorefcnt(struct ifnet *ifp)
6474{
6475 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e 6476 VERIFY(IF_FULLY_ATTACHED(ifp));
39037602
A
6477 VERIFY(ifp->if_refio > 0);
6478 ifp->if_refio++;
6479 lck_mtx_unlock(&ifp->if_ref_lock);
6480}
6481
cb323159
A
6482__attribute__((always_inline))
6483static void
6484ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
6d2010ae 6485{
cb323159
A
6486 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
6487
6d2010ae 6488 VERIFY(ifp->if_refio > 0);
5ba3f43e 6489 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
cb323159 6490
6d2010ae 6491 ifp->if_refio--;
cb323159 6492 VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
6d2010ae 6493
39037602
A
6494 /*
6495 * if there are no more outstanding io references, wakeup the
6d2010ae
A
6496 * ifnet_detach thread if detaching flag is set.
6497 */
0a7de745 6498 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
6d2010ae 6499 wakeup(&(ifp->if_refio));
0a7de745 6500 }
cb323159
A
6501}
6502
6503void
6504ifnet_decr_iorefcnt(struct ifnet *ifp)
6505{
6506 lck_mtx_lock_spin(&ifp->if_ref_lock);
6507 ifnet_decr_iorefcnt_locked(ifp);
6508 lck_mtx_unlock(&ifp->if_ref_lock);
6509}
6510
6511boolean_t
6512ifnet_datamov_begin(struct ifnet *ifp)
6513{
6514 boolean_t ret;
6515
6516 lck_mtx_lock_spin(&ifp->if_ref_lock);
6517 if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
6518 ifp->if_refio++;
6519 ifp->if_datamov++;
6520 }
6521 lck_mtx_unlock(&ifp->if_ref_lock);
6522
6523 return ret;
6524}
6525
6526void
6527ifnet_datamov_end(struct ifnet *ifp)
6528{
6529 lck_mtx_lock_spin(&ifp->if_ref_lock);
6530 VERIFY(ifp->if_datamov > 0);
6531 /*
6532 * if there's no more thread moving data, wakeup any
6533 * drainers that's blocked waiting for this.
6534 */
6535 if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
6536 wakeup(&(ifp->if_datamov));
6537 }
6538 ifnet_decr_iorefcnt_locked(ifp);
6539 lck_mtx_unlock(&ifp->if_ref_lock);
6540}
6541
6542void
6543ifnet_datamov_suspend(struct ifnet *ifp)
6544{
6545 lck_mtx_lock_spin(&ifp->if_ref_lock);
6546 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6547 ifp->if_refio++;
6548 if (ifp->if_suspend++ == 0) {
6549 VERIFY(ifp->if_refflags & IFRF_READY);
6550 ifp->if_refflags &= ~IFRF_READY;
6551 }
6552 lck_mtx_unlock(&ifp->if_ref_lock);
6553}
6554
6555void
6556ifnet_datamov_drain(struct ifnet *ifp)
6557{
6558 lck_mtx_lock(&ifp->if_ref_lock);
6559 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6560 /* data movement must already be suspended */
6561 VERIFY(ifp->if_suspend > 0);
6562 VERIFY(!(ifp->if_refflags & IFRF_READY));
6563 ifp->if_drainers++;
6564 while (ifp->if_datamov != 0) {
6565 (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
6566 (PZERO - 1), __func__, NULL);
6567 }
6568 VERIFY(!(ifp->if_refflags & IFRF_READY));
6569 VERIFY(ifp->if_drainers > 0);
6570 ifp->if_drainers--;
6571 lck_mtx_unlock(&ifp->if_ref_lock);
6572
6573 /* purge the interface queues */
6574 if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
6575 if_qflush(ifp, 0);
6576 }
6577}
5ba3f43e 6578
cb323159
A
6579void
6580ifnet_datamov_resume(struct ifnet *ifp)
6581{
6582 lck_mtx_lock(&ifp->if_ref_lock);
6583 /* data movement must already be suspended */
6584 VERIFY(ifp->if_suspend > 0);
6585 if (--ifp->if_suspend == 0) {
6586 VERIFY(!(ifp->if_refflags & IFRF_READY));
6587 ifp->if_refflags |= IFRF_READY;
6588 }
6589 ifnet_decr_iorefcnt_locked(ifp);
6d2010ae
A
6590 lck_mtx_unlock(&ifp->if_ref_lock);
6591}
b0d623f7 6592
6d2010ae
A
6593static void
6594dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
6595{
6596 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
6597 ctrace_t *tr;
6598 u_int32_t idx;
6599 u_int16_t *cnt;
1c79356b 6600
6d2010ae
A
6601 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
6602 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
6603 /* NOTREACHED */
6604 }
6605
6606 if (refhold) {
6607 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
6608 tr = dl_if_dbg->dldbg_if_refhold;
6609 } else {
6610 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
6611 tr = dl_if_dbg->dldbg_if_refrele;
6612 }
6613
6614 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
6615 ctrace_record(&tr[idx]);
91447636 6616}
1c79356b 6617
6d2010ae
A
6618errno_t
6619dlil_if_ref(struct ifnet *ifp)
6620{
6621 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6622
0a7de745
A
6623 if (dl_if == NULL) {
6624 return EINVAL;
6625 }
6d2010ae
A
6626
6627 lck_mtx_lock_spin(&dl_if->dl_if_lock);
6628 ++dl_if->dl_if_refcnt;
6629 if (dl_if->dl_if_refcnt == 0) {
6630 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
6631 /* NOTREACHED */
6632 }
0a7de745 6633 if (dl_if->dl_if_trace != NULL) {
6d2010ae 6634 (*dl_if->dl_if_trace)(dl_if, TRUE);
0a7de745 6635 }
6d2010ae
A
6636 lck_mtx_unlock(&dl_if->dl_if_lock);
6637
0a7de745 6638 return 0;
91447636 6639}
1c79356b 6640
6d2010ae
A
6641errno_t
6642dlil_if_free(struct ifnet *ifp)
6643{
6644 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5ba3f43e 6645 bool need_release = FALSE;
6d2010ae 6646
0a7de745
A
6647 if (dl_if == NULL) {
6648 return EINVAL;
6649 }
6d2010ae
A
6650
6651 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5ba3f43e
A
6652 switch (dl_if->dl_if_refcnt) {
6653 case 0:
6d2010ae
A
6654 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
6655 /* NOTREACHED */
5ba3f43e
A
6656 break;
6657 case 1:
6658 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
6659 need_release = TRUE;
6660 }
6661 break;
6662 default:
6663 break;
6d2010ae
A
6664 }
6665 --dl_if->dl_if_refcnt;
0a7de745 6666 if (dl_if->dl_if_trace != NULL) {
6d2010ae 6667 (*dl_if->dl_if_trace)(dl_if, FALSE);
0a7de745 6668 }
6d2010ae 6669 lck_mtx_unlock(&dl_if->dl_if_lock);
5ba3f43e
A
6670 if (need_release) {
6671 dlil_if_release(ifp);
6672 }
0a7de745 6673 return 0;
6d2010ae 6674}
1c79356b 6675
2d21ac55 6676static errno_t
6d2010ae 6677dlil_attach_protocol_internal(struct if_proto *proto,
5ba3f43e
A
6678 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
6679 uint32_t * proto_count)
91447636 6680{
6d2010ae 6681 struct kev_dl_proto_data ev_pr_data;
91447636
A
6682 struct ifnet *ifp = proto->ifp;
6683 int retval = 0;
b0d623f7 6684 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
6d2010ae
A
6685 struct if_proto *prev_proto;
6686 struct if_proto *_proto;
6687
6688 /* callee holds a proto refcnt upon success */
6689 ifnet_lock_exclusive(ifp);
6690 _proto = find_attached_proto(ifp, proto->protocol_family);
6691 if (_proto != NULL) {
91447636 6692 ifnet_lock_done(ifp);
6d2010ae 6693 if_proto_free(_proto);
0a7de745 6694 return EEXIST;
91447636 6695 }
6d2010ae 6696
91447636
A
6697 /*
6698 * Call family module add_proto routine so it can refine the
6699 * demux descriptors as it wishes.
6700 */
6d2010ae
A
6701 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
6702 demux_count);
91447636 6703 if (retval) {
6d2010ae 6704 ifnet_lock_done(ifp);
0a7de745 6705 return retval;
91447636 6706 }
6d2010ae 6707
91447636
A
6708 /*
6709 * Insert the protocol in the hash
6710 */
6d2010ae 6711 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
0a7de745 6712 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
6d2010ae 6713 prev_proto = SLIST_NEXT(prev_proto, next_hash);
0a7de745
A
6714 }
6715 if (prev_proto) {
6d2010ae 6716 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
0a7de745 6717 } else {
6d2010ae
A
6718 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
6719 proto, next_hash);
0a7de745 6720 }
6d2010ae
A
6721
6722 /* hold a proto refcnt for attach */
6723 if_proto_ref(proto);
1c79356b 6724
91447636 6725 /*
6d2010ae
A
6726 * The reserved field carries the number of protocol still attached
6727 * (subject to change)
91447636 6728 */
91447636 6729 ev_pr_data.proto_family = proto->protocol_family;
a39ff7e2
A
6730 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
6731
6d2010ae
A
6732 ifnet_lock_done(ifp);
6733
6734 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
6735 (struct net_event_data *)&ev_pr_data,
0a7de745 6736 sizeof(struct kev_dl_proto_data));
5ba3f43e
A
6737 if (proto_count != NULL) {
6738 *proto_count = ev_pr_data.proto_remaining_count;
6739 }
0a7de745 6740 return retval;
91447636 6741}
0b4e3aa0 6742
2d21ac55
A
6743errno_t
6744ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 6745 const struct ifnet_attach_proto_param *proto_details)
91447636
A
6746{
6747 int retval = 0;
6748 struct if_proto *ifproto = NULL;
5ba3f43e 6749 uint32_t proto_count = 0;
6d2010ae
A
6750
6751 ifnet_head_lock_shared();
6752 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6753 retval = EINVAL;
6754 goto end;
6755 }
6756 /* Check that the interface is in the global list */
6757 if (!ifnet_lookup(ifp)) {
6758 retval = ENXIO;
6759 goto end;
6760 }
6761
f427ee49 6762 ifproto = zalloc_flags(dlif_proto_zone, Z_WAITOK | Z_ZERO);
6d2010ae 6763 if (ifproto == NULL) {
91447636
A
6764 retval = ENOMEM;
6765 goto end;
6766 }
6d2010ae
A
6767
6768 /* refcnt held above during lookup */
91447636
A
6769 ifproto->ifp = ifp;
6770 ifproto->protocol_family = protocol;
6771 ifproto->proto_kpi = kProtoKPI_v1;
6772 ifproto->kpi.v1.input = proto_details->input;
6773 ifproto->kpi.v1.pre_output = proto_details->pre_output;
6774 ifproto->kpi.v1.event = proto_details->event;
6775 ifproto->kpi.v1.ioctl = proto_details->ioctl;
6776 ifproto->kpi.v1.detached = proto_details->detached;
6777 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
6778 ifproto->kpi.v1.send_arp = proto_details->send_arp;
6d2010ae 6779
2d21ac55 6780 retval = dlil_attach_protocol_internal(ifproto,
0a7de745
A
6781 proto_details->demux_list, proto_details->demux_count,
6782 &proto_count);
6d2010ae 6783
9bccf70c 6784end:
cb323159 6785 if (retval != 0 && retval != EEXIST) {
39236c6e 6786 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
cb323159 6787 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
5ba3f43e
A
6788 } else {
6789 if (dlil_verbose) {
cb323159
A
6790 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
6791 ifp != NULL ? if_name(ifp) : "N/A",
0a7de745 6792 protocol, proto_count);
5ba3f43e 6793 }
6d2010ae
A
6794 }
6795 ifnet_head_done();
5ba3f43e 6796 if (retval == 0) {
a39ff7e2
A
6797 /*
6798 * A protocol has been attached, mark the interface up.
6799 * This used to be done by configd.KernelEventMonitor, but that
6800 * is inherently prone to races (rdar://problem/30810208).
6801 */
6802 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6803 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6804 dlil_post_sifflags_msg(ifp);
5ba3f43e 6805 } else if (ifproto != NULL) {
6d2010ae 6806 zfree(dlif_proto_zone, ifproto);
5ba3f43e 6807 }
0a7de745 6808 return retval;
1c79356b
A
6809}
6810
2d21ac55
A
6811errno_t
6812ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 6813 const struct ifnet_attach_proto_param_v2 *proto_details)
91447636 6814{
2d21ac55 6815 int retval = 0;
91447636 6816 struct if_proto *ifproto = NULL;
5ba3f43e 6817 uint32_t proto_count = 0;
6d2010ae
A
6818
6819 ifnet_head_lock_shared();
6820 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6821 retval = EINVAL;
6822 goto end;
6823 }
6824 /* Check that the interface is in the global list */
6825 if (!ifnet_lookup(ifp)) {
6826 retval = ENXIO;
6827 goto end;
6828 }
6829
6830 ifproto = zalloc(dlif_proto_zone);
6831 if (ifproto == NULL) {
91447636
A
6832 retval = ENOMEM;
6833 goto end;
6834 }
2d21ac55 6835 bzero(ifproto, sizeof(*ifproto));
6d2010ae
A
6836
6837 /* refcnt held above during lookup */
2d21ac55
A
6838 ifproto->ifp = ifp;
6839 ifproto->protocol_family = protocol;
6840 ifproto->proto_kpi = kProtoKPI_v2;
6841 ifproto->kpi.v2.input = proto_details->input;
6842 ifproto->kpi.v2.pre_output = proto_details->pre_output;
6843 ifproto->kpi.v2.event = proto_details->event;
6844 ifproto->kpi.v2.ioctl = proto_details->ioctl;
6845 ifproto->kpi.v2.detached = proto_details->detached;
6846 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
6847 ifproto->kpi.v2.send_arp = proto_details->send_arp;
1c79356b 6848
6d2010ae 6849 retval = dlil_attach_protocol_internal(ifproto,
0a7de745
A
6850 proto_details->demux_list, proto_details->demux_count,
6851 &proto_count);
6d2010ae
A
6852
6853end:
cb323159 6854 if (retval != 0 && retval != EEXIST) {
39236c6e 6855 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
cb323159 6856 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
5ba3f43e
A
6857 } else {
6858 if (dlil_verbose) {
cb323159
A
6859 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
6860 ifp != NULL ? if_name(ifp) : "N/A",
0a7de745 6861 protocol, proto_count);
5ba3f43e 6862 }
2d21ac55 6863 }
6d2010ae 6864 ifnet_head_done();
5ba3f43e 6865 if (retval == 0) {
a39ff7e2
A
6866 /*
6867 * A protocol has been attached, mark the interface up.
6868 * This used to be done by configd.KernelEventMonitor, but that
6869 * is inherently prone to races (rdar://problem/30810208).
6870 */
6871 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6872 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6873 dlil_post_sifflags_msg(ifp);
5ba3f43e 6874 } else if (ifproto != NULL) {
6d2010ae 6875 zfree(dlif_proto_zone, ifproto);
5ba3f43e 6876 }
0a7de745 6877 return retval;
91447636 6878}
1c79356b 6879
2d21ac55
A
6880errno_t
6881ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
91447636
A
6882{
6883 struct if_proto *proto = NULL;
0a7de745 6884 int retval = 0;
6d2010ae
A
6885
6886 if (ifp == NULL || proto_family == 0) {
6887 retval = EINVAL;
91447636
A
6888 goto end;
6889 }
6d2010ae
A
6890
6891 ifnet_lock_exclusive(ifp);
6892 /* callee holds a proto refcnt upon success */
91447636 6893 proto = find_attached_proto(ifp, proto_family);
91447636
A
6894 if (proto == NULL) {
6895 retval = ENXIO;
6d2010ae 6896 ifnet_lock_done(ifp);
91447636
A
6897 goto end;
6898 }
6d2010ae
A
6899
6900 /* call family module del_proto */
0a7de745 6901 if (ifp->if_del_proto) {
91447636 6902 ifp->if_del_proto(ifp, proto->protocol_family);
0a7de745 6903 }
1c79356b 6904
6d2010ae
A
6905 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
6906 proto, if_proto, next_hash);
6907
6908 if (proto->proto_kpi == kProtoKPI_v1) {
6909 proto->kpi.v1.input = ifproto_media_input_v1;
39037602 6910 proto->kpi.v1.pre_output = ifproto_media_preout;
6d2010ae
A
6911 proto->kpi.v1.event = ifproto_media_event;
6912 proto->kpi.v1.ioctl = ifproto_media_ioctl;
6913 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
6914 proto->kpi.v1.send_arp = ifproto_media_send_arp;
6915 } else {
6916 proto->kpi.v2.input = ifproto_media_input_v2;
6917 proto->kpi.v2.pre_output = ifproto_media_preout;
6918 proto->kpi.v2.event = ifproto_media_event;
6919 proto->kpi.v2.ioctl = ifproto_media_ioctl;
6920 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
6921 proto->kpi.v2.send_arp = ifproto_media_send_arp;
6922 }
6923 proto->detached = 1;
6924 ifnet_lock_done(ifp);
6925
6926 if (dlil_verbose) {
cb323159 6927 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
39236c6e 6928 (proto->proto_kpi == kProtoKPI_v1) ?
6d2010ae
A
6929 "v1" : "v2", proto_family);
6930 }
6931
6932 /* release proto refcnt held during protocol attach */
6933 if_proto_free(proto);
91447636
A
6934
6935 /*
6d2010ae
A
6936 * Release proto refcnt held during lookup; the rest of
6937 * protocol detach steps will happen when the last proto
6938 * reference is released.
91447636 6939 */
6d2010ae
A
6940 if_proto_free(proto);
6941
91447636 6942end:
0a7de745 6943 return retval;
91447636 6944}
1c79356b 6945
6d2010ae
A
6946
6947static errno_t
6948ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
6949 struct mbuf *packet, char *header)
91447636 6950{
6d2010ae 6951#pragma unused(ifp, protocol, packet, header)
0a7de745 6952 return ENXIO;
6d2010ae
A
6953}
6954
6955static errno_t
6956ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
6957 struct mbuf *packet)
6958{
6959#pragma unused(ifp, protocol, packet)
0a7de745 6960 return ENXIO;
6d2010ae
A
6961}
6962
6963static errno_t
6964ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
6965 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
6966 char *link_layer_dest)
6967{
6968#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
0a7de745 6969 return ENXIO;
91447636 6970}
9bccf70c 6971
91447636 6972static void
6d2010ae
A
6973ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
6974 const struct kev_msg *event)
6975{
6976#pragma unused(ifp, protocol, event)
6977}
6978
6979static errno_t
6980ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
6981 unsigned long command, void *argument)
6982{
6983#pragma unused(ifp, protocol, command, argument)
0a7de745 6984 return ENXIO;
6d2010ae
A
6985}
6986
6987static errno_t
6988ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
6989 struct sockaddr_dl *out_ll, size_t ll_len)
6990{
6991#pragma unused(ifp, proto_addr, out_ll, ll_len)
0a7de745 6992 return ENXIO;
6d2010ae
A
6993}
6994
6995static errno_t
6996ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
6997 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6998 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
6999{
7000#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
0a7de745 7001 return ENXIO;
91447636 7002}
9bccf70c 7003
91447636 7004extern int if_next_index(void);
4bd07ac2 7005extern int tcp_ecn_outbound;
91447636 7006
2d21ac55 7007errno_t
6d2010ae 7008ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
91447636 7009{
91447636 7010 struct ifnet *tmp_if;
6d2010ae
A
7011 struct ifaddr *ifa;
7012 struct if_data_internal if_data_saved;
7013 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
316670eb 7014 struct dlil_threading_info *dl_inp;
f427ee49 7015 thread_continue_t thfunc = NULL;
316670eb
A
7016 u_int32_t sflags = 0;
7017 int err;
1c79356b 7018
0a7de745
A
7019 if (ifp == NULL) {
7020 return EINVAL;
7021 }
6d2010ae 7022
7ddcb079
A
7023 /*
7024 * Serialize ifnet attach using dlil_ifnet_lock, in order to
7025 * prevent the interface from being configured while it is
7026 * embryonic, as ifnet_head_lock is dropped and reacquired
7027 * below prior to marking the ifnet with IFRF_ATTACHED.
7028 */
7029 dlil_if_lock();
6d2010ae 7030 ifnet_head_lock_exclusive();
91447636
A
7031 /* Verify we aren't already on the list */
7032 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
7033 if (tmp_if == ifp) {
7034 ifnet_head_done();
7ddcb079 7035 dlil_if_unlock();
0a7de745 7036 return EEXIST;
91447636
A
7037 }
7038 }
0b4e3aa0 7039
6d2010ae 7040 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e
A
7041 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
7042 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
6d2010ae
A
7043 __func__, ifp);
7044 /* NOTREACHED */
91447636 7045 }
6d2010ae 7046 lck_mtx_unlock(&ifp->if_ref_lock);
1c79356b 7047
6d2010ae 7048 ifnet_lock_exclusive(ifp);
b0d623f7 7049
6d2010ae
A
7050 /* Sanity check */
7051 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
7052 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
cb323159 7053 VERIFY(ifp->if_threads_pending == 0);
6d2010ae
A
7054
7055 if (ll_addr != NULL) {
7056 if (ifp->if_addrlen == 0) {
7057 ifp->if_addrlen = ll_addr->sdl_alen;
7058 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
7059 ifnet_lock_done(ifp);
7060 ifnet_head_done();
7ddcb079 7061 dlil_if_unlock();
0a7de745 7062 return EINVAL;
b0d623f7
A
7063 }
7064 }
7065
91447636 7066 /*
b0d623f7 7067 * Allow interfaces without protocol families to attach
91447636
A
7068 * only if they have the necessary fields filled out.
7069 */
6d2010ae
A
7070 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
7071 DLIL_PRINTF("%s: Attempt to attach interface without "
7072 "family module - %d\n", __func__, ifp->if_family);
7073 ifnet_lock_done(ifp);
7074 ifnet_head_done();
7ddcb079 7075 dlil_if_unlock();
0a7de745 7076 return ENODEV;
1c79356b
A
7077 }
7078
6d2010ae
A
7079 /* Allocate protocol hash table */
7080 VERIFY(ifp->if_proto_hash == NULL);
f427ee49 7081 ifp->if_proto_hash = zalloc_flags(dlif_phash_zone, Z_WAITOK | Z_ZERO);
6d2010ae
A
7082 if (ifp->if_proto_hash == NULL) {
7083 ifnet_lock_done(ifp);
7084 ifnet_head_done();
7ddcb079 7085 dlil_if_unlock();
0a7de745 7086 return ENOBUFS;
6d2010ae 7087 }
91447636 7088
6d2010ae
A
7089 lck_mtx_lock_spin(&ifp->if_flt_lock);
7090 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
91447636 7091 TAILQ_INIT(&ifp->if_flt_head);
6d2010ae
A
7092 VERIFY(ifp->if_flt_busy == 0);
7093 VERIFY(ifp->if_flt_waiters == 0);
7094 lck_mtx_unlock(&ifp->if_flt_lock);
7095
6d2010ae
A
7096 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
7097 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
91447636 7098 LIST_INIT(&ifp->if_multiaddrs);
6d2010ae 7099 }
1c79356b 7100
6d2010ae
A
7101 VERIFY(ifp->if_allhostsinm == NULL);
7102 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
7103 TAILQ_INIT(&ifp->if_addrhead);
7104
6d2010ae
A
7105 if (ifp->if_index == 0) {
7106 int idx = if_next_index();
7107
7108 if (idx == -1) {
7109 ifp->if_index = 0;
7110 ifnet_lock_done(ifp);
7111 ifnet_head_done();
7ddcb079 7112 dlil_if_unlock();
0a7de745 7113 return ENOBUFS;
1c79356b 7114 }
f427ee49
A
7115 ifp->if_index = (uint16_t)idx;
7116
7117 /* the lladdr passed at attach time is the permanent address */
7118 if (ll_addr != NULL && ifp->if_type == IFT_ETHER &&
7119 ll_addr->sdl_alen == ETHER_ADDR_LEN) {
7120 bcopy(CONST_LLADDR(ll_addr),
7121 dl_if->dl_if_permanent_ether,
7122 ETHER_ADDR_LEN);
7123 dl_if->dl_if_permanent_ether_is_set = 1;
7124 }
6d2010ae
A
7125 }
7126 /* There should not be anything occupying this slot */
7127 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
7128
7129 /* allocate (if needed) and initialize a link address */
6d2010ae
A
7130 ifa = dlil_alloc_lladdr(ifp, ll_addr);
7131 if (ifa == NULL) {
7132 ifnet_lock_done(ifp);
7133 ifnet_head_done();
7ddcb079 7134 dlil_if_unlock();
0a7de745 7135 return ENOBUFS;
6d2010ae
A
7136 }
7137
7138 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
7139 ifnet_addrs[ifp->if_index - 1] = ifa;
7140
7141 /* make this address the first on the list */
7142 IFA_LOCK(ifa);
7143 /* hold a reference for ifnet_addrs[] */
7144 IFA_ADDREF_LOCKED(ifa);
7145 /* if_attach_link_ifa() holds a reference for ifa_link */
7146 if_attach_link_ifa(ifp, ifa);
7147 IFA_UNLOCK(ifa);
7148
6d2010ae
A
7149 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
7150 ifindex2ifnet[ifp->if_index] = ifp;
2d21ac55 7151
6d2010ae
A
7152 /* Hold a reference to the underlying dlil_ifnet */
7153 ifnet_reference(ifp);
7154
316670eb
A
7155 /* Clear stats (save and restore other fields that we care) */
7156 if_data_saved = ifp->if_data;
0a7de745 7157 bzero(&ifp->if_data, sizeof(ifp->if_data));
316670eb
A
7158 ifp->if_data.ifi_type = if_data_saved.ifi_type;
7159 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
7160 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
7161 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
7162 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
7163 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
7164 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
7165 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
7166 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
7167 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
7168 ifnet_touch_lastchange(ifp);
7169
7170 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
39037602
A
7171 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
7172 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
316670eb
A
7173
7174 /* By default, use SFB and enable flow advisory */
7175 sflags = PKTSCHEDF_QALG_SFB;
0a7de745 7176 if (if_flowadv) {
316670eb 7177 sflags |= PKTSCHEDF_QALG_FLOWCTL;
0a7de745 7178 }
316670eb 7179
0a7de745 7180 if (if_delaybased_queue) {
fe8ab488 7181 sflags |= PKTSCHEDF_QALG_DELAYBASED;
0a7de745 7182 }
fe8ab488 7183
5ba3f43e 7184 if (ifp->if_output_sched_model ==
0a7de745 7185 IFNET_SCHED_MODEL_DRIVER_MANAGED) {
5ba3f43e 7186 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
0a7de745 7187 }
5ba3f43e 7188
316670eb
A
7189 /* Initialize transmit queue(s) */
7190 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
7191 if (err != 0) {
7192 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
7193 "err=%d", __func__, ifp, err);
7194 /* NOTREACHED */
7195 }
7196
7197 /* Sanity checks on the input thread storage */
7198 dl_inp = &dl_if->dl_if_inpstorage;
f427ee49
A
7199 bzero(&dl_inp->dlth_stats, sizeof(dl_inp->dlth_stats));
7200 VERIFY(dl_inp->dlth_flags == 0);
7201 VERIFY(dl_inp->dlth_wtot == 0);
7202 VERIFY(dl_inp->dlth_ifp == NULL);
7203 VERIFY(qhead(&dl_inp->dlth_pkts) == NULL && qempty(&dl_inp->dlth_pkts));
7204 VERIFY(qlimit(&dl_inp->dlth_pkts) == 0);
7205 VERIFY(!dl_inp->dlth_affinity);
316670eb 7206 VERIFY(ifp->if_inp == NULL);
f427ee49
A
7207 VERIFY(dl_inp->dlth_thread == THREAD_NULL);
7208 VERIFY(dl_inp->dlth_strategy == NULL);
7209 VERIFY(dl_inp->dlth_driver_thread == THREAD_NULL);
7210 VERIFY(dl_inp->dlth_poller_thread == THREAD_NULL);
7211 VERIFY(dl_inp->dlth_affinity_tag == 0);
cb323159 7212
316670eb 7213#if IFNET_INPUT_SANITY_CHK
f427ee49 7214 VERIFY(dl_inp->dlth_pkts_cnt == 0);
316670eb
A
7215#endif /* IFNET_INPUT_SANITY_CHK */
7216
cb323159
A
7217 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7218 dlil_reset_rxpoll_params(ifp);
316670eb 7219 /*
cb323159 7220 * A specific DLIL input thread is created per non-loopback interface.
316670eb 7221 */
cb323159 7222 if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
316670eb 7223 ifp->if_inp = dl_inp;
cb323159 7224 ifnet_incr_pending_thread_count(ifp);
f427ee49
A
7225 err = dlil_create_input_thread(ifp, ifp->if_inp, &thfunc);
7226 if (err == ENODEV) {
7227 VERIFY(thfunc == NULL);
7228 ifnet_decr_pending_thread_count(ifp);
7229 } else if (err != 0) {
316670eb
A
7230 panic_plain("%s: ifp=%p couldn't get an input thread; "
7231 "err=%d", __func__, ifp, err);
7232 /* NOTREACHED */
7233 }
7234 }
6d2010ae 7235 /*
39236c6e
A
7236 * If the driver supports the new transmit model, calculate flow hash
7237 * and create a workloop starter thread to invoke the if_start callback
7238 * where the packets may be dequeued and transmitted.
6d2010ae 7239 */
316670eb 7240 if (ifp->if_eflags & IFEF_TXSTART) {
f427ee49
A
7241 thread_precedence_policy_data_t info;
7242 __unused kern_return_t kret;
7243
39236c6e
A
7244 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
7245 VERIFY(ifp->if_flowhash != 0);
316670eb
A
7246 VERIFY(ifp->if_start_thread == THREAD_NULL);
7247
7248 ifnet_set_start_cycle(ifp, NULL);
7249 ifp->if_start_active = 0;
7250 ifp->if_start_req = 0;
39236c6e 7251 ifp->if_start_flags = 0;
5ba3f43e 7252 VERIFY(ifp->if_start != NULL);
cb323159
A
7253 ifnet_incr_pending_thread_count(ifp);
7254 if ((err = kernel_thread_start(ifnet_start_thread_func,
5ba3f43e
A
7255 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
7256 panic_plain("%s: "
7257 "ifp=%p couldn't get a start thread; "
316670eb 7258 "err=%d", __func__, ifp, err);
0a7de745 7259 /* NOTREACHED */
6d2010ae 7260 }
f427ee49
A
7261 bzero(&info, sizeof(info));
7262 info.importance = 1;
7263 kret = thread_policy_set(ifp->if_start_thread,
7264 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
7265 THREAD_PRECEDENCE_POLICY_COUNT);
7266 ASSERT(kret == KERN_SUCCESS);
39236c6e
A
7267 } else {
7268 ifp->if_flowhash = 0;
316670eb
A
7269 }
7270
cb323159
A
7271 /* Reset polling parameters */
7272 ifnet_set_poll_cycle(ifp, NULL);
7273 ifp->if_poll_update = 0;
7274 ifp->if_poll_flags = 0;
7275 ifp->if_poll_req = 0;
7276 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7277
316670eb
A
7278 /*
7279 * If the driver supports the new receive model, create a poller
7280 * thread to invoke if_input_poll callback where the packets may
7281 * be dequeued from the driver and processed for reception.
f427ee49
A
7282 * if the interface is netif compat then the poller thread is
7283 * managed by netif.
316670eb 7284 */
f427ee49
A
7285 if (thfunc == dlil_rxpoll_input_thread_func) {
7286 thread_precedence_policy_data_t info;
7287 __unused kern_return_t kret;
316670eb
A
7288 VERIFY(ifp->if_input_poll != NULL);
7289 VERIFY(ifp->if_input_ctl != NULL);
cb323159
A
7290 ifnet_incr_pending_thread_count(ifp);
7291 if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
316670eb
A
7292 &ifp->if_poll_thread)) != KERN_SUCCESS) {
7293 panic_plain("%s: ifp=%p couldn't get a poll thread; "
6d2010ae
A
7294 "err=%d", __func__, ifp, err);
7295 /* NOTREACHED */
7296 }
f427ee49
A
7297 bzero(&info, sizeof(info));
7298 info.importance = 1;
7299 kret = thread_policy_set(ifp->if_poll_thread,
7300 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
7301 THREAD_PRECEDENCE_POLICY_COUNT);
7302 ASSERT(kret == KERN_SUCCESS);
91447636 7303 }
6d2010ae 7304
316670eb
A
7305 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
7306 VERIFY(ifp->if_desc.ifd_len == 0);
7307 VERIFY(ifp->if_desc.ifd_desc != NULL);
6d2010ae
A
7308
7309 /* Record attach PC stacktrace */
7310 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
7311
7312 ifp->if_updatemcasts = 0;
7313 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
7314 struct ifmultiaddr *ifma;
7315 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
7316 IFMA_LOCK(ifma);
7317 if (ifma->ifma_addr->sa_family == AF_LINK ||
0a7de745 7318 ifma->ifma_addr->sa_family == AF_UNSPEC) {
6d2010ae 7319 ifp->if_updatemcasts++;
0a7de745 7320 }
6d2010ae
A
7321 IFMA_UNLOCK(ifma);
7322 }
7323
cb323159 7324 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
39236c6e 7325 "membership(s)\n", if_name(ifp),
6d2010ae
A
7326 ifp->if_updatemcasts);
7327 }
7328
39236c6e 7329 /* Clear logging parameters */
0a7de745 7330 bzero(&ifp->if_log, sizeof(ifp->if_log));
5ba3f43e
A
7331
7332 /* Clear foreground/realtime activity timestamps */
39236c6e 7333 ifp->if_fg_sendts = 0;
5ba3f43e 7334 ifp->if_rt_sendts = 0;
39236c6e
A
7335
7336 VERIFY(ifp->if_delegated.ifp == NULL);
7337 VERIFY(ifp->if_delegated.type == 0);
7338 VERIFY(ifp->if_delegated.family == 0);
7339 VERIFY(ifp->if_delegated.subfamily == 0);
fe8ab488 7340 VERIFY(ifp->if_delegated.expensive == 0);
cb323159 7341 VERIFY(ifp->if_delegated.constrained == 0);
39236c6e 7342
39037602
A
7343 VERIFY(ifp->if_agentids == NULL);
7344 VERIFY(ifp->if_agentcount == 0);
3e170ce0
A
7345
7346 /* Reset interface state */
7347 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
39037602 7348 ifp->if_interface_state.valid_bitmask |=
0a7de745 7349 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
3e170ce0 7350 ifp->if_interface_state.interface_availability =
0a7de745 7351 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
3e170ce0
A
7352
7353 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
7354 if (ifp == lo_ifp) {
7355 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
7356 ifp->if_interface_state.valid_bitmask |=
7357 IF_INTERFACE_STATE_LQM_STATE_VALID;
7358 } else {
7359 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
7360 }
4bd07ac2
A
7361
7362 /*
7363 * Enable ECN capability on this interface depending on the
7364 * value of ECN global setting
7365 */
7366 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
f427ee49
A
7367 if_set_eflags(ifp, IFEF_ECN_ENABLE);
7368 if_clear_eflags(ifp, IFEF_ECN_DISABLE);
4bd07ac2
A
7369 }
7370
39037602
A
7371 /*
7372 * Built-in Cyclops always on policy for WiFi infra
7373 */
7374 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
7375 errno_t error;
7376
7377 error = if_set_qosmarking_mode(ifp,
7378 IFRTYPE_QOSMARKING_FASTLANE);
7379 if (error != 0) {
cb323159 7380 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
39037602
A
7381 __func__, ifp->if_xname, error);
7382 } else {
f427ee49 7383 if_set_eflags(ifp, IFEF_QOSMARKING_ENABLED);
39037602 7384#if (DEVELOPMENT || DEBUG)
cb323159 7385 DLIL_PRINTF("%s fastlane enabled on %s\n",
0a7de745 7386 __func__, ifp->if_xname);
39037602
A
7387#endif /* (DEVELOPMENT || DEBUG) */
7388 }
7389 }
7390
0c530ab8 7391 ifnet_lock_done(ifp);
b0d623f7 7392 ifnet_head_done();
6d2010ae 7393
5ba3f43e 7394
6d2010ae
A
7395 lck_mtx_lock(&ifp->if_cached_route_lock);
7396 /* Enable forwarding cached route */
7397 ifp->if_fwd_cacheok = 1;
7398 /* Clean up any existing cached routes */
39236c6e 7399 ROUTE_RELEASE(&ifp->if_fwd_route);
0a7de745 7400 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
39236c6e 7401 ROUTE_RELEASE(&ifp->if_src_route);
0a7de745 7402 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
39236c6e 7403 ROUTE_RELEASE(&ifp->if_src_route6);
0a7de745 7404 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
6d2010ae
A
7405 lck_mtx_unlock(&ifp->if_cached_route_lock);
7406
7407 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
7408
b0d623f7 7409 /*
6d2010ae
A
7410 * Allocate and attach IGMPv3/MLDv2 interface specific variables
7411 * and trees; do this before the ifnet is marked as attached.
7412 * The ifnet keeps the reference to the info structures even after
7413 * the ifnet is detached, since the network-layer records still
7414 * refer to the info structures even after that. This also
7415 * makes it possible for them to still function after the ifnet
7416 * is recycled or reattached.
b0d623f7 7417 */
6d2010ae
A
7418#if INET
7419 if (IGMP_IFINFO(ifp) == NULL) {
f427ee49 7420 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, Z_WAITOK);
6d2010ae
A
7421 VERIFY(IGMP_IFINFO(ifp) != NULL);
7422 } else {
7423 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
7424 igmp_domifreattach(IGMP_IFINFO(ifp));
7425 }
7426#endif /* INET */
6d2010ae 7427 if (MLD_IFINFO(ifp) == NULL) {
f427ee49 7428 MLD_IFINFO(ifp) = mld_domifattach(ifp, Z_WAITOK);
6d2010ae
A
7429 VERIFY(MLD_IFINFO(ifp) != NULL);
7430 } else {
7431 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
7432 mld_domifreattach(MLD_IFINFO(ifp));
7433 }
b0d623f7 7434
39236c6e 7435 VERIFY(ifp->if_data_threshold == 0);
5ba3f43e 7436 VERIFY(ifp->if_dt_tcall != NULL);
39236c6e 7437
6d2010ae 7438 /*
cb323159
A
7439 * Wait for the created kernel threads for I/O to get
7440 * scheduled and run at least once before we proceed
7441 * to mark interface as attached.
6d2010ae 7442 */
cb323159
A
7443 lck_mtx_lock(&ifp->if_ref_lock);
7444 while (ifp->if_threads_pending != 0) {
7445 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
7446 "interface %s to get scheduled at least once.\n",
7447 __func__, ifp->if_xname);
7448 (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
7449 __func__, NULL);
7450 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
7451 }
7452 lck_mtx_unlock(&ifp->if_ref_lock);
7453 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
7454 "at least once. Proceeding.\n", __func__, ifp->if_xname);
7455
7456 /* Final mark this ifnet as attached. */
6d2010ae
A
7457 lck_mtx_lock(rnh_lock);
7458 ifnet_lock_exclusive(ifp);
7459 lck_mtx_lock_spin(&ifp->if_ref_lock);
cb323159 7460 ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
6d2010ae 7461 lck_mtx_unlock(&ifp->if_ref_lock);
d1ecb069 7462 if (net_rtref) {
6d2010ae
A
7463 /* boot-args override; enable idle notification */
7464 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
d1ecb069 7465 IFRF_IDLE_NOTIFY);
6d2010ae
A
7466 } else {
7467 /* apply previous request(s) to set the idle flags, if any */
7468 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
7469 ifp->if_idle_new_flags_mask);
d1ecb069 7470 }
6d2010ae
A
7471 ifnet_lock_done(ifp);
7472 lck_mtx_unlock(rnh_lock);
7ddcb079 7473 dlil_if_unlock();
6d2010ae
A
7474
7475#if PF
7476 /*
7477 * Attach packet filter to this interface, if enabled.
7478 */
7479 pf_ifnet_hook(ifp, 1);
7480#endif /* PF */
d1ecb069 7481
2d21ac55 7482 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
1c79356b 7483
6d2010ae 7484 if (dlil_verbose) {
cb323159 7485 DLIL_PRINTF("%s: attached%s\n", if_name(ifp),
6d2010ae
A
7486 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
7487 }
7488
0a7de745 7489 return 0;
6d2010ae
A
7490}
7491
7492/*
7493 * Prepare the storage for the first/permanent link address, which must
7494 * must have the same lifetime as the ifnet itself. Although the link
7495 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
7496 * its location in memory must never change as it may still be referred
7497 * to by some parts of the system afterwards (unfortunate implementation
7498 * artifacts inherited from BSD.)
7499 *
7500 * Caller must hold ifnet lock as writer.
7501 */
7502static struct ifaddr *
7503dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
7504{
7505 struct ifaddr *ifa, *oifa;
7506 struct sockaddr_dl *asdl, *msdl;
0a7de745 7507 char workbuf[IFNAMSIZ * 2];
6d2010ae
A
7508 int namelen, masklen, socksize;
7509 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7510
7511 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
7512 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
7513
4ba76501 7514 namelen = scnprintf(workbuf, sizeof(workbuf), "%s",
39236c6e 7515 if_name(ifp));
39037602
A
7516 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
7517 + ((namelen > 0) ? namelen : 0);
6d2010ae 7518 socksize = masklen + ifp->if_addrlen;
0a7de745
A
7519#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
7520 if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
6d2010ae 7521 socksize = sizeof(struct sockaddr_dl);
0a7de745 7522 }
6d2010ae
A
7523 socksize = ROUNDUP(socksize);
7524#undef ROUNDUP
7525
7526 ifa = ifp->if_lladdr;
7527 if (socksize > DLIL_SDLMAXLEN ||
7528 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
7529 /*
7530 * Rare, but in the event that the link address requires
7531 * more storage space than DLIL_SDLMAXLEN, allocate the
7532 * largest possible storages for address and mask, such
7533 * that we can reuse the same space when if_addrlen grows.
7534 * This same space will be used when if_addrlen shrinks.
7535 */
7536 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
0a7de745 7537 int ifasize = sizeof(*ifa) + 2 * SOCK_MAXADDRLEN;
6d2010ae 7538 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
0a7de745
A
7539 if (ifa == NULL) {
7540 return NULL;
7541 }
6d2010ae
A
7542 ifa_lock_init(ifa);
7543 /* Don't set IFD_ALLOC, as this is permanent */
7544 ifa->ifa_debug = IFD_LINK;
7545 }
7546 IFA_LOCK(ifa);
7547 /* address and mask sockaddr_dl locations */
7548 asdl = (struct sockaddr_dl *)(ifa + 1);
7549 bzero(asdl, SOCK_MAXADDRLEN);
316670eb
A
7550 msdl = (struct sockaddr_dl *)(void *)
7551 ((char *)asdl + SOCK_MAXADDRLEN);
6d2010ae
A
7552 bzero(msdl, SOCK_MAXADDRLEN);
7553 } else {
7554 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
7555 /*
7556 * Use the storage areas for address and mask within the
7557 * dlil_ifnet structure. This is the most common case.
7558 */
7559 if (ifa == NULL) {
7560 ifa = &dl_if->dl_if_lladdr.ifa;
7561 ifa_lock_init(ifa);
7562 /* Don't set IFD_ALLOC, as this is permanent */
7563 ifa->ifa_debug = IFD_LINK;
7564 }
7565 IFA_LOCK(ifa);
7566 /* address and mask sockaddr_dl locations */
316670eb 7567 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
0a7de745 7568 bzero(asdl, sizeof(dl_if->dl_if_lladdr.asdl));
316670eb 7569 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
0a7de745 7570 bzero(msdl, sizeof(dl_if->dl_if_lladdr.msdl));
6d2010ae
A
7571 }
7572
7573 /* hold a permanent reference for the ifnet itself */
7574 IFA_ADDREF_LOCKED(ifa);
7575 oifa = ifp->if_lladdr;
7576 ifp->if_lladdr = ifa;
7577
7578 VERIFY(ifa->ifa_debug == IFD_LINK);
7579 ifa->ifa_ifp = ifp;
7580 ifa->ifa_rtrequest = link_rtrequest;
7581 ifa->ifa_addr = (struct sockaddr *)asdl;
f427ee49 7582 asdl->sdl_len = (u_char)socksize;
6d2010ae 7583 asdl->sdl_family = AF_LINK;
39037602
A
7584 if (namelen > 0) {
7585 bcopy(workbuf, asdl->sdl_data, min(namelen,
0a7de745 7586 sizeof(asdl->sdl_data)));
f427ee49 7587 asdl->sdl_nlen = (u_char)namelen;
39037602
A
7588 } else {
7589 asdl->sdl_nlen = 0;
7590 }
6d2010ae
A
7591 asdl->sdl_index = ifp->if_index;
7592 asdl->sdl_type = ifp->if_type;
7593 if (ll_addr != NULL) {
7594 asdl->sdl_alen = ll_addr->sdl_alen;
7595 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
7596 } else {
7597 asdl->sdl_alen = 0;
7598 }
39037602 7599 ifa->ifa_netmask = (struct sockaddr *)msdl;
f427ee49 7600 msdl->sdl_len = (u_char)masklen;
0a7de745 7601 while (namelen > 0) {
6d2010ae 7602 msdl->sdl_data[--namelen] = 0xff;
0a7de745 7603 }
6d2010ae
A
7604 IFA_UNLOCK(ifa);
7605
0a7de745 7606 if (oifa != NULL) {
6d2010ae 7607 IFA_REMREF(oifa);
0a7de745 7608 }
6d2010ae 7609
0a7de745 7610 return ifa;
6d2010ae
A
7611}
7612
7613static void
7614if_purgeaddrs(struct ifnet *ifp)
7615{
7616#if INET
7617 in_purgeaddrs(ifp);
7618#endif /* INET */
6d2010ae 7619 in6_purgeaddrs(ifp);
1c79356b
A
7620}
7621
2d21ac55 7622errno_t
6d2010ae 7623ifnet_detach(ifnet_t ifp)
1c79356b 7624{
39236c6e 7625 struct ifnet *delegated_ifp;
39037602 7626 struct nd_ifinfo *ndi = NULL;
39236c6e 7627
0a7de745
A
7628 if (ifp == NULL) {
7629 return EINVAL;
7630 }
6d2010ae 7631
39037602 7632 ndi = ND_IFINFO(ifp);
0a7de745 7633 if (NULL != ndi) {
39037602 7634 ndi->cga_initialized = FALSE;
0a7de745 7635 }
39037602 7636
6d2010ae 7637 lck_mtx_lock(rnh_lock);
316670eb 7638 ifnet_head_lock_exclusive();
91447636 7639 ifnet_lock_exclusive(ifp);
6d2010ae 7640
cb323159
A
7641 if (ifp->if_output_netem != NULL) {
7642 netem_destroy(ifp->if_output_netem);
7643 ifp->if_output_netem = NULL;
7644 }
7645
6d2010ae
A
7646 /*
7647 * Check to see if this interface has previously triggered
7648 * aggressive protocol draining; if so, decrement the global
7649 * refcnt and clear PR_AGGDRAIN on the route domain if
7650 * there are no more of such an interface around.
7651 */
7652 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
7653
7654 lck_mtx_lock_spin(&ifp->if_ref_lock);
39037602 7655 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
6d2010ae
A
7656 lck_mtx_unlock(&ifp->if_ref_lock);
7657 ifnet_lock_done(ifp);
6d2010ae 7658 ifnet_head_done();
13f56ec4 7659 lck_mtx_unlock(rnh_lock);
0a7de745 7660 return EINVAL;
6d2010ae 7661 } else if (ifp->if_refflags & IFRF_DETACHING) {
91447636 7662 /* Interface has already been detached */
6d2010ae 7663 lck_mtx_unlock(&ifp->if_ref_lock);
91447636 7664 ifnet_lock_done(ifp);
6d2010ae 7665 ifnet_head_done();
13f56ec4 7666 lck_mtx_unlock(rnh_lock);
0a7de745 7667 return ENXIO;
55e303ae 7668 }
5ba3f43e 7669 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
6d2010ae
A
7670 /* Indicate this interface is being detached */
7671 ifp->if_refflags &= ~IFRF_ATTACHED;
7672 ifp->if_refflags |= IFRF_DETACHING;
7673 lck_mtx_unlock(&ifp->if_ref_lock);
7674
5c9f4661 7675 if (dlil_verbose) {
cb323159 7676 DLIL_PRINTF("%s: detaching\n", if_name(ifp));
5c9f4661
A
7677 }
7678
7679 /* clean up flow control entry object if there's any */
7680 if (ifp->if_eflags & IFEF_TXSTART) {
7681 ifnet_flowadv(ifp->if_flowhash);
7682 }
6d2010ae 7683
490019cf 7684 /* Reset ECN enable/disable flags */
d9a64523 7685 /* Reset CLAT46 flag */
f427ee49 7686 if_clear_eflags(ifp, IFEF_ECN_ENABLE | IFEF_ECN_DISABLE | IFEF_CLAT46);
d9a64523 7687
cb323159
A
7688 /*
7689 * We do not reset the TCP keep alive counters in case
7690 * a TCP connection stays connection after the interface
7691 * went down
7692 */
7693 if (ifp->if_tcp_kao_cnt > 0) {
7694 os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
7695 __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
7696 }
7697 ifp->if_tcp_kao_max = 0;
7698
91447636 7699 /*
6d2010ae
A
7700 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
7701 * no longer be visible during lookups from this point.
91447636 7702 */
6d2010ae
A
7703 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
7704 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
7705 ifp->if_link.tqe_next = NULL;
7706 ifp->if_link.tqe_prev = NULL;
39037602 7707 if (ifp->if_ordered_link.tqe_next != NULL ||
0a7de745 7708 ifp->if_ordered_link.tqe_prev != NULL) {
39037602
A
7709 ifnet_remove_from_ordered_list(ifp);
7710 }
6d2010ae
A
7711 ifindex2ifnet[ifp->if_index] = NULL;
7712
f427ee49
A
7713 /* 18717626 - reset router mode */
7714 if_clear_eflags(ifp, IFEF_IPV4_ROUTER);
7715 ifp->if_ipv6_router_mode = IPV6_ROUTER_MODE_DISABLED;
3e170ce0 7716
6d2010ae
A
7717 /* Record detach PC stacktrace */
7718 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
7719
39236c6e 7720 /* Clear logging parameters */
0a7de745 7721 bzero(&ifp->if_log, sizeof(ifp->if_log));
39236c6e
A
7722
7723 /* Clear delegated interface info (reference released below) */
7724 delegated_ifp = ifp->if_delegated.ifp;
0a7de745 7725 bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
39236c6e 7726
3e170ce0
A
7727 /* Reset interface state */
7728 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
7729
91447636 7730 ifnet_lock_done(ifp);
6d2010ae 7731 ifnet_head_done();
13f56ec4 7732 lck_mtx_unlock(rnh_lock);
6d2010ae 7733
5ba3f43e 7734
39236c6e 7735 /* Release reference held on the delegated interface */
0a7de745 7736 if (delegated_ifp != NULL) {
39236c6e 7737 ifnet_release(delegated_ifp);
0a7de745 7738 }
39236c6e 7739
316670eb 7740 /* Reset Link Quality Metric (unless loopback [lo0]) */
0a7de745 7741 if (ifp != lo_ifp) {
3e170ce0 7742 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
0a7de745 7743 }
316670eb
A
7744
7745 /* Reset TCP local statistics */
0a7de745 7746 if (ifp->if_tcp_stat != NULL) {
316670eb 7747 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
0a7de745 7748 }
316670eb
A
7749
7750 /* Reset UDP local statistics */
0a7de745 7751 if (ifp->if_udp_stat != NULL) {
316670eb 7752 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
0a7de745 7753 }
316670eb 7754
4bd07ac2 7755 /* Reset ifnet IPv4 stats */
0a7de745 7756 if (ifp->if_ipv4_stat != NULL) {
4bd07ac2 7757 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
0a7de745 7758 }
4bd07ac2
A
7759
7760 /* Reset ifnet IPv6 stats */
0a7de745 7761 if (ifp->if_ipv6_stat != NULL) {
4bd07ac2 7762 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
0a7de745 7763 }
4bd07ac2 7764
3e170ce0
A
7765 /* Release memory held for interface link status report */
7766 if (ifp->if_link_status != NULL) {
7767 FREE(ifp->if_link_status, M_TEMP);
7768 ifp->if_link_status = NULL;
7769 }
7770
39037602
A
7771 /* Clear agent IDs */
7772 if (ifp->if_agentids != NULL) {
7773 FREE(ifp->if_agentids, M_NETAGENT);
7774 ifp->if_agentids = NULL;
7775 }
7776 ifp->if_agentcount = 0;
7777
7778
2d21ac55
A
7779 /* Let BPF know we're detaching */
7780 bpfdetach(ifp);
6d2010ae
A
7781
7782 /* Mark the interface as DOWN */
7783 if_down(ifp);
7784
7785 /* Disable forwarding cached route */
7786 lck_mtx_lock(&ifp->if_cached_route_lock);
7787 ifp->if_fwd_cacheok = 0;
7788 lck_mtx_unlock(&ifp->if_cached_route_lock);
7789
5ba3f43e 7790 /* Disable data threshold and wait for any pending event posting */
39236c6e 7791 ifp->if_data_threshold = 0;
5ba3f43e
A
7792 VERIFY(ifp->if_dt_tcall != NULL);
7793 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
7794
d1ecb069 7795 /*
6d2010ae
A
7796 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
7797 * references to the info structures and leave them attached to
7798 * this ifnet.
d1ecb069 7799 */
6d2010ae
A
7800#if INET
7801 igmp_domifdetach(ifp);
7802#endif /* INET */
6d2010ae 7803 mld_domifdetach(ifp);
6d2010ae
A
7804
7805 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
7806
7807 /* Let worker thread take care of the rest, to avoid reentrancy */
7ddcb079 7808 dlil_if_lock();
6d2010ae 7809 ifnet_detaching_enqueue(ifp);
7ddcb079 7810 dlil_if_unlock();
6d2010ae 7811
0a7de745 7812 return 0;
6d2010ae
A
7813}
7814
7815static void
7816ifnet_detaching_enqueue(struct ifnet *ifp)
7817{
7ddcb079 7818 dlil_if_lock_assert();
6d2010ae
A
7819
7820 ++ifnet_detaching_cnt;
7821 VERIFY(ifnet_detaching_cnt != 0);
7822 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
7823 wakeup((caddr_t)&ifnet_delayed_run);
7824}
7825
7826static struct ifnet *
7827ifnet_detaching_dequeue(void)
7828{
7829 struct ifnet *ifp;
7830
7ddcb079 7831 dlil_if_lock_assert();
6d2010ae
A
7832
7833 ifp = TAILQ_FIRST(&ifnet_detaching_head);
7834 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
7835 if (ifp != NULL) {
7836 VERIFY(ifnet_detaching_cnt != 0);
7837 --ifnet_detaching_cnt;
7838 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
7839 ifp->if_detaching_link.tqe_next = NULL;
7840 ifp->if_detaching_link.tqe_prev = NULL;
7841 }
0a7de745 7842 return ifp;
6d2010ae
A
7843}
7844
f427ee49
A
7845__attribute__((noreturn))
7846static void
7847ifnet_detacher_thread_cont(void *v, wait_result_t wres)
6d2010ae 7848{
f427ee49 7849#pragma unused(v, wres)
6d2010ae
A
7850 struct ifnet *ifp;
7851
f427ee49
A
7852 dlil_if_lock();
7853 if (__improbable(ifnet_detaching_embryonic)) {
7854 ifnet_detaching_embryonic = FALSE;
7855 /* there's no lock ordering constrain so OK to do this here */
7856 dlil_decr_pending_thread_count();
7857 }
7858
6d2010ae 7859 for (;;) {
316670eb 7860 dlil_if_lock_assert();
f427ee49
A
7861
7862 if (ifnet_detaching_cnt == 0) {
7863 break;
6d2010ae
A
7864 }
7865
cb323159
A
7866 net_update_uptime();
7867
6d2010ae
A
7868 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
7869
7870 /* Take care of detaching ifnet */
7871 ifp = ifnet_detaching_dequeue();
316670eb
A
7872 if (ifp != NULL) {
7873 dlil_if_unlock();
6d2010ae 7874 ifnet_detach_final(ifp);
316670eb
A
7875 dlil_if_lock();
7876 }
55e303ae 7877 }
f427ee49
A
7878
7879 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
7880 dlil_if_unlock();
7881 (void) thread_block(ifnet_detacher_thread_cont);
7882
7883 VERIFY(0); /* we should never get here */
7884 /* NOTREACHED */
7885 __builtin_unreachable();
316670eb
A
7886}
7887
cb323159 7888__dead2
316670eb
A
7889static void
7890ifnet_detacher_thread_func(void *v, wait_result_t w)
7891{
7892#pragma unused(v, w)
7893 dlil_if_lock();
f427ee49
A
7894 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
7895 ifnet_detaching_embryonic = TRUE;
7896 /* wake up once to get out of embryonic state */
7897 wakeup((caddr_t)&ifnet_delayed_run);
316670eb 7898 dlil_if_unlock();
f427ee49 7899 (void) thread_block(ifnet_detacher_thread_cont);
316670eb 7900 VERIFY(0);
f427ee49
A
7901 /* NOTREACHED */
7902 __builtin_unreachable();
6d2010ae 7903}
b0d623f7 7904
6d2010ae
A
7905static void
7906ifnet_detach_final(struct ifnet *ifp)
7907{
7908 struct ifnet_filter *filter, *filter_next;
7909 struct ifnet_filter_head fhead;
316670eb 7910 struct dlil_threading_info *inp;
6d2010ae
A
7911 struct ifaddr *ifa;
7912 ifnet_detached_func if_free;
7913 int i;
7914
7915 lck_mtx_lock(&ifp->if_ref_lock);
7916 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7917 panic("%s: flags mismatch (detaching not set) ifp=%p",
7918 __func__, ifp);
7919 /* NOTREACHED */
7920 }
7921
316670eb
A
7922 /*
7923 * Wait until the existing IO references get released
7924 * before we proceed with ifnet_detach. This is not a
7925 * common case, so block without using a continuation.
b0d623f7 7926 */
6d2010ae 7927 while (ifp->if_refio > 0) {
cb323159 7928 DLIL_PRINTF("%s: Waiting for IO references on %s interface "
39236c6e 7929 "to be released\n", __func__, if_name(ifp));
6d2010ae 7930 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
0a7de745 7931 (PZERO - 1), "ifnet_ioref_wait", NULL);
6d2010ae 7932 }
cb323159
A
7933
7934 VERIFY(ifp->if_datamov == 0);
7935 VERIFY(ifp->if_drainers == 0);
7936 VERIFY(ifp->if_suspend == 0);
7937 ifp->if_refflags &= ~IFRF_READY;
6d2010ae
A
7938 lck_mtx_unlock(&ifp->if_ref_lock);
7939
fe8ab488
A
7940 /* Drain and destroy send queue */
7941 ifclassq_teardown(ifp);
7942
6d2010ae
A
7943 /* Detach interface filters */
7944 lck_mtx_lock(&ifp->if_flt_lock);
7945 if_flt_monitor_enter(ifp);
b0d623f7 7946
5ba3f43e 7947 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636
A
7948 fhead = ifp->if_flt_head;
7949 TAILQ_INIT(&ifp->if_flt_head);
2d21ac55 7950
6d2010ae
A
7951 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
7952 filter_next = TAILQ_NEXT(filter, filt_next);
7953 lck_mtx_unlock(&ifp->if_flt_lock);
7954
7955 dlil_detach_filter_internal(filter, 1);
7956 lck_mtx_lock(&ifp->if_flt_lock);
7957 }
7958 if_flt_monitor_leave(ifp);
7959 lck_mtx_unlock(&ifp->if_flt_lock);
7960
7961 /* Tell upper layers to drop their network addresses */
7962 if_purgeaddrs(ifp);
7963
7964 ifnet_lock_exclusive(ifp);
7965
f427ee49 7966 /* Unplumb all protocols */
6d2010ae
A
7967 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
7968 struct if_proto *proto;
7969
7970 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7971 while (proto != NULL) {
7972 protocol_family_t family = proto->protocol_family;
7973 ifnet_lock_done(ifp);
7974 proto_unplumb(family, ifp);
7975 ifnet_lock_exclusive(ifp);
7976 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7977 }
7978 /* There should not be any protocols left */
7979 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
7980 }
7981 zfree(dlif_phash_zone, ifp->if_proto_hash);
7982 ifp->if_proto_hash = NULL;
7983
7984 /* Detach (permanent) link address from if_addrhead */
7985 ifa = TAILQ_FIRST(&ifp->if_addrhead);
7986 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
7987 IFA_LOCK(ifa);
7988 if_detach_link_ifa(ifp, ifa);
7989 IFA_UNLOCK(ifa);
7990
7991 /* Remove (permanent) link address from ifnet_addrs[] */
7992 IFA_REMREF(ifa);
7993 ifnet_addrs[ifp->if_index - 1] = NULL;
7994
7995 /* This interface should not be on {ifnet_head,detaching} */
7996 VERIFY(ifp->if_link.tqe_next == NULL);
7997 VERIFY(ifp->if_link.tqe_prev == NULL);
7998 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
7999 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
39037602
A
8000 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
8001 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
6d2010ae
A
8002
8003 /* The slot should have been emptied */
8004 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
8005
8006 /* There should not be any addresses left */
8007 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
1c79356b 8008
316670eb
A
8009 /*
8010 * Signal the starter thread to terminate itself.
8011 */
8012 if (ifp->if_start_thread != THREAD_NULL) {
8013 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e 8014 ifp->if_start_flags = 0;
316670eb
A
8015 ifp->if_start_thread = THREAD_NULL;
8016 wakeup_one((caddr_t)&ifp->if_start_thread);
8017 lck_mtx_unlock(&ifp->if_start_lock);
8018 }
8019
8020 /*
8021 * Signal the poller thread to terminate itself.
8022 */
8023 if (ifp->if_poll_thread != THREAD_NULL) {
8024 lck_mtx_lock_spin(&ifp->if_poll_lock);
8025 ifp->if_poll_thread = THREAD_NULL;
8026 wakeup_one((caddr_t)&ifp->if_poll_thread);
8027 lck_mtx_unlock(&ifp->if_poll_lock);
8028 }
8029
2d21ac55
A
8030 /*
8031 * If thread affinity was set for the workloop thread, we will need
8032 * to tear down the affinity and release the extra reference count
316670eb
A
8033 * taken at attach time. Does not apply to lo0 or other interfaces
8034 * without dedicated input threads.
2d21ac55 8035 */
316670eb
A
8036 if ((inp = ifp->if_inp) != NULL) {
8037 VERIFY(inp != dlil_main_input_thread);
8038
f427ee49 8039 if (inp->dlth_affinity) {
316670eb
A
8040 struct thread *tp, *wtp, *ptp;
8041
f427ee49
A
8042 lck_mtx_lock_spin(&inp->dlth_lock);
8043 wtp = inp->dlth_driver_thread;
8044 inp->dlth_driver_thread = THREAD_NULL;
8045 ptp = inp->dlth_poller_thread;
8046 inp->dlth_poller_thread = THREAD_NULL;
8047 ASSERT(inp->dlth_thread != THREAD_NULL);
8048 tp = inp->dlth_thread; /* don't nullify now */
8049 inp->dlth_affinity_tag = 0;
8050 inp->dlth_affinity = FALSE;
8051 lck_mtx_unlock(&inp->dlth_lock);
316670eb
A
8052
8053 /* Tear down poll thread affinity */
8054 if (ptp != NULL) {
8055 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
cb323159 8056 VERIFY(ifp->if_xflags & IFXF_LEGACY);
316670eb
A
8057 (void) dlil_affinity_set(ptp,
8058 THREAD_AFFINITY_TAG_NULL);
8059 thread_deallocate(ptp);
6d2010ae 8060 }
2d21ac55 8061
2d21ac55 8062 /* Tear down workloop thread affinity */
316670eb
A
8063 if (wtp != NULL) {
8064 (void) dlil_affinity_set(wtp,
2d21ac55 8065 THREAD_AFFINITY_TAG_NULL);
316670eb 8066 thread_deallocate(wtp);
2d21ac55 8067 }
1c79356b 8068
316670eb 8069 /* Tear down DLIL input thread affinity */
2d21ac55
A
8070 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
8071 thread_deallocate(tp);
9bccf70c 8072 }
1c79356b 8073
316670eb
A
8074 /* disassociate ifp DLIL input thread */
8075 ifp->if_inp = NULL;
6d2010ae 8076
f427ee49
A
8077 /* if the worker thread was created, tell it to terminate */
8078 if (inp->dlth_thread != THREAD_NULL) {
8079 lck_mtx_lock_spin(&inp->dlth_lock);
8080 inp->dlth_flags |= DLIL_INPUT_TERMINATE;
8081 if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
8082 wakeup_one((caddr_t)&inp->dlth_flags);
8083 }
8084 lck_mtx_unlock(&inp->dlth_lock);
8085 ifnet_lock_done(ifp);
5ba3f43e 8086
f427ee49
A
8087 /* wait for the input thread to terminate */
8088 lck_mtx_lock_spin(&inp->dlth_lock);
8089 while ((inp->dlth_flags & DLIL_INPUT_TERMINATE_COMPLETE)
8090 == 0) {
8091 (void) msleep(&inp->dlth_flags, &inp->dlth_lock,
8092 (PZERO - 1) | PSPIN, inp->dlth_name, NULL);
8093 }
8094 lck_mtx_unlock(&inp->dlth_lock);
8095 ifnet_lock_exclusive(ifp);
5ba3f43e 8096 }
5ba3f43e
A
8097
8098 /* clean-up input thread state */
8099 dlil_clean_threading_info(inp);
cb323159
A
8100 /* clean-up poll parameters */
8101 VERIFY(ifp->if_poll_thread == THREAD_NULL);
8102 dlil_reset_rxpoll_params(ifp);
55e303ae 8103 }
6d2010ae
A
8104
8105 /* The driver might unload, so point these to ourselves */
8106 if_free = ifp->if_free;
5ba3f43e 8107 ifp->if_output_dlil = ifp_if_output;
6d2010ae 8108 ifp->if_output = ifp_if_output;
316670eb
A
8109 ifp->if_pre_enqueue = ifp_if_output;
8110 ifp->if_start = ifp_if_start;
8111 ifp->if_output_ctl = ifp_if_ctl;
5ba3f43e 8112 ifp->if_input_dlil = ifp_if_input;
316670eb
A
8113 ifp->if_input_poll = ifp_if_input_poll;
8114 ifp->if_input_ctl = ifp_if_ctl;
6d2010ae
A
8115 ifp->if_ioctl = ifp_if_ioctl;
8116 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
8117 ifp->if_free = ifp_if_free;
8118 ifp->if_demux = ifp_if_demux;
8119 ifp->if_event = ifp_if_event;
39236c6e
A
8120 ifp->if_framer_legacy = ifp_if_framer;
8121 ifp->if_framer = ifp_if_framer_extended;
6d2010ae
A
8122 ifp->if_add_proto = ifp_if_add_proto;
8123 ifp->if_del_proto = ifp_if_del_proto;
8124 ifp->if_check_multi = ifp_if_check_multi;
8125
316670eb
A
8126 /* wipe out interface description */
8127 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
8128 ifp->if_desc.ifd_len = 0;
8129 VERIFY(ifp->if_desc.ifd_desc != NULL);
8130 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
8131
39236c6e
A
8132 /* there shouldn't be any delegation by now */
8133 VERIFY(ifp->if_delegated.ifp == NULL);
8134 VERIFY(ifp->if_delegated.type == 0);
8135 VERIFY(ifp->if_delegated.family == 0);
8136 VERIFY(ifp->if_delegated.subfamily == 0);
fe8ab488 8137 VERIFY(ifp->if_delegated.expensive == 0);
cb323159 8138 VERIFY(ifp->if_delegated.constrained == 0);
39236c6e 8139
39037602 8140 /* QoS marking get cleared */
f427ee49 8141 if_clear_eflags(ifp, IFEF_QOSMARKING_ENABLED);
39037602
A
8142 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
8143
5ba3f43e 8144
6d2010ae
A
8145 ifnet_lock_done(ifp);
8146
8147#if PF
8148 /*
8149 * Detach this interface from packet filter, if enabled.
8150 */
8151 pf_ifnet_hook(ifp, 0);
8152#endif /* PF */
8153
8154 /* Filter list should be empty */
8155 lck_mtx_lock_spin(&ifp->if_flt_lock);
8156 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
8157 VERIFY(ifp->if_flt_busy == 0);
8158 VERIFY(ifp->if_flt_waiters == 0);
8159 lck_mtx_unlock(&ifp->if_flt_lock);
8160
316670eb
A
8161 /* Last chance to drain send queue */
8162 if_qflush(ifp, 0);
8163
6d2010ae
A
8164 /* Last chance to cleanup any cached route */
8165 lck_mtx_lock(&ifp->if_cached_route_lock);
8166 VERIFY(!ifp->if_fwd_cacheok);
39236c6e 8167 ROUTE_RELEASE(&ifp->if_fwd_route);
0a7de745 8168 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
39236c6e 8169 ROUTE_RELEASE(&ifp->if_src_route);
0a7de745 8170 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
39236c6e 8171 ROUTE_RELEASE(&ifp->if_src_route6);
0a7de745 8172 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
6d2010ae
A
8173 lck_mtx_unlock(&ifp->if_cached_route_lock);
8174
39236c6e 8175 VERIFY(ifp->if_data_threshold == 0);
5ba3f43e
A
8176 VERIFY(ifp->if_dt_tcall != NULL);
8177 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
39236c6e 8178
6d2010ae
A
8179 ifnet_llreach_ifdetach(ifp);
8180
8181 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
8182
6d2010ae
A
8183 /*
8184 * Finally, mark this ifnet as detached.
8185 */
8186 lck_mtx_lock_spin(&ifp->if_ref_lock);
8187 if (!(ifp->if_refflags & IFRF_DETACHING)) {
8188 panic("%s: flags mismatch (detaching not set) ifp=%p",
8189 __func__, ifp);
8190 /* NOTREACHED */
55e303ae 8191 }
6d2010ae
A
8192 ifp->if_refflags &= ~IFRF_DETACHING;
8193 lck_mtx_unlock(&ifp->if_ref_lock);
0a7de745 8194 if (if_free != NULL) {
39037602 8195 if_free(ifp);
0a7de745 8196 }
6d2010ae 8197
0a7de745 8198 if (dlil_verbose) {
cb323159 8199 DLIL_PRINTF("%s: detached\n", if_name(ifp));
0a7de745 8200 }
6d2010ae
A
8201
8202 /* Release reference held during ifnet attach */
8203 ifnet_release(ifp);
1c79356b 8204}
9bccf70c 8205
5ba3f43e 8206errno_t
6d2010ae 8207ifp_if_output(struct ifnet *ifp, struct mbuf *m)
9bccf70c 8208{
6d2010ae 8209#pragma unused(ifp)
39037602 8210 m_freem_list(m);
0a7de745 8211 return 0;
9bccf70c
A
8212}
8213
5ba3f43e 8214void
316670eb
A
8215ifp_if_start(struct ifnet *ifp)
8216{
8217 ifnet_purge(ifp);
8218}
8219
39037602
A
8220static errno_t
8221ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
8222 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
8223 boolean_t poll, struct thread *tp)
8224{
8225#pragma unused(ifp, m_tail, s, poll, tp)
8226 m_freem_list(m_head);
0a7de745 8227 return ENXIO;
39037602
A
8228}
8229
316670eb
A
8230static void
8231ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
8232 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
8233{
8234#pragma unused(ifp, flags, max_cnt)
0a7de745 8235 if (m_head != NULL) {
316670eb 8236 *m_head = NULL;
0a7de745
A
8237 }
8238 if (m_tail != NULL) {
316670eb 8239 *m_tail = NULL;
0a7de745
A
8240 }
8241 if (cnt != NULL) {
316670eb 8242 *cnt = 0;
0a7de745
A
8243 }
8244 if (len != NULL) {
316670eb 8245 *len = 0;
0a7de745 8246 }
316670eb
A
8247}
8248
8249static errno_t
8250ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
8251{
8252#pragma unused(ifp, cmd, arglen, arg)
0a7de745 8253 return EOPNOTSUPP;
316670eb
A
8254}
8255
6d2010ae
A
8256static errno_t
8257ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
9bccf70c 8258{
6d2010ae
A
8259#pragma unused(ifp, fh, pf)
8260 m_freem(m);
0a7de745 8261 return EJUSTRETURN;
9bccf70c
A
8262}
8263
6d2010ae
A
8264static errno_t
8265ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
8266 const struct ifnet_demux_desc *da, u_int32_t dc)
9bccf70c 8267{
6d2010ae 8268#pragma unused(ifp, pf, da, dc)
0a7de745 8269 return EINVAL;
9bccf70c
A
8270}
8271
91447636 8272static errno_t
6d2010ae 8273ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
9bccf70c 8274{
6d2010ae 8275#pragma unused(ifp, pf)
0a7de745 8276 return EINVAL;
6d2010ae
A
8277}
8278
8279static errno_t
8280ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
8281{
8282#pragma unused(ifp, sa)
0a7de745 8283 return EOPNOTSUPP;
6d2010ae
A
8284}
8285
f427ee49 8286#if !XNU_TARGET_OS_OSX
5ba3f43e
A
8287static errno_t
8288ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8289 const struct sockaddr *sa, const char *ll, const char *t,
8290 u_int32_t *pre, u_int32_t *post)
f427ee49 8291#else /* XNU_TARGET_OS_OSX */
39236c6e
A
8292static errno_t
8293ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8294 const struct sockaddr *sa, const char *ll, const char *t)
f427ee49 8295#endif /* XNU_TARGET_OS_OSX */
6d2010ae
A
8296{
8297#pragma unused(ifp, m, sa, ll, t)
f427ee49 8298#if !XNU_TARGET_OS_OSX
0a7de745 8299 return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
f427ee49 8300#else /* XNU_TARGET_OS_OSX */
0a7de745 8301 return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
f427ee49 8302#endif /* XNU_TARGET_OS_OSX */
39236c6e
A
8303}
8304
8305static errno_t
8306ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
8307 const struct sockaddr *sa, const char *ll, const char *t,
8308 u_int32_t *pre, u_int32_t *post)
8309{
8310#pragma unused(ifp, sa, ll, t)
6d2010ae
A
8311 m_freem(*m);
8312 *m = NULL;
39236c6e 8313
0a7de745 8314 if (pre != NULL) {
39236c6e 8315 *pre = 0;
0a7de745
A
8316 }
8317 if (post != NULL) {
39236c6e 8318 *post = 0;
0a7de745 8319 }
39236c6e 8320
0a7de745 8321 return EJUSTRETURN;
6d2010ae
A
8322}
8323
316670eb 8324errno_t
6d2010ae
A
8325ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
8326{
8327#pragma unused(ifp, cmd, arg)
0a7de745 8328 return EOPNOTSUPP;
6d2010ae
A
8329}
8330
8331static errno_t
8332ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
8333{
8334#pragma unused(ifp, tm, f)
8335 /* XXX not sure what to do here */
0a7de745 8336 return 0;
6d2010ae
A
8337}
8338
8339static void
8340ifp_if_free(struct ifnet *ifp)
8341{
8342#pragma unused(ifp)
8343}
8344
8345static void
8346ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
8347{
8348#pragma unused(ifp, e)
9bccf70c
A
8349}
8350
0a7de745
A
8351int
8352dlil_if_acquire(u_int32_t family, const void *uniqueid,
a39ff7e2 8353 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
6d2010ae
A
8354{
8355 struct ifnet *ifp1 = NULL;
8356 struct dlil_ifnet *dlifp1 = NULL;
cb323159 8357 struct dlil_ifnet *dlifp1_saved = NULL;
6d2010ae
A
8358 void *buf, *base, **pbuf;
8359 int ret = 0;
8360
a39ff7e2 8361 VERIFY(*ifp == NULL);
7ddcb079 8362 dlil_if_lock();
a39ff7e2
A
8363 /*
8364 * We absolutely can't have an interface with the same name
8365 * in in-use state.
8366 * To make sure of that list has to be traversed completely
8367 */
6d2010ae
A
8368 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
8369 ifp1 = (struct ifnet *)dlifp1;
8370
0a7de745 8371 if (ifp1->if_family != family) {
6d2010ae 8372 continue;
0a7de745 8373 }
6d2010ae 8374
a39ff7e2
A
8375 /*
8376 * If interface is in use, return EBUSY if either unique id
8377 * or interface extended names are the same
8378 */
6d2010ae 8379 lck_mtx_lock(&dlifp1->dl_if_lock);
a39ff7e2 8380 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
6d2010ae 8381 if (dlifp1->dl_if_flags & DLIF_INUSE) {
a39ff7e2
A
8382 lck_mtx_unlock(&dlifp1->dl_if_lock);
8383 ret = EBUSY;
8384 goto end;
8385 }
8386 }
8387
8388 if (uniqueid_len) {
8389 if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
8390 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
8391 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6d2010ae 8392 lck_mtx_unlock(&dlifp1->dl_if_lock);
a39ff7e2 8393 ret = EBUSY;
9bccf70c 8394 goto end;
a39ff7e2 8395 } else {
a39ff7e2 8396 /* Cache the first interface that can be recycled */
0a7de745 8397 if (*ifp == NULL) {
a39ff7e2 8398 *ifp = ifp1;
cb323159 8399 dlifp1_saved = dlifp1;
0a7de745 8400 }
a39ff7e2
A
8401 /*
8402 * XXX Do not break or jump to end as we have to traverse
8403 * the whole list to ensure there are no name collisions
8404 */
6d2010ae 8405 }
6d2010ae
A
8406 }
8407 }
8408 lck_mtx_unlock(&dlifp1->dl_if_lock);
8409 }
8410
a39ff7e2 8411 /* If there's an interface that can be recycled, use that */
0a7de745 8412 if (*ifp != NULL) {
cb323159
A
8413 if (dlifp1_saved != NULL) {
8414 lck_mtx_lock(&dlifp1_saved->dl_if_lock);
8415 dlifp1_saved->dl_if_flags |= (DLIF_INUSE | DLIF_REUSE);
8416 lck_mtx_unlock(&dlifp1_saved->dl_if_lock);
8417 dlifp1_saved = NULL;
8418 }
a39ff7e2 8419 goto end;
0a7de745 8420 }
a39ff7e2 8421
6d2010ae 8422 /* no interface found, allocate a new one */
f427ee49 8423 buf = zalloc_flags(dlif_zone, Z_WAITOK | Z_ZERO);
6d2010ae
A
8424 if (buf == NULL) {
8425 ret = ENOMEM;
8426 goto end;
8427 }
6d2010ae
A
8428
8429 /* Get the 64-bit aligned base address for this object */
0a7de745
A
8430 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
8431 sizeof(u_int64_t));
6d2010ae
A
8432 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
8433
8434 /*
8435 * Wind back a pointer size from the aligned base and
8436 * save the original address so we can free it later.
8437 */
0a7de745 8438 pbuf = (void **)((intptr_t)base - sizeof(void *));
6d2010ae
A
8439 *pbuf = buf;
8440 dlifp1 = base;
8441
8442 if (uniqueid_len) {
8443 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
8444 M_NKE, M_WAITOK);
8445 if (dlifp1->dl_if_uniqueid == NULL) {
5ba3f43e 8446 zfree(dlif_zone, buf);
6d2010ae
A
8447 ret = ENOMEM;
8448 goto end;
8449 }
8450 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
8451 dlifp1->dl_if_uniqueid_len = uniqueid_len;
8452 }
8453
8454 ifp1 = (struct ifnet *)dlifp1;
8455 dlifp1->dl_if_flags = DLIF_INUSE;
8456 if (ifnet_debug) {
8457 dlifp1->dl_if_flags |= DLIF_DEBUG;
8458 dlifp1->dl_if_trace = dlil_if_trace;
8459 }
8460 ifp1->if_name = dlifp1->dl_if_namestorage;
39236c6e 8461 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
316670eb
A
8462
8463 /* initialize interface description */
8464 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
8465 ifp1->if_desc.ifd_len = 0;
8466 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
8467
5ba3f43e 8468
316670eb
A
8469 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
8470 DLIL_PRINTF("%s: failed to allocate if local stats, "
8471 "error: %d\n", __func__, ret);
8472 /* This probably shouldn't be fatal */
8473 ret = 0;
8474 }
8475
6d2010ae
A
8476 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
8477 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
8478 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
8479 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6d2010ae
A
8480 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
8481 ifnet_lock_attr);
8482 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
3e170ce0
A
8483#if INET
8484 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
8485 ifnet_lock_attr);
8486 ifp1->if_inetdata = NULL;
8487#endif
3e170ce0
A
8488 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
8489 ifnet_lock_attr);
39236c6e 8490 ifp1->if_inet6data = NULL;
3e170ce0
A
8491 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
8492 ifnet_lock_attr);
8493 ifp1->if_link_status = NULL;
6d2010ae 8494
316670eb
A
8495 /* for send data paths */
8496 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
8497 ifnet_lock_attr);
8498 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
8499 ifnet_lock_attr);
8500 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
8501 ifnet_lock_attr);
8502
8503 /* for receive data paths */
8504 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
8505 ifnet_lock_attr);
8506
5ba3f43e
A
8507 /* thread call allocation is done with sleeping zalloc */
8508 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
8509 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
8510 if (ifp1->if_dt_tcall == NULL) {
8511 panic_plain("%s: couldn't create if_dt_tcall", __func__);
8512 /* NOTREACHED */
8513 }
8514
6d2010ae
A
8515 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
8516
8517 *ifp = ifp1;
9bccf70c
A
8518
8519end:
7ddcb079 8520 dlil_if_unlock();
9bccf70c 8521
0a7de745
A
8522 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof(u_int64_t)) &&
8523 IS_P2ALIGNED(&ifp1->if_data, sizeof(u_int64_t))));
6d2010ae 8524
0a7de745 8525 return ret;
9bccf70c
A
8526}
8527
2d21ac55 8528__private_extern__ void
0a7de745 8529dlil_if_release(ifnet_t ifp)
6d2010ae
A
8530{
8531 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
8532
5ba3f43e
A
8533 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
8534 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
8535 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
8536 }
8537
6d2010ae
A
8538 ifnet_lock_exclusive(ifp);
8539 lck_mtx_lock(&dlifp->dl_if_lock);
8540 dlifp->dl_if_flags &= ~DLIF_INUSE;
fe8ab488 8541 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6d2010ae 8542 ifp->if_name = dlifp->dl_if_namestorage;
39236c6e
A
8543 /* Reset external name (name + unit) */
8544 ifp->if_xname = dlifp->dl_if_xnamestorage;
39037602 8545 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
39236c6e 8546 "%s?", ifp->if_name);
6d2010ae 8547 lck_mtx_unlock(&dlifp->dl_if_lock);
6d2010ae 8548 ifnet_lock_done(ifp);
9bccf70c 8549}
4a3eedf9 8550
7ddcb079
A
8551__private_extern__ void
8552dlil_if_lock(void)
8553{
8554 lck_mtx_lock(&dlil_ifnet_lock);
8555}
8556
8557__private_extern__ void
8558dlil_if_unlock(void)
8559{
8560 lck_mtx_unlock(&dlil_ifnet_lock);
8561}
8562
8563__private_extern__ void
8564dlil_if_lock_assert(void)
8565{
5ba3f43e 8566 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
7ddcb079
A
8567}
8568
4a3eedf9
A
8569__private_extern__ void
8570dlil_proto_unplumb_all(struct ifnet *ifp)
8571{
8572 /*
39236c6e
A
8573 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
8574 * each bucket contains exactly one entry; PF_VLAN does not need an
8575 * explicit unplumb.
4a3eedf9 8576 *
39236c6e 8577 * if_proto_hash[3] is for other protocols; we expect anything
4a3eedf9
A
8578 * in this bucket to respond to the DETACHING event (which would
8579 * have happened by now) and do the unplumb then.
8580 */
8581 (void) proto_unplumb(PF_INET, ifp);
4a3eedf9 8582 (void) proto_unplumb(PF_INET6, ifp);
4a3eedf9 8583}
6d2010ae
A
8584
8585static void
8586ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
8587{
8588 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8589 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8590
0a7de745 8591 route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
6d2010ae
A
8592
8593 lck_mtx_unlock(&ifp->if_cached_route_lock);
8594}
8595
8596static void
8597ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
8598{
8599 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8600 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8601
8602 if (ifp->if_fwd_cacheok) {
0a7de745 8603 route_copyin(src, &ifp->if_src_route, sizeof(*src));
6d2010ae 8604 } else {
39236c6e 8605 ROUTE_RELEASE(src);
6d2010ae
A
8606 }
8607 lck_mtx_unlock(&ifp->if_cached_route_lock);
8608}
8609
6d2010ae
A
8610static void
8611ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
8612{
8613 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8614 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8615
8616 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
0a7de745 8617 sizeof(*dst));
6d2010ae
A
8618
8619 lck_mtx_unlock(&ifp->if_cached_route_lock);
8620}
8621
8622static void
8623ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
8624{
8625 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8626 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8627
8628 if (ifp->if_fwd_cacheok) {
8629 route_copyin((struct route *)src,
0a7de745 8630 (struct route *)&ifp->if_src_route6, sizeof(*src));
6d2010ae 8631 } else {
39236c6e 8632 ROUTE_RELEASE(src);
6d2010ae
A
8633 }
8634 lck_mtx_unlock(&ifp->if_cached_route_lock);
8635}
6d2010ae
A
8636
8637struct rtentry *
0a7de745 8638ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6d2010ae 8639{
0a7de745
A
8640 struct route src_rt;
8641 struct sockaddr_in *dst;
316670eb
A
8642
8643 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
6d2010ae
A
8644
8645 ifp_src_route_copyout(ifp, &src_rt);
8646
39236c6e
A
8647 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
8648 ROUTE_RELEASE(&src_rt);
8649 if (dst->sin_family != AF_INET) {
0a7de745
A
8650 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8651 dst->sin_len = sizeof(src_rt.ro_dst);
6d2010ae
A
8652 dst->sin_family = AF_INET;
8653 }
8654 dst->sin_addr = src_ip;
8655
5ba3f43e
A
8656 VERIFY(src_rt.ro_rt == NULL);
8657 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
8658 0, 0, ifp->if_index);
6d2010ae 8659
5ba3f43e
A
8660 if (src_rt.ro_rt != NULL) {
8661 /* retain a ref, copyin consumes one */
0a7de745 8662 struct rtentry *rte = src_rt.ro_rt;
5ba3f43e
A
8663 RT_ADDREF(rte);
8664 ifp_src_route_copyin(ifp, &src_rt);
8665 src_rt.ro_rt = rte;
6d2010ae
A
8666 }
8667 }
8668
0a7de745 8669 return src_rt.ro_rt;
6d2010ae
A
8670}
8671
39037602 8672struct rtentry *
6d2010ae
A
8673ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
8674{
8675 struct route_in6 src_rt;
8676
8677 ifp_src_route6_copyout(ifp, &src_rt);
8678
39236c6e
A
8679 if (ROUTE_UNUSABLE(&src_rt) ||
8680 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
8681 ROUTE_RELEASE(&src_rt);
8682 if (src_rt.ro_dst.sin6_family != AF_INET6) {
0a7de745
A
8683 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8684 src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
6d2010ae
A
8685 src_rt.ro_dst.sin6_family = AF_INET6;
8686 }
8687 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
316670eb 8688 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
0a7de745 8689 sizeof(src_rt.ro_dst.sin6_addr));
6d2010ae
A
8690
8691 if (src_rt.ro_rt == NULL) {
8692 src_rt.ro_rt = rtalloc1_scoped(
0a7de745
A
8693 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
8694 ifp->if_index);
6d2010ae
A
8695
8696 if (src_rt.ro_rt != NULL) {
8697 /* retain a ref, copyin consumes one */
0a7de745 8698 struct rtentry *rte = src_rt.ro_rt;
6d2010ae
A
8699 RT_ADDREF(rte);
8700 ifp_src_route6_copyin(ifp, &src_rt);
8701 src_rt.ro_rt = rte;
8702 }
8703 }
8704 }
8705
0a7de745 8706 return src_rt.ro_rt;
6d2010ae 8707}
316670eb
A
8708
8709void
3e170ce0 8710if_lqm_update(struct ifnet *ifp, int lqm, int locked)
316670eb
A
8711{
8712 struct kev_dl_link_quality_metric_data ev_lqm_data;
8713
8714 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
8715
8716 /* Normalize to edge */
5ba3f43e
A
8717 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
8718 lqm = IFNET_LQM_THRESH_ABORT;
8719 atomic_bitset_32(&tcbinfo.ipi_flags,
8720 INPCBINFO_HANDLE_LQM_ABORT);
8721 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
8722 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
8723 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
8724 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
8725 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
8726 lqm <= IFNET_LQM_THRESH_POOR) {
316670eb 8727 lqm = IFNET_LQM_THRESH_POOR;
5ba3f43e
A
8728 } else if (lqm > IFNET_LQM_THRESH_POOR &&
8729 lqm <= IFNET_LQM_THRESH_GOOD) {
316670eb 8730 lqm = IFNET_LQM_THRESH_GOOD;
5ba3f43e 8731 }
316670eb 8732
3e170ce0
A
8733 /*
8734 * Take the lock if needed
8735 */
0a7de745 8736 if (!locked) {
3e170ce0 8737 ifnet_lock_exclusive(ifp);
0a7de745 8738 }
3e170ce0
A
8739
8740 if (lqm == ifp->if_interface_state.lqm_state &&
39037602 8741 (ifp->if_interface_state.valid_bitmask &
3e170ce0
A
8742 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
8743 /*
8744 * Release the lock if was not held by the caller
8745 */
0a7de745 8746 if (!locked) {
3e170ce0 8747 ifnet_lock_done(ifp);
0a7de745
A
8748 }
8749 return; /* nothing to update */
316670eb 8750 }
3e170ce0 8751 ifp->if_interface_state.valid_bitmask |=
0a7de745 8752 IF_INTERFACE_STATE_LQM_STATE_VALID;
f427ee49 8753 ifp->if_interface_state.lqm_state = (int8_t)lqm;
3e170ce0
A
8754
8755 /*
8756 * Don't want to hold the lock when issuing kernel events
8757 */
316670eb
A
8758 ifnet_lock_done(ifp);
8759
0a7de745 8760 bzero(&ev_lqm_data, sizeof(ev_lqm_data));
316670eb
A
8761 ev_lqm_data.link_quality_metric = lqm;
8762
8763 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
0a7de745 8764 (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data));
3e170ce0
A
8765
8766 /*
8767 * Reacquire the lock for the caller
8768 */
0a7de745 8769 if (locked) {
3e170ce0 8770 ifnet_lock_exclusive(ifp);
0a7de745 8771 }
3e170ce0
A
8772}
8773
8774static void
8775if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
8776{
8777 struct kev_dl_rrc_state kev;
39037602 8778
3e170ce0
A
8779 if (rrc_state == ifp->if_interface_state.rrc_state &&
8780 (ifp->if_interface_state.valid_bitmask &
0a7de745 8781 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
3e170ce0 8782 return;
0a7de745 8783 }
3e170ce0
A
8784
8785 ifp->if_interface_state.valid_bitmask |=
8786 IF_INTERFACE_STATE_RRC_STATE_VALID;
8787
f427ee49 8788 ifp->if_interface_state.rrc_state = (uint8_t)rrc_state;
3e170ce0
A
8789
8790 /*
8791 * Don't want to hold the lock when issuing kernel events
8792 */
8793 ifnet_lock_done(ifp);
8794
8795 bzero(&kev, sizeof(struct kev_dl_rrc_state));
8796 kev.rrc_state = rrc_state;
8797
8798 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
8799 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
8800
8801 ifnet_lock_exclusive(ifp);
8802}
8803
8804errno_t
8805if_state_update(struct ifnet *ifp,
39037602 8806 struct if_interface_state *if_interface_state)
3e170ce0
A
8807{
8808 u_short if_index_available = 0;
8809
8810 ifnet_lock_exclusive(ifp);
8811
8812 if ((ifp->if_type != IFT_CELLULAR) &&
8813 (if_interface_state->valid_bitmask &
8814 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
8815 ifnet_lock_done(ifp);
0a7de745 8816 return ENOTSUP;
3e170ce0
A
8817 }
8818 if ((if_interface_state->valid_bitmask &
8819 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
8820 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
8821 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
8822 ifnet_lock_done(ifp);
0a7de745 8823 return EINVAL;
3e170ce0
A
8824 }
8825 if ((if_interface_state->valid_bitmask &
8826 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
8827 if_interface_state->rrc_state !=
8828 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
8829 if_interface_state->rrc_state !=
8830 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
8831 ifnet_lock_done(ifp);
0a7de745 8832 return EINVAL;
3e170ce0
A
8833 }
8834
8835 if (if_interface_state->valid_bitmask &
8836 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8837 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
8838 }
8839 if (if_interface_state->valid_bitmask &
8840 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8841 if_rrc_state_update(ifp, if_interface_state->rrc_state);
8842 }
8843 if (if_interface_state->valid_bitmask &
8844 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8845 ifp->if_interface_state.valid_bitmask |=
8846 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8847 ifp->if_interface_state.interface_availability =
8848 if_interface_state->interface_availability;
8849
8850 if (ifp->if_interface_state.interface_availability ==
8851 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
cb323159
A
8852 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
8853 __func__, if_name(ifp), ifp->if_index);
3e170ce0 8854 if_index_available = ifp->if_index;
cb323159
A
8855 } else {
8856 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
8857 __func__, if_name(ifp), ifp->if_index);
3e170ce0
A
8858 }
8859 }
8860 ifnet_lock_done(ifp);
8861
8862 /*
8863 * Check if the TCP connections going on this interface should be
8864 * forced to send probe packets instead of waiting for TCP timers
cb323159
A
8865 * to fire. This is done on an explicit notification such as
8866 * SIOCSIFINTERFACESTATE which marks the interface as available.
3e170ce0 8867 */
0a7de745 8868 if (if_index_available > 0) {
3e170ce0 8869 tcp_interface_send_probe(if_index_available);
0a7de745 8870 }
3e170ce0 8871
0a7de745 8872 return 0;
3e170ce0
A
8873}
8874
8875void
8876if_get_state(struct ifnet *ifp,
39037602 8877 struct if_interface_state *if_interface_state)
3e170ce0
A
8878{
8879 ifnet_lock_shared(ifp);
8880
8881 if_interface_state->valid_bitmask = 0;
8882
8883 if (ifp->if_interface_state.valid_bitmask &
8884 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8885 if_interface_state->valid_bitmask |=
8886 IF_INTERFACE_STATE_RRC_STATE_VALID;
8887 if_interface_state->rrc_state =
8888 ifp->if_interface_state.rrc_state;
8889 }
8890 if (ifp->if_interface_state.valid_bitmask &
8891 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8892 if_interface_state->valid_bitmask |=
8893 IF_INTERFACE_STATE_LQM_STATE_VALID;
8894 if_interface_state->lqm_state =
8895 ifp->if_interface_state.lqm_state;
8896 }
8897 if (ifp->if_interface_state.valid_bitmask &
8898 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8899 if_interface_state->valid_bitmask |=
8900 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8901 if_interface_state->interface_availability =
8902 ifp->if_interface_state.interface_availability;
8903 }
8904
8905 ifnet_lock_done(ifp);
8906}
8907
8908errno_t
8909if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
8910{
3e170ce0 8911 if (conn_probe > 1) {
0a7de745 8912 return EINVAL;
3e170ce0 8913 }
0a7de745 8914 if (conn_probe == 0) {
f427ee49 8915 if_clear_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
0a7de745 8916 } else {
f427ee49 8917 if_set_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
0a7de745 8918 }
3e170ce0 8919
5ba3f43e
A
8920#if NECP
8921 necp_update_all_clients();
8922#endif /* NECP */
8923
3e170ce0 8924 tcp_probe_connectivity(ifp, conn_probe);
0a7de745 8925 return 0;
316670eb
A
8926}
8927
8928/* for uuid.c */
cb323159
A
8929static int
8930get_ether_index(int * ret_other_index)
316670eb
A
8931{
8932 struct ifnet *ifp;
cb323159
A
8933 int en0_index = 0;
8934 int other_en_index = 0;
8935 int any_ether_index = 0;
8936 short best_unit = 0;
316670eb 8937
cb323159 8938 *ret_other_index = 0;
316670eb 8939 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
cb323159
A
8940 /*
8941 * find en0, or if not en0, the lowest unit en*, and if not
8942 * that, any ethernet
8943 */
316670eb 8944 ifnet_lock_shared(ifp);
cb323159
A
8945 if (strcmp(ifp->if_name, "en") == 0) {
8946 if (ifp->if_unit == 0) {
8947 /* found en0, we're done */
8948 en0_index = ifp->if_index;
8949 ifnet_lock_done(ifp);
8950 break;
8951 }
8952 if (other_en_index == 0 || ifp->if_unit < best_unit) {
8953 other_en_index = ifp->if_index;
8954 best_unit = ifp->if_unit;
8955 }
8956 } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
8957 any_ether_index = ifp->if_index;
316670eb 8958 }
316670eb
A
8959 ifnet_lock_done(ifp);
8960 }
cb323159
A
8961 if (en0_index == 0) {
8962 if (other_en_index != 0) {
8963 *ret_other_index = other_en_index;
8964 } else if (any_ether_index != 0) {
8965 *ret_other_index = any_ether_index;
8966 }
8967 }
8968 return en0_index;
8969}
8970
8971int
8972uuid_get_ethernet(u_int8_t *node)
8973{
8974 static int en0_index;
8975 struct ifnet *ifp;
8976 int other_index = 0;
8977 int the_index = 0;
8978 int ret;
316670eb 8979
cb323159
A
8980 ifnet_head_lock_shared();
8981 if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
8982 en0_index = get_ether_index(&other_index);
8983 }
8984 if (en0_index != 0) {
8985 the_index = en0_index;
8986 } else if (other_index != 0) {
8987 the_index = other_index;
8988 }
8989 if (the_index != 0) {
f427ee49
A
8990 struct dlil_ifnet *dl_if;
8991
cb323159
A
8992 ifp = ifindex2ifnet[the_index];
8993 VERIFY(ifp != NULL);
f427ee49
A
8994 dl_if = (struct dlil_ifnet *)ifp;
8995 if (dl_if->dl_if_permanent_ether_is_set != 0) {
8996 /*
8997 * Use the permanent ethernet address if it is
8998 * available because it will never change.
8999 */
9000 memcpy(node, dl_if->dl_if_permanent_ether,
9001 ETHER_ADDR_LEN);
9002 } else {
9003 memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
9004 }
cb323159
A
9005 ret = 0;
9006 } else {
9007 ret = -1;
9008 }
9009 ifnet_head_done();
9010 return ret;
316670eb
A
9011}
9012
9013static int
9014sysctl_rxpoll SYSCTL_HANDLER_ARGS
9015{
9016#pragma unused(arg1, arg2)
39236c6e
A
9017 uint32_t i;
9018 int err;
316670eb
A
9019
9020 i = if_rxpoll;
9021
9022 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
9023 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9024 return err;
9025 }
316670eb 9026
0a7de745
A
9027 if (net_rxpoll == 0) {
9028 return ENXIO;
9029 }
316670eb
A
9030
9031 if_rxpoll = i;
0a7de745 9032 return err;
316670eb
A
9033}
9034
9035static int
39236c6e 9036sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
9037{
9038#pragma unused(arg1, arg2)
39236c6e
A
9039 uint64_t q;
9040 int err;
316670eb 9041
39236c6e 9042 q = if_rxpoll_mode_holdtime;
316670eb 9043
39236c6e 9044 err = sysctl_handle_quad(oidp, &q, 0, req);
0a7de745
A
9045 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9046 return err;
9047 }
316670eb 9048
0a7de745 9049 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) {
39236c6e 9050 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
0a7de745 9051 }
39236c6e
A
9052
9053 if_rxpoll_mode_holdtime = q;
316670eb 9054
0a7de745 9055 return err;
316670eb
A
9056}
9057
9058static int
39236c6e 9059sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
9060{
9061#pragma unused(arg1, arg2)
39236c6e
A
9062 uint64_t q;
9063 int err;
316670eb 9064
39236c6e 9065 q = if_rxpoll_sample_holdtime;
316670eb 9066
39236c6e 9067 err = sysctl_handle_quad(oidp, &q, 0, req);
0a7de745
A
9068 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9069 return err;
9070 }
316670eb 9071
0a7de745 9072 if (q < IF_RXPOLL_SAMPLETIME_MIN) {
39236c6e 9073 q = IF_RXPOLL_SAMPLETIME_MIN;
0a7de745 9074 }
39236c6e
A
9075
9076 if_rxpoll_sample_holdtime = q;
316670eb 9077
0a7de745 9078 return err;
316670eb
A
9079}
9080
39236c6e
A
9081static int
9082sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
316670eb 9083{
39236c6e
A
9084#pragma unused(arg1, arg2)
9085 uint64_t q;
9086 int err;
316670eb 9087
39236c6e 9088 q = if_rxpoll_interval_time;
316670eb 9089
39236c6e 9090 err = sysctl_handle_quad(oidp, &q, 0, req);
0a7de745
A
9091 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9092 return err;
9093 }
39236c6e 9094
0a7de745 9095 if (q < IF_RXPOLL_INTERVALTIME_MIN) {
39236c6e 9096 q = IF_RXPOLL_INTERVALTIME_MIN;
0a7de745 9097 }
316670eb 9098
39236c6e 9099 if_rxpoll_interval_time = q;
316670eb 9100
0a7de745 9101 return err;
316670eb
A
9102}
9103
39236c6e
A
9104static int
9105sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
316670eb 9106{
39236c6e
A
9107#pragma unused(arg1, arg2)
9108 uint32_t i;
9109 int err;
316670eb 9110
cb323159 9111 i = if_sysctl_rxpoll_wlowat;
316670eb 9112
39236c6e 9113 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
9114 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9115 return err;
9116 }
316670eb 9117
cb323159 9118 if (i == 0 || i >= if_sysctl_rxpoll_whiwat) {
0a7de745
A
9119 return EINVAL;
9120 }
39236c6e 9121
cb323159 9122 if_sysctl_rxpoll_wlowat = i;
0a7de745 9123 return err;
316670eb
A
9124}
9125
39236c6e
A
9126static int
9127sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
316670eb 9128{
39236c6e
A
9129#pragma unused(arg1, arg2)
9130 uint32_t i;
9131 int err;
316670eb 9132
cb323159 9133 i = if_sysctl_rxpoll_whiwat;
316670eb 9134
39236c6e 9135 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
9136 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9137 return err;
9138 }
316670eb 9139
cb323159 9140 if (i <= if_sysctl_rxpoll_wlowat) {
0a7de745
A
9141 return EINVAL;
9142 }
39236c6e 9143
cb323159 9144 if_sysctl_rxpoll_whiwat = i;
0a7de745 9145 return err;
316670eb
A
9146}
9147
9148static int
39236c6e 9149sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
316670eb 9150{
39236c6e
A
9151#pragma unused(arg1, arg2)
9152 int i, err;
316670eb 9153
39236c6e 9154 i = if_sndq_maxlen;
316670eb 9155
39236c6e 9156 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
9157 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9158 return err;
9159 }
316670eb 9160
0a7de745 9161 if (i < IF_SNDQ_MINLEN) {
39236c6e 9162 i = IF_SNDQ_MINLEN;
0a7de745 9163 }
316670eb 9164
39236c6e 9165 if_sndq_maxlen = i;
0a7de745 9166 return err;
316670eb
A
9167}
9168
39236c6e
A
9169static int
9170sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
316670eb 9171{
39236c6e
A
9172#pragma unused(arg1, arg2)
9173 int i, err;
9174
9175 i = if_rcvq_maxlen;
9176
9177 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
9178 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9179 return err;
9180 }
39236c6e 9181
0a7de745 9182 if (i < IF_RCVQ_MINLEN) {
39236c6e 9183 i = IF_RCVQ_MINLEN;
0a7de745 9184 }
39236c6e
A
9185
9186 if_rcvq_maxlen = i;
0a7de745 9187 return err;
316670eb
A
9188}
9189
cb323159 9190int
316670eb
A
9191dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
9192 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
9193{
9194 struct kev_dl_node_presence kev;
9195 struct sockaddr_dl *sdl;
9196 struct sockaddr_in6 *sin6;
cb323159 9197 int ret = 0;
316670eb
A
9198
9199 VERIFY(ifp);
9200 VERIFY(sa);
9201 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
9202
0a7de745 9203 bzero(&kev, sizeof(kev));
316670eb
A
9204 sin6 = &kev.sin6_node_address;
9205 sdl = &kev.sdl_node_address;
9206 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
9207 kev.rssi = rssi;
9208 kev.link_quality_metric = lqm;
9209 kev.node_proximity_metric = npm;
0a7de745 9210 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
316670eb 9211
cb323159
A
9212 ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
9213 if (ret == 0) {
9214 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
9215 &kev.link_data, sizeof(kev));
9216 if (err != 0) {
9217 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
9218 "error %d\n", __func__, err);
9219 }
9220 }
9221 return ret;
316670eb
A
9222}
9223
9224void
9225dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
9226{
cb323159
A
9227 struct kev_dl_node_absence kev = {};
9228 struct sockaddr_in6 *kev_sin6 = NULL;
9229 struct sockaddr_dl *kev_sdl = NULL;
316670eb 9230
cb323159
A
9231 VERIFY(ifp != NULL);
9232 VERIFY(sa != NULL);
316670eb
A
9233 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
9234
cb323159
A
9235 kev_sin6 = &kev.sin6_node_address;
9236 kev_sdl = &kev.sdl_node_address;
9237
9238 if (sa->sa_family == AF_INET6) {
9239 /*
9240 * If IPv6 address is given, get the link layer
9241 * address from what was cached in the neighbor cache
9242 */
9243 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9244 bcopy(sa, kev_sin6, sa->sa_len);
9245 nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
9246 } else {
9247 /*
9248 * If passed address is AF_LINK type, derive the address
9249 * based on the link address.
9250 */
9251 nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
9252 nd6_alt_node_absent(ifp, kev_sin6, NULL);
9253 }
9254
9255 kev_sdl->sdl_type = ifp->if_type;
9256 kev_sdl->sdl_index = ifp->if_index;
316670eb 9257
316670eb 9258 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
0a7de745 9259 &kev.link_data, sizeof(kev));
316670eb
A
9260}
9261
cb323159
A
9262int
9263dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
9264 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
9265{
9266 struct kev_dl_node_presence kev = {};
9267 struct sockaddr_dl *kev_sdl = NULL;
9268 struct sockaddr_in6 *kev_sin6 = NULL;
9269 int ret = 0;
9270
9271 VERIFY(ifp != NULL);
9272 VERIFY(sa != NULL && sdl != NULL);
9273 VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
9274
9275 kev_sin6 = &kev.sin6_node_address;
9276 kev_sdl = &kev.sdl_node_address;
9277
9278 VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
9279 bcopy(sdl, kev_sdl, sdl->sdl_len);
9280 kev_sdl->sdl_type = ifp->if_type;
9281 kev_sdl->sdl_index = ifp->if_index;
9282
9283 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9284 bcopy(sa, kev_sin6, sa->sa_len);
9285
9286 kev.rssi = rssi;
9287 kev.link_quality_metric = lqm;
9288 kev.node_proximity_metric = npm;
9289 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
9290
9291 ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
9292 if (ret == 0) {
9293 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
9294 &kev.link_data, sizeof(kev));
9295 if (err != 0) {
f427ee49 9296 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__, err);
cb323159
A
9297 }
9298 }
9299 return ret;
9300}
9301
39236c6e
A
9302const void *
9303dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
0a7de745 9304 kauth_cred_t *credp)
39236c6e
A
9305{
9306 const u_int8_t *bytes;
9307 size_t size;
9308
9309 bytes = CONST_LLADDR(sdl);
9310 size = sdl->sdl_alen;
9311
9312#if CONFIG_MACF
9313 if (dlil_lladdr_ckreq) {
9314 switch (sdl->sdl_type) {
9315 case IFT_ETHER:
39236c6e 9316 case IFT_IEEE1394:
39236c6e
A
9317 break;
9318 default:
9319 credp = NULL;
9320 break;
0a7de745
A
9321 }
9322 ;
39236c6e
A
9323
9324 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
9325 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
0a7de745 9326 [0] = 2
39236c6e
A
9327 };
9328
5ba3f43e 9329 bytes = unspec;
39236c6e
A
9330 }
9331 }
9332#else
9333#pragma unused(credp)
9334#endif
9335
0a7de745
A
9336 if (sizep != NULL) {
9337 *sizep = size;
9338 }
9339 return bytes;
39236c6e
A
9340}
9341
9342void
9343dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
9344 u_int8_t info[DLIL_MODARGLEN])
9345{
9346 struct kev_dl_issues kev;
9347 struct timeval tv;
9348
9349 VERIFY(ifp != NULL);
9350 VERIFY(modid != NULL);
0a7de745
A
9351 _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
9352 _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
39236c6e 9353
0a7de745 9354 bzero(&kev, sizeof(kev));
39236c6e
A
9355
9356 microtime(&tv);
9357 kev.timestamp = tv.tv_sec;
9358 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
0a7de745 9359 if (info != NULL) {
39236c6e 9360 bcopy(info, &kev.info, DLIL_MODARGLEN);
0a7de745 9361 }
39236c6e
A
9362
9363 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
0a7de745 9364 &kev.link_data, sizeof(kev));
39236c6e
A
9365}
9366
316670eb
A
9367errno_t
9368ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9369 struct proc *p)
9370{
9371 u_int32_t level = IFNET_THROTTLE_OFF;
9372 errno_t result = 0;
9373
9374 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
9375
9376 if (cmd == SIOCSIFOPPORTUNISTIC) {
9377 /*
9378 * XXX: Use priv_check_cred() instead of root check?
9379 */
0a7de745
A
9380 if ((result = proc_suser(p)) != 0) {
9381 return result;
9382 }
316670eb
A
9383
9384 if (ifr->ifr_opportunistic.ifo_flags ==
0a7de745 9385 IFRIFOF_BLOCK_OPPORTUNISTIC) {
316670eb 9386 level = IFNET_THROTTLE_OPPORTUNISTIC;
0a7de745 9387 } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
316670eb 9388 level = IFNET_THROTTLE_OFF;
0a7de745 9389 } else {
316670eb 9390 result = EINVAL;
0a7de745 9391 }
316670eb 9392
0a7de745 9393 if (result == 0) {
316670eb 9394 result = ifnet_set_throttle(ifp, level);
0a7de745 9395 }
316670eb
A
9396 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
9397 ifr->ifr_opportunistic.ifo_flags = 0;
9398 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
9399 ifr->ifr_opportunistic.ifo_flags |=
9400 IFRIFOF_BLOCK_OPPORTUNISTIC;
9401 }
9402 }
9403
9404 /*
9405 * Return the count of current opportunistic connections
9406 * over the interface.
9407 */
9408 if (result == 0) {
9409 uint32_t flags = 0;
9410 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
0a7de745 9411 INPCB_OPPORTUNISTIC_SETCMD : 0;
39037602 9412 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
0a7de745 9413 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
316670eb
A
9414 ifr->ifr_opportunistic.ifo_inuse =
9415 udp_count_opportunistic(ifp->if_index, flags) +
9416 tcp_count_opportunistic(ifp->if_index, flags);
9417 }
9418
0a7de745 9419 if (result == EALREADY) {
316670eb 9420 result = 0;
0a7de745 9421 }
316670eb 9422
0a7de745 9423 return result;
316670eb
A
9424}
9425
9426int
9427ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
9428{
9429 struct ifclassq *ifq;
9430 int err = 0;
9431
0a7de745
A
9432 if (!(ifp->if_eflags & IFEF_TXSTART)) {
9433 return ENXIO;
9434 }
316670eb
A
9435
9436 *level = IFNET_THROTTLE_OFF;
9437
9438 ifq = &ifp->if_snd;
9439 IFCQ_LOCK(ifq);
9440 /* Throttling works only for IFCQ, not ALTQ instances */
0a7de745 9441 if (IFCQ_IS_ENABLED(ifq)) {
f427ee49
A
9442 cqrq_throttle_t req = { 0, IFNET_THROTTLE_OFF };
9443
9444 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
9445 *level = req.level;
0a7de745 9446 }
316670eb
A
9447 IFCQ_UNLOCK(ifq);
9448
0a7de745 9449 return err;
316670eb
A
9450}
9451
9452int
9453ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
9454{
9455 struct ifclassq *ifq;
9456 int err = 0;
9457
0a7de745
A
9458 if (!(ifp->if_eflags & IFEF_TXSTART)) {
9459 return ENXIO;
9460 }
316670eb 9461
39236c6e
A
9462 ifq = &ifp->if_snd;
9463
316670eb
A
9464 switch (level) {
9465 case IFNET_THROTTLE_OFF:
9466 case IFNET_THROTTLE_OPPORTUNISTIC:
316670eb
A
9467 break;
9468 default:
0a7de745 9469 return EINVAL;
316670eb
A
9470 }
9471
316670eb 9472 IFCQ_LOCK(ifq);
0a7de745 9473 if (IFCQ_IS_ENABLED(ifq)) {
f427ee49
A
9474 cqrq_throttle_t req = { 1, level };
9475
9476 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
0a7de745 9477 }
316670eb
A
9478 IFCQ_UNLOCK(ifq);
9479
9480 if (err == 0) {
cb323159 9481 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
39236c6e 9482 level);
cb323159
A
9483#if NECP
9484 necp_update_all_clients();
9485#endif /* NECP */
0a7de745 9486 if (level == IFNET_THROTTLE_OFF) {
316670eb 9487 ifnet_start(ifp);
0a7de745 9488 }
316670eb
A
9489 }
9490
0a7de745 9491 return err;
316670eb 9492}
39236c6e
A
9493
9494errno_t
9495ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9496 struct proc *p)
9497{
9498#pragma unused(p)
9499 errno_t result = 0;
9500 uint32_t flags;
9501 int level, category, subcategory;
9502
9503 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
9504
9505 if (cmd == SIOCSIFLOG) {
9506 if ((result = priv_check_cred(kauth_cred_get(),
0a7de745
A
9507 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
9508 return result;
9509 }
39236c6e
A
9510
9511 level = ifr->ifr_log.ifl_level;
0a7de745 9512 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
39236c6e 9513 result = EINVAL;
0a7de745 9514 }
39236c6e
A
9515
9516 flags = ifr->ifr_log.ifl_flags;
0a7de745 9517 if ((flags &= IFNET_LOGF_MASK) == 0) {
39236c6e 9518 result = EINVAL;
0a7de745 9519 }
39236c6e
A
9520
9521 category = ifr->ifr_log.ifl_category;
9522 subcategory = ifr->ifr_log.ifl_subcategory;
9523
0a7de745 9524 if (result == 0) {
39236c6e
A
9525 result = ifnet_set_log(ifp, level, flags,
9526 category, subcategory);
0a7de745 9527 }
39236c6e
A
9528 } else {
9529 result = ifnet_get_log(ifp, &level, &flags, &category,
9530 &subcategory);
9531 if (result == 0) {
9532 ifr->ifr_log.ifl_level = level;
9533 ifr->ifr_log.ifl_flags = flags;
9534 ifr->ifr_log.ifl_category = category;
9535 ifr->ifr_log.ifl_subcategory = subcategory;
9536 }
9537 }
9538
0a7de745 9539 return result;
39236c6e
A
9540}
9541
9542int
9543ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
9544 int32_t category, int32_t subcategory)
9545{
9546 int err = 0;
9547
9548 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
9549 VERIFY(flags & IFNET_LOGF_MASK);
9550
9551 /*
9552 * The logging level applies to all facilities; make sure to
9553 * update them all with the most current level.
9554 */
9555 flags |= ifp->if_log.flags;
9556
9557 if (ifp->if_output_ctl != NULL) {
9558 struct ifnet_log_params l;
9559
0a7de745 9560 bzero(&l, sizeof(l));
39236c6e
A
9561 l.level = level;
9562 l.flags = flags;
9563 l.flags &= ~IFNET_LOGF_DLIL;
9564 l.category = category;
9565 l.subcategory = subcategory;
9566
9567 /* Send this request to lower layers */
9568 if (l.flags != 0) {
9569 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
0a7de745 9570 sizeof(l), &l);
39236c6e
A
9571 }
9572 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
9573 /*
9574 * If targeted to the lower layers without an output
9575 * control callback registered on the interface, just
9576 * silently ignore facilities other than ours.
9577 */
9578 flags &= IFNET_LOGF_DLIL;
0a7de745 9579 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
39236c6e 9580 level = 0;
0a7de745 9581 }
39236c6e
A
9582 }
9583
9584 if (err == 0) {
0a7de745 9585 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
39236c6e 9586 ifp->if_log.flags = 0;
0a7de745 9587 } else {
39236c6e 9588 ifp->if_log.flags |= flags;
0a7de745 9589 }
39236c6e
A
9590
9591 log(LOG_INFO, "%s: logging level set to %d flags=%b "
9592 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
9593 ifp->if_log.level, ifp->if_log.flags,
9594 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
9595 category, subcategory);
9596 }
9597
0a7de745 9598 return err;
39236c6e
A
9599}
9600
9601int
9602ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
9603 int32_t *category, int32_t *subcategory)
9604{
0a7de745 9605 if (level != NULL) {
39236c6e 9606 *level = ifp->if_log.level;
0a7de745
A
9607 }
9608 if (flags != NULL) {
39236c6e 9609 *flags = ifp->if_log.flags;
0a7de745
A
9610 }
9611 if (category != NULL) {
39236c6e 9612 *category = ifp->if_log.category;
0a7de745
A
9613 }
9614 if (subcategory != NULL) {
39236c6e 9615 *subcategory = ifp->if_log.subcategory;
0a7de745 9616 }
39236c6e 9617
0a7de745 9618 return 0;
39236c6e
A
9619}
9620
9621int
9622ifnet_notify_address(struct ifnet *ifp, int af)
9623{
9624 struct ifnet_notify_address_params na;
9625
9626#if PF
9627 (void) pf_ifaddr_hook(ifp);
9628#endif /* PF */
9629
0a7de745
A
9630 if (ifp->if_output_ctl == NULL) {
9631 return EOPNOTSUPP;
9632 }
39236c6e 9633
0a7de745 9634 bzero(&na, sizeof(na));
f427ee49 9635 na.address_family = (sa_family_t)af;
39236c6e 9636
0a7de745
A
9637 return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
9638 sizeof(na), &na);
39236c6e
A
9639}
9640
9641errno_t
9642ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
9643{
9644 if (ifp == NULL || flowid == NULL) {
0a7de745 9645 return EINVAL;
39236c6e 9646 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 9647 !IF_FULLY_ATTACHED(ifp)) {
0a7de745 9648 return ENXIO;
39236c6e
A
9649 }
9650
9651 *flowid = ifp->if_flowhash;
9652
0a7de745 9653 return 0;
39236c6e
A
9654}
9655
9656errno_t
9657ifnet_disable_output(struct ifnet *ifp)
9658{
9659 int err;
9660
9661 if (ifp == NULL) {
0a7de745 9662 return EINVAL;
39236c6e 9663 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 9664 !IF_FULLY_ATTACHED(ifp)) {
0a7de745 9665 return ENXIO;
39236c6e
A
9666 }
9667
9668 if ((err = ifnet_fc_add(ifp)) == 0) {
9669 lck_mtx_lock_spin(&ifp->if_start_lock);
9670 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
9671 lck_mtx_unlock(&ifp->if_start_lock);
9672 }
0a7de745 9673 return err;
39236c6e
A
9674}
9675
9676errno_t
9677ifnet_enable_output(struct ifnet *ifp)
9678{
9679 if (ifp == NULL) {
0a7de745 9680 return EINVAL;
39236c6e 9681 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 9682 !IF_FULLY_ATTACHED(ifp)) {
0a7de745 9683 return ENXIO;
39236c6e
A
9684 }
9685
5c9f4661 9686 ifnet_start_common(ifp, TRUE);
0a7de745 9687 return 0;
39236c6e
A
9688}
9689
9690void
9691ifnet_flowadv(uint32_t flowhash)
9692{
9693 struct ifnet_fc_entry *ifce;
9694 struct ifnet *ifp;
9695
9696 ifce = ifnet_fc_get(flowhash);
0a7de745 9697 if (ifce == NULL) {
39236c6e 9698 return;
0a7de745 9699 }
39236c6e
A
9700
9701 VERIFY(ifce->ifce_ifp != NULL);
9702 ifp = ifce->ifce_ifp;
9703
9704 /* flow hash gets recalculated per attach, so check */
9705 if (ifnet_is_attached(ifp, 1)) {
0a7de745 9706 if (ifp->if_flowhash == flowhash) {
39236c6e 9707 (void) ifnet_enable_output(ifp);
0a7de745 9708 }
39236c6e
A
9709 ifnet_decr_iorefcnt(ifp);
9710 }
9711 ifnet_fc_entry_free(ifce);
9712}
9713
9714/*
9715 * Function to compare ifnet_fc_entries in ifnet flow control tree
9716 */
9717static inline int
9718ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
9719{
0a7de745 9720 return fc1->ifce_flowhash - fc2->ifce_flowhash;
39236c6e
A
9721}
9722
9723static int
9724ifnet_fc_add(struct ifnet *ifp)
9725{
9726 struct ifnet_fc_entry keyfc, *ifce;
9727 uint32_t flowhash;
9728
9729 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
9730 VERIFY(ifp->if_flowhash != 0);
9731 flowhash = ifp->if_flowhash;
9732
0a7de745 9733 bzero(&keyfc, sizeof(keyfc));
39236c6e
A
9734 keyfc.ifce_flowhash = flowhash;
9735
9736 lck_mtx_lock_spin(&ifnet_fc_lock);
9737 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9738 if (ifce != NULL && ifce->ifce_ifp == ifp) {
9739 /* Entry is already in ifnet_fc_tree, return */
9740 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 9741 return 0;
39236c6e
A
9742 }
9743
9744 if (ifce != NULL) {
9745 /*
9746 * There is a different fc entry with the same flow hash
9747 * but different ifp pointer. There can be a collision
9748 * on flow hash but the probability is low. Let's just
9749 * avoid adding a second one when there is a collision.
9750 */
9751 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 9752 return EAGAIN;
39236c6e
A
9753 }
9754
9755 /* become regular mutex */
9756 lck_mtx_convert_spin(&ifnet_fc_lock);
9757
f427ee49 9758 ifce = zalloc_flags(ifnet_fc_zone, Z_WAITOK | Z_ZERO);
39236c6e
A
9759 ifce->ifce_flowhash = flowhash;
9760 ifce->ifce_ifp = ifp;
9761
9762 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9763 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 9764 return 0;
39236c6e
A
9765}
9766
9767static struct ifnet_fc_entry *
9768ifnet_fc_get(uint32_t flowhash)
9769{
9770 struct ifnet_fc_entry keyfc, *ifce;
9771 struct ifnet *ifp;
9772
0a7de745 9773 bzero(&keyfc, sizeof(keyfc));
39236c6e
A
9774 keyfc.ifce_flowhash = flowhash;
9775
9776 lck_mtx_lock_spin(&ifnet_fc_lock);
9777 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9778 if (ifce == NULL) {
9779 /* Entry is not present in ifnet_fc_tree, return */
9780 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 9781 return NULL;
39236c6e
A
9782 }
9783
9784 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9785
9786 VERIFY(ifce->ifce_ifp != NULL);
9787 ifp = ifce->ifce_ifp;
9788
9789 /* become regular mutex */
9790 lck_mtx_convert_spin(&ifnet_fc_lock);
9791
9792 if (!ifnet_is_attached(ifp, 0)) {
9793 /*
9794 * This ifp is not attached or in the process of being
9795 * detached; just don't process it.
9796 */
9797 ifnet_fc_entry_free(ifce);
9798 ifce = NULL;
9799 }
9800 lck_mtx_unlock(&ifnet_fc_lock);
9801
0a7de745 9802 return ifce;
39236c6e
A
9803}
9804
9805static void
9806ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
9807{
9808 zfree(ifnet_fc_zone, ifce);
9809}
9810
9811static uint32_t
9812ifnet_calc_flowhash(struct ifnet *ifp)
9813{
9814 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
9815 uint32_t flowhash = 0;
9816
0a7de745 9817 if (ifnet_flowhash_seed == 0) {
39236c6e 9818 ifnet_flowhash_seed = RandomULong();
0a7de745 9819 }
39236c6e 9820
0a7de745 9821 bzero(&fh, sizeof(fh));
39236c6e 9822
0a7de745 9823 (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
39236c6e
A
9824 fh.ifk_unit = ifp->if_unit;
9825 fh.ifk_flags = ifp->if_flags;
9826 fh.ifk_eflags = ifp->if_eflags;
9827 fh.ifk_capabilities = ifp->if_capabilities;
9828 fh.ifk_capenable = ifp->if_capenable;
9829 fh.ifk_output_sched_model = ifp->if_output_sched_model;
9830 fh.ifk_rand1 = RandomULong();
9831 fh.ifk_rand2 = RandomULong();
9832
9833try_again:
0a7de745 9834 flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
39236c6e
A
9835 if (flowhash == 0) {
9836 /* try to get a non-zero flowhash */
9837 ifnet_flowhash_seed = RandomULong();
9838 goto try_again;
9839 }
9840
0a7de745 9841 return flowhash;
39236c6e
A
9842}
9843
3e170ce0
A
9844int
9845ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
9846 uint16_t flags, uint8_t *data)
9847{
9848#pragma unused(flags)
9849 int error = 0;
9850
9851 switch (family) {
9852 case AF_INET:
9853 if_inetdata_lock_exclusive(ifp);
9854 if (IN_IFEXTRA(ifp) != NULL) {
9855 if (len == 0) {
9856 /* Allow clearing the signature */
9857 IN_IFEXTRA(ifp)->netsig_len = 0;
9858 bzero(IN_IFEXTRA(ifp)->netsig,
0a7de745 9859 sizeof(IN_IFEXTRA(ifp)->netsig));
3e170ce0
A
9860 if_inetdata_lock_done(ifp);
9861 break;
0a7de745 9862 } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
3e170ce0
A
9863 error = EINVAL;
9864 if_inetdata_lock_done(ifp);
9865 break;
9866 }
9867 IN_IFEXTRA(ifp)->netsig_len = len;
9868 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
9869 } else {
9870 error = ENOMEM;
9871 }
9872 if_inetdata_lock_done(ifp);
9873 break;
9874
9875 case AF_INET6:
9876 if_inet6data_lock_exclusive(ifp);
9877 if (IN6_IFEXTRA(ifp) != NULL) {
9878 if (len == 0) {
9879 /* Allow clearing the signature */
9880 IN6_IFEXTRA(ifp)->netsig_len = 0;
9881 bzero(IN6_IFEXTRA(ifp)->netsig,
0a7de745 9882 sizeof(IN6_IFEXTRA(ifp)->netsig));
3e170ce0
A
9883 if_inet6data_lock_done(ifp);
9884 break;
0a7de745 9885 } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
3e170ce0
A
9886 error = EINVAL;
9887 if_inet6data_lock_done(ifp);
9888 break;
9889 }
9890 IN6_IFEXTRA(ifp)->netsig_len = len;
9891 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
9892 } else {
9893 error = ENOMEM;
9894 }
9895 if_inet6data_lock_done(ifp);
9896 break;
9897
9898 default:
9899 error = EINVAL;
9900 break;
9901 }
9902
0a7de745 9903 return error;
3e170ce0
A
9904}
9905
9906int
9907ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
9908 uint16_t *flags, uint8_t *data)
9909{
9910 int error = 0;
9911
0a7de745
A
9912 if (ifp == NULL || len == NULL || data == NULL) {
9913 return EINVAL;
9914 }
3e170ce0
A
9915
9916 switch (family) {
9917 case AF_INET:
9918 if_inetdata_lock_shared(ifp);
9919 if (IN_IFEXTRA(ifp) != NULL) {
9920 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
9921 error = EINVAL;
9922 if_inetdata_lock_done(ifp);
9923 break;
9924 }
f427ee49 9925 if ((*len = (uint8_t)IN_IFEXTRA(ifp)->netsig_len) > 0) {
3e170ce0 9926 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
0a7de745 9927 } else {
3e170ce0 9928 error = ENOENT;
0a7de745 9929 }
3e170ce0
A
9930 } else {
9931 error = ENOMEM;
9932 }
9933 if_inetdata_lock_done(ifp);
9934 break;
9935
9936 case AF_INET6:
9937 if_inet6data_lock_shared(ifp);
9938 if (IN6_IFEXTRA(ifp) != NULL) {
9939 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
9940 error = EINVAL;
9941 if_inet6data_lock_done(ifp);
9942 break;
9943 }
f427ee49 9944 if ((*len = (uint8_t)IN6_IFEXTRA(ifp)->netsig_len) > 0) {
3e170ce0 9945 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
0a7de745 9946 } else {
3e170ce0 9947 error = ENOENT;
0a7de745 9948 }
3e170ce0
A
9949 } else {
9950 error = ENOMEM;
9951 }
9952 if_inet6data_lock_done(ifp);
9953 break;
9954
9955 default:
9956 error = EINVAL;
9957 break;
9958 }
9959
0a7de745 9960 if (error == 0 && flags != NULL) {
3e170ce0 9961 *flags = 0;
0a7de745 9962 }
3e170ce0 9963
0a7de745 9964 return error;
3e170ce0
A
9965}
9966
5ba3f43e
A
9967int
9968ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
9969{
9970 int i, error = 0, one_set = 0;
9971
9972 if_inet6data_lock_exclusive(ifp);
9973
9974 if (IN6_IFEXTRA(ifp) == NULL) {
9975 error = ENOMEM;
9976 goto out;
9977 }
9978
9979 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
9980 uint32_t prefix_len =
9981 prefixes[i].prefix_len;
9982 struct in6_addr *prefix =
9983 &prefixes[i].ipv6_prefix;
9984
9985 if (prefix_len == 0) {
d9a64523
A
9986 clat_log0((LOG_DEBUG,
9987 "NAT64 prefixes purged from Interface %s\n",
9988 if_name(ifp)));
5ba3f43e
A
9989 /* Allow clearing the signature */
9990 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
9991 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9992 sizeof(struct in6_addr));
9993
9994 continue;
9995 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
0a7de745
A
9996 prefix_len != NAT64_PREFIX_LEN_40 &&
9997 prefix_len != NAT64_PREFIX_LEN_48 &&
9998 prefix_len != NAT64_PREFIX_LEN_56 &&
9999 prefix_len != NAT64_PREFIX_LEN_64 &&
10000 prefix_len != NAT64_PREFIX_LEN_96) {
d9a64523
A
10001 clat_log0((LOG_DEBUG,
10002 "NAT64 prefixlen is incorrect %d\n", prefix_len));
5ba3f43e
A
10003 error = EINVAL;
10004 goto out;
10005 }
10006
10007 if (IN6_IS_SCOPE_EMBED(prefix)) {
d9a64523
A
10008 clat_log0((LOG_DEBUG,
10009 "NAT64 prefix has interface/link local scope.\n"));
5ba3f43e
A
10010 error = EINVAL;
10011 goto out;
10012 }
10013
10014 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
10015 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
10016 sizeof(struct in6_addr));
d9a64523
A
10017 clat_log0((LOG_DEBUG,
10018 "NAT64 prefix set to %s with prefixlen: %d\n",
10019 ip6_sprintf(prefix), prefix_len));
5ba3f43e
A
10020 one_set = 1;
10021 }
10022
10023out:
10024 if_inet6data_lock_done(ifp);
10025
0a7de745 10026 if (error == 0 && one_set != 0) {
5ba3f43e 10027 necp_update_all_clients();
0a7de745 10028 }
5ba3f43e 10029
0a7de745 10030 return error;
5ba3f43e
A
10031}
10032
10033int
10034ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
10035{
10036 int i, found_one = 0, error = 0;
10037
0a7de745
A
10038 if (ifp == NULL) {
10039 return EINVAL;
10040 }
5ba3f43e
A
10041
10042 if_inet6data_lock_shared(ifp);
10043
10044 if (IN6_IFEXTRA(ifp) == NULL) {
10045 error = ENOMEM;
10046 goto out;
10047 }
10048
10049 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
0a7de745 10050 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
5ba3f43e 10051 found_one = 1;
0a7de745 10052 }
5ba3f43e
A
10053 }
10054
10055 if (found_one == 0) {
10056 error = ENOENT;
10057 goto out;
10058 }
10059
0a7de745 10060 if (prefixes) {
5ba3f43e
A
10061 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
10062 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
0a7de745 10063 }
5ba3f43e
A
10064
10065out:
10066 if_inet6data_lock_done(ifp);
10067
0a7de745 10068 return error;
5ba3f43e 10069}
5ba3f43e 10070
39236c6e
A
10071static void
10072dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
10073 protocol_family_t pf)
10074{
10075#pragma unused(ifp)
10076 uint32_t did_sw;
10077
10078 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
0a7de745 10079 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) {
39236c6e 10080 return;
0a7de745 10081 }
39236c6e
A
10082
10083 switch (pf) {
10084 case PF_INET:
10085 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
0a7de745 10086 if (did_sw & CSUM_DELAY_IP) {
39236c6e 10087 hwcksum_dbg_finalized_hdr++;
0a7de745
A
10088 }
10089 if (did_sw & CSUM_DELAY_DATA) {
39236c6e 10090 hwcksum_dbg_finalized_data++;
0a7de745 10091 }
39236c6e 10092 break;
39236c6e
A
10093 case PF_INET6:
10094 /*
10095 * Checksum offload should not have been enabled when
10096 * extension headers exist; that also means that we
10097 * cannot force-finalize packets with extension headers.
10098 * Indicate to the callee should it skip such case by
10099 * setting optlen to -1.
10100 */
10101 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
10102 m->m_pkthdr.csum_flags);
0a7de745 10103 if (did_sw & CSUM_DELAY_IPV6_DATA) {
39236c6e 10104 hwcksum_dbg_finalized_data++;
0a7de745 10105 }
39236c6e 10106 break;
39236c6e
A
10107 default:
10108 return;
10109 }
10110}
10111
10112static void
10113dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
10114 protocol_family_t pf)
10115{
5ba3f43e 10116 uint16_t sum = 0;
39236c6e
A
10117 uint32_t hlen;
10118
10119 if (frame_header == NULL ||
10120 frame_header < (char *)mbuf_datastart(m) ||
10121 frame_header > (char *)m->m_data) {
cb323159 10122 DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
39236c6e
A
10123 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
10124 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
10125 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
10126 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
10127 (uint64_t)VM_KERNEL_ADDRPERM(m));
10128 return;
10129 }
f427ee49 10130 hlen = (uint32_t)(m->m_data - frame_header);
39236c6e
A
10131
10132 switch (pf) {
10133 case PF_INET:
39236c6e 10134 case PF_INET6:
39236c6e
A
10135 break;
10136 default:
10137 return;
10138 }
10139
10140 /*
10141 * Force partial checksum offload; useful to simulate cases
10142 * where the hardware does not support partial checksum offload,
10143 * in order to validate correctness throughout the layers above.
10144 */
10145 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
10146 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
10147
0a7de745 10148 if (foff > (uint32_t)m->m_pkthdr.len) {
39236c6e 10149 return;
0a7de745 10150 }
39236c6e
A
10151
10152 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
10153
10154 /* Compute 16-bit 1's complement sum from forced offset */
10155 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
10156
10157 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
10158 m->m_pkthdr.csum_rx_val = sum;
f427ee49 10159 m->m_pkthdr.csum_rx_start = (uint16_t)(foff + hlen);
39236c6e
A
10160
10161 hwcksum_dbg_partial_forced++;
10162 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
10163 }
10164
10165 /*
10166 * Partial checksum offload verification (and adjustment);
10167 * useful to validate and test cases where the hardware
10168 * supports partial checksum offload.
10169 */
10170 if ((m->m_pkthdr.csum_flags &
10171 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
10172 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
10173 uint32_t rxoff;
10174
10175 /* Start offset must begin after frame header */
10176 rxoff = m->m_pkthdr.csum_rx_start;
10177 if (hlen > rxoff) {
10178 hwcksum_dbg_bad_rxoff++;
10179 if (dlil_verbose) {
cb323159 10180 DLIL_PRINTF("%s: partial cksum start offset %d "
39236c6e
A
10181 "is less than frame header length %d for "
10182 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
10183 (uint64_t)VM_KERNEL_ADDRPERM(m));
10184 }
10185 return;
10186 }
39037602 10187 rxoff -= hlen;
39236c6e
A
10188
10189 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
10190 /*
10191 * Compute the expected 16-bit 1's complement sum;
10192 * skip this if we've already computed it above
10193 * when partial checksum offload is forced.
10194 */
10195 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
10196
10197 /* Hardware or driver is buggy */
10198 if (sum != m->m_pkthdr.csum_rx_val) {
10199 hwcksum_dbg_bad_cksum++;
10200 if (dlil_verbose) {
cb323159 10201 DLIL_PRINTF("%s: bad partial cksum value "
39236c6e
A
10202 "0x%x (expected 0x%x) for mbuf "
10203 "0x%llx [rx_start %d]\n",
10204 if_name(ifp),
10205 m->m_pkthdr.csum_rx_val, sum,
10206 (uint64_t)VM_KERNEL_ADDRPERM(m),
10207 m->m_pkthdr.csum_rx_start);
10208 }
10209 return;
10210 }
10211 }
10212 hwcksum_dbg_verified++;
10213
10214 /*
10215 * This code allows us to emulate various hardwares that
10216 * perform 16-bit 1's complement sum beginning at various
10217 * start offset values.
10218 */
10219 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
10220 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
10221
0a7de745 10222 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) {
39236c6e 10223 return;
0a7de745 10224 }
39236c6e 10225
5ba3f43e
A
10226 sum = m_adj_sum16(m, rxoff, aoff,
10227 m_pktlen(m) - aoff, sum);
39236c6e
A
10228
10229 m->m_pkthdr.csum_rx_val = sum;
f427ee49 10230 m->m_pkthdr.csum_rx_start = (uint16_t)(aoff + hlen);
39236c6e
A
10231
10232 hwcksum_dbg_adjusted++;
10233 }
10234 }
10235}
10236
10237static int
10238sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
10239{
10240#pragma unused(arg1, arg2)
10241 u_int32_t i;
10242 int err;
10243
10244 i = hwcksum_dbg_mode;
10245
10246 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
10247 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10248 return err;
10249 }
39236c6e 10250
0a7de745
A
10251 if (hwcksum_dbg == 0) {
10252 return ENODEV;
10253 }
39236c6e 10254
0a7de745
A
10255 if ((i & ~HWCKSUM_DBG_MASK) != 0) {
10256 return EINVAL;
10257 }
39236c6e
A
10258
10259 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
10260
0a7de745 10261 return err;
39236c6e
A
10262}
10263
10264static int
10265sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
10266{
10267#pragma unused(arg1, arg2)
10268 u_int32_t i;
10269 int err;
10270
10271 i = hwcksum_dbg_partial_rxoff_forced;
10272
10273 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
10274 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10275 return err;
10276 }
39236c6e 10277
0a7de745
A
10278 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
10279 return ENODEV;
10280 }
39236c6e
A
10281
10282 hwcksum_dbg_partial_rxoff_forced = i;
10283
0a7de745 10284 return err;
39236c6e
A
10285}
10286
10287static int
10288sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
10289{
10290#pragma unused(arg1, arg2)
10291 u_int32_t i;
10292 int err;
10293
10294 i = hwcksum_dbg_partial_rxoff_adj;
10295
10296 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
10297 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10298 return err;
10299 }
39236c6e 10300
0a7de745
A
10301 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) {
10302 return ENODEV;
10303 }
39236c6e
A
10304
10305 hwcksum_dbg_partial_rxoff_adj = i;
10306
0a7de745 10307 return err;
39236c6e
A
10308}
10309
3e170ce0
A
10310static int
10311sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
10312{
10313#pragma unused(oidp, arg1, arg2)
10314 int err;
39037602 10315
3e170ce0 10316 if (req->oldptr == USER_ADDR_NULL) {
3e170ce0
A
10317 }
10318 if (req->newptr != USER_ADDR_NULL) {
0a7de745 10319 return EPERM;
3e170ce0
A
10320 }
10321 err = SYSCTL_OUT(req, &tx_chain_len_stats,
10322 sizeof(struct chain_len_stats));
10323
0a7de745 10324 return err;
3e170ce0
A
10325}
10326
10327
5ba3f43e 10328#if DEBUG || DEVELOPMENT
39236c6e
A
10329/* Blob for sum16 verification */
10330static uint8_t sumdata[] = {
10331 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
10332 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
10333 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
10334 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
10335 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
10336 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
10337 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
10338 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
10339 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
10340 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
10341 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
10342 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
10343 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
10344 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
10345 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
10346 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
10347 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
10348 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
10349 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
10350 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
10351 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
10352 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
10353 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
10354 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
10355 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
10356 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
10357 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
10358 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
10359 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
10360 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
10361 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
10362 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
10363 0xc8, 0x28, 0x02, 0x00, 0x00
10364};
10365
10366/* Precomputed 16-bit 1's complement sums for various spans of the above data */
10367static struct {
0a7de745
A
10368 boolean_t init;
10369 uint16_t len;
10370 uint16_t sumr; /* reference */
10371 uint16_t sumrp; /* reference, precomputed */
39236c6e 10372} sumtbl[] = {
0a7de745
A
10373 { FALSE, 0, 0, 0x0000 },
10374 { FALSE, 1, 0, 0x001f },
10375 { FALSE, 2, 0, 0x8b1f },
10376 { FALSE, 3, 0, 0x8b27 },
10377 { FALSE, 7, 0, 0x790e },
10378 { FALSE, 11, 0, 0xcb6d },
10379 { FALSE, 20, 0, 0x20dd },
10380 { FALSE, 27, 0, 0xbabd },
10381 { FALSE, 32, 0, 0xf3e8 },
10382 { FALSE, 37, 0, 0x197d },
10383 { FALSE, 43, 0, 0x9eae },
10384 { FALSE, 64, 0, 0x4678 },
5ba3f43e
A
10385 { FALSE, 127, 0, 0x9399 },
10386 { FALSE, 256, 0, 0xd147 },
10387 { FALSE, 325, 0, 0x0358 },
39236c6e 10388};
0a7de745 10389#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
39236c6e
A
10390
10391static void
10392dlil_verify_sum16(void)
10393{
10394 struct mbuf *m;
10395 uint8_t *buf;
10396 int n;
10397
10398 /* Make sure test data plus extra room for alignment fits in cluster */
0a7de745 10399 _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
39236c6e 10400
5ba3f43e
A
10401 kprintf("DLIL: running SUM16 self-tests ... ");
10402
39236c6e 10403 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
0a7de745 10404 m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
d9a64523 10405
0a7de745 10406 buf = mtod(m, uint8_t *); /* base address */
39236c6e
A
10407
10408 for (n = 0; n < SUMTBL_MAX; n++) {
10409 uint16_t len = sumtbl[n].len;
10410 int i;
10411
10412 /* Verify for all possible alignments */
0a7de745 10413 for (i = 0; i < (int)sizeof(uint64_t); i++) {
5ba3f43e 10414 uint16_t sum, sumr;
39236c6e
A
10415 uint8_t *c;
10416
10417 /* Copy over test data to mbuf */
0a7de745 10418 VERIFY(len <= sizeof(sumdata));
39236c6e
A
10419 c = buf + i;
10420 bcopy(sumdata, c, len);
10421
10422 /* Zero-offset test (align by data pointer) */
10423 m->m_data = (caddr_t)c;
10424 m->m_len = len;
10425 sum = m_sum16(m, 0, len);
10426
5ba3f43e 10427 if (!sumtbl[n].init) {
f427ee49 10428 sumr = (uint16_t)in_cksum_mbuf_ref(m, len, 0, 0);
5ba3f43e
A
10429 sumtbl[n].sumr = sumr;
10430 sumtbl[n].init = TRUE;
10431 } else {
10432 sumr = sumtbl[n].sumr;
10433 }
10434
39236c6e 10435 /* Something is horribly broken; stop now */
5ba3f43e
A
10436 if (sumr != sumtbl[n].sumrp) {
10437 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
10438 "for len=%d align=%d sum=0x%04x "
10439 "[expected=0x%04x]\n", __func__,
10440 len, i, sum, sumr);
10441 /* NOTREACHED */
10442 } else if (sum != sumr) {
10443 panic_plain("\n%s: broken m_sum16() for len=%d "
10444 "align=%d sum=0x%04x [expected=0x%04x]\n",
10445 __func__, len, i, sum, sumr);
39236c6e
A
10446 /* NOTREACHED */
10447 }
10448
10449 /* Alignment test by offset (fixed data pointer) */
10450 m->m_data = (caddr_t)buf;
10451 m->m_len = i + len;
10452 sum = m_sum16(m, i, len);
10453
10454 /* Something is horribly broken; stop now */
5ba3f43e
A
10455 if (sum != sumr) {
10456 panic_plain("\n%s: broken m_sum16() for len=%d "
10457 "offset=%d sum=0x%04x [expected=0x%04x]\n",
10458 __func__, len, i, sum, sumr);
39236c6e
A
10459 /* NOTREACHED */
10460 }
10461#if INET
10462 /* Simple sum16 contiguous buffer test by aligment */
10463 sum = b_sum16(c, len);
10464
10465 /* Something is horribly broken; stop now */
5ba3f43e
A
10466 if (sum != sumr) {
10467 panic_plain("\n%s: broken b_sum16() for len=%d "
10468 "align=%d sum=0x%04x [expected=0x%04x]\n",
10469 __func__, len, i, sum, sumr);
39236c6e
A
10470 /* NOTREACHED */
10471 }
10472#endif /* INET */
10473 }
10474 }
10475 m_freem(m);
10476
5ba3f43e 10477 kprintf("PASSED\n");
39236c6e 10478}
5ba3f43e 10479#endif /* DEBUG || DEVELOPMENT */
39236c6e 10480
0a7de745 10481#define CASE_STRINGIFY(x) case x: return #x
39236c6e
A
10482
10483__private_extern__ const char *
10484dlil_kev_dl_code_str(u_int32_t event_code)
10485{
10486 switch (event_code) {
0a7de745
A
10487 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
10488 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
10489 CASE_STRINGIFY(KEV_DL_SIFMTU);
10490 CASE_STRINGIFY(KEV_DL_SIFPHYS);
10491 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
10492 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
10493 CASE_STRINGIFY(KEV_DL_ADDMULTI);
10494 CASE_STRINGIFY(KEV_DL_DELMULTI);
10495 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
10496 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
10497 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
10498 CASE_STRINGIFY(KEV_DL_LINK_OFF);
10499 CASE_STRINGIFY(KEV_DL_LINK_ON);
10500 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
10501 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
10502 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
10503 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
10504 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
10505 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
10506 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
10507 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
10508 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
10509 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
10510 CASE_STRINGIFY(KEV_DL_ISSUES);
10511 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
39236c6e
A
10512 default:
10513 break;
10514 }
0a7de745 10515 return "";
39236c6e 10516}
3e170ce0 10517
5ba3f43e
A
10518static void
10519dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
10520{
10521#pragma unused(arg1)
10522 struct ifnet *ifp = arg0;
10523
10524 if (ifnet_is_attached(ifp, 1)) {
10525 nstat_ifnet_threshold_reached(ifp->if_index);
10526 ifnet_decr_iorefcnt(ifp);
10527 }
10528}
10529
10530void
10531ifnet_notify_data_threshold(struct ifnet *ifp)
10532{
10533 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
10534 uint64_t oldbytes = ifp->if_dt_bytes;
10535
10536 ASSERT(ifp->if_dt_tcall != NULL);
10537
10538 /*
10539 * If we went over the threshold, notify NetworkStatistics.
10540 * We rate-limit it based on the threshold interval value.
10541 */
10542 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
10543 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
10544 !thread_call_isactive(ifp->if_dt_tcall)) {
10545 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
10546 uint64_t now = mach_absolute_time(), deadline = now;
10547 uint64_t ival;
10548
10549 if (tival != 0) {
10550 nanoseconds_to_absolutetime(tival, &ival);
10551 clock_deadline_for_periodic_event(ival, now, &deadline);
10552 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
10553 deadline);
10554 } else {
10555 (void) thread_call_enter(ifp->if_dt_tcall);
10556 }
10557 }
10558}
10559
39037602
A
10560#if (DEVELOPMENT || DEBUG)
10561/*
10562 * The sysctl variable name contains the input parameters of
10563 * ifnet_get_keepalive_offload_frames()
10564 * ifp (interface index): name[0]
10565 * frames_array_count: name[1]
10566 * frame_data_offset: name[2]
10567 * The return length gives used_frames_count
10568 */
10569static int
10570sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
10571{
10572#pragma unused(oidp)
10573 int *name = (int *)arg1;
10574 u_int namelen = arg2;
10575 int idx;
10576 ifnet_t ifp = NULL;
10577 u_int32_t frames_array_count;
10578 size_t frame_data_offset;
10579 u_int32_t used_frames_count;
10580 struct ifnet_keepalive_offload_frame *frames_array = NULL;
10581 int error = 0;
10582 u_int32_t i;
10583
10584 /*
10585 * Only root can get look at other people TCP frames
10586 */
10587 error = proc_suser(current_proc());
0a7de745 10588 if (error != 0) {
39037602 10589 goto done;
0a7de745 10590 }
39037602
A
10591 /*
10592 * Validate the input parameters
10593 */
10594 if (req->newptr != USER_ADDR_NULL) {
10595 error = EPERM;
10596 goto done;
10597 }
10598 if (namelen != 3) {
10599 error = EINVAL;
10600 goto done;
10601 }
10602 if (req->oldptr == USER_ADDR_NULL) {
10603 error = EINVAL;
10604 goto done;
10605 }
10606 if (req->oldlen == 0) {
10607 error = EINVAL;
10608 goto done;
10609 }
10610 idx = name[0];
10611 frames_array_count = name[1];
10612 frame_data_offset = name[2];
10613
10614 /* Make sure the passed buffer is large enough */
10615 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
10616 req->oldlen) {
10617 error = ENOMEM;
10618 goto done;
10619 }
10620
10621 ifnet_head_lock_shared();
4d15aeb1 10622 if (!IF_INDEX_IN_RANGE(idx)) {
39037602
A
10623 ifnet_head_done();
10624 error = ENOENT;
10625 goto done;
10626 }
10627 ifp = ifindex2ifnet[idx];
10628 ifnet_head_done();
10629
10630 frames_array = _MALLOC(frames_array_count *
10631 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
10632 if (frames_array == NULL) {
10633 error = ENOMEM;
10634 goto done;
10635 }
10636
10637 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
10638 frames_array_count, frame_data_offset, &used_frames_count);
10639 if (error != 0) {
cb323159 10640 DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n",
39037602
A
10641 __func__, error);
10642 goto done;
10643 }
10644
10645 for (i = 0; i < used_frames_count; i++) {
10646 error = SYSCTL_OUT(req, frames_array + i,
10647 sizeof(struct ifnet_keepalive_offload_frame));
10648 if (error != 0) {
10649 goto done;
10650 }
10651 }
10652done:
0a7de745 10653 if (frames_array != NULL) {
39037602 10654 _FREE(frames_array, M_TEMP);
0a7de745
A
10655 }
10656 return error;
39037602
A
10657}
10658#endif /* DEVELOPMENT || DEBUG */
5ba3f43e
A
10659
10660void
10661ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
10662 struct ifnet *ifp)
10663{
10664 tcp_update_stats_per_flow(ifs, ifp);
10665}
f427ee49
A
10666
10667static inline u_int32_t
10668_set_flags(u_int32_t *flags_p, u_int32_t set_flags)
10669{
10670 return (u_int32_t)OSBitOrAtomic(set_flags, flags_p);
10671}
10672
10673static inline void
10674_clear_flags(u_int32_t *flags_p, u_int32_t clear_flags)
10675{
10676 OSBitAndAtomic(~clear_flags, flags_p);
10677}
10678
10679__private_extern__ u_int32_t
10680if_set_eflags(ifnet_t interface, u_int32_t set_flags)
10681{
10682 return _set_flags(&interface->if_eflags, set_flags);
10683}
10684
10685__private_extern__ void
10686if_clear_eflags(ifnet_t interface, u_int32_t clear_flags)
10687{
10688 _clear_flags(&interface->if_eflags, clear_flags);
10689}
10690
10691__private_extern__ u_int32_t
10692if_set_xflags(ifnet_t interface, u_int32_t set_flags)
10693{
10694 return _set_flags(&interface->if_xflags, set_flags);
10695}
10696
10697__private_extern__ void
10698if_clear_xflags(ifnet_t interface, u_int32_t clear_flags)
10699{
10700 _clear_flags(&interface->if_xflags, clear_flags);
10701}