]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/dlil.c
xnu-6153.101.6.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
CommitLineData
1c79356b 1/*
0a7de745 2 * Copyright (c) 1999-2019 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
2d21ac55
A
28/*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
39236c6e 34#include <stddef.h>
1c79356b 35
1c79356b
A
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/socket.h>
91447636
A
42#include <sys/domain.h>
43#include <sys/user.h>
2d21ac55 44#include <sys/random.h>
316670eb 45#include <sys/socketvar.h>
1c79356b
A
46#include <net/if_dl.h>
47#include <net/if.h>
91447636 48#include <net/route.h>
1c79356b
A
49#include <net/if_var.h>
50#include <net/dlil.h>
91447636 51#include <net/if_arp.h>
316670eb 52#include <net/iptap.h>
39236c6e 53#include <net/pktap.h>
1c79356b
A
54#include <sys/kern_event.h>
55#include <sys/kdebug.h>
6d2010ae 56#include <sys/mcache.h>
39236c6e
A
57#include <sys/syslog.h>
58#include <sys/protosw.h>
59#include <sys/priv.h>
1c79356b 60
91447636 61#include <kern/assert.h>
1c79356b 62#include <kern/task.h>
9bccf70c
A
63#include <kern/thread.h>
64#include <kern/sched_prim.h>
91447636 65#include <kern/locks.h>
6d2010ae 66#include <kern/zalloc.h>
9bccf70c 67
39236c6e 68#include <net/kpi_protocol.h>
1c79356b 69#include <net/if_types.h>
9d749ea3 70#include <net/if_ipsec.h>
6d2010ae 71#include <net/if_llreach.h>
9d749ea3 72#include <net/if_utun.h>
91447636 73#include <net/kpi_interfacefilter.h>
316670eb
A
74#include <net/classq/classq.h>
75#include <net/classq/classq_sfb.h>
39236c6e
A
76#include <net/flowhash.h>
77#include <net/ntstat.h>
5ba3f43e
A
78#include <net/if_llatbl.h>
79#include <net/net_api_stats.h>
a39ff7e2 80#include <net/if_ports_used.h>
c6bf4f31 81#include <net/if_vlan_var.h>
d9a64523 82#include <netinet/in.h>
6d2010ae
A
83#if INET
84#include <netinet/in_var.h>
85#include <netinet/igmp_var.h>
316670eb
A
86#include <netinet/ip_var.h>
87#include <netinet/tcp.h>
88#include <netinet/tcp_var.h>
89#include <netinet/udp.h>
90#include <netinet/udp_var.h>
91#include <netinet/if_ether.h>
92#include <netinet/in_pcb.h>
39037602 93#include <netinet/in_tclass.h>
d9a64523
A
94#include <netinet/ip.h>
95#include <netinet/ip_icmp.h>
96#include <netinet/icmp_var.h>
6d2010ae
A
97#endif /* INET */
98
99#if INET6
d9a64523 100#include <net/nat464_utils.h>
6d2010ae
A
101#include <netinet6/in6_var.h>
102#include <netinet6/nd6.h>
103#include <netinet6/mld6_var.h>
39236c6e 104#include <netinet6/scope6_var.h>
d9a64523
A
105#include <netinet/ip6.h>
106#include <netinet/icmp6.h>
6d2010ae 107#endif /* INET6 */
d9a64523 108#include <net/pf_pbuf.h>
91447636 109#include <libkern/OSAtomic.h>
39236c6e 110#include <libkern/tree.h>
1c79356b 111
39236c6e 112#include <dev/random/randomdev.h>
d52fe63f 113#include <machine/machine_routines.h>
1c79356b 114
2d21ac55 115#include <mach/thread_act.h>
6d2010ae 116#include <mach/sdt.h>
2d21ac55 117
39236c6e
A
118#if CONFIG_MACF
119#include <sys/kauth.h>
2d21ac55 120#include <security/mac_framework.h>
39236c6e
A
121#include <net/ethernet.h>
122#include <net/firewire.h>
123#endif
2d21ac55 124
b0d623f7
A
125#if PF
126#include <net/pfvar.h>
127#endif /* PF */
316670eb 128#include <net/pktsched/pktsched.h>
cb323159 129#include <net/pktsched/pktsched_netem.h>
b0d623f7 130
39037602
A
131#if NECP
132#include <net/necp.h>
133#endif /* NECP */
1c79356b 134
5ba3f43e 135
cb323159
A
136#include <os/log.h>
137
0a7de745
A
138#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
139#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
140#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
141#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
142#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
39037602 143
0a7de745
A
144#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
145#define MAX_LINKADDR 4 /* LONGWORDS */
146#define M_NKE M_IFADDR
1c79356b 147
2d21ac55 148#if 1
0a7de745 149#define DLIL_PRINTF printf
91447636 150#else
0a7de745 151#define DLIL_PRINTF kprintf
91447636
A
152#endif
153
0a7de745 154#define IF_DATA_REQUIRE_ALIGNED_64(f) \
6d2010ae 155 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
d1ecb069 156
0a7de745 157#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
6d2010ae
A
158 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
159
91447636 160enum {
0a7de745
A
161 kProtoKPI_v1 = 1,
162 kProtoKPI_v2 = 2
91447636
A
163};
164
6d2010ae
A
165/*
166 * List of if_proto structures in if_proto_hash[] is protected by
167 * the ifnet lock. The rest of the fields are initialized at protocol
168 * attach time and never change, thus no lock required as long as
169 * a reference to it is valid, via if_proto_ref().
170 */
91447636 171struct if_proto {
0a7de745
A
172 SLIST_ENTRY(if_proto) next_hash;
173 u_int32_t refcount;
174 u_int32_t detached;
175 struct ifnet *ifp;
176 protocol_family_t protocol_family;
177 int proto_kpi;
178 union {
91447636 179 struct {
0a7de745
A
180 proto_media_input input;
181 proto_media_preout pre_output;
182 proto_media_event event;
183 proto_media_ioctl ioctl;
184 proto_media_detached detached;
185 proto_media_resolve_multi resolve_multi;
186 proto_media_send_arp send_arp;
91447636 187 } v1;
2d21ac55 188 struct {
0a7de745
A
189 proto_media_input_v2 input;
190 proto_media_preout pre_output;
191 proto_media_event event;
192 proto_media_ioctl ioctl;
193 proto_media_detached detached;
194 proto_media_resolve_multi resolve_multi;
195 proto_media_send_arp send_arp;
2d21ac55 196 } v2;
91447636 197 } kpi;
1c79356b
A
198};
199
91447636
A
200SLIST_HEAD(proto_hash_entry, if_proto);
201
0a7de745 202#define DLIL_SDLDATALEN \
6d2010ae 203 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
1c79356b 204
9bccf70c 205struct dlil_ifnet {
0a7de745 206 struct ifnet dl_if; /* public ifnet */
6d2010ae 207 /*
316670eb 208 * DLIL private fields, protected by dl_if_lock
6d2010ae
A
209 */
210 decl_lck_mtx_data(, dl_if_lock);
0a7de745
A
211 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
212 u_int32_t dl_if_flags; /* flags (below) */
213 u_int32_t dl_if_refcnt; /* refcnt */
6d2010ae 214 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
0a7de745
A
215 void *dl_if_uniqueid; /* unique interface id */
216 size_t dl_if_uniqueid_len; /* length of the unique id */
217 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
218 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
6d2010ae 219 struct {
0a7de745
A
220 struct ifaddr ifa; /* lladdr ifa */
221 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
222 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
6d2010ae 223 } dl_if_lladdr;
316670eb
A
224 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
225 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
0a7de745
A
226 ctrace_t dl_if_attach; /* attach PC stacktrace */
227 ctrace_t dl_if_detach; /* detach PC stacktrace */
6d2010ae
A
228};
229
230/* Values for dl_if_flags (private to DLIL) */
0a7de745
A
231#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
232#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
233#define DLIF_DEBUG 0x4 /* has debugging info */
6d2010ae 234
0a7de745 235#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
6d2010ae
A
236
237/* For gdb */
238__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
239
240struct dlil_ifnet_dbg {
0a7de745
A
241 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
242 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
243 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
6d2010ae
A
244 /*
245 * Circular lists of ifnet_{reference,release} callers.
246 */
0a7de745
A
247 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
248 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
1c79356b
A
249};
250
0a7de745
A
251#define DLIL_TO_IFP(s) (&s->dl_if)
252#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
6d2010ae 253
91447636 254struct ifnet_filter {
0a7de745
A
255 TAILQ_ENTRY(ifnet_filter) filt_next;
256 u_int32_t filt_skip;
257 u_int32_t filt_flags;
258 ifnet_t filt_ifp;
259 const char *filt_name;
260 void *filt_cookie;
261 protocol_family_t filt_protocol;
262 iff_input_func filt_input;
263 iff_output_func filt_output;
264 iff_event_func filt_event;
265 iff_ioctl_func filt_ioctl;
266 iff_detached_func filt_detached;
1c79356b
A
267};
268
2d21ac55 269struct proto_input_entry;
55e303ae 270
91447636 271static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
91447636 272static lck_grp_t *dlil_lock_group;
6d2010ae 273lck_grp_t *ifnet_lock_group;
91447636 274static lck_grp_t *ifnet_head_lock_group;
316670eb
A
275static lck_grp_t *ifnet_snd_lock_group;
276static lck_grp_t *ifnet_rcv_lock_group;
6d2010ae 277lck_attr_t *ifnet_lock_attr;
7ddcb079
A
278decl_lck_rw_data(static, ifnet_head_lock);
279decl_lck_mtx_data(static, dlil_ifnet_lock);
39236c6e 280u_int32_t dlil_filter_disable_tso_count = 0;
316670eb 281
6d2010ae 282#if DEBUG
0a7de745 283static unsigned int ifnet_debug = 1; /* debugging (enabled) */
6d2010ae 284#else
0a7de745 285static unsigned int ifnet_debug; /* debugging (disabled) */
6d2010ae 286#endif /* !DEBUG */
0a7de745
A
287static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
288static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
289static struct zone *dlif_zone; /* zone for dlil_ifnet */
6d2010ae 290
0a7de745
A
291#define DLIF_ZONE_MAX IFNETS_MAX /* maximum elements in zone */
292#define DLIF_ZONE_NAME "ifnet" /* zone name */
6d2010ae 293
0a7de745
A
294static unsigned int dlif_filt_size; /* size of ifnet_filter */
295static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
6d2010ae 296
0a7de745
A
297#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
298#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
6d2010ae 299
0a7de745
A
300static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
301static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
6d2010ae 302
0a7de745
A
303#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
304#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
6d2010ae 305
0a7de745
A
306static unsigned int dlif_proto_size; /* size of if_proto */
307static struct zone *dlif_proto_zone; /* zone for if_proto */
6d2010ae 308
0a7de745
A
309#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
310#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
6d2010ae 311
0a7de745 312static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
39037602 313static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
0a7de745 314static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
316670eb 315
0a7de745
A
316#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
317#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
316670eb 318
0a7de745
A
319static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
320static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
321static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
316670eb 322
0a7de745
A
323#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
324#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
316670eb 325
d1ecb069 326static u_int32_t net_rtref;
d1ecb069 327
316670eb
A
328static struct dlil_main_threading_info dlil_main_input_thread_info;
329__private_extern__ struct dlil_threading_info *dlil_main_input_thread =
330 (struct dlil_threading_info *)&dlil_main_input_thread_info;
2d21ac55 331
39037602 332static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
91447636 333static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
6d2010ae
A
334static void dlil_if_trace(struct dlil_ifnet *, int);
335static void if_proto_ref(struct if_proto *);
336static void if_proto_free(struct if_proto *);
337static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
a39ff7e2 338static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
0a7de745 339 u_int32_t list_count);
6d2010ae
A
340static void if_flt_monitor_busy(struct ifnet *);
341static void if_flt_monitor_unbusy(struct ifnet *);
342static void if_flt_monitor_enter(struct ifnet *);
343static void if_flt_monitor_leave(struct ifnet *);
344static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
345 char **, protocol_family_t);
346static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
347 protocol_family_t);
348static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
349 const struct sockaddr_dl *);
350static int ifnet_lookup(struct ifnet *);
351static void if_purgeaddrs(struct ifnet *);
352
353static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
354 struct mbuf *, char *);
355static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
356 struct mbuf *);
357static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
358 mbuf_t *, const struct sockaddr *, void *, char *, char *);
359static void ifproto_media_event(struct ifnet *, protocol_family_t,
360 const struct kev_msg *);
361static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
362 unsigned long, void *);
363static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
364 struct sockaddr_dl *, size_t);
365static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
366 const struct sockaddr_dl *, const struct sockaddr *,
367 const struct sockaddr_dl *, const struct sockaddr *);
368
39037602
A
369static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
370 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
371 boolean_t poll, struct thread *tp);
316670eb
A
372static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
373 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
374static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
6d2010ae
A
375static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
376 protocol_family_t *);
377static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
378 const struct ifnet_demux_desc *, u_int32_t);
379static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
380static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
5ba3f43e
A
381#if CONFIG_EMBEDDED
382static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
383 const struct sockaddr *, const char *, const char *,
384 u_int32_t *, u_int32_t *);
385#else
6d2010ae 386static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
39236c6e 387 const struct sockaddr *, const char *, const char *);
5ba3f43e 388#endif /* CONFIG_EMBEDDED */
39236c6e
A
389static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
390 const struct sockaddr *, const char *, const char *,
391 u_int32_t *, u_int32_t *);
6d2010ae
A
392static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
393static void ifp_if_free(struct ifnet *);
394static void ifp_if_event(struct ifnet *, const struct kev_msg *);
316670eb
A
395static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
396static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
6d2010ae 397
316670eb 398static void dlil_main_input_thread_func(void *, wait_result_t);
cb323159
A
399static void dlil_main_input_thread_cont(void *, wait_result_t);
400
316670eb 401static void dlil_input_thread_func(void *, wait_result_t);
cb323159
A
402static void dlil_input_thread_cont(void *, wait_result_t);
403
316670eb 404static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
cb323159
A
405static void dlil_rxpoll_input_thread_cont(void *, wait_result_t);
406
6d2010ae 407static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
316670eb
A
408static void dlil_terminate_input_thread(struct dlil_threading_info *);
409static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
cb323159
A
410 struct dlil_threading_info *, struct ifnet *, boolean_t);
411static boolean_t dlil_input_stats_sync(struct ifnet *,
412 struct dlil_threading_info *);
316670eb
A
413static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
414 u_int32_t, ifnet_model_t, boolean_t);
415static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
416 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
0a7de745 417static int dlil_is_clat_needed(protocol_family_t, mbuf_t );
d9a64523
A
418static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *);
419static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *);
5ba3f43e 420#if DEBUG || DEVELOPMENT
39236c6e 421static void dlil_verify_sum16(void);
5ba3f43e 422#endif /* DEBUG || DEVELOPMENT */
39236c6e
A
423static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
424 protocol_family_t);
425static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
426 protocol_family_t);
427
cb323159
A
428static void dlil_incr_pending_thread_count(void);
429static void dlil_decr_pending_thread_count(void);
430
316670eb
A
431static void ifnet_detacher_thread_func(void *, wait_result_t);
432static int ifnet_detacher_thread_cont(int);
6d2010ae
A
433static void ifnet_detach_final(struct ifnet *);
434static void ifnet_detaching_enqueue(struct ifnet *);
435static struct ifnet *ifnet_detaching_dequeue(void);
436
cb323159
A
437static void ifnet_start_thread_func(void *, wait_result_t);
438static void ifnet_start_thread_cont(void *, wait_result_t);
439
440static void ifnet_poll_thread_func(void *, wait_result_t);
441static void ifnet_poll_thread_cont(void *, wait_result_t);
442
443static errno_t ifnet_enqueue_common(struct ifnet *, classq_pkt_t *,
444 boolean_t, boolean_t *);
316670eb 445
6d2010ae
A
446static void ifp_src_route_copyout(struct ifnet *, struct route *);
447static void ifp_src_route_copyin(struct ifnet *, struct route *);
448#if INET6
449static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
450static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
451#endif /* INET6 */
452
316670eb 453static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
39236c6e
A
454static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
455static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
456static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
457static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
458static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
316670eb
A
459static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
460static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
39236c6e
A
461static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
462static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
463static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
3e170ce0
A
464
465struct chain_len_stats tx_chain_len_stats;
466static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
316670eb 467
5ba3f43e
A
468#if TEST_INPUT_THREAD_TERMINATION
469static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
470#endif /* TEST_INPUT_THREAD_TERMINATION */
471
6d2010ae
A
472/* The following are protected by dlil_ifnet_lock */
473static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
474static u_int32_t ifnet_detaching_cnt;
0a7de745 475static void *ifnet_delayed_run; /* wait channel for detaching thread */
6d2010ae 476
39236c6e
A
477decl_lck_mtx_data(static, ifnet_fc_lock);
478
479static uint32_t ifnet_flowhash_seed;
480
481struct ifnet_flowhash_key {
0a7de745
A
482 char ifk_name[IFNAMSIZ];
483 uint32_t ifk_unit;
484 uint32_t ifk_flags;
485 uint32_t ifk_eflags;
486 uint32_t ifk_capabilities;
487 uint32_t ifk_capenable;
488 uint32_t ifk_output_sched_model;
489 uint32_t ifk_rand1;
490 uint32_t ifk_rand2;
39236c6e
A
491};
492
493/* Flow control entry per interface */
494struct ifnet_fc_entry {
495 RB_ENTRY(ifnet_fc_entry) ifce_entry;
0a7de745
A
496 u_int32_t ifce_flowhash;
497 struct ifnet *ifce_ifp;
39236c6e
A
498};
499
500static uint32_t ifnet_calc_flowhash(struct ifnet *);
501static int ifce_cmp(const struct ifnet_fc_entry *,
502 const struct ifnet_fc_entry *);
503static int ifnet_fc_add(struct ifnet *);
504static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
505static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
506
507/* protected by ifnet_fc_lock */
508RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
509RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
510RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
511
0a7de745
A
512static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
513static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
39236c6e 514
0a7de745
A
515#define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
516#define IFNET_FC_ZONE_MAX 32
39236c6e 517
39037602 518extern void bpfdetach(struct ifnet *);
6d2010ae 519extern void proto_input_run(void);
91447636 520
39037602 521extern uint32_t udp_count_opportunistic(unsigned int ifindex,
0a7de745 522 u_int32_t flags);
39037602 523extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
0a7de745 524 u_int32_t flags);
316670eb 525
6d2010ae 526__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
91447636 527
39236c6e 528#if CONFIG_MACF
5ba3f43e
A
529#ifdef CONFIG_EMBEDDED
530int dlil_lladdr_ckreq = 1;
531#else
39236c6e
A
532int dlil_lladdr_ckreq = 0;
533#endif
5ba3f43e 534#endif
39236c6e 535
b0d623f7 536#if DEBUG
39236c6e 537int dlil_verbose = 1;
b0d623f7 538#else
39236c6e 539int dlil_verbose = 0;
b0d623f7 540#endif /* DEBUG */
6d2010ae 541#if IFNET_INPUT_SANITY_CHK
6d2010ae 542/* sanity checking of input packet lists received */
316670eb
A
543static u_int32_t dlil_input_sanity_check = 0;
544#endif /* IFNET_INPUT_SANITY_CHK */
545/* rate limit debug messages */
cb323159 546struct timespec dlil_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
b0d623f7 547
6d2010ae 548SYSCTL_DECL(_net_link_generic_system);
91447636 549
316670eb
A
550SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
551 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
552
0a7de745 553#define IF_SNDQ_MINLEN 32
316670eb
A
554u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
555SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
556 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
557 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
558
0a7de745
A
559#define IF_RCVQ_MINLEN 32
560#define IF_RCVQ_MAXLEN 256
316670eb
A
561u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
562SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
563 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
564 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
565
0a7de745 566#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
cb323159 567u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
316670eb
A
568SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
569 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
570 "ilog2 of EWMA decay rate of avg inbound packets");
571
0a7de745
A
572#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
573#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
316670eb 574static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
39236c6e
A
575SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
576 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
577 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
578 "Q", "input poll mode freeze time");
316670eb 579
0a7de745
A
580#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
581#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
316670eb 582static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
39236c6e
A
583SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
584 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
585 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
586 "Q", "input poll sampling time");
587
39236c6e
A
588static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
589SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
590 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
591 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
592 "Q", "input poll interval (time)");
593
0a7de745 594#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
cb323159 595u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
316670eb
A
596SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
597 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
598 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
599
0a7de745 600#define IF_RXPOLL_WLOWAT 10
cb323159 601static u_int32_t if_sysctl_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
39236c6e 602SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
cb323159 603 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_wlowat,
39236c6e
A
604 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
605 "I", "input poll wakeup low watermark");
316670eb 606
0a7de745 607#define IF_RXPOLL_WHIWAT 100
cb323159 608static u_int32_t if_sysctl_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
39236c6e 609SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
cb323159 610 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_whiwat,
39236c6e
A
611 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
612 "I", "input poll wakeup high watermark");
316670eb 613
0a7de745 614static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
316670eb
A
615SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
616 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
617 "max packets per poll call");
618
cb323159 619u_int32_t if_rxpoll = 1;
316670eb
A
620SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
621 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
622 sysctl_rxpoll, "I", "enable opportunistic input polling");
623
5ba3f43e
A
624#if TEST_INPUT_THREAD_TERMINATION
625static u_int32_t if_input_thread_termination_spin = 0;
626SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
627 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
628 &if_input_thread_termination_spin, 0,
629 sysctl_input_thread_termination_spin,
630 "I", "input thread termination spin limit");
631#endif /* TEST_INPUT_THREAD_TERMINATION */
316670eb
A
632
633static u_int32_t cur_dlil_input_threads = 0;
634SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
39037602 635 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
316670eb 636 "Current number of DLIL input threads");
91447636 637
6d2010ae 638#if IFNET_INPUT_SANITY_CHK
316670eb 639SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
39037602 640 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
6d2010ae 641 "Turn on sanity checking in DLIL input");
316670eb 642#endif /* IFNET_INPUT_SANITY_CHK */
1c79356b 643
316670eb
A
644static u_int32_t if_flowadv = 1;
645SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
646 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
647 "enable flow-advisory mechanism");
648
fe8ab488
A
649static u_int32_t if_delaybased_queue = 1;
650SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
651 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
652 "enable delay based dynamic queue sizing");
653
39236c6e
A
654static uint64_t hwcksum_in_invalidated = 0;
655SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
656 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
657 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
658
659uint32_t hwcksum_dbg = 0;
660SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
661 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
662 "enable hardware cksum debugging");
663
3e170ce0
A
664u_int32_t ifnet_start_delayed = 0;
665SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
666 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
667 "number of times start was delayed");
668
669u_int32_t ifnet_delay_start_disabled = 0;
670SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
671 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
672 "number of times start was delayed");
673
0a7de745
A
674#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
675#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
676#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
677#define HWCKSUM_DBG_MASK \
678 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
39236c6e
A
679 HWCKSUM_DBG_FINALIZE_FORCED)
680
681static uint32_t hwcksum_dbg_mode = 0;
682SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
683 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
684 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
685
686static uint64_t hwcksum_dbg_partial_forced = 0;
687SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
688 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
689 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
690
691static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
692SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
693 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
694 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
695
696static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
697SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
698 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
699 &hwcksum_dbg_partial_rxoff_forced, 0,
700 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
701 "forced partial cksum rx offset");
702
703static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
704SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
705 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
706 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
707 "adjusted partial cksum rx offset");
708
709static uint64_t hwcksum_dbg_verified = 0;
710SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
711 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
712 &hwcksum_dbg_verified, "packets verified for having good checksum");
713
714static uint64_t hwcksum_dbg_bad_cksum = 0;
715SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
716 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
717 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
718
719static uint64_t hwcksum_dbg_bad_rxoff = 0;
720SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
721 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
722 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
723
724static uint64_t hwcksum_dbg_adjusted = 0;
725SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
726 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
727 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
728
729static uint64_t hwcksum_dbg_finalized_hdr = 0;
730SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
731 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
732 &hwcksum_dbg_finalized_hdr, "finalized headers");
733
734static uint64_t hwcksum_dbg_finalized_data = 0;
735SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
736 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
737 &hwcksum_dbg_finalized_data, "finalized payloads");
738
739uint32_t hwcksum_tx = 1;
740SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
741 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
742 "enable transmit hardware checksum offload");
743
744uint32_t hwcksum_rx = 1;
745SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
746 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
747 "enable receive hardware checksum offload");
748
3e170ce0
A
749SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
750 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
751 sysctl_tx_chain_len_stats, "S", "");
752
753uint32_t tx_chain_len_count = 0;
754SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
39037602 755 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
3e170ce0 756
0a7de745 757static uint32_t threshold_notify = 1; /* enable/disable */
5ba3f43e
A
758SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
759 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
760
0a7de745 761static uint32_t threshold_interval = 2; /* in seconds */
5ba3f43e
A
762SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
763 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
764
39037602
A
765#if (DEVELOPMENT || DEBUG)
766static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
767SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
768 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
769#endif /* DEVELOPMENT || DEBUG */
770
5ba3f43e 771struct net_api_stats net_api_stats;
0a7de745
A
772SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD | CTLFLAG_LOCKED,
773 &net_api_stats, net_api_stats, "");
5ba3f43e
A
774
775
316670eb 776unsigned int net_rxpoll = 1;
6d2010ae
A
777unsigned int net_affinity = 1;
778static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
1c79356b 779
0a7de745 780extern u_int32_t inject_buckets;
b36670ce 781
0a7de745
A
782static lck_grp_attr_t *dlil_grp_attributes = NULL;
783static lck_attr_t *dlil_lck_attributes = NULL;
91447636 784
5ba3f43e
A
785/* DLIL data threshold thread call */
786static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
787
cb323159
A
788void
789ifnet_filter_update_tso(boolean_t filter_enable)
790{
791 /*
792 * update filter count and route_generation ID to let TCP
793 * know it should reevalute doing TSO or not
794 */
795 OSAddAtomic(filter_enable ? 1 : -1, &dlil_filter_disable_tso_count);
796 routegenid_update();
797}
5ba3f43e 798
91447636 799
0a7de745
A
800#define DLIL_INPUT_CHECK(m, ifp) { \
801 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
802 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
803 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
804 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
805 /* NOTREACHED */ \
806 } \
316670eb
A
807}
808
0a7de745
A
809#define DLIL_EWMA(old, new, decay) do { \
810 u_int32_t _avg; \
811 if ((_avg = (old)) > 0) \
812 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
813 else \
814 _avg = (new); \
815 (old) = _avg; \
316670eb
A
816} while (0)
817
0a7de745
A
818#define MBPS (1ULL * 1000 * 1000)
819#define GBPS (MBPS * 1000)
316670eb
A
820
821struct rxpoll_time_tbl {
0a7de745
A
822 u_int64_t speed; /* downlink speed */
823 u_int32_t plowat; /* packets low watermark */
824 u_int32_t phiwat; /* packets high watermark */
825 u_int32_t blowat; /* bytes low watermark */
826 u_int32_t bhiwat; /* bytes high watermark */
316670eb
A
827};
828
829static struct rxpoll_time_tbl rxpoll_tbl[] = {
cb323159
A
830 { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) },
831 { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
832 { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
833 { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
834 { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
835 { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
316670eb
A
836};
837
cb323159
A
838decl_lck_mtx_data(static, dlil_thread_sync_lock);
839static uint32_t dlil_pending_thread_cnt = 0;
840static void
841dlil_incr_pending_thread_count(void)
842{
843 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
844 lck_mtx_lock(&dlil_thread_sync_lock);
845 dlil_pending_thread_cnt++;
846 lck_mtx_unlock(&dlil_thread_sync_lock);
847}
848
849static void
850dlil_decr_pending_thread_count(void)
851{
852 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
853 lck_mtx_lock(&dlil_thread_sync_lock);
854 VERIFY(dlil_pending_thread_cnt > 0);
855 dlil_pending_thread_cnt--;
856 if (dlil_pending_thread_cnt == 0) {
857 wakeup(&dlil_pending_thread_cnt);
858 }
859 lck_mtx_unlock(&dlil_thread_sync_lock);
860}
861
39236c6e 862int
b0d623f7 863proto_hash_value(u_int32_t protocol_family)
91447636 864{
4a3eedf9
A
865 /*
866 * dlil_proto_unplumb_all() depends on the mapping between
867 * the hash bucket index and the protocol family defined
868 * here; future changes must be applied there as well.
869 */
39037602 870 switch (protocol_family) {
0a7de745
A
871 case PF_INET:
872 return 0;
873 case PF_INET6:
874 return 1;
875 case PF_VLAN:
876 return 2;
cb323159
A
877 case PF_802154:
878 return 3;
0a7de745
A
879 case PF_UNSPEC:
880 default:
cb323159 881 return 4;
91447636
A
882 }
883}
884
6d2010ae
A
885/*
886 * Caller must already be holding ifnet lock.
887 */
888static struct if_proto *
b0d623f7 889find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
1c79356b 890{
91447636 891 struct if_proto *proto = NULL;
b0d623f7 892 u_int32_t i = proto_hash_value(protocol_family);
6d2010ae
A
893
894 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
895
0a7de745 896 if (ifp->if_proto_hash != NULL) {
91447636 897 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
0a7de745 898 }
6d2010ae 899
0a7de745 900 while (proto != NULL && proto->protocol_family != protocol_family) {
91447636 901 proto = SLIST_NEXT(proto, next_hash);
0a7de745 902 }
6d2010ae 903
0a7de745 904 if (proto != NULL) {
6d2010ae 905 if_proto_ref(proto);
0a7de745 906 }
6d2010ae 907
0a7de745 908 return proto;
1c79356b
A
909}
910
91447636
A
911static void
912if_proto_ref(struct if_proto *proto)
1c79356b 913{
6d2010ae 914 atomic_add_32(&proto->refcount, 1);
1c79356b
A
915}
916
6d2010ae
A
917extern void if_rtproto_del(struct ifnet *ifp, int protocol);
918
91447636
A
919static void
920if_proto_free(struct if_proto *proto)
0b4e3aa0 921{
6d2010ae
A
922 u_int32_t oldval;
923 struct ifnet *ifp = proto->ifp;
924 u_int32_t proto_family = proto->protocol_family;
925 struct kev_dl_proto_data ev_pr_data;
926
927 oldval = atomic_add_32_ov(&proto->refcount, -1);
0a7de745 928 if (oldval > 1) {
6d2010ae 929 return;
0a7de745 930 }
6d2010ae
A
931
932 /* No more reference on this, protocol must have been detached */
933 VERIFY(proto->detached);
934
935 if (proto->proto_kpi == kProtoKPI_v1) {
0a7de745 936 if (proto->kpi.v1.detached) {
6d2010ae 937 proto->kpi.v1.detached(ifp, proto->protocol_family);
0a7de745 938 }
6d2010ae
A
939 }
940 if (proto->proto_kpi == kProtoKPI_v2) {
0a7de745 941 if (proto->kpi.v2.detached) {
6d2010ae 942 proto->kpi.v2.detached(ifp, proto->protocol_family);
0a7de745 943 }
91447636 944 }
6d2010ae
A
945
946 /*
947 * Cleanup routes that may still be in the routing table for that
948 * interface/protocol pair.
949 */
950 if_rtproto_del(ifp, proto_family);
951
952 /*
953 * The reserved field carries the number of protocol still attached
954 * (subject to change)
955 */
956 ifnet_lock_shared(ifp);
957 ev_pr_data.proto_family = proto_family;
a39ff7e2 958 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
6d2010ae
A
959 ifnet_lock_done(ifp);
960
961 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
962 (struct net_event_data *)&ev_pr_data,
0a7de745 963 sizeof(struct kev_dl_proto_data));
6d2010ae 964
a39ff7e2
A
965 if (ev_pr_data.proto_remaining_count == 0) {
966 /*
967 * The protocol count has gone to zero, mark the interface down.
968 * This used to be done by configd.KernelEventMonitor, but that
969 * is inherently prone to races (rdar://problem/30810208).
970 */
971 (void) ifnet_set_flags(ifp, 0, IFF_UP);
972 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
973 dlil_post_sifflags_msg(ifp);
974 }
975
6d2010ae 976 zfree(dlif_proto_zone, proto);
0b4e3aa0
A
977}
978
91447636 979__private_extern__ void
6d2010ae 980ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
1c79356b 981{
5ba3f43e
A
982#if !MACH_ASSERT
983#pragma unused(ifp)
984#endif
6d2010ae
A
985 unsigned int type = 0;
986 int ass = 1;
987
988 switch (what) {
989 case IFNET_LCK_ASSERT_EXCLUSIVE:
990 type = LCK_RW_ASSERT_EXCLUSIVE;
991 break;
992
993 case IFNET_LCK_ASSERT_SHARED:
994 type = LCK_RW_ASSERT_SHARED;
995 break;
996
997 case IFNET_LCK_ASSERT_OWNED:
998 type = LCK_RW_ASSERT_HELD;
999 break;
1000
1001 case IFNET_LCK_ASSERT_NOTOWNED:
1002 /* nothing to do here for RW lock; bypass assert */
1003 ass = 0;
1004 break;
1005
1006 default:
1007 panic("bad ifnet assert type: %d", what);
1008 /* NOTREACHED */
1009 }
0a7de745 1010 if (ass) {
5ba3f43e 1011 LCK_RW_ASSERT(&ifp->if_lock, type);
0a7de745 1012 }
1c79356b
A
1013}
1014
91447636 1015__private_extern__ void
6d2010ae 1016ifnet_lock_shared(struct ifnet *ifp)
1c79356b 1017{
6d2010ae 1018 lck_rw_lock_shared(&ifp->if_lock);
1c79356b
A
1019}
1020
91447636 1021__private_extern__ void
6d2010ae 1022ifnet_lock_exclusive(struct ifnet *ifp)
0b4e3aa0 1023{
6d2010ae 1024 lck_rw_lock_exclusive(&ifp->if_lock);
0b4e3aa0
A
1025}
1026
91447636 1027__private_extern__ void
6d2010ae 1028ifnet_lock_done(struct ifnet *ifp)
1c79356b 1029{
6d2010ae 1030 lck_rw_done(&ifp->if_lock);
1c79356b
A
1031}
1032
3e170ce0
A
1033#if INET
1034__private_extern__ void
1035if_inetdata_lock_shared(struct ifnet *ifp)
1036{
1037 lck_rw_lock_shared(&ifp->if_inetdata_lock);
1038}
1039
1040__private_extern__ void
1041if_inetdata_lock_exclusive(struct ifnet *ifp)
1042{
1043 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
1044}
1045
1046__private_extern__ void
1047if_inetdata_lock_done(struct ifnet *ifp)
1048{
1049 lck_rw_done(&ifp->if_inetdata_lock);
1050}
1051#endif
1052
39236c6e
A
1053#if INET6
1054__private_extern__ void
1055if_inet6data_lock_shared(struct ifnet *ifp)
1056{
1057 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1058}
1059
1060__private_extern__ void
1061if_inet6data_lock_exclusive(struct ifnet *ifp)
1062{
1063 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1064}
1065
1066__private_extern__ void
1067if_inet6data_lock_done(struct ifnet *ifp)
1068{
1069 lck_rw_done(&ifp->if_inet6data_lock);
1070}
1071#endif
1072
91447636 1073__private_extern__ void
2d21ac55 1074ifnet_head_lock_shared(void)
1c79356b 1075{
6d2010ae 1076 lck_rw_lock_shared(&ifnet_head_lock);
1c79356b
A
1077}
1078
91447636 1079__private_extern__ void
2d21ac55 1080ifnet_head_lock_exclusive(void)
91447636 1081{
6d2010ae 1082 lck_rw_lock_exclusive(&ifnet_head_lock);
91447636 1083}
1c79356b 1084
91447636 1085__private_extern__ void
2d21ac55 1086ifnet_head_done(void)
1c79356b 1087{
6d2010ae 1088 lck_rw_done(&ifnet_head_lock);
91447636 1089}
1c79356b 1090
39037602
A
1091__private_extern__ void
1092ifnet_head_assert_exclusive(void)
1093{
5ba3f43e 1094 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
39037602
A
1095}
1096
6d2010ae 1097/*
a39ff7e2
A
1098 * dlil_ifp_protolist
1099 * - get the list of protocols attached to the interface, or just the number
1100 * of attached protocols
1101 * - if the number returned is greater than 'list_count', truncation occurred
1102 *
1103 * Note:
1104 * - caller must already be holding ifnet lock.
6d2010ae 1105 */
a39ff7e2
A
1106static u_int32_t
1107dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1108 u_int32_t list_count)
91447636 1109{
0a7de745
A
1110 u_int32_t count = 0;
1111 int i;
6d2010ae
A
1112
1113 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1114
0a7de745 1115 if (ifp->if_proto_hash == NULL) {
6d2010ae 1116 goto done;
0a7de745 1117 }
6d2010ae
A
1118
1119 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1120 struct if_proto *proto;
1121 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
a39ff7e2
A
1122 if (list != NULL && count < list_count) {
1123 list[count] = proto->protocol_family;
1124 }
6d2010ae 1125 count++;
91447636
A
1126 }
1127 }
6d2010ae 1128done:
0a7de745 1129 return count;
91447636 1130}
1c79356b 1131
a39ff7e2
A
1132__private_extern__ u_int32_t
1133if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1134{
1135 ifnet_lock_shared(ifp);
1136 count = dlil_ifp_protolist(ifp, protolist, count);
1137 ifnet_lock_done(ifp);
0a7de745 1138 return count;
a39ff7e2
A
1139}
1140
1141__private_extern__ void
1142if_free_protolist(u_int32_t *list)
1143{
1144 _FREE(list, M_TEMP);
1145}
1146
cb323159 1147__private_extern__ int
6d2010ae
A
1148dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1149 u_int32_t event_code, struct net_event_data *event_data,
1150 u_int32_t event_data_len)
91447636 1151{
6d2010ae
A
1152 struct net_event_data ev_data;
1153 struct kev_msg ev_msg;
1154
0a7de745
A
1155 bzero(&ev_msg, sizeof(ev_msg));
1156 bzero(&ev_data, sizeof(ev_data));
6d2010ae 1157 /*
2d21ac55 1158 * a net event always starts with a net_event_data structure
91447636
A
1159 * but the caller can generate a simple net event or
1160 * provide a longer event structure to post
1161 */
0a7de745
A
1162 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1163 ev_msg.kev_class = KEV_NETWORK_CLASS;
1164 ev_msg.kev_subclass = event_subclass;
1165 ev_msg.event_code = event_code;
6d2010ae
A
1166
1167 if (event_data == NULL) {
91447636 1168 event_data = &ev_data;
0a7de745 1169 event_data_len = sizeof(struct net_event_data);
91447636 1170 }
6d2010ae 1171
fe8ab488 1172 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
91447636 1173 event_data->if_family = ifp->if_family;
39037602 1174 event_data->if_unit = (u_int32_t)ifp->if_unit;
6d2010ae 1175
91447636 1176 ev_msg.dv[0].data_length = event_data_len;
6d2010ae 1177 ev_msg.dv[0].data_ptr = event_data;
91447636 1178 ev_msg.dv[1].data_length = 0;
6d2010ae 1179
cb323159
A
1180 bool update_generation = true;
1181 if (event_subclass == KEV_DL_SUBCLASS) {
1182 /* Don't update interface generation for frequent link quality and state changes */
1183 switch (event_code) {
1184 case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1185 case KEV_DL_RRC_STATE_CHANGED:
1186 case KEV_DL_NODE_PRESENCE:
1187 case KEV_DL_NODE_ABSENCE:
1188 case KEV_DL_MASTER_ELECTED:
1189 update_generation = false;
1190 break;
1191 default:
1192 break;
1193 }
1194 }
39037602 1195
cb323159 1196 return dlil_event_internal(ifp, &ev_msg, update_generation);
1c79356b
A
1197}
1198
316670eb
A
1199__private_extern__ int
1200dlil_alloc_local_stats(struct ifnet *ifp)
1201{
1202 int ret = EINVAL;
1203 void *buf, *base, **pbuf;
1204
0a7de745 1205 if (ifp == NULL) {
316670eb 1206 goto end;
0a7de745 1207 }
316670eb
A
1208
1209 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1210 /* allocate tcpstat_local structure */
1211 buf = zalloc(dlif_tcpstat_zone);
1212 if (buf == NULL) {
1213 ret = ENOMEM;
1214 goto end;
1215 }
1216 bzero(buf, dlif_tcpstat_bufsize);
1217
1218 /* Get the 64-bit aligned base address for this object */
0a7de745
A
1219 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1220 sizeof(u_int64_t));
316670eb
A
1221 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1222 ((intptr_t)buf + dlif_tcpstat_bufsize));
1223
1224 /*
1225 * Wind back a pointer size from the aligned base and
1226 * save the original address so we can free it later.
1227 */
0a7de745 1228 pbuf = (void **)((intptr_t)base - sizeof(void *));
316670eb
A
1229 *pbuf = buf;
1230 ifp->if_tcp_stat = base;
1231
1232 /* allocate udpstat_local structure */
1233 buf = zalloc(dlif_udpstat_zone);
1234 if (buf == NULL) {
1235 ret = ENOMEM;
1236 goto end;
1237 }
1238 bzero(buf, dlif_udpstat_bufsize);
1239
1240 /* Get the 64-bit aligned base address for this object */
0a7de745
A
1241 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1242 sizeof(u_int64_t));
316670eb
A
1243 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1244 ((intptr_t)buf + dlif_udpstat_bufsize));
1245
1246 /*
1247 * Wind back a pointer size from the aligned base and
1248 * save the original address so we can free it later.
1249 */
0a7de745 1250 pbuf = (void **)((intptr_t)base - sizeof(void *));
316670eb
A
1251 *pbuf = buf;
1252 ifp->if_udp_stat = base;
1253
0a7de745
A
1254 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
1255 IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
316670eb
A
1256
1257 ret = 0;
1258 }
1259
4bd07ac2
A
1260 if (ifp->if_ipv4_stat == NULL) {
1261 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
0a7de745 1262 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
4bd07ac2
A
1263 if (ifp->if_ipv4_stat == NULL) {
1264 ret = ENOMEM;
1265 goto end;
1266 }
1267 }
1268
1269 if (ifp->if_ipv6_stat == NULL) {
1270 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
0a7de745 1271 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
4bd07ac2
A
1272 if (ifp->if_ipv6_stat == NULL) {
1273 ret = ENOMEM;
1274 goto end;
1275 }
1276 }
316670eb 1277end:
cb323159 1278 if (ifp != NULL && ret != 0) {
316670eb
A
1279 if (ifp->if_tcp_stat != NULL) {
1280 pbuf = (void **)
0a7de745 1281 ((intptr_t)ifp->if_tcp_stat - sizeof(void *));
316670eb
A
1282 zfree(dlif_tcpstat_zone, *pbuf);
1283 ifp->if_tcp_stat = NULL;
1284 }
1285 if (ifp->if_udp_stat != NULL) {
1286 pbuf = (void **)
0a7de745 1287 ((intptr_t)ifp->if_udp_stat - sizeof(void *));
316670eb
A
1288 zfree(dlif_udpstat_zone, *pbuf);
1289 ifp->if_udp_stat = NULL;
1290 }
4bd07ac2
A
1291 if (ifp->if_ipv4_stat != NULL) {
1292 FREE(ifp->if_ipv4_stat, M_TEMP);
1293 ifp->if_ipv4_stat = NULL;
1294 }
1295 if (ifp->if_ipv6_stat != NULL) {
1296 FREE(ifp->if_ipv6_stat, M_TEMP);
1297 ifp->if_ipv6_stat = NULL;
1298 }
316670eb
A
1299 }
1300
0a7de745 1301 return ret;
316670eb
A
1302}
1303
cb323159
A
1304static void
1305dlil_reset_rxpoll_params(ifnet_t ifp)
1306{
1307 ASSERT(ifp != NULL);
1308 ifnet_set_poll_cycle(ifp, NULL);
1309 ifp->if_poll_update = 0;
1310 ifp->if_poll_flags = 0;
1311 ifp->if_poll_req = 0;
1312 ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1313 bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1314 bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1315 bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1316 net_timerclear(&ifp->if_poll_mode_holdtime);
1317 net_timerclear(&ifp->if_poll_mode_lasttime);
1318 net_timerclear(&ifp->if_poll_sample_holdtime);
1319 net_timerclear(&ifp->if_poll_sample_lasttime);
1320 net_timerclear(&ifp->if_poll_dbg_lasttime);
1321}
1322
6d2010ae 1323static int
316670eb 1324dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
2d21ac55 1325{
cb323159 1326 boolean_t dlil_rxpoll_input;
316670eb
A
1327 thread_continue_t func;
1328 u_int32_t limit;
2d21ac55
A
1329 int error;
1330
cb323159
A
1331 dlil_rxpoll_input = (ifp != NULL && net_rxpoll &&
1332 (ifp->if_eflags & IFEF_RXPOLL) && (ifp->if_xflags & IFXF_LEGACY));
1333
316670eb
A
1334 /* NULL ifp indicates the main input thread, called at dlil_init time */
1335 if (ifp == NULL) {
1336 func = dlil_main_input_thread_func;
1337 VERIFY(inp == dlil_main_input_thread);
1338 (void) strlcat(inp->input_name,
1339 "main_input", DLIL_THREADNAME_LEN);
cb323159 1340 } else if (dlil_rxpoll_input) {
316670eb
A
1341 func = dlil_rxpoll_input_thread_func;
1342 VERIFY(inp != dlil_main_input_thread);
1343 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
39236c6e 1344 "%s_input_poll", if_name(ifp));
6d2010ae 1345 } else {
316670eb
A
1346 func = dlil_input_thread_func;
1347 VERIFY(inp != dlil_main_input_thread);
1348 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
39236c6e 1349 "%s_input", if_name(ifp));
6d2010ae 1350 }
316670eb 1351 VERIFY(inp->input_thr == THREAD_NULL);
2d21ac55 1352
316670eb
A
1353 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1354 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1355
cb323159 1356 inp->ifp = ifp; /* NULL for main input thread */
316670eb
A
1357 /*
1358 * For interfaces that support opportunistic polling, set the
1359 * low and high watermarks for outstanding inbound packets/bytes.
1360 * Also define freeze times for transitioning between modes
1361 * and updating the average.
1362 */
1363 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1364 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
cb323159
A
1365 if (ifp->if_xflags & IFXF_LEGACY) {
1366 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1367 }
316670eb
A
1368 } else {
1369 limit = (u_int32_t)-1;
1370 }
1371
5ba3f43e 1372 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
316670eb
A
1373 if (inp == dlil_main_input_thread) {
1374 struct dlil_main_threading_info *inpm =
1375 (struct dlil_main_threading_info *)inp;
5ba3f43e 1376 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
316670eb 1377 }
2d21ac55 1378
316670eb
A
1379 error = kernel_thread_start(func, inp, &inp->input_thr);
1380 if (error == KERN_SUCCESS) {
1381 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
0a7de745 1382 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_NETISR));
2d21ac55 1383 /*
316670eb
A
1384 * We create an affinity set so that the matching workloop
1385 * thread or the starter thread (for loopback) can be
1386 * scheduled on the same processor set as the input thread.
2d21ac55 1387 */
316670eb
A
1388 if (net_affinity) {
1389 struct thread *tp = inp->input_thr;
2d21ac55
A
1390 u_int32_t tag;
1391 /*
1392 * Randomize to reduce the probability
1393 * of affinity tag namespace collision.
1394 */
0a7de745 1395 read_frandom(&tag, sizeof(tag));
2d21ac55
A
1396 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1397 thread_reference(tp);
316670eb
A
1398 inp->tag = tag;
1399 inp->net_affinity = TRUE;
2d21ac55
A
1400 }
1401 }
316670eb
A
1402 } else if (inp == dlil_main_input_thread) {
1403 panic_plain("%s: couldn't create main input thread", __func__);
1404 /* NOTREACHED */
2d21ac55 1405 } else {
39236c6e
A
1406 panic_plain("%s: couldn't create %s input thread", __func__,
1407 if_name(ifp));
6d2010ae 1408 /* NOTREACHED */
2d21ac55 1409 }
b0d623f7 1410 OSAddAtomic(1, &cur_dlil_input_threads);
316670eb 1411
0a7de745 1412 return error;
2d21ac55
A
1413}
1414
5ba3f43e
A
1415#if TEST_INPUT_THREAD_TERMINATION
1416static int
1417sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
316670eb 1418{
5ba3f43e
A
1419#pragma unused(arg1, arg2)
1420 uint32_t i;
1421 int err;
316670eb 1422
5ba3f43e 1423 i = if_input_thread_termination_spin;
316670eb 1424
5ba3f43e 1425 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
1426 if (err != 0 || req->newptr == USER_ADDR_NULL) {
1427 return err;
1428 }
5ba3f43e 1429
0a7de745
A
1430 if (net_rxpoll == 0) {
1431 return ENXIO;
1432 }
316670eb 1433
5ba3f43e 1434 if_input_thread_termination_spin = i;
0a7de745 1435 return err;
5ba3f43e
A
1436}
1437#endif /* TEST_INPUT_THREAD_TERMINATION */
1438
1439static void
1440dlil_clean_threading_info(struct dlil_threading_info *inp)
1441{
316670eb
A
1442 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1443 lck_grp_free(inp->lck_grp);
1444
1445 inp->input_waiting = 0;
1446 inp->wtot = 0;
0a7de745 1447 bzero(inp->input_name, sizeof(inp->input_name));
316670eb
A
1448 inp->ifp = NULL;
1449 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1450 qlimit(&inp->rcvq_pkts) = 0;
0a7de745 1451 bzero(&inp->stats, sizeof(inp->stats));
316670eb
A
1452
1453 VERIFY(!inp->net_affinity);
1454 inp->input_thr = THREAD_NULL;
1455 VERIFY(inp->wloop_thr == THREAD_NULL);
1456 VERIFY(inp->poll_thr == THREAD_NULL);
1457 VERIFY(inp->tag == 0);
316670eb
A
1458#if IFNET_INPUT_SANITY_CHK
1459 inp->input_mbuf_cnt = 0;
1460#endif /* IFNET_INPUT_SANITY_CHK */
5ba3f43e 1461}
316670eb 1462
5ba3f43e
A
1463static void
1464dlil_terminate_input_thread(struct dlil_threading_info *inp)
1465{
1466 struct ifnet *ifp = inp->ifp;
cb323159 1467 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
5ba3f43e
A
1468
1469 VERIFY(current_thread() == inp->input_thr);
1470 VERIFY(inp != dlil_main_input_thread);
1471
1472 OSAddAtomic(-1, &cur_dlil_input_threads);
1473
1474#if TEST_INPUT_THREAD_TERMINATION
1475 { /* do something useless that won't get optimized away */
0a7de745 1476 uint32_t v = 1;
5ba3f43e 1477 for (uint32_t i = 0;
0a7de745
A
1478 i < if_input_thread_termination_spin;
1479 i++) {
5ba3f43e
A
1480 v = (i + 1) * v;
1481 }
cb323159 1482 DLIL_PRINTF("the value is %d\n", v);
316670eb 1483 }
5ba3f43e
A
1484#endif /* TEST_INPUT_THREAD_TERMINATION */
1485
1486 lck_mtx_lock_spin(&inp->input_lck);
cb323159 1487 _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
5ba3f43e
A
1488 VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0);
1489 inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE;
1490 wakeup_one((caddr_t)&inp->input_waiting);
1491 lck_mtx_unlock(&inp->input_lck);
316670eb 1492
cb323159
A
1493 /* free up pending packets */
1494 if (pkt.cp_mbuf != NULL) {
1495 mbuf_freem_list(pkt.cp_mbuf);
1496 }
1497
316670eb
A
1498 /* for the extra refcnt from kernel_thread_start() */
1499 thread_deallocate(current_thread());
1500
5ba3f43e 1501 if (dlil_verbose) {
cb323159 1502 DLIL_PRINTF("%s: input thread terminated\n",
5ba3f43e
A
1503 if_name(ifp));
1504 }
1505
316670eb
A
1506 /* this is the end */
1507 thread_terminate(current_thread());
1508 /* NOTREACHED */
1509}
1510
2d21ac55
A
1511static kern_return_t
1512dlil_affinity_set(struct thread *tp, u_int32_t tag)
1513{
1514 thread_affinity_policy_data_t policy;
1515
0a7de745 1516 bzero(&policy, sizeof(policy));
2d21ac55 1517 policy.affinity_tag = tag;
0a7de745
A
1518 return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1519 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
2d21ac55
A
1520}
1521
91447636
A
1522void
1523dlil_init(void)
1524{
6d2010ae
A
1525 thread_t thread = THREAD_NULL;
1526
1527 /*
1528 * The following fields must be 64-bit aligned for atomic operations.
1529 */
1530 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
39037602 1531 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
6d2010ae
A
1532 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1533 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1534 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1535 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1536 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1537 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1538 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1539 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1540 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1541 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1542 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1543 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1544 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1545
1546 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
39037602 1547 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
6d2010ae
A
1548 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1549 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1550 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1551 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1552 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1553 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1554 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1555 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1556 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1557 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1558 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1559 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1560 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1561
1562 /*
1563 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1564 */
1565 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1566 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1567 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1568 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1569 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
39236c6e
A
1570 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1571 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1572 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1573 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
5ba3f43e 1574 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
6d2010ae
A
1575 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1576 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1577 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1578 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1579
39236c6e
A
1580 /*
1581 * ... as well as the mbuf checksum flags counterparts.
1582 */
1583 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1584 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1585 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1586 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1587 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1588 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1589 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1590 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1591 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
5ba3f43e 1592 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
39236c6e
A
1593 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1594
6d2010ae
A
1595 /*
1596 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1597 */
1598 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
316670eb 1599 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
6d2010ae 1600
39236c6e
A
1601 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1602 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1603 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1604 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1605
1606 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1607 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1608 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1609
1610 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1611 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1612 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1613 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1614 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1615 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1616 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1617 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1618 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1619 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1620 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1621 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1622 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1623 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1624 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1625 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
cb323159
A
1626 _CASSERT(IFRTYPE_FAMILY_6LOWPAN == IFNET_FAMILY_6LOWPAN);
1627 _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1628 _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
39236c6e
A
1629
1630 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1631 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1632 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1633 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1634 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
fe8ab488 1635 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
39037602 1636 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
cb323159
A
1637 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1638 _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT == IFNET_SUBFAMILY_DEFAULT);
39236c6e
A
1639
1640 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1641 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1642
6d2010ae 1643 PE_parse_boot_argn("net_affinity", &net_affinity,
0a7de745 1644 sizeof(net_affinity));
b0d623f7 1645
0a7de745 1646 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
316670eb 1647
0a7de745 1648 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
6d2010ae 1649
0a7de745 1650 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
6d2010ae 1651
cb323159 1652 VERIFY(dlil_pending_thread_cnt == 0);
0a7de745
A
1653 dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) :
1654 sizeof(struct dlil_ifnet_dbg);
6d2010ae 1655 /* Enforce 64-bit alignment for dlil_ifnet structure */
0a7de745
A
1656 dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t);
1657 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t));
6d2010ae
A
1658 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1659 0, DLIF_ZONE_NAME);
1660 if (dlif_zone == NULL) {
316670eb
A
1661 panic_plain("%s: failed allocating %s", __func__,
1662 DLIF_ZONE_NAME);
6d2010ae
A
1663 /* NOTREACHED */
1664 }
1665 zone_change(dlif_zone, Z_EXPAND, TRUE);
1666 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1667
0a7de745 1668 dlif_filt_size = sizeof(struct ifnet_filter);
6d2010ae
A
1669 dlif_filt_zone = zinit(dlif_filt_size,
1670 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1671 if (dlif_filt_zone == NULL) {
316670eb 1672 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1673 DLIF_FILT_ZONE_NAME);
1674 /* NOTREACHED */
1675 }
1676 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1677 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1678
0a7de745 1679 dlif_phash_size = sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS;
6d2010ae
A
1680 dlif_phash_zone = zinit(dlif_phash_size,
1681 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1682 if (dlif_phash_zone == NULL) {
316670eb 1683 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1684 DLIF_PHASH_ZONE_NAME);
1685 /* NOTREACHED */
1686 }
1687 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1688 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1689
0a7de745 1690 dlif_proto_size = sizeof(struct if_proto);
6d2010ae
A
1691 dlif_proto_zone = zinit(dlif_proto_size,
1692 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1693 if (dlif_proto_zone == NULL) {
316670eb 1694 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1695 DLIF_PROTO_ZONE_NAME);
1696 /* NOTREACHED */
1697 }
1698 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1699 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1700
0a7de745 1701 dlif_tcpstat_size = sizeof(struct tcpstat_local);
316670eb
A
1702 /* Enforce 64-bit alignment for tcpstat_local structure */
1703 dlif_tcpstat_bufsize =
0a7de745 1704 dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t);
316670eb 1705 dlif_tcpstat_bufsize =
0a7de745 1706 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t));
316670eb
A
1707 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1708 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1709 DLIF_TCPSTAT_ZONE_NAME);
1710 if (dlif_tcpstat_zone == NULL) {
1711 panic_plain("%s: failed allocating %s", __func__,
1712 DLIF_TCPSTAT_ZONE_NAME);
1713 /* NOTREACHED */
1714 }
1715 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1716 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1717
0a7de745 1718 dlif_udpstat_size = sizeof(struct udpstat_local);
316670eb
A
1719 /* Enforce 64-bit alignment for udpstat_local structure */
1720 dlif_udpstat_bufsize =
0a7de745 1721 dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t);
316670eb 1722 dlif_udpstat_bufsize =
0a7de745 1723 P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t));
316670eb
A
1724 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1725 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1726 DLIF_UDPSTAT_ZONE_NAME);
1727 if (dlif_udpstat_zone == NULL) {
1728 panic_plain("%s: failed allocating %s", __func__,
1729 DLIF_UDPSTAT_ZONE_NAME);
1730 /* NOTREACHED */
1731 }
1732 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1733 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1734
6d2010ae 1735 ifnet_llreach_init();
5ba3f43e 1736 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
d1ecb069 1737
91447636 1738 TAILQ_INIT(&dlil_ifnet_head);
91447636 1739 TAILQ_INIT(&ifnet_head);
6d2010ae 1740 TAILQ_INIT(&ifnet_detaching_head);
39037602 1741 TAILQ_INIT(&ifnet_ordered_head);
6d2010ae 1742
91447636 1743 /* Setup the lock groups we will use */
2d21ac55 1744 dlil_grp_attributes = lck_grp_attr_alloc_init();
91447636 1745
316670eb 1746 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
6d2010ae
A
1747 dlil_grp_attributes);
1748 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1749 dlil_grp_attributes);
1750 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1751 dlil_grp_attributes);
316670eb
A
1752 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1753 dlil_grp_attributes);
1754 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
6d2010ae
A
1755 dlil_grp_attributes);
1756
91447636 1757 /* Setup the lock attributes we will use */
2d21ac55 1758 dlil_lck_attributes = lck_attr_alloc_init();
6d2010ae 1759
91447636 1760 ifnet_lock_attr = lck_attr_alloc_init();
6d2010ae
A
1761
1762 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1763 dlil_lck_attributes);
1764 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
cb323159 1765 lck_mtx_init(&dlil_thread_sync_lock, dlil_lock_group, dlil_lck_attributes);
6d2010ae 1766
39236c6e
A
1767 /* Setup interface flow control related items */
1768 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
316670eb 1769
0a7de745 1770 ifnet_fc_zone_size = sizeof(struct ifnet_fc_entry);
39236c6e
A
1771 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1772 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1773 if (ifnet_fc_zone == NULL) {
1774 panic_plain("%s: failed allocating %s", __func__,
1775 IFNET_FC_ZONE_NAME);
1776 /* NOTREACHED */
1777 }
1778 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1779 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
6d2010ae 1780
39236c6e 1781 /* Initialize interface address subsystem */
6d2010ae 1782 ifa_init();
39236c6e
A
1783
1784#if PF
1785 /* Initialize the packet filter */
1786 pfinit();
1787#endif /* PF */
1788
1789 /* Initialize queue algorithms */
1790 classq_init();
1791
1792 /* Initialize packet schedulers */
1793 pktsched_init();
1794
1795 /* Initialize flow advisory subsystem */
1796 flowadv_init();
1797
1798 /* Initialize the pktap virtual interface */
1799 pktap_init();
1800
39037602
A
1801 /* Initialize the service class to dscp map */
1802 net_qos_map_init();
1803
a39ff7e2
A
1804 /* Initialize the interface port list */
1805 if_ports_used_init();
1806
d9a64523
A
1807 /* Initialize the interface low power mode event handler */
1808 if_low_power_evhdlr_init();
1809
5ba3f43e 1810#if DEBUG || DEVELOPMENT
39236c6e
A
1811 /* Run self-tests */
1812 dlil_verify_sum16();
5ba3f43e
A
1813#endif /* DEBUG || DEVELOPMENT */
1814
1815 /* Initialize link layer table */
1816 lltable_glbl_init();
39236c6e 1817
91447636 1818 /*
316670eb
A
1819 * Create and start up the main DLIL input thread and the interface
1820 * detacher threads once everything is initialized.
91447636 1821 */
cb323159 1822 dlil_incr_pending_thread_count();
316670eb 1823 dlil_create_input_thread(NULL, dlil_main_input_thread);
2d21ac55 1824
cb323159
A
1825 /*
1826 * Create ifnet detacher thread.
1827 * When an interface gets detached, part of the detach processing
1828 * is delayed. The interface is added to delayed detach list
1829 * and this thread is woken up to call ifnet_detach_final
1830 * on these interfaces.
1831 */
1832 dlil_incr_pending_thread_count();
316670eb
A
1833 if (kernel_thread_start(ifnet_detacher_thread_func,
1834 NULL, &thread) != KERN_SUCCESS) {
1835 panic_plain("%s: couldn't create detacher thread", __func__);
6d2010ae
A
1836 /* NOTREACHED */
1837 }
b0d623f7 1838 thread_deallocate(thread);
cb323159
A
1839
1840 /*
1841 * Wait for the created kernel threads for dlil to get
1842 * scheduled and run at least once before we proceed
1843 */
1844 lck_mtx_lock(&dlil_thread_sync_lock);
1845 while (dlil_pending_thread_cnt != 0) {
1846 DLIL_PRINTF("%s: Waiting for all the create dlil kernel threads "
1847 "to get scheduled at least once.\n", __func__);
1848 (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock, (PZERO - 1),
1849 __func__, NULL);
1850 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1851 }
1852 lck_mtx_unlock(&dlil_thread_sync_lock);
1853 DLIL_PRINTF("%s: All the created dlil kernel threads have been scheduled "
1854 "at least once. Proceeding.\n", __func__);
91447636 1855}
1c79356b 1856
6d2010ae
A
1857static void
1858if_flt_monitor_busy(struct ifnet *ifp)
1859{
5ba3f43e 1860 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1861
1862 ++ifp->if_flt_busy;
1863 VERIFY(ifp->if_flt_busy != 0);
1864}
1865
1866static void
1867if_flt_monitor_unbusy(struct ifnet *ifp)
1868{
1869 if_flt_monitor_leave(ifp);
1870}
1871
1872static void
1873if_flt_monitor_enter(struct ifnet *ifp)
1874{
5ba3f43e 1875 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1876
1877 while (ifp->if_flt_busy) {
1878 ++ifp->if_flt_waiters;
1879 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1880 (PZERO - 1), "if_flt_monitor", NULL);
1881 }
1882 if_flt_monitor_busy(ifp);
1883}
1884
1885static void
1886if_flt_monitor_leave(struct ifnet *ifp)
1887{
5ba3f43e 1888 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1889
1890 VERIFY(ifp->if_flt_busy != 0);
1891 --ifp->if_flt_busy;
1892
1893 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1894 ifp->if_flt_waiters = 0;
1895 wakeup(&ifp->if_flt_head);
1896 }
1897}
1898
2d21ac55 1899__private_extern__ int
0a7de745 1900dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
39236c6e 1901 interface_filter_t *filter_ref, u_int32_t flags)
6d2010ae
A
1902{
1903 int retval = 0;
1904 struct ifnet_filter *filter = NULL;
9bccf70c 1905
6d2010ae
A
1906 ifnet_head_lock_shared();
1907 /* Check that the interface is in the global list */
1908 if (!ifnet_lookup(ifp)) {
1909 retval = ENXIO;
1910 goto done;
1911 }
1912
1913 filter = zalloc(dlif_filt_zone);
1914 if (filter == NULL) {
1915 retval = ENOMEM;
1916 goto done;
1917 }
1918 bzero(filter, dlif_filt_size);
1919
1920 /* refcnt held above during lookup */
39236c6e 1921 filter->filt_flags = flags;
91447636
A
1922 filter->filt_ifp = ifp;
1923 filter->filt_cookie = if_filter->iff_cookie;
1924 filter->filt_name = if_filter->iff_name;
1925 filter->filt_protocol = if_filter->iff_protocol;
743345f9
A
1926 /*
1927 * Do not install filter callbacks for internal coproc interface
1928 */
1929 if (!IFNET_IS_INTCOPROC(ifp)) {
1930 filter->filt_input = if_filter->iff_input;
1931 filter->filt_output = if_filter->iff_output;
1932 filter->filt_event = if_filter->iff_event;
1933 filter->filt_ioctl = if_filter->iff_ioctl;
1934 }
91447636 1935 filter->filt_detached = if_filter->iff_detached;
6d2010ae
A
1936
1937 lck_mtx_lock(&ifp->if_flt_lock);
1938 if_flt_monitor_enter(ifp);
1939
5ba3f43e 1940 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636 1941 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
6d2010ae
A
1942
1943 if_flt_monitor_leave(ifp);
1944 lck_mtx_unlock(&ifp->if_flt_lock);
1945
91447636 1946 *filter_ref = filter;
b0d623f7
A
1947
1948 /*
1949 * Bump filter count and route_generation ID to let TCP
1950 * know it shouldn't do TSO on this connection
1951 */
39236c6e 1952 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
cb323159 1953 ifnet_filter_update_tso(TRUE);
39236c6e 1954 }
5ba3f43e
A
1955 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1956 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1957 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1958 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1959 }
6d2010ae 1960 if (dlil_verbose) {
cb323159 1961 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
39236c6e 1962 if_filter->iff_name);
6d2010ae
A
1963 }
1964done:
1965 ifnet_head_done();
1966 if (retval != 0 && ifp != NULL) {
39236c6e
A
1967 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1968 if_name(ifp), if_filter->iff_name, retval);
6d2010ae 1969 }
0a7de745 1970 if (retval != 0 && filter != NULL) {
6d2010ae 1971 zfree(dlif_filt_zone, filter);
0a7de745 1972 }
6d2010ae 1973
0a7de745 1974 return retval;
1c79356b
A
1975}
1976
91447636 1977static int
0a7de745 1978dlil_detach_filter_internal(interface_filter_t filter, int detached)
1c79356b 1979{
91447636 1980 int retval = 0;
6d2010ae 1981
3a60a9f5 1982 if (detached == 0) {
6d2010ae
A
1983 ifnet_t ifp = NULL;
1984
3a60a9f5
A
1985 ifnet_head_lock_shared();
1986 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6d2010ae
A
1987 interface_filter_t entry = NULL;
1988
1989 lck_mtx_lock(&ifp->if_flt_lock);
3a60a9f5 1990 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
0a7de745 1991 if (entry != filter || entry->filt_skip) {
6d2010ae 1992 continue;
0a7de745 1993 }
6d2010ae
A
1994 /*
1995 * We've found a match; since it's possible
1996 * that the thread gets blocked in the monitor,
1997 * we do the lock dance. Interface should
1998 * not be detached since we still have a use
1999 * count held during filter attach.
2000 */
0a7de745 2001 entry->filt_skip = 1; /* skip input/output */
6d2010ae
A
2002 lck_mtx_unlock(&ifp->if_flt_lock);
2003 ifnet_head_done();
2004
2005 lck_mtx_lock(&ifp->if_flt_lock);
2006 if_flt_monitor_enter(ifp);
5ba3f43e 2007 LCK_MTX_ASSERT(&ifp->if_flt_lock,
6d2010ae
A
2008 LCK_MTX_ASSERT_OWNED);
2009
2010 /* Remove the filter from the list */
2011 TAILQ_REMOVE(&ifp->if_flt_head, filter,
2012 filt_next);
2013
2014 if_flt_monitor_leave(ifp);
2015 lck_mtx_unlock(&ifp->if_flt_lock);
2016 if (dlil_verbose) {
cb323159 2017 DLIL_PRINTF("%s: %s filter detached\n",
39236c6e 2018 if_name(ifp), filter->filt_name);
6d2010ae
A
2019 }
2020 goto destroy;
3a60a9f5 2021 }
6d2010ae 2022 lck_mtx_unlock(&ifp->if_flt_lock);
3a60a9f5
A
2023 }
2024 ifnet_head_done();
6d2010ae
A
2025
2026 /* filter parameter is not a valid filter ref */
2027 retval = EINVAL;
2028 goto done;
3a60a9f5 2029 }
6d2010ae 2030
0a7de745 2031 if (dlil_verbose) {
cb323159 2032 DLIL_PRINTF("%s filter detached\n", filter->filt_name);
0a7de745 2033 }
6d2010ae
A
2034
2035destroy:
2036
2037 /* Call the detached function if there is one */
0a7de745 2038 if (filter->filt_detached) {
91447636 2039 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
0a7de745 2040 }
9bccf70c 2041
b0d623f7
A
2042 /*
2043 * Decrease filter count and route_generation ID to let TCP
2044 * know it should reevalute doing TSO or not
2045 */
39236c6e 2046 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
cb323159 2047 ifnet_filter_update_tso(FALSE);
39236c6e 2048 }
39037602 2049
5ba3f43e
A
2050 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
2051
39037602
A
2052 /* Free the filter */
2053 zfree(dlif_filt_zone, filter);
2054 filter = NULL;
6d2010ae 2055done:
39037602 2056 if (retval != 0 && filter != NULL) {
6d2010ae
A
2057 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
2058 filter->filt_name, retval);
2059 }
39037602 2060
0a7de745 2061 return retval;
1c79356b
A
2062}
2063
2d21ac55 2064__private_extern__ void
91447636
A
2065dlil_detach_filter(interface_filter_t filter)
2066{
0a7de745 2067 if (filter == NULL) {
3a60a9f5 2068 return;
0a7de745 2069 }
91447636
A
2070 dlil_detach_filter_internal(filter, 0);
2071}
1c79356b 2072
cb323159
A
2073__attribute__((noreturn))
2074static void
2075dlil_main_input_thread_func(void *v, wait_result_t w)
2076{
2077#pragma unused(w)
2078 struct dlil_threading_info *inp = v;
2079
2080 VERIFY(inp == dlil_main_input_thread);
2081 VERIFY(inp->ifp == NULL);
2082 VERIFY(current_thread() == inp->input_thr);
2083
2084 dlil_decr_pending_thread_count();
2085 lck_mtx_lock(&inp->input_lck);
2086 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2087 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2088 lck_mtx_unlock(&inp->input_lck);
2089 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2090 /* NOTREACHED */
2091 __builtin_unreachable();
2092}
2093
316670eb
A
2094/*
2095 * Main input thread:
2096 *
2097 * a) handles all inbound packets for lo0
2098 * b) handles all inbound packets for interfaces with no dedicated
2099 * input thread (e.g. anything but Ethernet/PDP or those that support
2100 * opportunistic polling.)
2101 * c) protocol registrations
2102 * d) packet injections
2103 */
39037602 2104__attribute__((noreturn))
91447636 2105static void
cb323159 2106dlil_main_input_thread_cont(void *v, wait_result_t wres)
91447636 2107{
316670eb
A
2108 struct dlil_main_threading_info *inpm = v;
2109 struct dlil_threading_info *inp = v;
2110
cb323159
A
2111 /* main input thread is uninterruptible */
2112 VERIFY(wres != THREAD_INTERRUPTED);
2113 lck_mtx_lock_spin(&inp->input_lck);
2114 VERIFY(!(inp->input_waiting & (DLIL_INPUT_TERMINATE |
2115 DLIL_INPUT_RUNNING)));
2116 inp->input_waiting |= DLIL_INPUT_RUNNING;
316670eb 2117
91447636 2118 while (1) {
2d21ac55 2119 struct mbuf *m = NULL, *m_loop = NULL;
316670eb 2120 u_int32_t m_cnt, m_cnt_loop;
cb323159 2121 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
316670eb 2122 boolean_t proto_req;
6d2010ae 2123
316670eb 2124 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2d21ac55 2125
316670eb
A
2126 proto_req = (inp->input_waiting &
2127 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
6d2010ae 2128
316670eb
A
2129 /* Packets for non-dedicated interfaces other than lo0 */
2130 m_cnt = qlen(&inp->rcvq_pkts);
cb323159
A
2131 _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
2132 m = pkt.cp_mbuf;
6d2010ae 2133
39236c6e 2134 /* Packets exclusive to lo0 */
316670eb 2135 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
cb323159
A
2136 _getq_all(&inpm->lo_rcvq_pkts, &pkt, NULL, NULL, NULL);
2137 m_loop = pkt.cp_mbuf;
6d2010ae 2138
316670eb 2139 inp->wtot = 0;
6d2010ae 2140
316670eb 2141 lck_mtx_unlock(&inp->input_lck);
6d2010ae 2142
316670eb 2143 /*
39037602
A
2144 * NOTE warning %%% attention !!!!
2145 * We should think about putting some thread starvation
2146 * safeguards if we deal with long chains of packets.
2147 */
0a7de745 2148 if (m_loop != NULL) {
316670eb 2149 dlil_input_packet_list_extended(lo_ifp, m_loop,
cb323159 2150 m_cnt_loop, IFNET_MODEL_INPUT_POLL_OFF);
0a7de745 2151 }
6d2010ae 2152
0a7de745 2153 if (m != NULL) {
316670eb 2154 dlil_input_packet_list_extended(NULL, m,
cb323159 2155 m_cnt, IFNET_MODEL_INPUT_POLL_OFF);
0a7de745 2156 }
316670eb 2157
0a7de745 2158 if (proto_req) {
316670eb 2159 proto_input_run();
0a7de745 2160 }
cb323159
A
2161
2162 lck_mtx_lock_spin(&inp->input_lck);
2163 VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
2164 /* main input thread cannot be terminated */
2165 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
2166 if (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2167 break;
2168 }
316670eb
A
2169 }
2170
cb323159
A
2171 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2172 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2173 lck_mtx_unlock(&inp->input_lck);
2174 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2175
0a7de745 2176 VERIFY(0); /* we should never get here */
cb323159
A
2177 /* NOTREACHED */
2178 __builtin_unreachable();
316670eb
A
2179}
2180
2181/*
2182 * Input thread for interfaces with legacy input model.
2183 */
cb323159 2184__attribute__((noreturn))
316670eb
A
2185static void
2186dlil_input_thread_func(void *v, wait_result_t w)
2187{
2188#pragma unused(w)
39037602 2189 char thread_name[MAXTHREADNAMESIZE];
316670eb
A
2190 struct dlil_threading_info *inp = v;
2191 struct ifnet *ifp = inp->ifp;
2192
cb323159
A
2193 VERIFY(inp != dlil_main_input_thread);
2194 VERIFY(ifp != NULL);
2195 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll ||
2196 !(ifp->if_xflags & IFXF_LEGACY));
2197 VERIFY(ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_OFF ||
2198 !(ifp->if_xflags & IFXF_LEGACY));
2199 VERIFY(current_thread() == inp->input_thr);
2200
2201 /* construct the name for this thread, and then apply it */
39037602 2202 bzero(thread_name, sizeof(thread_name));
cb323159
A
2203 (void) snprintf(thread_name, sizeof(thread_name),
2204 "dlil_input_%s", ifp->if_xname);
39037602 2205 thread_set_thread_name(inp->input_thr, thread_name);
cb323159 2206 ifnet_decr_pending_thread_count(ifp);
39037602 2207
cb323159
A
2208 lck_mtx_lock(&inp->input_lck);
2209 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2210 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2211 lck_mtx_unlock(&inp->input_lck);
2212 (void) thread_block_parameter(dlil_input_thread_cont, inp);
2213 /* NOTREACHED */
2214 __builtin_unreachable();
2215}
2216
2217__attribute__((noreturn))
2218static void
2219dlil_input_thread_cont(void *v, wait_result_t wres)
2220{
2221 struct dlil_threading_info *inp = v;
2222 struct ifnet *ifp = inp->ifp;
2223
2224 lck_mtx_lock_spin(&inp->input_lck);
2225 if (__improbable(wres == THREAD_INTERRUPTED ||
2226 (inp->input_waiting & DLIL_INPUT_TERMINATE))) {
2227 goto terminate;
2228 }
2229
2230 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2231 inp->input_waiting |= DLIL_INPUT_RUNNING;
2d21ac55 2232
316670eb
A
2233 while (1) {
2234 struct mbuf *m = NULL;
cb323159
A
2235 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2236 boolean_t notify = FALSE;
316670eb
A
2237 u_int32_t m_cnt;
2238
316670eb 2239 inp->input_waiting &= ~DLIL_INPUT_WAITING;
6d2010ae 2240
316670eb
A
2241 /*
2242 * Protocol registration and injection must always use
2243 * the main input thread; in theory the latter can utilize
2244 * the corresponding input thread where the packet arrived
2245 * on, but that requires our knowing the interface in advance
2246 * (and the benefits might not worth the trouble.)
2247 */
2248 VERIFY(!(inp->input_waiting &
0a7de745 2249 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
6d2010ae 2250
316670eb
A
2251 /* Packets for this interface */
2252 m_cnt = qlen(&inp->rcvq_pkts);
cb323159
A
2253 _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
2254 m = pkt.cp_mbuf;
2d21ac55 2255
316670eb
A
2256 inp->wtot = 0;
2257
cb323159 2258 notify = dlil_input_stats_sync(ifp, inp);
316670eb
A
2259
2260 lck_mtx_unlock(&inp->input_lck);
2d21ac55 2261
cb323159
A
2262 if (notify) {
2263 ifnet_notify_data_threshold(ifp);
2264 }
2265
91447636 2266 /*
39037602
A
2267 * NOTE warning %%% attention !!!!
2268 * We should think about putting some thread starvation
2269 * safeguards if we deal with long chains of packets.
2270 */
0a7de745 2271 if (m != NULL) {
316670eb 2272 dlil_input_packet_list_extended(NULL, m,
cb323159
A
2273 m_cnt, ifp->if_poll_mode);
2274 }
2275
2276 lck_mtx_lock_spin(&inp->input_lck);
2277 VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
ea3f0419
A
2278 if (!(inp->input_waiting & ~(DLIL_INPUT_RUNNING |
2279 DLIL_INPUT_TERMINATE))) {
cb323159 2280 break;
0a7de745 2281 }
2d21ac55 2282 }
316670eb 2283
cb323159
A
2284 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2285
2286 if (__improbable(inp->input_waiting & DLIL_INPUT_TERMINATE)) {
2287terminate:
2288 lck_mtx_unlock(&inp->input_lck);
2289 dlil_terminate_input_thread(inp);
2290 /* NOTREACHED */
2291 } else {
2292 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2293 lck_mtx_unlock(&inp->input_lck);
2294 (void) thread_block_parameter(dlil_input_thread_cont, inp);
2295 /* NOTREACHED */
2296 }
2297
0a7de745 2298 VERIFY(0); /* we should never get here */
cb323159
A
2299 /* NOTREACHED */
2300 __builtin_unreachable();
2d21ac55
A
2301}
2302
316670eb
A
2303/*
2304 * Input thread for interfaces with opportunistic polling input model.
2305 */
cb323159 2306__attribute__((noreturn))
316670eb
A
2307static void
2308dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2d21ac55 2309{
316670eb 2310#pragma unused(w)
cb323159 2311 char thread_name[MAXTHREADNAMESIZE];
316670eb
A
2312 struct dlil_threading_info *inp = v;
2313 struct ifnet *ifp = inp->ifp;
2d21ac55 2314
316670eb 2315 VERIFY(inp != dlil_main_input_thread);
cb323159
A
2316 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL) &&
2317 (ifp->if_xflags & IFXF_LEGACY));
2318 VERIFY(current_thread() == inp->input_thr);
2319
2320 /* construct the name for this thread, and then apply it */
2321 bzero(thread_name, sizeof(thread_name));
2322 (void) snprintf(thread_name, sizeof(thread_name),
2323 "dlil_input_poll_%s", ifp->if_xname);
2324 thread_set_thread_name(inp->input_thr, thread_name);
2325 ifnet_decr_pending_thread_count(ifp);
2326
2327 lck_mtx_lock(&inp->input_lck);
2328 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2329 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2330 lck_mtx_unlock(&inp->input_lck);
2331 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont, inp);
2332 /* NOTREACHED */
2333 __builtin_unreachable();
2334}
2335
2336__attribute__((noreturn))
2337static void
2338dlil_rxpoll_input_thread_cont(void *v, wait_result_t wres)
2339{
2340 struct dlil_threading_info *inp = v;
2341 struct ifnet *ifp = inp->ifp;
2342 struct timespec ts;
2343
2344 lck_mtx_lock_spin(&inp->input_lck);
2345 if (__improbable(wres == THREAD_INTERRUPTED ||
2346 (inp->input_waiting & DLIL_INPUT_TERMINATE))) {
2347 goto terminate;
2348 }
2349
2350 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2351 inp->input_waiting |= DLIL_INPUT_RUNNING;
2d21ac55 2352
2d21ac55 2353 while (1) {
316670eb
A
2354 struct mbuf *m = NULL;
2355 u_int32_t m_cnt, m_size, poll_req = 0;
2356 ifnet_model_t mode;
2357 struct timespec now, delta;
cb323159
A
2358 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2359 boolean_t notify;
39236c6e 2360 u_int64_t ival;
6d2010ae 2361
cb323159 2362 inp->input_waiting &= ~DLIL_INPUT_WAITING;
6d2010ae 2363
cb323159 2364 if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
39236c6e 2365 ival = IF_RXPOLL_INTERVALTIME_MIN;
0a7de745 2366 }
39236c6e 2367
316670eb
A
2368 /* Link parameters changed? */
2369 if (ifp->if_poll_update != 0) {
2370 ifp->if_poll_update = 0;
39236c6e 2371 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
91447636 2372 }
1c79356b 2373
316670eb 2374 /* Current operating mode */
cb323159 2375 mode = ifp->if_poll_mode;
2d21ac55
A
2376
2377 /*
316670eb
A
2378 * Protocol registration and injection must always use
2379 * the main input thread; in theory the latter can utilize
2380 * the corresponding input thread where the packet arrived
2381 * on, but that requires our knowing the interface in advance
2382 * (and the benefits might not worth the trouble.)
2d21ac55 2383 */
316670eb 2384 VERIFY(!(inp->input_waiting &
0a7de745 2385 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2d21ac55 2386
316670eb
A
2387 /* Total count of all packets */
2388 m_cnt = qlen(&inp->rcvq_pkts);
2389
2390 /* Total bytes of all packets */
2391 m_size = qsize(&inp->rcvq_pkts);
2392
2393 /* Packets for this interface */
cb323159
A
2394 _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
2395 m = pkt.cp_mbuf;
316670eb
A
2396 VERIFY(m != NULL || m_cnt == 0);
2397
2398 nanouptime(&now);
cb323159
A
2399 if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
2400 *(&ifp->if_poll_sample_lasttime) = *(&now);
0a7de745 2401 }
316670eb 2402
cb323159
A
2403 net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
2404 if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
316670eb
A
2405 u_int32_t ptot, btot;
2406
2407 /* Accumulate statistics for current sampling */
cb323159 2408 PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
316670eb 2409
cb323159 2410 if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
316670eb 2411 goto skip;
0a7de745 2412 }
316670eb 2413
cb323159 2414 *(&ifp->if_poll_sample_lasttime) = *(&now);
316670eb
A
2415
2416 /* Calculate min/max of inbound bytes */
cb323159
A
2417 btot = (u_int32_t)ifp->if_poll_sstats.bytes;
2418 if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
2419 ifp->if_rxpoll_bmin = btot;
0a7de745 2420 }
cb323159
A
2421 if (btot > ifp->if_rxpoll_bmax) {
2422 ifp->if_rxpoll_bmax = btot;
0a7de745 2423 }
316670eb
A
2424
2425 /* Calculate EWMA of inbound bytes */
cb323159 2426 DLIL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
316670eb
A
2427
2428 /* Calculate min/max of inbound packets */
cb323159
A
2429 ptot = (u_int32_t)ifp->if_poll_sstats.packets;
2430 if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
2431 ifp->if_rxpoll_pmin = ptot;
0a7de745 2432 }
cb323159
A
2433 if (ptot > ifp->if_rxpoll_pmax) {
2434 ifp->if_rxpoll_pmax = ptot;
0a7de745 2435 }
316670eb
A
2436
2437 /* Calculate EWMA of inbound packets */
cb323159 2438 DLIL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
316670eb
A
2439
2440 /* Reset sampling statistics */
cb323159 2441 PKTCNTR_CLEAR(&ifp->if_poll_sstats);
316670eb
A
2442
2443 /* Calculate EWMA of wakeup requests */
cb323159 2444 DLIL_EWMA(ifp->if_rxpoll_wavg, inp->wtot, if_rxpoll_decay);
316670eb
A
2445 inp->wtot = 0;
2446
2447 if (dlil_verbose) {
cb323159
A
2448 if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
2449 *(&ifp->if_poll_dbg_lasttime) = *(&now);
0a7de745 2450 }
cb323159 2451 net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
316670eb 2452 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
cb323159
A
2453 *(&ifp->if_poll_dbg_lasttime) = *(&now);
2454 DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
316670eb
A
2455 "limits [%d/%d], wreq avg %d "
2456 "limits [%d/%d], bytes avg %d "
39236c6e 2457 "limits [%d/%d]\n", if_name(ifp),
cb323159 2458 (ifp->if_poll_mode ==
316670eb 2459 IFNET_MODEL_INPUT_POLL_ON) ?
cb323159
A
2460 "ON" : "OFF", ifp->if_rxpoll_pavg,
2461 ifp->if_rxpoll_pmax,
2462 ifp->if_rxpoll_plowat,
2463 ifp->if_rxpoll_phiwat,
2464 ifp->if_rxpoll_wavg,
2465 ifp->if_rxpoll_wlowat,
2466 ifp->if_rxpoll_whiwat,
2467 ifp->if_rxpoll_bavg,
2468 ifp->if_rxpoll_blowat,
2469 ifp->if_rxpoll_bhiwat);
316670eb
A
2470 }
2471 }
2d21ac55 2472
316670eb 2473 /* Perform mode transition, if necessary */
cb323159
A
2474 if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
2475 *(&ifp->if_poll_mode_lasttime) = *(&now);
0a7de745 2476 }
316670eb 2477
cb323159
A
2478 net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
2479 if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
316670eb 2480 goto skip;
0a7de745 2481 }
316670eb 2482
cb323159
A
2483 if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
2484 ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
2485 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
316670eb 2486 mode = IFNET_MODEL_INPUT_POLL_OFF;
cb323159
A
2487 } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
2488 (ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat ||
2489 ifp->if_rxpoll_wavg >= ifp->if_rxpoll_whiwat) &&
2490 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
316670eb
A
2491 mode = IFNET_MODEL_INPUT_POLL_ON;
2492 }
6d2010ae 2493
cb323159
A
2494 if (mode != ifp->if_poll_mode) {
2495 ifp->if_poll_mode = mode;
2496 *(&ifp->if_poll_mode_lasttime) = *(&now);
316670eb
A
2497 poll_req++;
2498 }
2499 }
2500skip:
cb323159 2501 notify = dlil_input_stats_sync(ifp, inp);
6d2010ae 2502
316670eb 2503 lck_mtx_unlock(&inp->input_lck);
6d2010ae 2504
cb323159
A
2505 if (notify) {
2506 ifnet_notify_data_threshold(ifp);
2507 }
2508
316670eb
A
2509 /*
2510 * If there's a mode change and interface is still attached,
2511 * perform a downcall to the driver for the new mode. Also
2512 * hold an IO refcnt on the interface to prevent it from
2513 * being detached (will be release below.)
2514 */
2515 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
cb323159
A
2516 struct ifnet_model_params p = {
2517 .model = mode, .reserved = { 0 }
2518 };
316670eb
A
2519 errno_t err;
2520
2521 if (dlil_verbose) {
cb323159 2522 DLIL_PRINTF("%s: polling is now %s, "
316670eb
A
2523 "pkts avg %d max %d limits [%d/%d], "
2524 "wreq avg %d limits [%d/%d], "
2525 "bytes avg %d limits [%d/%d]\n",
39236c6e 2526 if_name(ifp),
316670eb 2527 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
cb323159
A
2528 "ON" : "OFF", ifp->if_rxpoll_pavg,
2529 ifp->if_rxpoll_pmax, ifp->if_rxpoll_plowat,
2530 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wavg,
2531 ifp->if_rxpoll_wlowat, ifp->if_rxpoll_whiwat,
2532 ifp->if_rxpoll_bavg, ifp->if_rxpoll_blowat,
2533 ifp->if_rxpoll_bhiwat);
316670eb 2534 }
2d21ac55 2535
316670eb 2536 if ((err = ((*ifp->if_input_ctl)(ifp,
0a7de745 2537 IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) {
cb323159 2538 DLIL_PRINTF("%s: error setting polling mode "
39236c6e 2539 "to %s (%d)\n", if_name(ifp),
316670eb
A
2540 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2541 "ON" : "OFF", err);
2542 }
1c79356b 2543
316670eb
A
2544 switch (mode) {
2545 case IFNET_MODEL_INPUT_POLL_OFF:
2546 ifnet_set_poll_cycle(ifp, NULL);
cb323159 2547 ifp->if_rxpoll_offreq++;
0a7de745 2548 if (err != 0) {
cb323159 2549 ifp->if_rxpoll_offerr++;
0a7de745 2550 }
316670eb 2551 break;
2d21ac55 2552
316670eb 2553 case IFNET_MODEL_INPUT_POLL_ON:
39236c6e 2554 net_nsectimer(&ival, &ts);
316670eb
A
2555 ifnet_set_poll_cycle(ifp, &ts);
2556 ifnet_poll(ifp);
cb323159 2557 ifp->if_rxpoll_onreq++;
0a7de745 2558 if (err != 0) {
cb323159 2559 ifp->if_rxpoll_onerr++;
0a7de745 2560 }
316670eb
A
2561 break;
2562
2563 default:
2564 VERIFY(0);
2565 /* NOTREACHED */
2566 }
2567
2568 /* Release the IO refcnt */
2569 ifnet_decr_iorefcnt(ifp);
2570 }
2571
2572 /*
39037602
A
2573 * NOTE warning %%% attention !!!!
2574 * We should think about putting some thread starvation
2575 * safeguards if we deal with long chains of packets.
2576 */
0a7de745 2577 if (m != NULL) {
316670eb 2578 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
0a7de745 2579 }
cb323159
A
2580
2581 lck_mtx_lock_spin(&inp->input_lck);
2582 VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
ea3f0419
A
2583 if (!(inp->input_waiting & ~(DLIL_INPUT_RUNNING |
2584 DLIL_INPUT_TERMINATE))) {
cb323159
A
2585 break;
2586 }
2587 }
2588
2589 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2590
2591 if (__improbable(inp->input_waiting & DLIL_INPUT_TERMINATE)) {
2592terminate:
2593 lck_mtx_unlock(&inp->input_lck);
2594 dlil_terminate_input_thread(inp);
2595 /* NOTREACHED */
2596 } else {
2597 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2598 lck_mtx_unlock(&inp->input_lck);
2599 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont,
2600 inp);
2601 /* NOTREACHED */
316670eb
A
2602 }
2603
0a7de745 2604 VERIFY(0); /* we should never get here */
cb323159
A
2605 /* NOTREACHED */
2606 __builtin_unreachable();
316670eb
A
2607}
2608
39236c6e 2609errno_t
cb323159 2610dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
316670eb 2611{
39236c6e
A
2612 if (p != NULL) {
2613 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
0a7de745
A
2614 (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
2615 return EINVAL;
2616 }
2617 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2618 p->packets_lowat >= p->packets_hiwat) {
2619 return EINVAL;
2620 }
39236c6e 2621 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
0a7de745
A
2622 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
2623 return EINVAL;
2624 }
2625 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2626 p->bytes_lowat >= p->bytes_hiwat) {
2627 return EINVAL;
2628 }
39236c6e 2629 if (p->interval_time != 0 &&
0a7de745 2630 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
39236c6e 2631 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
0a7de745 2632 }
39236c6e 2633 }
cb323159
A
2634 return 0;
2635}
39236c6e 2636
cb323159
A
2637void
2638dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2639{
2640 u_int64_t sample_holdtime, inbw;
316670eb 2641
39236c6e 2642 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
0a7de745 2643 sample_holdtime = 0; /* polling is disabled */
cb323159
A
2644 ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
2645 ifp->if_rxpoll_blowat = 0;
2646 ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
2647 ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
2648 ifp->if_rxpoll_plim = 0;
2649 ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
316670eb 2650 } else {
39236c6e
A
2651 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2652 u_int64_t ival;
316670eb
A
2653 unsigned int n, i;
2654
39236c6e 2655 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
0a7de745 2656 if (inbw < rxpoll_tbl[i].speed) {
316670eb 2657 break;
0a7de745 2658 }
316670eb
A
2659 n = i;
2660 }
39236c6e
A
2661 /* auto-tune if caller didn't specify a value */
2662 plowat = ((p == NULL || p->packets_lowat == 0) ?
2663 rxpoll_tbl[n].plowat : p->packets_lowat);
2664 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2665 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2666 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2667 rxpoll_tbl[n].blowat : p->bytes_lowat);
2668 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2669 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2670 plim = ((p == NULL || p->packets_limit == 0) ?
2671 if_rxpoll_max : p->packets_limit);
2672 ival = ((p == NULL || p->interval_time == 0) ?
2673 if_rxpoll_interval_time : p->interval_time);
2674
2675 VERIFY(plowat != 0 && phiwat != 0);
2676 VERIFY(blowat != 0 && bhiwat != 0);
2677 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2678
316670eb 2679 sample_holdtime = if_rxpoll_sample_holdtime;
cb323159
A
2680 ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
2681 ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
2682 ifp->if_rxpoll_plowat = plowat;
2683 ifp->if_rxpoll_phiwat = phiwat;
2684 ifp->if_rxpoll_blowat = blowat;
2685 ifp->if_rxpoll_bhiwat = bhiwat;
2686 ifp->if_rxpoll_plim = plim;
2687 ifp->if_rxpoll_ival = ival;
316670eb
A
2688 }
2689
cb323159
A
2690 net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
2691 net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
316670eb
A
2692
2693 if (dlil_verbose) {
cb323159 2694 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
39236c6e
A
2695 "poll interval %llu nsec, pkts per poll %u, "
2696 "pkt limits [%u/%u], wreq limits [%u/%u], "
2697 "bytes limits [%u/%u]\n", if_name(ifp),
cb323159
A
2698 inbw, sample_holdtime, ifp->if_rxpoll_ival,
2699 ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
2700 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
2701 ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
2702 ifp->if_rxpoll_bhiwat);
2703 }
2704}
2705
2706/*
2707 * Must be called on an attached ifnet (caller is expected to check.)
2708 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2709 */
2710errno_t
2711dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2712 boolean_t locked)
2713{
2714 errno_t err;
2715 struct dlil_threading_info *inp;
2716
2717 VERIFY(ifp != NULL);
2718 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2719 return ENXIO;
2720 }
2721 err = dlil_rxpoll_validate_params(p);
2722 if (err != 0) {
2723 return err;
316670eb 2724 }
39236c6e 2725
cb323159
A
2726 if (!locked) {
2727 lck_mtx_lock(&inp->input_lck);
2728 }
2729 LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2730 /*
2731 * Normally, we'd reset the parameters to the auto-tuned values
2732 * if the the input thread detects a change in link rate. If the
2733 * driver provides its own parameters right after a link rate
2734 * changes, but before the input thread gets to run, we want to
2735 * make sure to keep the driver's values. Clearing if_poll_update
2736 * will achieve that.
2737 */
2738 if (p != NULL && !locked && ifp->if_poll_update != 0) {
2739 ifp->if_poll_update = 0;
2740 }
2741 dlil_rxpoll_update_params(ifp, p);
0a7de745 2742 if (!locked) {
39236c6e 2743 lck_mtx_unlock(&inp->input_lck);
0a7de745 2744 }
0a7de745 2745 return 0;
39236c6e
A
2746}
2747
2748/*
2749 * Must be called on an attached ifnet (caller is expected to check.)
2750 */
2751errno_t
2752dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2753{
2754 struct dlil_threading_info *inp;
2755
2756 VERIFY(ifp != NULL && p != NULL);
0a7de745
A
2757 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2758 return ENXIO;
2759 }
39236c6e 2760
0a7de745 2761 bzero(p, sizeof(*p));
39236c6e
A
2762
2763 lck_mtx_lock(&inp->input_lck);
cb323159
A
2764 p->packets_limit = ifp->if_rxpoll_plim;
2765 p->packets_lowat = ifp->if_rxpoll_plowat;
2766 p->packets_hiwat = ifp->if_rxpoll_phiwat;
2767 p->bytes_lowat = ifp->if_rxpoll_blowat;
2768 p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2769 p->interval_time = ifp->if_rxpoll_ival;
39236c6e
A
2770 lck_mtx_unlock(&inp->input_lck);
2771
0a7de745 2772 return 0;
316670eb
A
2773}
2774
2775errno_t
2776ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2777 const struct ifnet_stat_increment_param *s)
2778{
0a7de745 2779 return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
316670eb
A
2780}
2781
2782errno_t
2783ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2784 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2785{
0a7de745 2786 return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
316670eb
A
2787}
2788
cb323159
A
2789errno_t
2790ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2791 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2792{
2793 return ifnet_input_common(ifp, m_head, m_tail, s,
2794 (m_head != NULL), TRUE);
2795}
2796
316670eb
A
2797static errno_t
2798ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2799 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2800{
5ba3f43e 2801 dlil_input_func input_func;
39037602 2802 struct ifnet_stat_increment_param _s;
316670eb 2803 u_int32_t m_cnt = 0, m_size = 0;
39037602
A
2804 struct mbuf *last;
2805 errno_t err = 0;
316670eb 2806
39236c6e 2807 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
0a7de745 2808 if (m_head != NULL) {
39236c6e 2809 mbuf_freem_list(m_head);
0a7de745
A
2810 }
2811 return EINVAL;
39236c6e
A
2812 }
2813
2814 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2815 VERIFY(m_tail == NULL || ext);
2816 VERIFY(s != NULL || !ext);
2817
316670eb
A
2818 /*
2819 * Drop the packet(s) if the parameters are invalid, or if the
2820 * interface is no longer attached; else hold an IO refcnt to
2821 * prevent it from being detached (will be released below.)
2822 */
cb323159 2823 if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
0a7de745 2824 if (m_head != NULL) {
316670eb 2825 mbuf_freem_list(m_head);
0a7de745
A
2826 }
2827 return EINVAL;
316670eb
A
2828 }
2829
5ba3f43e
A
2830 input_func = ifp->if_input_dlil;
2831 VERIFY(input_func != NULL);
39037602 2832
316670eb
A
2833 if (m_tail == NULL) {
2834 last = m_head;
39236c6e 2835 while (m_head != NULL) {
316670eb 2836#if IFNET_INPUT_SANITY_CHK
0a7de745 2837 if (dlil_input_sanity_check != 0) {
316670eb 2838 DLIL_INPUT_CHECK(last, ifp);
0a7de745 2839 }
316670eb
A
2840#endif /* IFNET_INPUT_SANITY_CHK */
2841 m_cnt++;
2842 m_size += m_length(last);
0a7de745 2843 if (mbuf_nextpkt(last) == NULL) {
316670eb 2844 break;
0a7de745 2845 }
316670eb
A
2846 last = mbuf_nextpkt(last);
2847 }
2848 m_tail = last;
2849 } else {
2850#if IFNET_INPUT_SANITY_CHK
2851 if (dlil_input_sanity_check != 0) {
2852 last = m_head;
2853 while (1) {
2854 DLIL_INPUT_CHECK(last, ifp);
2855 m_cnt++;
2856 m_size += m_length(last);
0a7de745 2857 if (mbuf_nextpkt(last) == NULL) {
316670eb 2858 break;
0a7de745 2859 }
316670eb
A
2860 last = mbuf_nextpkt(last);
2861 }
2862 } else {
2863 m_cnt = s->packets_in;
2864 m_size = s->bytes_in;
2865 last = m_tail;
2866 }
2867#else
2868 m_cnt = s->packets_in;
2869 m_size = s->bytes_in;
2870 last = m_tail;
2871#endif /* IFNET_INPUT_SANITY_CHK */
2872 }
2873
2874 if (last != m_tail) {
39236c6e
A
2875 panic_plain("%s: invalid input packet chain for %s, "
2876 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2877 m_tail, last);
316670eb
A
2878 }
2879
2880 /*
2881 * Assert packet count only for the extended variant, for backwards
2882 * compatibility, since this came directly from the device driver.
2883 * Relax this assertion for input bytes, as the driver may have
2884 * included the link-layer headers in the computation; hence
2885 * m_size is just an approximation.
2886 */
2887 if (ext && s->packets_in != m_cnt) {
39236c6e
A
2888 panic_plain("%s: input packet count mismatch for %s, "
2889 "%d instead of %d\n", __func__, if_name(ifp),
2890 s->packets_in, m_cnt);
316670eb
A
2891 }
2892
39037602 2893 if (s == NULL) {
0a7de745 2894 bzero(&_s, sizeof(_s));
39037602
A
2895 s = &_s;
2896 } else {
2897 _s = *s;
2898 }
2899 _s.packets_in = m_cnt;
2900 _s.bytes_in = m_size;
2901
5ba3f43e 2902 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
39037602
A
2903
2904 if (ifp != lo_ifp) {
2905 /* Release the IO refcnt */
cb323159 2906 ifnet_datamov_end(ifp);
39037602
A
2907 }
2908
0a7de745 2909 return err;
39037602
A
2910}
2911
39037602
A
2912
2913errno_t
2914dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2915{
0a7de745 2916 return ifp->if_output(ifp, m);
39037602
A
2917}
2918
2919errno_t
2920dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2921 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2922 boolean_t poll, struct thread *tp)
2923{
2924 struct dlil_threading_info *inp;
2925 u_int32_t m_cnt = s->packets_in;
2926 u_int32_t m_size = s->bytes_in;
cb323159 2927 boolean_t notify = FALSE;
39037602 2928
0a7de745 2929 if ((inp = ifp->if_inp) == NULL) {
316670eb 2930 inp = dlil_main_input_thread;
0a7de745 2931 }
316670eb
A
2932
2933 /*
2934 * If there is a matching DLIL input thread associated with an
2935 * affinity set, associate this thread with the same set. We
2936 * will only do this once.
2937 */
2938 lck_mtx_lock_spin(&inp->input_lck);
39037602 2939 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
316670eb
A
2940 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2941 (poll && inp->poll_thr == THREAD_NULL))) {
2942 u_int32_t tag = inp->tag;
2943
2944 if (poll) {
2945 VERIFY(inp->poll_thr == THREAD_NULL);
2946 inp->poll_thr = tp;
2947 } else {
2948 VERIFY(inp->wloop_thr == THREAD_NULL);
2949 inp->wloop_thr = tp;
2950 }
2951 lck_mtx_unlock(&inp->input_lck);
2952
2953 /* Associate the current thread with the new affinity tag */
2954 (void) dlil_affinity_set(tp, tag);
2955
2956 /*
2957 * Take a reference on the current thread; during detach,
5ba3f43e 2958 * we will need to refer to it in order to tear down its
316670eb
A
2959 * affinity.
2960 */
2961 thread_reference(tp);
2962 lck_mtx_lock_spin(&inp->input_lck);
2963 }
2964
39236c6e
A
2965 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2966
39037602 2967 /*
316670eb
A
2968 * Because of loopbacked multicast we cannot stuff the ifp in
2969 * the rcvif of the packet header: loopback (lo0) packets use a
2970 * dedicated list so that we can later associate them with lo_ifp
2971 * on their way up the stack. Packets for other interfaces without
2972 * dedicated input threads go to the regular list.
2973 */
39236c6e 2974 if (m_head != NULL) {
cb323159
A
2975 classq_pkt_t head, tail;
2976 CLASSQ_PKT_INIT_MBUF(&head, m_head);
2977 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
39236c6e
A
2978 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2979 struct dlil_main_threading_info *inpm =
2980 (struct dlil_main_threading_info *)inp;
cb323159 2981 _addq_multi(&inpm->lo_rcvq_pkts, &head, &tail,
39236c6e
A
2982 m_cnt, m_size);
2983 } else {
cb323159 2984 _addq_multi(&inp->rcvq_pkts, &head, &tail,
39236c6e
A
2985 m_cnt, m_size);
2986 }
316670eb
A
2987 }
2988
2989#if IFNET_INPUT_SANITY_CHK
2990 if (dlil_input_sanity_check != 0) {
2991 u_int32_t count;
2992 struct mbuf *m0;
2993
0a7de745 2994 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0)) {
316670eb 2995 count++;
0a7de745 2996 }
316670eb
A
2997
2998 if (count != m_cnt) {
39236c6e
A
2999 panic_plain("%s: invalid packet count %d "
3000 "(expected %d)\n", if_name(ifp),
316670eb
A
3001 count, m_cnt);
3002 /* NOTREACHED */
3003 }
3004
3005 inp->input_mbuf_cnt += m_cnt;
3006 }
3007#endif /* IFNET_INPUT_SANITY_CHK */
3008
cb323159 3009 dlil_input_stats_add(s, inp, ifp, poll);
39037602
A
3010 /*
3011 * If we're using the main input thread, synchronize the
3012 * stats now since we have the interface context. All
3013 * other cases involving dedicated input threads will
3014 * have their stats synchronized there.
3015 */
0a7de745 3016 if (inp == dlil_main_input_thread) {
cb323159
A
3017 notify = dlil_input_stats_sync(ifp, inp);
3018 }
3019
3020 inp->input_waiting |= DLIL_INPUT_WAITING;
3021 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
3022 inp->wtot++;
3023 wakeup_one((caddr_t)&inp->input_waiting);
316670eb
A
3024 }
3025 lck_mtx_unlock(&inp->input_lck);
3026
cb323159
A
3027 if (notify) {
3028 ifnet_notify_data_threshold(ifp);
3029 }
3030
0a7de745 3031 return 0;
316670eb
A
3032}
3033
5ba3f43e 3034
39236c6e 3035static void
5c9f4661 3036ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
316670eb 3037{
0a7de745 3038 if (!(ifp->if_eflags & IFEF_TXSTART)) {
39236c6e 3039 return;
0a7de745 3040 }
316670eb 3041 /*
39236c6e
A
3042 * If the starter thread is inactive, signal it to do work,
3043 * unless the interface is being flow controlled from below,
3044 * e.g. a virtual interface being flow controlled by a real
5c9f4661
A
3045 * network interface beneath it, or it's been disabled via
3046 * a call to ifnet_disable_output().
316670eb
A
3047 */
3048 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e
A
3049 if (resetfc) {
3050 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
3051 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
3052 lck_mtx_unlock(&ifp->if_start_lock);
3053 return;
3054 }
316670eb 3055 ifp->if_start_req++;
3e170ce0
A
3056 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
3057 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
39037602
A
3058 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
3059 ifp->if_start_delayed == 0)) {
5ba3f43e
A
3060 (void) thread_wakeup_thread((caddr_t)&ifp->if_start_thread,
3061 ifp->if_start_thread);
316670eb
A
3062 }
3063 lck_mtx_unlock(&ifp->if_start_lock);
3064}
3065
39236c6e
A
3066void
3067ifnet_start(struct ifnet *ifp)
3068{
5c9f4661 3069 ifnet_start_common(ifp, FALSE);
39236c6e
A
3070}
3071
cb323159 3072__attribute__((noreturn))
316670eb 3073static void
cb323159 3074ifnet_start_thread_func(void *v, wait_result_t w)
316670eb
A
3075{
3076#pragma unused(w)
3077 struct ifnet *ifp = v;
39037602 3078 char thread_name[MAXTHREADNAMESIZE];
316670eb 3079
39037602
A
3080 /* Construct the name for this thread, and then apply it. */
3081 bzero(thread_name, sizeof(thread_name));
0a7de745 3082 (void) snprintf(thread_name, sizeof(thread_name),
5ba3f43e 3083 "ifnet_start_%s", ifp->if_xname);
cb323159
A
3084 ASSERT(ifp->if_start_thread == current_thread());
3085 thread_set_thread_name(current_thread(), thread_name);
39037602 3086
316670eb
A
3087 /*
3088 * Treat the dedicated starter thread for lo0 as equivalent to
3089 * the driver workloop thread; if net_affinity is enabled for
3090 * the main input thread, associate this starter thread to it
3091 * by binding them with the same affinity tag. This is done
3092 * only once (as we only have one lo_ifp which never goes away.)
3093 */
3094 if (ifp == lo_ifp) {
3095 struct dlil_threading_info *inp = dlil_main_input_thread;
3096 struct thread *tp = current_thread();
3097
3098 lck_mtx_lock(&inp->input_lck);
3099 if (inp->net_affinity) {
3100 u_int32_t tag = inp->tag;
3101
3102 VERIFY(inp->wloop_thr == THREAD_NULL);
3103 VERIFY(inp->poll_thr == THREAD_NULL);
3104 inp->wloop_thr = tp;
3105 lck_mtx_unlock(&inp->input_lck);
3106
3107 /* Associate this thread with the affinity tag */
3108 (void) dlil_affinity_set(tp, tag);
3109 } else {
3110 lck_mtx_unlock(&inp->input_lck);
3111 }
3112 }
cb323159 3113 ifnet_decr_pending_thread_count(ifp);
316670eb 3114
cb323159
A
3115 lck_mtx_lock(&ifp->if_start_lock);
3116 VERIFY(!ifp->if_start_active);
3117 (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
3118 lck_mtx_unlock(&ifp->if_start_lock);
3119 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3120 /* NOTREACHED */
3121 __builtin_unreachable();
3122}
316670eb 3123
cb323159
A
3124__attribute__((noreturn))
3125static void
3126ifnet_start_thread_cont(void *v, wait_result_t wres)
3127{
3128 struct ifnet *ifp = v;
3129 struct ifclassq *ifq = &ifp->if_snd;
316670eb 3130
cb323159
A
3131 lck_mtx_lock(&ifp->if_start_lock);
3132 if (__improbable(wres == THREAD_INTERRUPTED ||
3133 ifp->if_start_thread == THREAD_NULL)) {
3134 goto terminate;
3135 }
316670eb 3136
cb323159 3137 ifp->if_start_active = 1;
316670eb 3138
cb323159
A
3139 /*
3140 * Keep on servicing until no more request.
3141 */
3142 for (;;) {
3143 u_int32_t req = ifp->if_start_req;
3144 if (!IFCQ_IS_EMPTY(ifq) &&
3145 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3146 ifp->if_start_delayed == 0 &&
3147 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
3148 (ifp->if_eflags & IFEF_DELAY_START)) {
3149 ifp->if_start_delayed = 1;
3150 ifnet_start_delayed++;
3151 break;
3152 } else {
3153 ifp->if_start_delayed = 0;
316670eb 3154 }
cb323159 3155 lck_mtx_unlock(&ifp->if_start_lock);
316670eb 3156
cb323159
A
3157 /*
3158 * If no longer attached, don't call start because ifp
3159 * is being destroyed; else hold an IO refcnt to
3160 * prevent the interface from being detached (will be
3161 * released below.)
3162 */
3163 if (!ifnet_datamov_begin(ifp)) {
3164 lck_mtx_lock_spin(&ifp->if_start_lock);
3165 break;
3166 }
3e170ce0 3167
cb323159
A
3168 /* invoke the driver's start routine */
3169 ((*ifp->if_start)(ifp));
3e170ce0 3170
cb323159
A
3171 /*
3172 * Release the io ref count taken above.
3173 */
3174 ifnet_datamov_end(ifp);
3e170ce0 3175
cb323159 3176 lck_mtx_lock_spin(&ifp->if_start_lock);
3e170ce0 3177
cb323159
A
3178 /*
3179 * If there's no pending request or if the
3180 * interface has been disabled, we're done.
3181 */
3182 if (req == ifp->if_start_req ||
3183 (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
3184 break;
3185 }
3186 }
3e170ce0 3187
cb323159
A
3188 ifp->if_start_req = 0;
3189 ifp->if_start_active = 0;
316670eb 3190
3e170ce0 3191
cb323159
A
3192 if (__probable(ifp->if_start_thread != THREAD_NULL)) {
3193 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3194 struct timespec delay_start_ts;
3195 struct timespec *ts;
3e170ce0 3196
316670eb
A
3197 /*
3198 * Wakeup N ns from now if rate-controlled by TBR, and if
3199 * there are still packets in the send queue which haven't
3200 * been dequeued so far; else sleep indefinitely (ts = NULL)
3201 * until ifnet_start() is called again.
3202 */
3203 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
3204 &ifp->if_start_cycle : NULL);
3205
3e170ce0
A
3206 if (ts == NULL && ifp->if_start_delayed == 1) {
3207 delay_start_ts.tv_sec = 0;
3208 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
3209 ts = &delay_start_ts;
3210 }
3211
0a7de745 3212 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
316670eb 3213 ts = NULL;
0a7de745 3214 }
cb323159
A
3215
3216 if (__improbable(ts != NULL)) {
3217 clock_interval_to_deadline((ts->tv_nsec +
3218 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3219 }
3220
3221 (void) assert_wait_deadline(&ifp->if_start_thread,
3222 THREAD_UNINT, deadline);
3223 lck_mtx_unlock(&ifp->if_start_lock);
3224 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3225 /* NOTREACHED */
3226 } else {
3227terminate:
3228 /* interface is detached? */
3229 ifnet_set_start_cycle(ifp, NULL);
3230 lck_mtx_unlock(&ifp->if_start_lock);
3231 ifnet_purge(ifp);
3232
3233 if (dlil_verbose) {
3234 DLIL_PRINTF("%s: starter thread terminated\n",
3235 if_name(ifp));
3236 }
3237
3238 /* for the extra refcnt from kernel_thread_start() */
3239 thread_deallocate(current_thread());
3240 /* this is the end */
3241 thread_terminate(current_thread());
3242 /* NOTREACHED */
316670eb
A
3243 }
3244
cb323159
A
3245 /* must never get here */
3246 VERIFY(0);
316670eb 3247 /* NOTREACHED */
cb323159 3248 __builtin_unreachable();
316670eb
A
3249}
3250
3251void
3252ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
3253{
0a7de745
A
3254 if (ts == NULL) {
3255 bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
3256 } else {
316670eb 3257 *(&ifp->if_start_cycle) = *ts;
0a7de745 3258 }
316670eb 3259
0a7de745 3260 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
cb323159 3261 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
39236c6e 3262 if_name(ifp), ts->tv_nsec);
0a7de745 3263 }
316670eb
A
3264}
3265
cb323159 3266void
316670eb
A
3267ifnet_poll(struct ifnet *ifp)
3268{
3269 /*
3270 * If the poller thread is inactive, signal it to do work.
3271 */
3272 lck_mtx_lock_spin(&ifp->if_poll_lock);
3273 ifp->if_poll_req++;
cb323159
A
3274 if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
3275 ifp->if_poll_thread != THREAD_NULL) {
316670eb
A
3276 wakeup_one((caddr_t)&ifp->if_poll_thread);
3277 }
3278 lck_mtx_unlock(&ifp->if_poll_lock);
3279}
3280
cb323159 3281__attribute__((noreturn))
316670eb 3282static void
cb323159 3283ifnet_poll_thread_func(void *v, wait_result_t w)
316670eb
A
3284{
3285#pragma unused(w)
cb323159
A
3286 char thread_name[MAXTHREADNAMESIZE];
3287 struct ifnet *ifp = v;
3288
3289 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
3290 VERIFY(current_thread() == ifp->if_poll_thread);
3291
3292 /* construct the name for this thread, and then apply it */
3293 bzero(thread_name, sizeof(thread_name));
3294 (void) snprintf(thread_name, sizeof(thread_name),
3295 "ifnet_poller_%s", ifp->if_xname);
3296 thread_set_thread_name(ifp->if_poll_thread, thread_name);
3297 ifnet_decr_pending_thread_count(ifp);
3298
3299 lck_mtx_lock(&ifp->if_poll_lock);
3300 (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
3301 lck_mtx_unlock(&ifp->if_poll_lock);
3302 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3303 /* NOTREACHED */
3304 __builtin_unreachable();
3305}
3306
3307__attribute__((noreturn))
3308static void
3309ifnet_poll_thread_cont(void *v, wait_result_t wres)
3310{
316670eb
A
3311 struct dlil_threading_info *inp;
3312 struct ifnet *ifp = v;
316670eb 3313 struct ifnet_stat_increment_param s;
cb323159
A
3314 struct timespec start_time;
3315
3316 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
316670eb 3317
0a7de745 3318 bzero(&s, sizeof(s));
cb323159 3319 net_timerclear(&start_time);
316670eb
A
3320
3321 lck_mtx_lock_spin(&ifp->if_poll_lock);
cb323159
A
3322 if (__improbable(wres == THREAD_INTERRUPTED ||
3323 ifp->if_poll_thread == THREAD_NULL)) {
3324 goto terminate;
3325 }
316670eb
A
3326
3327 inp = ifp->if_inp;
3328 VERIFY(inp != NULL);
3329
cb323159
A
3330 ifp->if_poll_flags |= IF_POLLF_RUNNING;
3331
3332 /*
3333 * Keep on servicing until no more request.
3334 */
316670eb 3335 for (;;) {
cb323159
A
3336 struct mbuf *m_head, *m_tail;
3337 u_int32_t m_lim, m_cnt, m_totlen;
3338 u_int16_t req = ifp->if_poll_req;
316670eb 3339
cb323159
A
3340 m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
3341 MAX((qlimit(&inp->rcvq_pkts)), (ifp->if_rxpoll_phiwat << 2));
3342 lck_mtx_unlock(&ifp->if_poll_lock);
316670eb 3343
cb323159
A
3344 /*
3345 * If no longer attached, there's nothing to do;
3346 * else hold an IO refcnt to prevent the interface
3347 * from being detached (will be released below.)
3348 */
3349 if (!ifnet_is_attached(ifp, 1)) {
3350 lck_mtx_lock_spin(&ifp->if_poll_lock);
3351 break;
3352 }
316670eb 3353
cb323159
A
3354 if (dlil_verbose > 1) {
3355 DLIL_PRINTF("%s: polling up to %d pkts, "
3356 "pkts avg %d max %d, wreq avg %d, "
3357 "bytes avg %d\n",
3358 if_name(ifp), m_lim,
3359 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3360 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
316670eb
A
3361 }
3362
cb323159
A
3363 /* invoke the driver's input poll routine */
3364 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3365 &m_cnt, &m_totlen));
316670eb 3366
cb323159
A
3367 if (m_head != NULL) {
3368 VERIFY(m_tail != NULL && m_cnt > 0);
316670eb 3369
cb323159
A
3370 if (dlil_verbose > 1) {
3371 DLIL_PRINTF("%s: polled %d pkts, "
3372 "pkts avg %d max %d, wreq avg %d, "
3373 "bytes avg %d\n",
3374 if_name(ifp), m_cnt,
3375 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3376 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
db609669 3377 }
316670eb 3378
cb323159
A
3379 /* stats are required for extended variant */
3380 s.packets_in = m_cnt;
3381 s.bytes_in = m_totlen;
316670eb 3382
cb323159
A
3383 (void) ifnet_input_common(ifp, m_head, m_tail,
3384 &s, TRUE, TRUE);
3385 } else {
316670eb 3386 if (dlil_verbose > 1) {
cb323159 3387 DLIL_PRINTF("%s: no packets, "
316670eb
A
3388 "pkts avg %d max %d, wreq avg %d, "
3389 "bytes avg %d\n",
cb323159
A
3390 if_name(ifp), ifp->if_rxpoll_pavg,
3391 ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
3392 ifp->if_rxpoll_bavg);
316670eb
A
3393 }
3394
cb323159
A
3395 (void) ifnet_input_common(ifp, NULL, NULL,
3396 NULL, FALSE, TRUE);
3397 }
316670eb 3398
cb323159
A
3399 /* Release the io ref count */
3400 ifnet_decr_iorefcnt(ifp);
39236c6e 3401
cb323159 3402 lck_mtx_lock_spin(&ifp->if_poll_lock);
316670eb 3403
cb323159
A
3404 /* if there's no pending request, we're done */
3405 if (req == ifp->if_poll_req ||
3406 ifp->if_poll_thread == THREAD_NULL) {
3407 break;
3408 }
3409 }
316670eb 3410
cb323159
A
3411 ifp->if_poll_req = 0;
3412 ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
316670eb 3413
cb323159
A
3414 if (ifp->if_poll_thread != THREAD_NULL) {
3415 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3416 struct timespec *ts;
316670eb
A
3417
3418 /*
3419 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3420 * until ifnet_poll() is called again.
3421 */
3422 ts = &ifp->if_poll_cycle;
0a7de745 3423 if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
316670eb 3424 ts = NULL;
0a7de745 3425 }
cb323159
A
3426
3427 if (ts != NULL) {
3428 clock_interval_to_deadline((ts->tv_nsec +
3429 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3430 }
3431
3432 (void) assert_wait_deadline(&ifp->if_poll_thread,
3433 THREAD_UNINT, deadline);
3434 lck_mtx_unlock(&ifp->if_poll_lock);
3435 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3436 /* NOTREACHED */
3437 } else {
3438terminate:
3439 /* interface is detached (maybe while asleep)? */
3440 ifnet_set_poll_cycle(ifp, NULL);
3441 lck_mtx_unlock(&ifp->if_poll_lock);
3442
3443 if (dlil_verbose) {
3444 DLIL_PRINTF("%s: poller thread terminated\n",
3445 if_name(ifp));
3446 }
3447
3448 /* for the extra refcnt from kernel_thread_start() */
3449 thread_deallocate(current_thread());
3450 /* this is the end */
3451 thread_terminate(current_thread());
3452 /* NOTREACHED */
316670eb
A
3453 }
3454
cb323159
A
3455 /* must never get here */
3456 VERIFY(0);
316670eb 3457 /* NOTREACHED */
cb323159 3458 __builtin_unreachable();
316670eb
A
3459}
3460
3461void
3462ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3463{
0a7de745
A
3464 if (ts == NULL) {
3465 bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
3466 } else {
316670eb 3467 *(&ifp->if_poll_cycle) = *ts;
0a7de745 3468 }
316670eb 3469
0a7de745 3470 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
cb323159 3471 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
39236c6e 3472 if_name(ifp), ts->tv_nsec);
0a7de745 3473 }
316670eb
A
3474}
3475
3476void
3477ifnet_purge(struct ifnet *ifp)
3478{
0a7de745 3479 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
316670eb 3480 if_qflush(ifp, 0);
0a7de745 3481 }
316670eb
A
3482}
3483
3484void
3485ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3486{
3487 IFCQ_LOCK_ASSERT_HELD(ifq);
3488
0a7de745 3489 if (!(IFCQ_IS_READY(ifq))) {
316670eb 3490 return;
0a7de745 3491 }
316670eb
A
3492
3493 if (IFCQ_TBR_IS_ENABLED(ifq)) {
cb323159
A
3494 struct tb_profile tb = {
3495 .rate = ifq->ifcq_tbr.tbr_rate_raw,
3496 .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
3497 };
316670eb
A
3498 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3499 }
3500
3501 ifclassq_update(ifq, ev);
3502}
3503
3504void
3505ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3506{
3507 switch (ev) {
39236c6e 3508 case CLASSQ_EV_LINK_BANDWIDTH:
0a7de745 3509 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
316670eb 3510 ifp->if_poll_update++;
0a7de745 3511 }
316670eb
A
3512 break;
3513
3514 default:
3515 break;
3516 }
3517}
3518
3519errno_t
3520ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3521{
3522 struct ifclassq *ifq;
3523 u_int32_t omodel;
3524 errno_t err;
3525
0a7de745
A
3526 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
3527 return EINVAL;
3528 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3529 return ENXIO;
3530 }
316670eb
A
3531
3532 ifq = &ifp->if_snd;
3533 IFCQ_LOCK(ifq);
3534 omodel = ifp->if_output_sched_model;
3535 ifp->if_output_sched_model = model;
0a7de745 3536 if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
316670eb 3537 ifp->if_output_sched_model = omodel;
0a7de745 3538 }
316670eb
A
3539 IFCQ_UNLOCK(ifq);
3540
0a7de745 3541 return err;
316670eb
A
3542}
3543
3544errno_t
3545ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3546{
0a7de745
A
3547 if (ifp == NULL) {
3548 return EINVAL;
3549 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3550 return ENXIO;
3551 }
316670eb
A
3552
3553 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3554
0a7de745 3555 return 0;
316670eb
A
3556}
3557
3558errno_t
3559ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3560{
0a7de745
A
3561 if (ifp == NULL || maxqlen == NULL) {
3562 return EINVAL;
3563 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3564 return ENXIO;
3565 }
316670eb
A
3566
3567 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3568
0a7de745 3569 return 0;
316670eb
A
3570}
3571
3572errno_t
39236c6e 3573ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
316670eb 3574{
39236c6e
A
3575 errno_t err;
3576
0a7de745 3577 if (ifp == NULL || pkts == NULL) {
39236c6e 3578 err = EINVAL;
0a7de745 3579 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
39236c6e 3580 err = ENXIO;
0a7de745 3581 } else {
39236c6e
A
3582 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3583 pkts, NULL);
0a7de745 3584 }
316670eb 3585
0a7de745 3586 return err;
39236c6e 3587}
316670eb 3588
39236c6e
A
3589errno_t
3590ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3591 u_int32_t *pkts, u_int32_t *bytes)
3592{
3593 errno_t err;
3594
3595 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
0a7de745 3596 (pkts == NULL && bytes == NULL)) {
39236c6e 3597 err = EINVAL;
0a7de745 3598 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
39236c6e 3599 err = ENXIO;
0a7de745 3600 } else {
39236c6e 3601 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
0a7de745 3602 }
39236c6e 3603
0a7de745 3604 return err;
316670eb
A
3605}
3606
3607errno_t
3608ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3609{
3610 struct dlil_threading_info *inp;
3611
0a7de745
A
3612 if (ifp == NULL) {
3613 return EINVAL;
3614 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3615 return ENXIO;
3616 }
316670eb 3617
0a7de745 3618 if (maxqlen == 0) {
316670eb 3619 maxqlen = if_rcvq_maxlen;
0a7de745 3620 } else if (maxqlen < IF_RCVQ_MINLEN) {
316670eb 3621 maxqlen = IF_RCVQ_MINLEN;
0a7de745 3622 }
316670eb
A
3623
3624 inp = ifp->if_inp;
3625 lck_mtx_lock(&inp->input_lck);
3626 qlimit(&inp->rcvq_pkts) = maxqlen;
3627 lck_mtx_unlock(&inp->input_lck);
3628
0a7de745 3629 return 0;
316670eb
A
3630}
3631
3632errno_t
3633ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3634{
3635 struct dlil_threading_info *inp;
3636
0a7de745
A
3637 if (ifp == NULL || maxqlen == NULL) {
3638 return EINVAL;
3639 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3640 return ENXIO;
3641 }
316670eb
A
3642
3643 inp = ifp->if_inp;
3644 lck_mtx_lock(&inp->input_lck);
3645 *maxqlen = qlimit(&inp->rcvq_pkts);
3646 lck_mtx_unlock(&inp->input_lck);
0a7de745 3647 return 0;
316670eb
A
3648}
3649
5ba3f43e
A
3650void
3651ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3652 uint16_t delay_timeout)
3653{
3654 if (delay_qlen > 0 && delay_timeout > 0) {
3655 ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
3656 ifp->if_start_delay_qlen = min(100, delay_qlen);
3657 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3658 /* convert timeout to nanoseconds */
3659 ifp->if_start_delay_timeout *= 1000;
3660 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3661 ifp->if_xname, (uint32_t)delay_qlen,
3662 (uint32_t)delay_timeout);
3663 } else {
3664 ifp->if_eflags &= ~IFEF_ENQUEUE_MULTI;
3665 }
3666}
3667
cb323159
A
3668/*
3669 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
3670 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
3671 * buf holds the full header.
3672 */
3673static __attribute__((noinline)) void
3674ifnet_mcast_clear_dscp(uint8_t *buf, uint8_t ip_ver)
3675{
3676 struct ip *ip;
3677 struct ip6_hdr *ip6;
3678 uint8_t lbuf[64] __attribute__((aligned(8)));
3679 uint8_t *p = buf;
3680
3681 if (ip_ver == IPVERSION) {
3682 uint8_t old_tos;
3683 uint32_t sum;
3684
3685 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3686 DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
3687 bcopy(buf, lbuf, sizeof(struct ip));
3688 p = lbuf;
3689 }
3690 ip = (struct ip *)(void *)p;
3691 if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
3692 return;
3693 }
3694
3695 DTRACE_IP1(clear__v4, struct ip *, ip);
3696 old_tos = ip->ip_tos;
3697 ip->ip_tos &= IPTOS_ECN_MASK;
3698 sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
3699 sum = (sum >> 16) + (sum & 0xffff);
3700 ip->ip_sum = (uint16_t)(sum & 0xffff);
3701
3702 if (__improbable(p == lbuf)) {
3703 bcopy(lbuf, buf, sizeof(struct ip));
3704 }
3705 } else {
3706 uint32_t flow;
3707 ASSERT(ip_ver == IPV6_VERSION);
3708
3709 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3710 DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
3711 bcopy(buf, lbuf, sizeof(struct ip6_hdr));
3712 p = lbuf;
3713 }
3714 ip6 = (struct ip6_hdr *)(void *)p;
3715 flow = ntohl(ip6->ip6_flow);
3716 if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
3717 return;
3718 }
3719
3720 DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
3721 ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
3722
3723 if (__improbable(p == lbuf)) {
3724 bcopy(lbuf, buf, sizeof(struct ip6_hdr));
3725 }
3726 }
3727}
3728
5ba3f43e 3729static inline errno_t
cb323159
A
3730ifnet_enqueue_ifclassq(struct ifnet *ifp, classq_pkt_t *p, boolean_t flush,
3731 boolean_t *pdrop)
316670eb 3732{
5ba3f43e
A
3733 volatile uint64_t *fg_ts = NULL;
3734 volatile uint64_t *rt_ts = NULL;
3e170ce0 3735 struct timespec now;
5ba3f43e
A
3736 u_int64_t now_nsec = 0;
3737 int error = 0;
cb323159
A
3738 uint8_t *mcast_buf = NULL;
3739 uint8_t ip_ver;
316670eb 3740
5ba3f43e
A
3741 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3742
3743 /*
3744 * If packet already carries a timestamp, either from dlil_output()
3745 * or from flowswitch, use it here. Otherwise, record timestamp.
3746 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3747 * the timestamp value is used internally there.
3748 */
cb323159 3749 switch (p->cp_ptype) {
5ba3f43e 3750 case QP_MBUF:
cb323159
A
3751 ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
3752 ASSERT(p->cp_mbuf->m_nextpkt == NULL);
5ba3f43e 3753
cb323159
A
3754 if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3755 p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
5ba3f43e
A
3756 nanouptime(&now);
3757 net_timernsec(&now, &now_nsec);
cb323159 3758 p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
5ba3f43e 3759 }
cb323159 3760 p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
5ba3f43e
A
3761 /*
3762 * If the packet service class is not background,
3763 * update the timestamp to indicate recent activity
3764 * on a foreground socket.
3765 */
cb323159
A
3766 if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3767 p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3768 if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
3769 PKTF_SO_BACKGROUND)) {
5ba3f43e 3770 ifp->if_fg_sendts = _net_uptime;
0a7de745 3771 if (fg_ts != NULL) {
5ba3f43e 3772 *fg_ts = _net_uptime;
0a7de745 3773 }
5ba3f43e 3774 }
cb323159 3775 if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
5ba3f43e 3776 ifp->if_rt_sendts = _net_uptime;
0a7de745 3777 if (rt_ts != NULL) {
5ba3f43e 3778 *rt_ts = _net_uptime;
0a7de745 3779 }
5ba3f43e
A
3780 }
3781 }
cb323159
A
3782
3783 /*
3784 * Some Wi-Fi AP implementations do not correctly handle
3785 * multicast IP packets with DSCP bits set (radr://9331522).
3786 * As a workaround we clear the DSCP bits and set the service
3787 * class to BE.
3788 */
3789 if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3790 IFNET_IS_WIFI_INFRA(ifp)) {
3791 size_t len = mbuf_len(p->cp_mbuf), hlen;
3792 struct ether_header *eh;
3793 boolean_t pullup = FALSE;
3794 uint16_t etype;
3795
3796 if (__improbable(len < sizeof(struct ether_header))) {
3797 DTRACE_IP1(small__ether, size_t, len);
3798 if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3799 sizeof(struct ether_header))) == NULL) {
3800 return ENOMEM;
3801 }
3802 }
3803 eh = (struct ether_header *)mbuf_data(p->cp_mbuf);
3804 etype = ntohs(eh->ether_type);
3805 if (etype == ETHERTYPE_IP) {
3806 hlen = sizeof(struct ether_header) +
3807 sizeof(struct ip);
3808 if (len < hlen) {
3809 DTRACE_IP1(small__v4, size_t, len);
3810 pullup = TRUE;
3811 }
3812 ip_ver = IPVERSION;
3813 } else if (etype == ETHERTYPE_IPV6) {
3814 hlen = sizeof(struct ether_header) +
3815 sizeof(struct ip6_hdr);
3816 if (len < hlen) {
3817 DTRACE_IP1(small__v6, size_t, len);
3818 pullup = TRUE;
3819 }
3820 ip_ver = IPV6_VERSION;
3821 } else {
3822 DTRACE_IP1(invalid__etype, uint16_t, etype);
3823 break;
3824 }
3825 if (pullup) {
3826 if ((p->cp_mbuf = m_pullup(p->cp_mbuf, hlen)) ==
3827 NULL) {
3828 return ENOMEM;
3829 }
3830
3831 eh = (struct ether_header *)mbuf_data(
3832 p->cp_mbuf);
3833 }
3834 mbuf_set_service_class(p->cp_mbuf, MBUF_SC_BE);
3835 mcast_buf = (uint8_t *)(eh + 1);
3836 /*
3837 * ifnet_mcast_clear_dscp() will finish the work below.
3838 * Note that the pullups above ensure that mcast_buf
3839 * points to a full IP header.
3840 */
3841 }
5ba3f43e 3842 break;
316670eb 3843
5ba3f43e
A
3844
3845 default:
3846 VERIFY(0);
3847 /* NOTREACHED */
cb323159
A
3848 __builtin_unreachable();
3849 }
3850
3851 if (mcast_buf != NULL) {
3852 ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
5ba3f43e 3853 }
3e170ce0
A
3854
3855 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
5ba3f43e
A
3856 if (now_nsec == 0) {
3857 nanouptime(&now);
3858 net_timernsec(&now, &now_nsec);
3859 }
3e170ce0
A
3860 /*
3861 * If the driver chose to delay start callback for
3862 * coalescing multiple packets, Then use the following
3863 * heuristics to make sure that start callback will
3864 * be delayed only when bulk data transfer is detected.
3865 * 1. number of packets enqueued in (delay_win * 2) is
3866 * greater than or equal to the delay qlen.
3867 * 2. If delay_start is enabled it will stay enabled for
3868 * another 10 idle windows. This is to take into account
3869 * variable RTT and burst traffic.
3870 * 3. If the time elapsed since last enqueue is more
3871 * than 200ms we disable delaying start callback. This is
3872 * is to take idle time into account.
39037602 3873 */
3e170ce0
A
3874 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3875 if (ifp->if_start_delay_swin > 0) {
3876 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3877 ifp->if_start_delay_cnt++;
3878 } else if ((now_nsec - ifp->if_start_delay_swin)
3879 >= (200 * 1000 * 1000)) {
3880 ifp->if_start_delay_swin = now_nsec;
3881 ifp->if_start_delay_cnt = 1;
3882 ifp->if_start_delay_idle = 0;
3883 if (ifp->if_eflags & IFEF_DELAY_START) {
3884 ifp->if_eflags &=
3885 ~(IFEF_DELAY_START);
3886 ifnet_delay_start_disabled++;
3887 }
3888 } else {
3889 if (ifp->if_start_delay_cnt >=
3890 ifp->if_start_delay_qlen) {
3891 ifp->if_eflags |= IFEF_DELAY_START;
3892 ifp->if_start_delay_idle = 0;
3893 } else {
3894 if (ifp->if_start_delay_idle >= 10) {
cb323159
A
3895 ifp->if_eflags &=
3896 ~(IFEF_DELAY_START);
3e170ce0
A
3897 ifnet_delay_start_disabled++;
3898 } else {
3899 ifp->if_start_delay_idle++;
3900 }
39037602 3901 }
3e170ce0
A
3902 ifp->if_start_delay_swin = now_nsec;
3903 ifp->if_start_delay_cnt = 1;
3904 }
3905 } else {
3906 ifp->if_start_delay_swin = now_nsec;
3907 ifp->if_start_delay_cnt = 1;
3908 ifp->if_start_delay_idle = 0;
3909 ifp->if_eflags &= ~(IFEF_DELAY_START);
3910 }
3911 } else {
3912 ifp->if_eflags &= ~(IFEF_DELAY_START);
3913 }
3914
cb323159
A
3915 /* enqueue the packet (caller consumes object) */
3916 error = ifclassq_enqueue(&ifp->if_snd, p, pdrop);
316670eb
A
3917
3918 /*
3919 * Tell the driver to start dequeueing; do this even when the queue
3920 * for the packet is suspended (EQSUSPENDED), as the driver could still
3921 * be dequeueing from other unsuspended queues.
3922 */
3e170ce0 3923 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
0a7de745 3924 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
316670eb 3925 ifnet_start(ifp);
0a7de745 3926 }
316670eb 3927
cb323159
A
3928 return error;
3929}
3930
3931int
3932ifnet_enqueue_netem(void *handle, pktsched_pkt_t *pkts, uint32_t n_pkts)
3933{
3934 struct ifnet *ifp = handle;
3935 boolean_t pdrop; /* dummy */
3936 uint32_t i;
3937
3938 ASSERT(n_pkts >= 1);
3939 for (i = 0; i < n_pkts - 1; i++) {
3940 (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt,
3941 FALSE, &pdrop);
3942 }
3943 /* flush with the last packet */
3944 (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt, TRUE, &pdrop);
3945
3946 return 0;
3947}
3948
3949static inline errno_t
3950ifnet_enqueue_common(struct ifnet *ifp, classq_pkt_t *pkt, boolean_t flush,
3951 boolean_t *pdrop)
3952{
3953 if (ifp->if_output_netem != NULL) {
3954 return netem_enqueue(ifp->if_output_netem, pkt, pdrop);
3955 } else {
3956 return ifnet_enqueue_ifclassq(ifp, pkt, flush, pdrop);
3957 }
316670eb
A
3958}
3959
5ba3f43e
A
3960errno_t
3961ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3962{
3963 boolean_t pdrop;
0a7de745 3964 return ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop);
5ba3f43e
A
3965}
3966
3967errno_t
3968ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3969 boolean_t *pdrop)
3970{
cb323159
A
3971 classq_pkt_t pkt;
3972
5ba3f43e
A
3973 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3974 m->m_nextpkt != NULL) {
3975 if (m != NULL) {
3976 m_freem_list(m);
3977 *pdrop = TRUE;
3978 }
0a7de745 3979 return EINVAL;
5ba3f43e
A
3980 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3981 !IF_FULLY_ATTACHED(ifp)) {
3982 /* flag tested without lock for performance */
3983 m_freem(m);
3984 *pdrop = TRUE;
0a7de745 3985 return ENXIO;
5ba3f43e
A
3986 } else if (!(ifp->if_flags & IFF_UP)) {
3987 m_freem(m);
3988 *pdrop = TRUE;
0a7de745 3989 return ENETDOWN;
5ba3f43e
A
3990 }
3991
cb323159
A
3992 CLASSQ_PKT_INIT_MBUF(&pkt, m);
3993 return ifnet_enqueue_common(ifp, &pkt, flush, pdrop);
5ba3f43e
A
3994}
3995
3996
316670eb
A
3997errno_t
3998ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3999{
fe8ab488 4000 errno_t rc;
cb323159
A
4001 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
4002
0a7de745
A
4003 if (ifp == NULL || mp == NULL) {
4004 return EINVAL;
4005 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4006 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4007 return ENXIO;
4008 }
4009 if (!ifnet_is_attached(ifp, 1)) {
4010 return ENXIO;
4011 }
5ba3f43e 4012
39037602 4013 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
cb323159
A
4014 &pkt, NULL, NULL, NULL);
4015 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
fe8ab488 4016 ifnet_decr_iorefcnt(ifp);
cb323159 4017 *mp = pkt.cp_mbuf;
0a7de745 4018 return rc;
316670eb
A
4019}
4020
4021errno_t
4022ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
4023 struct mbuf **mp)
4024{
fe8ab488 4025 errno_t rc;
cb323159
A
4026 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
4027
0a7de745
A
4028 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
4029 return EINVAL;
4030 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4031 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4032 return ENXIO;
4033 }
4034 if (!ifnet_is_attached(ifp, 1)) {
4035 return ENXIO;
4036 }
39037602 4037
5ba3f43e 4038 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
cb323159
A
4039 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL);
4040 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
fe8ab488 4041 ifnet_decr_iorefcnt(ifp);
cb323159 4042 *mp = pkt.cp_mbuf;
0a7de745 4043 return rc;
316670eb
A
4044}
4045
4046errno_t
39037602
A
4047ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
4048 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
316670eb 4049{
fe8ab488 4050 errno_t rc;
cb323159
A
4051 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4052 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4053
0a7de745
A
4054 if (ifp == NULL || head == NULL || pkt_limit < 1) {
4055 return EINVAL;
4056 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4057 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4058 return ENXIO;
4059 }
4060 if (!ifnet_is_attached(ifp, 1)) {
4061 return ENXIO;
4062 }
39037602
A
4063
4064 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
cb323159
A
4065 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len);
4066 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
39037602 4067 ifnet_decr_iorefcnt(ifp);
cb323159
A
4068 *head = pkt_head.cp_mbuf;
4069 if (tail != NULL) {
4070 *tail = pkt_tail.cp_mbuf;
4071 }
0a7de745 4072 return rc;
39037602
A
4073}
4074
4075errno_t
4076ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
4077 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
4078{
4079 errno_t rc;
cb323159
A
4080 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4081 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4082
0a7de745
A
4083 if (ifp == NULL || head == NULL || byte_limit < 1) {
4084 return EINVAL;
4085 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4086 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4087 return ENXIO;
4088 }
4089 if (!ifnet_is_attached(ifp, 1)) {
4090 return ENXIO;
4091 }
39037602
A
4092
4093 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
cb323159
A
4094 byte_limit, &pkt_head, &pkt_tail, cnt, len);
4095 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
fe8ab488 4096 ifnet_decr_iorefcnt(ifp);
cb323159
A
4097 *head = pkt_head.cp_mbuf;
4098 if (tail != NULL) {
4099 *tail = pkt_tail.cp_mbuf;
4100 }
0a7de745 4101 return rc;
316670eb
A
4102}
4103
4104errno_t
4105ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
39037602 4106 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
316670eb
A
4107 u_int32_t *len)
4108{
fe8ab488 4109 errno_t rc;
cb323159
A
4110 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4111 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4112
39037602 4113 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
0a7de745
A
4114 !MBUF_VALID_SC(sc)) {
4115 return EINVAL;
4116 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4117 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4118 return ENXIO;
4119 }
4120 if (!ifnet_is_attached(ifp, 1)) {
4121 return ENXIO;
4122 }
5ba3f43e
A
4123
4124 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
cb323159
A
4125 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
4126 cnt, len);
4127 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
fe8ab488 4128 ifnet_decr_iorefcnt(ifp);
cb323159
A
4129 *head = pkt_head.cp_mbuf;
4130 if (tail != NULL) {
4131 *tail = pkt_tail.cp_mbuf;
4132 }
0a7de745 4133 return rc;
316670eb
A
4134}
4135
5ba3f43e 4136#if !CONFIG_EMBEDDED
39236c6e
A
4137errno_t
4138ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
4139 const struct sockaddr *dest, const char *dest_linkaddr,
4140 const char *frame_type, u_int32_t *pre, u_int32_t *post)
4141{
0a7de745 4142 if (pre != NULL) {
39236c6e 4143 *pre = 0;
0a7de745
A
4144 }
4145 if (post != NULL) {
39236c6e 4146 *post = 0;
0a7de745 4147 }
39236c6e 4148
0a7de745 4149 return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
39236c6e 4150}
5ba3f43e 4151#endif /* !CONFIG_EMBEDDED */
39236c6e 4152
cb323159
A
4153static boolean_t
4154packet_has_vlan_tag(struct mbuf * m)
4155{
4156 u_int tag = 0;
4157
4158 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
4159 tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag);
4160 if (tag == 0) {
4161 /* the packet is just priority-tagged, clear the bit */
4162 m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
4163 }
4164 }
4165 return tag != 0;
4166}
4167
316670eb
A
4168static int
4169dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
4170 char **frame_header_p, protocol_family_t protocol_family)
4171{
cb323159
A
4172 boolean_t is_vlan_packet = FALSE;
4173 struct ifnet_filter *filter;
4174 struct mbuf *m = *m_p;
4175
4176 is_vlan_packet = packet_has_vlan_tag(m);
316670eb
A
4177
4178 /*
4179 * Pass the inbound packet to the interface filters
6d2010ae
A
4180 */
4181 lck_mtx_lock_spin(&ifp->if_flt_lock);
4182 /* prevent filter list from changing in case we drop the lock */
4183 if_flt_monitor_busy(ifp);
2d21ac55
A
4184 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4185 int result;
4186
cb323159
A
4187 /* exclude VLAN packets from external filters PR-3586856 */
4188 if (is_vlan_packet &&
4189 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4190 continue;
4191 }
4192
6d2010ae
A
4193 if (!filter->filt_skip && filter->filt_input != NULL &&
4194 (filter->filt_protocol == 0 ||
4195 filter->filt_protocol == protocol_family)) {
4196 lck_mtx_unlock(&ifp->if_flt_lock);
4197
2d21ac55 4198 result = (*filter->filt_input)(filter->filt_cookie,
6d2010ae
A
4199 ifp, protocol_family, m_p, frame_header_p);
4200
4201 lck_mtx_lock_spin(&ifp->if_flt_lock);
2d21ac55 4202 if (result != 0) {
6d2010ae
A
4203 /* we're done with the filter list */
4204 if_flt_monitor_unbusy(ifp);
4205 lck_mtx_unlock(&ifp->if_flt_lock);
0a7de745 4206 return result;
2d21ac55
A
4207 }
4208 }
4209 }
6d2010ae
A
4210 /* we're done with the filter list */
4211 if_flt_monitor_unbusy(ifp);
4212 lck_mtx_unlock(&ifp->if_flt_lock);
b7266188
A
4213
4214 /*
6d2010ae 4215 * Strip away M_PROTO1 bit prior to sending packet up the stack as
b7266188
A
4216 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
4217 */
0a7de745 4218 if (*m_p != NULL) {
b7266188 4219 (*m_p)->m_flags &= ~M_PROTO1;
0a7de745 4220 }
b7266188 4221
0a7de745 4222 return 0;
1c79356b
A
4223}
4224
6d2010ae
A
4225static int
4226dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
4227 protocol_family_t protocol_family)
4228{
cb323159
A
4229 boolean_t is_vlan_packet;
4230 struct ifnet_filter *filter;
4231 struct mbuf *m = *m_p;
4232
4233 is_vlan_packet = packet_has_vlan_tag(m);
6d2010ae
A
4234
4235 /*
4236 * Pass the outbound packet to the interface filters
4237 */
4238 lck_mtx_lock_spin(&ifp->if_flt_lock);
4239 /* prevent filter list from changing in case we drop the lock */
4240 if_flt_monitor_busy(ifp);
4241 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4242 int result;
4243
cb323159
A
4244 /* exclude VLAN packets from external filters PR-3586856 */
4245 if (is_vlan_packet &&
4246 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4247 continue;
4248 }
4249
6d2010ae
A
4250 if (!filter->filt_skip && filter->filt_output != NULL &&
4251 (filter->filt_protocol == 0 ||
4252 filter->filt_protocol == protocol_family)) {
4253 lck_mtx_unlock(&ifp->if_flt_lock);
4254
4255 result = filter->filt_output(filter->filt_cookie, ifp,
4256 protocol_family, m_p);
4257
4258 lck_mtx_lock_spin(&ifp->if_flt_lock);
4259 if (result != 0) {
4260 /* we're done with the filter list */
4261 if_flt_monitor_unbusy(ifp);
4262 lck_mtx_unlock(&ifp->if_flt_lock);
0a7de745 4263 return result;
6d2010ae
A
4264 }
4265 }
4266 }
4267 /* we're done with the filter list */
4268 if_flt_monitor_unbusy(ifp);
4269 lck_mtx_unlock(&ifp->if_flt_lock);
4270
0a7de745 4271 return 0;
6d2010ae
A
4272}
4273
2d21ac55
A
4274static void
4275dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
1c79356b 4276{
2d21ac55 4277 int error;
1c79356b 4278
2d21ac55
A
4279 if (ifproto->proto_kpi == kProtoKPI_v1) {
4280 /* Version 1 protocols get one packet at a time */
4281 while (m != NULL) {
0a7de745
A
4282 char * frame_header;
4283 mbuf_t next_packet;
6d2010ae 4284
2d21ac55
A
4285 next_packet = m->m_nextpkt;
4286 m->m_nextpkt = NULL;
39236c6e
A
4287 frame_header = m->m_pkthdr.pkt_hdr;
4288 m->m_pkthdr.pkt_hdr = NULL;
6d2010ae
A
4289 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
4290 ifproto->protocol_family, m, frame_header);
0a7de745 4291 if (error != 0 && error != EJUSTRETURN) {
2d21ac55 4292 m_freem(m);
0a7de745 4293 }
2d21ac55
A
4294 m = next_packet;
4295 }
6d2010ae 4296 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
2d21ac55
A
4297 /* Version 2 protocols support packet lists */
4298 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
6d2010ae 4299 ifproto->protocol_family, m);
0a7de745 4300 if (error != 0 && error != EJUSTRETURN) {
2d21ac55 4301 m_freem_list(m);
0a7de745 4302 }
91447636 4303 }
2d21ac55 4304}
1c79356b 4305
316670eb
A
4306static void
4307dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
cb323159 4308 struct dlil_threading_info *inp, struct ifnet *ifp, boolean_t poll)
316670eb
A
4309{
4310 struct ifnet_stat_increment_param *d = &inp->stats;
4311
0a7de745 4312 if (s->packets_in != 0) {
316670eb 4313 d->packets_in += s->packets_in;
0a7de745
A
4314 }
4315 if (s->bytes_in != 0) {
316670eb 4316 d->bytes_in += s->bytes_in;
0a7de745
A
4317 }
4318 if (s->errors_in != 0) {
316670eb 4319 d->errors_in += s->errors_in;
0a7de745 4320 }
316670eb 4321
0a7de745 4322 if (s->packets_out != 0) {
316670eb 4323 d->packets_out += s->packets_out;
0a7de745
A
4324 }
4325 if (s->bytes_out != 0) {
316670eb 4326 d->bytes_out += s->bytes_out;
0a7de745
A
4327 }
4328 if (s->errors_out != 0) {
316670eb 4329 d->errors_out += s->errors_out;
0a7de745 4330 }
316670eb 4331
0a7de745 4332 if (s->collisions != 0) {
316670eb 4333 d->collisions += s->collisions;
0a7de745
A
4334 }
4335 if (s->dropped != 0) {
316670eb 4336 d->dropped += s->dropped;
0a7de745 4337 }
316670eb 4338
0a7de745 4339 if (poll) {
cb323159 4340 PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in);
0a7de745 4341 }
316670eb
A
4342}
4343
cb323159 4344static boolean_t
316670eb
A
4345dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
4346{
4347 struct ifnet_stat_increment_param *s = &inp->stats;
4348
4349 /*
4350 * Use of atomic operations is unavoidable here because
4351 * these stats may also be incremented elsewhere via KPIs.
4352 */
4353 if (s->packets_in != 0) {
4354 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
4355 s->packets_in = 0;
4356 }
4357 if (s->bytes_in != 0) {
4358 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
4359 s->bytes_in = 0;
4360 }
4361 if (s->errors_in != 0) {
4362 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
4363 s->errors_in = 0;
4364 }
4365
4366 if (s->packets_out != 0) {
4367 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
4368 s->packets_out = 0;
4369 }
4370 if (s->bytes_out != 0) {
4371 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
4372 s->bytes_out = 0;
4373 }
4374 if (s->errors_out != 0) {
4375 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
4376 s->errors_out = 0;
4377 }
4378
4379 if (s->collisions != 0) {
4380 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
4381 s->collisions = 0;
4382 }
4383 if (s->dropped != 0) {
4384 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
4385 s->dropped = 0;
4386 }
39037602 4387
316670eb
A
4388 /*
4389 * No need for atomic operations as they are modified here
4390 * only from within the DLIL input thread context.
4391 */
cb323159
A
4392 if (ifp->if_poll_tstats.packets != 0) {
4393 ifp->if_poll_pstats.ifi_poll_packets += ifp->if_poll_tstats.packets;
4394 ifp->if_poll_tstats.packets = 0;
316670eb 4395 }
cb323159
A
4396 if (ifp->if_poll_tstats.bytes != 0) {
4397 ifp->if_poll_pstats.ifi_poll_bytes += ifp->if_poll_tstats.bytes;
4398 ifp->if_poll_tstats.bytes = 0;
316670eb 4399 }
cb323159
A
4400
4401 return ifp->if_data_threshold != 0;
316670eb
A
4402}
4403
4404__private_extern__ void
4405dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
4406{
0a7de745
A
4407 return dlil_input_packet_list_common(ifp, m, 0,
4408 IFNET_MODEL_INPUT_POLL_OFF, FALSE);
316670eb
A
4409}
4410
2d21ac55 4411__private_extern__ void
316670eb
A
4412dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
4413 u_int32_t cnt, ifnet_model_t mode)
4414{
0a7de745 4415 return dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE);
316670eb
A
4416}
4417
4418static void
4419dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
4420 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
2d21ac55 4421{
d9a64523
A
4422 int error = 0;
4423 protocol_family_t protocol_family;
4424 mbuf_t next_packet;
0a7de745 4425 ifnet_t ifp = ifp_param;
d9a64523 4426 char *frame_header = NULL;
0a7de745 4427 struct if_proto *last_ifproto = NULL;
d9a64523
A
4428 mbuf_t pkt_first = NULL;
4429 mbuf_t *pkt_next = NULL;
4430 u_int32_t poll_thresh = 0, poll_ival = 0;
2d21ac55 4431
39037602 4432 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
2d21ac55 4433
316670eb 4434 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
0a7de745 4435 (poll_ival = if_rxpoll_interval_pkts) > 0) {
316670eb 4436 poll_thresh = cnt;
0a7de745 4437 }
6d2010ae 4438
2d21ac55 4439 while (m != NULL) {
6d2010ae
A
4440 struct if_proto *ifproto = NULL;
4441 int iorefcnt = 0;
0a7de745 4442 uint32_t pktf_mask; /* pkt flags to preserve */
2d21ac55 4443
0a7de745 4444 if (ifp_param == NULL) {
2d21ac55 4445 ifp = m->m_pkthdr.rcvif;
0a7de745 4446 }
6d2010ae 4447
cb323159
A
4448 if ((ifp->if_eflags & IFEF_RXPOLL) &&
4449 (ifp->if_xflags & IFXF_LEGACY) && poll_thresh != 0 &&
0a7de745 4450 poll_ival > 0 && (--poll_thresh % poll_ival) == 0) {
316670eb 4451 ifnet_poll(ifp);
0a7de745 4452 }
316670eb 4453
6d2010ae 4454 /* Check if this mbuf looks valid */
316670eb 4455 MBUF_INPUT_CHECK(m, ifp);
6d2010ae
A
4456
4457 next_packet = m->m_nextpkt;
4458 m->m_nextpkt = NULL;
39236c6e
A
4459 frame_header = m->m_pkthdr.pkt_hdr;
4460 m->m_pkthdr.pkt_hdr = NULL;
2d21ac55 4461
316670eb
A
4462 /*
4463 * Get an IO reference count if the interface is not
4464 * loopback (lo0) and it is attached; lo0 never goes
4465 * away, so optimize for that.
6d2010ae
A
4466 */
4467 if (ifp != lo_ifp) {
cb323159 4468 if (!ifnet_datamov_begin(ifp)) {
6d2010ae
A
4469 m_freem(m);
4470 goto next;
4471 }
4472 iorefcnt = 1;
5ba3f43e
A
4473 /*
4474 * Preserve the time stamp if it was set.
4475 */
4476 pktf_mask = PKTF_TS_VALID;
39236c6e
A
4477 } else {
4478 /*
4479 * If this arrived on lo0, preserve interface addr
4480 * info to allow for connectivity between loopback
4481 * and local interface addresses.
4482 */
0a7de745 4483 pktf_mask = (PKTF_LOOP | PKTF_IFAINFO);
2d21ac55 4484 }
d41d1dae 4485
39236c6e
A
4486 /* make sure packet comes in clean */
4487 m_classifier_init(m, pktf_mask);
4488
316670eb 4489 ifp_inc_traffic_class_in(ifp, m);
d41d1dae 4490
2d21ac55 4491 /* find which protocol family this packet is for */
6d2010ae 4492 ifnet_lock_shared(ifp);
2d21ac55 4493 error = (*ifp->if_demux)(ifp, m, frame_header,
6d2010ae
A
4494 &protocol_family);
4495 ifnet_lock_done(ifp);
2d21ac55 4496 if (error != 0) {
0a7de745 4497 if (error == EJUSTRETURN) {
2d21ac55 4498 goto next;
0a7de745 4499 }
2d21ac55
A
4500 protocol_family = 0;
4501 }
6d2010ae 4502
d9a64523
A
4503 pktap_input(ifp, protocol_family, m, frame_header);
4504
4505 /* Drop v4 packets received on CLAT46 enabled interface */
4506 if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp)) {
4507 m_freem(m);
4508 ip6stat.ip6s_clat464_in_v4_drop++;
4509 goto next;
4510 }
4511
4512 /* Translate the packet if it is received on CLAT interface */
4513 if (protocol_family == PF_INET6 && IS_INTF_CLAT46(ifp)
4514 && dlil_is_clat_needed(protocol_family, m)) {
4515 char *data = NULL;
4516 struct ether_header eh;
4517 struct ether_header *ehp = NULL;
4518
4519 if (ifp->if_type == IFT_ETHER) {
4520 ehp = (struct ether_header *)(void *)frame_header;
4521 /* Skip RX Ethernet packets if they are not IPV6 */
0a7de745 4522 if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6) {
d9a64523 4523 goto skip_clat;
0a7de745 4524 }
d9a64523
A
4525
4526 /* Keep a copy of frame_header for Ethernet packets */
4527 bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN);
4528 }
4529 error = dlil_clat64(ifp, &protocol_family, &m);
4530 data = (char *) mbuf_data(m);
4531 if (error != 0) {
4532 m_freem(m);
4533 ip6stat.ip6s_clat464_in_drop++;
4534 goto next;
4535 }
4536 /* Native v6 should be No-op */
0a7de745 4537 if (protocol_family != PF_INET) {
d9a64523 4538 goto skip_clat;
0a7de745 4539 }
d9a64523
A
4540
4541 /* Do this only for translated v4 packets. */
4542 switch (ifp->if_type) {
4543 case IFT_CELLULAR:
4544 frame_header = data;
4545 break;
4546 case IFT_ETHER:
4547 /*
4548 * Drop if the mbuf doesn't have enough
4549 * space for Ethernet header
4550 */
4551 if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) {
4552 m_free(m);
4553 ip6stat.ip6s_clat464_in_drop++;
4554 goto next;
4555 }
4556 /*
4557 * Set the frame_header ETHER_HDR_LEN bytes
4558 * preceeding the data pointer. Change
4559 * the ether_type too.
4560 */
4561 frame_header = data - ETHER_HDR_LEN;
4562 eh.ether_type = htons(ETHERTYPE_IP);
4563 bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN);
4564 break;
4565 }
4566 }
4567skip_clat:
39236c6e 4568 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
0a7de745 4569 !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
39236c6e
A
4570 dlil_input_cksum_dbg(ifp, m, frame_header,
4571 protocol_family);
0a7de745 4572 }
39236c6e
A
4573 /*
4574 * For partial checksum offload, we expect the driver to
4575 * set the start offset indicating the start of the span
4576 * that is covered by the hardware-computed checksum;
4577 * adjust this start offset accordingly because the data
4578 * pointer has been advanced beyond the link-layer header.
4579 *
cb323159
A
4580 * Virtual lan types (bridge, vlan, bond) can call
4581 * dlil_input_packet_list() with the same packet with the
4582 * checksum flags set. Set a flag indicating that the
4583 * adjustment has already been done.
39236c6e 4584 */
cb323159
A
4585 if ((m->m_pkthdr.csum_flags & CSUM_ADJUST_DONE) != 0) {
4586 /* adjustment has already been done */
4587 } else if ((m->m_pkthdr.csum_flags &
39236c6e
A
4588 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4589 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4590 int adj;
39236c6e
A
4591 if (frame_header == NULL ||
4592 frame_header < (char *)mbuf_datastart(m) ||
4593 frame_header > (char *)m->m_data ||
4594 (adj = (m->m_data - frame_header)) >
4595 m->m_pkthdr.csum_rx_start) {
4596 m->m_pkthdr.csum_data = 0;
4597 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
4598 hwcksum_in_invalidated++;
4599 } else {
4600 m->m_pkthdr.csum_rx_start -= adj;
4601 }
cb323159
A
4602 /* make sure we don't adjust more than once */
4603 m->m_pkthdr.csum_flags |= CSUM_ADJUST_DONE;
39236c6e 4604 }
0a7de745 4605 if (clat_debug) {
d9a64523 4606 pktap_input(ifp, protocol_family, m, frame_header);
0a7de745 4607 }
316670eb 4608
0a7de745 4609 if (m->m_flags & (M_BCAST | M_MCAST)) {
6d2010ae 4610 atomic_add_64(&ifp->if_imcasts, 1);
0a7de745 4611 }
1c79356b 4612
cb323159
A
4613 /* run interface filters */
4614 error = dlil_interface_filters_input(ifp, &m,
4615 &frame_header, protocol_family);
4616 if (error != 0) {
4617 if (error != EJUSTRETURN) {
4618 m_freem(m);
91447636 4619 }
cb323159 4620 goto next;
91447636 4621 }
cb323159 4622 if ((m->m_flags & M_PROMISC) != 0) {
91447636 4623 m_freem(m);
2d21ac55 4624 goto next;
91447636 4625 }
6d2010ae 4626
2d21ac55
A
4627 /* Lookup the protocol attachment to this interface */
4628 if (protocol_family == 0) {
4629 ifproto = NULL;
6d2010ae
A
4630 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
4631 (last_ifproto->protocol_family == protocol_family)) {
4632 VERIFY(ifproto == NULL);
2d21ac55 4633 ifproto = last_ifproto;
6d2010ae
A
4634 if_proto_ref(last_ifproto);
4635 } else {
4636 VERIFY(ifproto == NULL);
4637 ifnet_lock_shared(ifp);
4638 /* callee holds a proto refcnt upon success */
0a7de745 4639 ifproto = find_attached_proto(ifp, protocol_family);
6d2010ae 4640 ifnet_lock_done(ifp);
2d21ac55
A
4641 }
4642 if (ifproto == NULL) {
4643 /* no protocol for this packet, discard */
4644 m_freem(m);
4645 goto next;
4646 }
4647 if (ifproto != last_ifproto) {
2d21ac55
A
4648 if (last_ifproto != NULL) {
4649 /* pass up the list for the previous protocol */
2d21ac55
A
4650 dlil_ifproto_input(last_ifproto, pkt_first);
4651 pkt_first = NULL;
4652 if_proto_free(last_ifproto);
2d21ac55
A
4653 }
4654 last_ifproto = ifproto;
6d2010ae 4655 if_proto_ref(ifproto);
2d21ac55
A
4656 }
4657 /* extend the list */
39236c6e 4658 m->m_pkthdr.pkt_hdr = frame_header;
2d21ac55
A
4659 if (pkt_first == NULL) {
4660 pkt_first = m;
4661 } else {
4662 *pkt_next = m;
4663 }
4664 pkt_next = &m->m_nextpkt;
1c79356b 4665
6d2010ae 4666next:
2d21ac55
A
4667 if (next_packet == NULL && last_ifproto != NULL) {
4668 /* pass up the last list of packets */
2d21ac55
A
4669 dlil_ifproto_input(last_ifproto, pkt_first);
4670 if_proto_free(last_ifproto);
6d2010ae
A
4671 last_ifproto = NULL;
4672 }
4673 if (ifproto != NULL) {
4674 if_proto_free(ifproto);
4675 ifproto = NULL;
2d21ac55 4676 }
316670eb 4677
2d21ac55 4678 m = next_packet;
1c79356b 4679
6d2010ae 4680 /* update the driver's multicast filter, if needed */
0a7de745 4681 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
6d2010ae 4682 ifp->if_updatemcasts = 0;
0a7de745
A
4683 }
4684 if (iorefcnt == 1) {
cb323159 4685 ifnet_datamov_end(ifp);
0a7de745 4686 }
91447636 4687 }
6d2010ae 4688
39037602 4689 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b
A
4690}
4691
6d2010ae
A
4692errno_t
4693if_mcasts_update(struct ifnet *ifp)
4694{
4695 errno_t err;
4696
4697 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
0a7de745 4698 if (err == EAFNOSUPPORT) {
6d2010ae 4699 err = 0;
0a7de745 4700 }
cb323159 4701 DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
39236c6e 4702 "(err=%d)\n", if_name(ifp),
6d2010ae
A
4703 (err == 0 ? "successfully restored" : "failed to restore"),
4704 ifp->if_updatemcasts, err);
4705
4706 /* just return success */
0a7de745 4707 return 0;
6d2010ae
A
4708}
4709
39037602
A
4710/* If ifp is set, we will increment the generation for the interface */
4711int
4712dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
4713{
4714 if (ifp != NULL) {
4715 ifnet_increment_generation(ifp);
4716 }
4717
4718#if NECP
4719 necp_update_all_clients();
4720#endif /* NECP */
4721
0a7de745 4722 return kev_post_msg(event);
39037602
A
4723}
4724
a39ff7e2
A
4725__private_extern__ void
4726dlil_post_sifflags_msg(struct ifnet * ifp)
4727{
4728 struct kev_msg ev_msg;
4729 struct net_event_data ev_data;
4730
0a7de745
A
4731 bzero(&ev_data, sizeof(ev_data));
4732 bzero(&ev_msg, sizeof(ev_msg));
a39ff7e2
A
4733 ev_msg.vendor_code = KEV_VENDOR_APPLE;
4734 ev_msg.kev_class = KEV_NETWORK_CLASS;
4735 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
4736 ev_msg.event_code = KEV_DL_SIFFLAGS;
4737 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
4738 ev_data.if_family = ifp->if_family;
4739 ev_data.if_unit = (u_int32_t) ifp->if_unit;
4740 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
4741 ev_msg.dv[0].data_ptr = &ev_data;
4742 ev_msg.dv[1].data_length = 0;
4743 dlil_post_complete_msg(ifp, &ev_msg);
4744}
4745
0a7de745 4746#define TMP_IF_PROTO_ARR_SIZE 10
91447636 4747static int
39037602 4748dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
1c79356b 4749{
a1c7dba1
A
4750 struct ifnet_filter *filter = NULL;
4751 struct if_proto *proto = NULL;
4752 int if_proto_count = 0;
4753 struct if_proto **tmp_ifproto_arr = NULL;
4754 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4755 int tmp_ifproto_arr_idx = 0;
4756 bool tmp_malloc = false;
6d2010ae 4757
6d2010ae
A
4758 /*
4759 * Pass the event to the interface filters
4760 */
4761 lck_mtx_lock_spin(&ifp->if_flt_lock);
4762 /* prevent filter list from changing in case we drop the lock */
4763 if_flt_monitor_busy(ifp);
4764 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4765 if (filter->filt_event != NULL) {
4766 lck_mtx_unlock(&ifp->if_flt_lock);
4767
4768 filter->filt_event(filter->filt_cookie, ifp,
4769 filter->filt_protocol, event);
4770
4771 lck_mtx_lock_spin(&ifp->if_flt_lock);
91447636 4772 }
6d2010ae
A
4773 }
4774 /* we're done with the filter list */
4775 if_flt_monitor_unbusy(ifp);
4776 lck_mtx_unlock(&ifp->if_flt_lock);
4777
3e170ce0 4778 /* Get an io ref count if the interface is attached */
0a7de745 4779 if (!ifnet_is_attached(ifp, 1)) {
3e170ce0 4780 goto done;
0a7de745 4781 }
3e170ce0 4782
a1c7dba1
A
4783 /*
4784 * An embedded tmp_list_entry in if_proto may still get
4785 * over-written by another thread after giving up ifnet lock,
4786 * therefore we are avoiding embedded pointers here.
4787 */
6d2010ae 4788 ifnet_lock_shared(ifp);
a39ff7e2 4789 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
a1c7dba1 4790 if (if_proto_count) {
6d2010ae 4791 int i;
a1c7dba1
A
4792 VERIFY(ifp->if_proto_hash != NULL);
4793 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
4794 tmp_ifproto_arr = tmp_ifproto_stack_arr;
4795 } else {
4796 MALLOC(tmp_ifproto_arr, struct if_proto **,
0a7de745 4797 sizeof(*tmp_ifproto_arr) * if_proto_count,
a1c7dba1
A
4798 M_TEMP, M_ZERO);
4799 if (tmp_ifproto_arr == NULL) {
4800 ifnet_lock_done(ifp);
4801 goto cleanup;
4802 }
4803 tmp_malloc = true;
4804 }
6d2010ae
A
4805
4806 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6d2010ae
A
4807 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
4808 next_hash) {
a1c7dba1
A
4809 if_proto_ref(proto);
4810 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
4811 tmp_ifproto_arr_idx++;
91447636
A
4812 }
4813 }
a1c7dba1 4814 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
91447636 4815 }
6d2010ae
A
4816 ifnet_lock_done(ifp);
4817
a1c7dba1
A
4818 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
4819 tmp_ifproto_arr_idx++) {
4820 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
4821 VERIFY(proto != NULL);
4822 proto_media_event eventp =
4823 (proto->proto_kpi == kProtoKPI_v1 ?
4824 proto->kpi.v1.event :
4825 proto->kpi.v2.event);
4826
4827 if (eventp != NULL) {
4828 eventp(ifp, proto->protocol_family,
4829 event);
4830 }
4831 if_proto_free(proto);
4832 }
4833
39037602 4834cleanup:
a1c7dba1
A
4835 if (tmp_malloc) {
4836 FREE(tmp_ifproto_arr, M_TEMP);
4837 }
4838
6d2010ae 4839 /* Pass the event to the interface */
0a7de745 4840 if (ifp->if_event != NULL) {
6d2010ae 4841 ifp->if_event(ifp, event);
0a7de745 4842 }
6d2010ae
A
4843
4844 /* Release the io ref count */
4845 ifnet_decr_iorefcnt(ifp);
6d2010ae 4846done:
0a7de745 4847 return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
1c79356b
A
4848}
4849
2d21ac55 4850errno_t
6d2010ae 4851ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
1c79356b 4852{
39037602 4853 struct kev_msg kev_msg;
2d21ac55
A
4854 int result = 0;
4855
0a7de745
A
4856 if (ifp == NULL || event == NULL) {
4857 return EINVAL;
4858 }
1c79356b 4859
0a7de745 4860 bzero(&kev_msg, sizeof(kev_msg));
39037602
A
4861 kev_msg.vendor_code = event->vendor_code;
4862 kev_msg.kev_class = event->kev_class;
4863 kev_msg.kev_subclass = event->kev_subclass;
4864 kev_msg.event_code = event->event_code;
91447636
A
4865 kev_msg.dv[0].data_ptr = &event->event_data[0];
4866 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
4867 kev_msg.dv[1].data_length = 0;
6d2010ae 4868
39037602 4869 result = dlil_event_internal(ifp, &kev_msg, TRUE);
1c79356b 4870
0a7de745 4871 return result;
91447636 4872}
1c79356b 4873
2d21ac55
A
4874#if CONFIG_MACF_NET
4875#include <netinet/ip6.h>
4876#include <netinet/ip.h>
6d2010ae
A
4877static int
4878dlil_get_socket_type(struct mbuf **mp, int family, int raw)
2d21ac55
A
4879{
4880 struct mbuf *m;
4881 struct ip *ip;
4882 struct ip6_hdr *ip6;
4883 int type = SOCK_RAW;
4884
4885 if (!raw) {
4886 switch (family) {
4887 case PF_INET:
4888 m = m_pullup(*mp, sizeof(struct ip));
0a7de745 4889 if (m == NULL) {
2d21ac55 4890 break;
0a7de745 4891 }
2d21ac55
A
4892 *mp = m;
4893 ip = mtod(m, struct ip *);
0a7de745 4894 if (ip->ip_p == IPPROTO_TCP) {
2d21ac55 4895 type = SOCK_STREAM;
0a7de745 4896 } else if (ip->ip_p == IPPROTO_UDP) {
2d21ac55 4897 type = SOCK_DGRAM;
0a7de745 4898 }
2d21ac55
A
4899 break;
4900 case PF_INET6:
4901 m = m_pullup(*mp, sizeof(struct ip6_hdr));
0a7de745 4902 if (m == NULL) {
2d21ac55 4903 break;
0a7de745 4904 }
2d21ac55
A
4905 *mp = m;
4906 ip6 = mtod(m, struct ip6_hdr *);
0a7de745 4907 if (ip6->ip6_nxt == IPPROTO_TCP) {
2d21ac55 4908 type = SOCK_STREAM;
0a7de745 4909 } else if (ip6->ip6_nxt == IPPROTO_UDP) {
2d21ac55 4910 type = SOCK_DGRAM;
0a7de745 4911 }
2d21ac55
A
4912 break;
4913 }
4914 }
4915
0a7de745 4916 return type;
2d21ac55
A
4917}
4918#endif
4919
3e170ce0
A
4920static void
4921dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4922{
0a7de745 4923 mbuf_t n = m;
3e170ce0
A
4924 int chainlen = 0;
4925
4926 while (n != NULL) {
4927 chainlen++;
4928 n = n->m_next;
4929 }
4930 switch (chainlen) {
0a7de745
A
4931 case 0:
4932 break;
4933 case 1:
4934 atomic_add_64(&cls->cls_one, 1);
4935 break;
4936 case 2:
4937 atomic_add_64(&cls->cls_two, 1);
4938 break;
4939 case 3:
4940 atomic_add_64(&cls->cls_three, 1);
4941 break;
4942 case 4:
4943 atomic_add_64(&cls->cls_four, 1);
4944 break;
4945 case 5:
4946 default:
4947 atomic_add_64(&cls->cls_five_or_more, 1);
4948 break;
3e170ce0
A
4949 }
4950}
4951
1c79356b 4952/*
91447636
A
4953 * dlil_output
4954 *
4955 * Caller should have a lock on the protocol domain if the protocol
4956 * doesn't support finer grained locking. In most cases, the lock
4957 * will be held from the socket layer and won't be released until
4958 * we return back to the socket layer.
4959 *
4960 * This does mean that we must take a protocol lock before we take
4961 * an interface lock if we're going to take both. This makes sense
4962 * because a protocol is likely to interact with an ifp while it
4963 * is under the protocol lock.
316670eb
A
4964 *
4965 * An advisory code will be returned if adv is not null. This
39236c6e 4966 * can be used to provide feedback about interface queues to the
316670eb 4967 * application.
1c79356b 4968 */
6d2010ae
A
4969errno_t
4970dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
316670eb 4971 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
6d2010ae
A
4972{
4973 char *frame_type = NULL;
4974 char *dst_linkaddr = NULL;
4975 int retval = 0;
4976 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4977 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
0a7de745
A
4978 struct if_proto *proto = NULL;
4979 mbuf_t m = NULL;
4980 mbuf_t send_head = NULL;
4981 mbuf_t *send_tail = &send_head;
6d2010ae 4982 int iorefcnt = 0;
316670eb 4983 u_int32_t pre = 0, post = 0;
39236c6e
A
4984 u_int32_t fpkts = 0, fbytes = 0;
4985 int32_t flen = 0;
5ba3f43e
A
4986 struct timespec now;
4987 u_int64_t now_nsec;
d9a64523
A
4988 boolean_t did_clat46 = FALSE;
4989 protocol_family_t old_proto_family = proto_family;
cb323159 4990 struct sockaddr_in6 dest6;
d9a64523 4991 struct rtentry *rt = NULL;
cb323159 4992 u_int32_t m_loop_set = 0;
6d2010ae 4993
39236c6e 4994 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
6d2010ae 4995
39037602
A
4996 /*
4997 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4998 * from happening while this operation is in progress
4999 */
cb323159 5000 if (!ifnet_datamov_begin(ifp)) {
6d2010ae
A
5001 retval = ENXIO;
5002 goto cleanup;
5003 }
5004 iorefcnt = 1;
5005
5ba3f43e 5006 VERIFY(ifp->if_output_dlil != NULL);
39037602 5007
6d2010ae 5008 /* update the driver's multicast filter, if needed */
0a7de745 5009 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
6d2010ae 5010 ifp->if_updatemcasts = 0;
0a7de745 5011 }
6d2010ae
A
5012
5013 frame_type = frame_type_buffer;
5014 dst_linkaddr = dst_linkaddr_buffer;
5015
91447636 5016 if (raw == 0) {
6d2010ae
A
5017 ifnet_lock_shared(ifp);
5018 /* callee holds a proto refcnt upon success */
91447636
A
5019 proto = find_attached_proto(ifp, proto_family);
5020 if (proto == NULL) {
6d2010ae 5021 ifnet_lock_done(ifp);
91447636
A
5022 retval = ENXIO;
5023 goto cleanup;
5024 }
6d2010ae 5025 ifnet_lock_done(ifp);
2d21ac55 5026 }
6d2010ae 5027
2d21ac55 5028preout_again:
0a7de745 5029 if (packetlist == NULL) {
2d21ac55 5030 goto cleanup;
0a7de745 5031 }
6d2010ae 5032
2d21ac55
A
5033 m = packetlist;
5034 packetlist = packetlist->m_nextpkt;
5035 m->m_nextpkt = NULL;
6d2010ae 5036
d9a64523
A
5037 /*
5038 * Perform address family translation for the first
5039 * packet outside the loop in order to perform address
5040 * lookup for the translated proto family.
5041 */
5042 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5043 (ifp->if_type == IFT_CELLULAR ||
0a7de745 5044 dlil_is_clat_needed(proto_family, m))) {
d9a64523
A
5045 retval = dlil_clat46(ifp, &proto_family, &m);
5046 /*
5047 * Go to the next packet if translation fails
5048 */
5049 if (retval != 0) {
5050 m_freem(m);
5051 m = NULL;
5052 ip6stat.ip6s_clat464_out_drop++;
5053 /* Make sure that the proto family is PF_INET */
5054 ASSERT(proto_family == PF_INET);
5055 goto preout_again;
5056 }
5057 /*
5058 * Free the old one and make it point to the IPv6 proto structure.
5059 *
5060 * Change proto for the first time we have successfully
5061 * performed address family translation.
5062 */
5063 if (!did_clat46 && proto_family == PF_INET6) {
d9a64523
A
5064 did_clat46 = TRUE;
5065
0a7de745 5066 if (proto != NULL) {
d9a64523 5067 if_proto_free(proto);
0a7de745 5068 }
d9a64523
A
5069 ifnet_lock_shared(ifp);
5070 /* callee holds a proto refcnt upon success */
5071 proto = find_attached_proto(ifp, proto_family);
5072 if (proto == NULL) {
5073 ifnet_lock_done(ifp);
5074 retval = ENXIO;
5075 m_freem(m);
5076 m = NULL;
5077 goto cleanup;
5078 }
5079 ifnet_lock_done(ifp);
5080 if (ifp->if_type == IFT_ETHER) {
5081 /* Update the dest to translated v6 address */
5082 dest6.sin6_len = sizeof(struct sockaddr_in6);
5083 dest6.sin6_family = AF_INET6;
5084 dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
5085 dest = (const struct sockaddr *)&dest6;
5086
5087 /*
5088 * Lookup route to the translated destination
5089 * Free this route ref during cleanup
5090 */
5091 rt = rtalloc1_scoped((struct sockaddr *)&dest6,
5092 0, 0, ifp->if_index);
5093
5094 route = rt;
5095 }
5096 }
5097 }
5098
5099 /*
5100 * This path gets packet chain going to the same destination.
5101 * The pre output routine is used to either trigger resolution of
5102 * the next hop or retreive the next hop's link layer addressing.
5103 * For ex: ether_inet(6)_pre_output routine.
5104 *
5105 * If the routine returns EJUSTRETURN, it implies that packet has
5106 * been queued, and therefore we have to call preout_again for the
5107 * following packet in the chain.
5108 *
5109 * For errors other than EJUSTRETURN, the current packet is freed
5110 * and the rest of the chain (pointed by packetlist is freed as
5111 * part of clean up.
5112 *
5113 * Else if there is no error the retrieved information is used for
5114 * all the packets in the chain.
5115 */
2d21ac55 5116 if (raw == 0) {
6d2010ae
A
5117 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
5118 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
91447636 5119 retval = 0;
6d2010ae
A
5120 if (preoutp != NULL) {
5121 retval = preoutp(ifp, proto_family, &m, dest, route,
5122 frame_type, dst_linkaddr);
5123
5124 if (retval != 0) {
0a7de745 5125 if (retval == EJUSTRETURN) {
6d2010ae 5126 goto preout_again;
0a7de745 5127 }
6d2010ae 5128 m_freem(m);
d9a64523 5129 m = NULL;
6d2010ae 5130 goto cleanup;
91447636 5131 }
1c79356b 5132 }
1c79356b 5133 }
2d21ac55
A
5134
5135#if CONFIG_MACF_NET
5136 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
5137 dlil_get_socket_type(&m, proto_family, raw));
39236c6e 5138 if (retval != 0) {
2d21ac55
A
5139 m_freem(m);
5140 goto cleanup;
5141 }
5142#endif
5143
5144 do {
d9a64523
A
5145 /*
5146 * Perform address family translation if needed.
5147 * For now we only support stateless 4 to 6 translation
5148 * on the out path.
5149 *
5150 * The routine below translates IP header, updates protocol
5151 * checksum and also translates ICMP.
5152 *
5153 * We skip the first packet as it is already translated and
5154 * the proto family is set to PF_INET6.
5155 */
5156 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5157 (ifp->if_type == IFT_CELLULAR ||
0a7de745 5158 dlil_is_clat_needed(proto_family, m))) {
d9a64523 5159 retval = dlil_clat46(ifp, &proto_family, &m);
0a7de745 5160 /* Goto the next packet if the translation fails */
d9a64523
A
5161 if (retval != 0) {
5162 m_freem(m);
5163 m = NULL;
5164 ip6stat.ip6s_clat464_out_drop++;
5165 goto next;
5166 }
5167 }
5168
6d2010ae 5169#if CONFIG_DTRACE
316670eb 5170 if (!raw && proto_family == PF_INET) {
39037602
A
5171 struct ip *ip = mtod(m, struct ip *);
5172 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
0a7de745
A
5173 struct ip *, ip, struct ifnet *, ifp,
5174 struct ip *, ip, struct ip6_hdr *, NULL);
316670eb 5175 } else if (!raw && proto_family == PF_INET6) {
39037602
A
5176 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
5177 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
0a7de745
A
5178 struct ip6_hdr *, ip6, struct ifnet *, ifp,
5179 struct ip *, NULL, struct ip6_hdr *, ip6);
6d2010ae
A
5180 }
5181#endif /* CONFIG_DTRACE */
5182
39236c6e 5183 if (raw == 0 && ifp->if_framer != NULL) {
7e4a7d39
A
5184 int rcvif_set = 0;
5185
5186 /*
5187 * If this is a broadcast packet that needs to be
5188 * looped back into the system, set the inbound ifp
5189 * to that of the outbound ifp. This will allow
5190 * us to determine that it is a legitimate packet
5191 * for the system. Only set the ifp if it's not
5192 * already set, just to be safe.
5193 */
5194 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
5195 m->m_pkthdr.rcvif == NULL) {
5196 m->m_pkthdr.rcvif = ifp;
5197 rcvif_set = 1;
5198 }
cb323159 5199 m_loop_set = m->m_flags & M_LOOP;
6d2010ae 5200 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
39236c6e
A
5201 frame_type, &pre, &post);
5202 if (retval != 0) {
0a7de745 5203 if (retval != EJUSTRETURN) {
2d21ac55 5204 m_freem(m);
0a7de745 5205 }
2d21ac55 5206 goto next;
91447636 5207 }
7e4a7d39 5208
39236c6e
A
5209 /*
5210 * For partial checksum offload, adjust the start
5211 * and stuff offsets based on the prepended header.
5212 */
5213 if ((m->m_pkthdr.csum_flags &
5214 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5215 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5216 m->m_pkthdr.csum_tx_stuff += pre;
5217 m->m_pkthdr.csum_tx_start += pre;
5218 }
5219
0a7de745 5220 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) {
39236c6e
A
5221 dlil_output_cksum_dbg(ifp, m, pre,
5222 proto_family);
0a7de745 5223 }
39236c6e 5224
7e4a7d39
A
5225 /*
5226 * Clear the ifp if it was set above, and to be
5227 * safe, only if it is still the same as the
5228 * outbound ifp we have in context. If it was
5229 * looped back, then a copy of it was sent to the
5230 * loopback interface with the rcvif set, and we
5231 * are clearing the one that will go down to the
5232 * layer below.
5233 */
0a7de745 5234 if (rcvif_set && m->m_pkthdr.rcvif == ifp) {
7e4a7d39 5235 m->m_pkthdr.rcvif = NULL;
0a7de745 5236 }
91447636 5237 }
6d2010ae
A
5238
5239 /*
2d21ac55
A
5240 * Let interface filters (if any) do their thing ...
5241 */
cb323159
A
5242 retval = dlil_interface_filters_output(ifp, &m, proto_family);
5243 if (retval != 0) {
5244 if (retval != EJUSTRETURN) {
5245 m_freem(m);
1c79356b 5246 }
cb323159 5247 goto next;
1c79356b 5248 }
b7266188 5249 /*
39236c6e
A
5250 * Strip away M_PROTO1 bit prior to sending packet
5251 * to the driver as this field may be used by the driver
b7266188
A
5252 */
5253 m->m_flags &= ~M_PROTO1;
5254
2d21ac55
A
5255 /*
5256 * If the underlying interface is not capable of handling a
5257 * packet whose data portion spans across physically disjoint
5258 * pages, we need to "normalize" the packet so that we pass
5259 * down a chain of mbufs where each mbuf points to a span that
5260 * resides in the system page boundary. If the packet does
5261 * not cross page(s), the following is a no-op.
5262 */
5263 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
0a7de745 5264 if ((m = m_normalize(m)) == NULL) {
2d21ac55 5265 goto next;
0a7de745 5266 }
2d21ac55
A
5267 }
5268
6d2010ae
A
5269 /*
5270 * If this is a TSO packet, make sure the interface still
5271 * advertise TSO capability.
b0d623f7 5272 */
39236c6e 5273 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
6d2010ae
A
5274 retval = EMSGSIZE;
5275 m_freem(m);
5276 goto cleanup;
b0d623f7
A
5277 }
5278
39236c6e
A
5279 ifp_inc_traffic_class_out(ifp, m);
5280 pktap_output(ifp, proto_family, m, pre, post);
6d2010ae 5281
3e170ce0
A
5282 /*
5283 * Count the number of elements in the mbuf chain
5284 */
5285 if (tx_chain_len_count) {
5286 dlil_count_chain_len(m, &tx_chain_len_stats);
5287 }
5288
5ba3f43e
A
5289 /*
5290 * Record timestamp; ifnet_enqueue() will use this info
5291 * rather than redoing the work. An optimization could
5292 * involve doing this just once at the top, if there are
5293 * no interface filters attached, but that's probably
5294 * not a big deal.
5295 */
5296 nanouptime(&now);
5297 net_timernsec(&now, &now_nsec);
5298 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
5299
5300 /*
5301 * Discard partial sum information if this packet originated
5302 * from another interface; the packet would already have the
5303 * final checksum and we shouldn't recompute it.
5304 */
5305 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
0a7de745
A
5306 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5307 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5ba3f43e
A
5308 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5309 m->m_pkthdr.csum_data = 0;
5310 }
5311
2d21ac55
A
5312 /*
5313 * Finally, call the driver.
5314 */
3e170ce0 5315 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
39236c6e
A
5316 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5317 flen += (m_pktlen(m) - (pre + post));
5318 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5319 }
2d21ac55
A
5320 *send_tail = m;
5321 send_tail = &m->m_nextpkt;
6d2010ae 5322 } else {
39236c6e
A
5323 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5324 flen = (m_pktlen(m) - (pre + post));
5325 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5326 } else {
5327 flen = 0;
5328 }
6d2010ae 5329 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
39236c6e 5330 0, 0, 0, 0, 0);
5ba3f43e 5331 retval = (*ifp->if_output_dlil)(ifp, m);
316670eb
A
5332 if (retval == EQFULL || retval == EQSUSPENDED) {
5333 if (adv != NULL && adv->code == FADV_SUCCESS) {
5334 adv->code = (retval == EQFULL ?
5335 FADV_FLOW_CONTROLLED :
5336 FADV_SUSPENDED);
5337 }
5338 retval = 0;
5339 }
39236c6e
A
5340 if (retval == 0 && flen > 0) {
5341 fbytes += flen;
5342 fpkts++;
5343 }
5344 if (retval != 0 && dlil_verbose) {
cb323159 5345 DLIL_PRINTF("%s: output error on %s retval = %d\n",
39236c6e 5346 __func__, if_name(ifp),
6d2010ae 5347 retval);
2d21ac55 5348 }
6d2010ae 5349 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
39236c6e 5350 0, 0, 0, 0, 0);
2d21ac55 5351 }
39236c6e 5352 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
2d21ac55
A
5353
5354next:
5355 m = packetlist;
39236c6e 5356 if (m != NULL) {
cb323159 5357 m->m_flags |= m_loop_set;
2d21ac55
A
5358 packetlist = packetlist->m_nextpkt;
5359 m->m_nextpkt = NULL;
5360 }
d9a64523 5361 /* Reset the proto family to old proto family for CLAT */
0a7de745 5362 if (did_clat46) {
d9a64523 5363 proto_family = old_proto_family;
0a7de745 5364 }
39236c6e 5365 } while (m != NULL);
d41d1dae 5366
39236c6e 5367 if (send_head != NULL) {
39236c6e
A
5368 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
5369 0, 0, 0, 0, 0);
3e170ce0 5370 if (ifp->if_eflags & IFEF_SENDLIST) {
5ba3f43e 5371 retval = (*ifp->if_output_dlil)(ifp, send_head);
3e170ce0
A
5372 if (retval == EQFULL || retval == EQSUSPENDED) {
5373 if (adv != NULL) {
5374 adv->code = (retval == EQFULL ?
5375 FADV_FLOW_CONTROLLED :
5376 FADV_SUSPENDED);
5377 }
5378 retval = 0;
5379 }
5380 if (retval == 0 && flen > 0) {
5381 fbytes += flen;
5382 fpkts++;
5383 }
5384 if (retval != 0 && dlil_verbose) {
cb323159 5385 DLIL_PRINTF("%s: output error on %s retval = %d\n",
3e170ce0
A
5386 __func__, if_name(ifp), retval);
5387 }
5388 } else {
5389 struct mbuf *send_m;
5390 int enq_cnt = 0;
5391 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
5392 while (send_head != NULL) {
5393 send_m = send_head;
5394 send_head = send_m->m_nextpkt;
5395 send_m->m_nextpkt = NULL;
5ba3f43e 5396 retval = (*ifp->if_output_dlil)(ifp, send_m);
3e170ce0
A
5397 if (retval == EQFULL || retval == EQSUSPENDED) {
5398 if (adv != NULL) {
5399 adv->code = (retval == EQFULL ?
5400 FADV_FLOW_CONTROLLED :
5401 FADV_SUSPENDED);
5402 }
5403 retval = 0;
5404 }
5405 if (retval == 0) {
5406 enq_cnt++;
0a7de745 5407 if (flen > 0) {
3e170ce0 5408 fpkts++;
0a7de745 5409 }
3e170ce0
A
5410 }
5411 if (retval != 0 && dlil_verbose) {
cb323159 5412 DLIL_PRINTF("%s: output error on %s "
39037602 5413 "retval = %d\n",
3e170ce0
A
5414 __func__, if_name(ifp), retval);
5415 }
5416 }
5417 if (enq_cnt > 0) {
5418 fbytes += flen;
5419 ifnet_start(ifp);
316670eb 5420 }
39236c6e
A
5421 }
5422 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 5423 }
6d2010ae 5424
39236c6e 5425 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 5426
91447636 5427cleanup:
0a7de745 5428 if (fbytes > 0) {
39236c6e 5429 ifp->if_fbytes += fbytes;
0a7de745
A
5430 }
5431 if (fpkts > 0) {
39236c6e 5432 ifp->if_fpackets += fpkts;
0a7de745
A
5433 }
5434 if (proto != NULL) {
6d2010ae 5435 if_proto_free(proto);
0a7de745
A
5436 }
5437 if (packetlist) { /* if any packets are left, clean up */
2d21ac55 5438 mbuf_freem_list(packetlist);
0a7de745
A
5439 }
5440 if (retval == EJUSTRETURN) {
91447636 5441 retval = 0;
0a7de745
A
5442 }
5443 if (iorefcnt == 1) {
cb323159 5444 ifnet_datamov_end(ifp);
0a7de745 5445 }
d9a64523
A
5446 if (rt != NULL) {
5447 rtfree(rt);
5448 rt = NULL;
5449 }
6d2010ae 5450
0a7de745 5451 return retval;
1c79356b
A
5452}
5453
d9a64523
A
5454/*
5455 * This routine checks if the destination address is not a loopback, link-local,
5456 * multicast or broadcast address.
5457 */
5458static int
5459dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
5460{
5461 int ret = 0;
0a7de745 5462 switch (proto_family) {
d9a64523
A
5463 case PF_INET: {
5464 struct ip *iph = mtod(m, struct ip *);
0a7de745 5465 if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
d9a64523 5466 ret = 1;
0a7de745 5467 }
d9a64523
A
5468 break;
5469 }
5470 case PF_INET6: {
5471 struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
5472 if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
0a7de745 5473 CLAT64_NEEDED(&ip6h->ip6_dst)) {
d9a64523 5474 ret = 1;
0a7de745 5475 }
d9a64523
A
5476 break;
5477 }
5478 }
5479
0a7de745 5480 return ret;
d9a64523
A
5481}
5482/*
5483 * @brief This routine translates IPv4 packet to IPv6 packet,
5484 * updates protocol checksum and also translates ICMP for code
5485 * along with inner header translation.
5486 *
5487 * @param ifp Pointer to the interface
5488 * @param proto_family pointer to protocol family. It is updated if function
5489 * performs the translation successfully.
5490 * @param m Pointer to the pointer pointing to the packet. Needed because this
5491 * routine can end up changing the mbuf to a different one.
5492 *
5493 * @return 0 on success or else a negative value.
5494 */
5495static errno_t
5496dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5497{
5498 VERIFY(*proto_family == PF_INET);
5499 VERIFY(IS_INTF_CLAT46(ifp));
5500
5501 pbuf_t pbuf_store, *pbuf = NULL;
5502 struct ip *iph = NULL;
5503 struct in_addr osrc, odst;
5504 uint8_t proto = 0;
5505 struct in6_ifaddr *ia6_clat_src = NULL;
5506 struct in6_addr *src = NULL;
5507 struct in6_addr dst;
5508 int error = 0;
5509 uint32_t off = 0;
5510 uint64_t tot_len = 0;
5511 uint16_t ip_id_val = 0;
5512 uint16_t ip_frag_off = 0;
5513
5514 boolean_t is_frag = FALSE;
5515 boolean_t is_first_frag = TRUE;
5516 boolean_t is_last_frag = TRUE;
5517
5518 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5519 pbuf = &pbuf_store;
5520 iph = pbuf->pb_data;
5521
5522 osrc = iph->ip_src;
5523 odst = iph->ip_dst;
5524 proto = iph->ip_p;
5525 off = iph->ip_hl << 2;
5526 ip_id_val = iph->ip_id;
5527 ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
5528
5529 tot_len = ntohs(iph->ip_len);
5530
5531 /*
5532 * For packets that are not first frags
5533 * we only need to adjust CSUM.
5534 * For 4 to 6, Fragmentation header gets appended
5535 * after proto translation.
5536 */
5537 if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
5538 is_frag = TRUE;
5539
5540 /* If the offset is not zero, it is not first frag */
0a7de745 5541 if (ip_frag_off != 0) {
d9a64523 5542 is_first_frag = FALSE;
0a7de745 5543 }
d9a64523
A
5544
5545 /* If IP_MF is set, then it is not last frag */
0a7de745 5546 if (ntohs(iph->ip_off) & IP_MF) {
d9a64523 5547 is_last_frag = FALSE;
0a7de745 5548 }
d9a64523
A
5549 }
5550
5551 /*
5552 * Retrive the local IPv6 CLAT46 address reserved for stateless
5553 * translation.
5554 */
5555 ia6_clat_src = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5556 if (ia6_clat_src == NULL) {
5557 ip6stat.ip6s_clat464_out_nov6addr_drop++;
5558 error = -1;
5559 goto cleanup;
5560 }
5561
5562 src = &ia6_clat_src->ia_addr.sin6_addr;
5563
5564 /*
5565 * Translate IPv4 destination to IPv6 destination by using the
5566 * prefixes learned through prior PLAT discovery.
5567 */
5568 if ((error = nat464_synthesize_ipv6(ifp, &odst, &dst)) != 0) {
5569 ip6stat.ip6s_clat464_out_v6synthfail_drop++;
5570 goto cleanup;
5571 }
5572
5573 /* Translate the IP header part first */
5574 error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
5575 iph->ip_ttl, *src, dst, tot_len) == NT_NAT64) ? 0 : -1;
5576
0a7de745 5577 iph = NULL; /* Invalidate iph as pbuf has been modified */
d9a64523
A
5578
5579 if (error != 0) {
5580 ip6stat.ip6s_clat464_out_46transfail_drop++;
5581 goto cleanup;
5582 }
5583
5584 /*
5585 * Translate protocol header, update checksum, checksum flags
5586 * and related fields.
5587 */
5588 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
5589 proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
5590
5591 if (error != 0) {
5592 ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
5593 goto cleanup;
5594 }
5595
5596 /* Now insert the IPv6 fragment header */
5597 if (is_frag) {
5598 error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
5599
5600 if (error != 0) {
5601 ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
5602 goto cleanup;
5603 }
5604 }
5605
5606cleanup:
0a7de745 5607 if (ia6_clat_src != NULL) {
d9a64523 5608 IFA_REMREF(&ia6_clat_src->ia_ifa);
0a7de745 5609 }
d9a64523
A
5610
5611 if (pbuf_is_valid(pbuf)) {
5612 *m = pbuf->pb_mbuf;
5613 pbuf->pb_mbuf = NULL;
5614 pbuf_destroy(pbuf);
5615 } else {
5616 error = -1;
5617 ip6stat.ip6s_clat464_out_invalpbuf_drop++;
5618 }
5619
5620 if (error == 0) {
5621 *proto_family = PF_INET6;
5622 ip6stat.ip6s_clat464_out_success++;
5623 }
5624
0a7de745 5625 return error;
d9a64523
A
5626}
5627
5628/*
5629 * @brief This routine translates incoming IPv6 to IPv4 packet,
5630 * updates protocol checksum and also translates ICMPv6 outer
5631 * and inner headers
5632 *
5633 * @return 0 on success or else a negative value.
5634 */
5635static errno_t
5636dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5637{
5638 VERIFY(*proto_family == PF_INET6);
5639 VERIFY(IS_INTF_CLAT46(ifp));
5640
5641 struct ip6_hdr *ip6h = NULL;
5642 struct in6_addr osrc, odst;
5643 uint8_t proto = 0;
5644 struct in6_ifaddr *ia6_clat_dst = NULL;
5645 struct in_ifaddr *ia4_clat_dst = NULL;
5646 struct in_addr *dst = NULL;
5647 struct in_addr src;
5648 int error = 0;
5649 uint32_t off = 0;
5650 u_int64_t tot_len = 0;
5651 uint8_t tos = 0;
5652 boolean_t is_first_frag = TRUE;
5653
5654 /* Incoming mbuf does not contain valid IP6 header */
5655 if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
5656 ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
5657 (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
5658 ip6stat.ip6s_clat464_in_tooshort_drop++;
0a7de745 5659 return -1;
d9a64523
A
5660 }
5661
5662 ip6h = mtod(*m, struct ip6_hdr *);
5663 /* Validate that mbuf contains IP payload equal to ip6_plen */
5664 if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
5665 ip6stat.ip6s_clat464_in_tooshort_drop++;
0a7de745 5666 return -1;
d9a64523
A
5667 }
5668
5669 osrc = ip6h->ip6_src;
5670 odst = ip6h->ip6_dst;
5671
5672 /*
5673 * Retrieve the local CLAT46 reserved IPv6 address.
5674 * Let the packet pass if we don't find one, as the flag
5675 * may get set before IPv6 configuration has taken place.
5676 */
5677 ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
0a7de745 5678 if (ia6_clat_dst == NULL) {
d9a64523 5679 goto done;
0a7de745 5680 }
d9a64523
A
5681
5682 /*
5683 * Check if the original dest in the packet is same as the reserved
5684 * CLAT46 IPv6 address
5685 */
5686 if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
5687 pbuf_t pbuf_store, *pbuf = NULL;
5688 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5689 pbuf = &pbuf_store;
5690
5691 /*
5692 * Retrive the local CLAT46 IPv4 address reserved for stateless
5693 * translation.
5694 */
5695 ia4_clat_dst = inifa_ifpclatv4(ifp);
5696 if (ia4_clat_dst == NULL) {
5697 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5698 ip6stat.ip6s_clat464_in_nov4addr_drop++;
5699 error = -1;
5700 goto cleanup;
5701 }
5702 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5703
5704 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5705 dst = &ia4_clat_dst->ia_addr.sin_addr;
5706 if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) {
5707 ip6stat.ip6s_clat464_in_v4synthfail_drop++;
5708 error = -1;
5709 goto cleanup;
5710 }
5711
5712 ip6h = pbuf->pb_data;
5713 off = sizeof(struct ip6_hdr);
5714 proto = ip6h->ip6_nxt;
5715 tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
5716 tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
5717
5718 /*
5719 * Translate the IP header and update the fragmentation
5720 * header if needed
5721 */
5722 error = (nat464_translate_64(pbuf, off, tos, &proto,
5723 ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
5724 0 : -1;
5725
5726 ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
5727
5728 if (error != 0) {
5729 ip6stat.ip6s_clat464_in_64transfail_drop++;
5730 goto cleanup;
5731 }
5732
5733 /*
5734 * Translate protocol header, update checksum, checksum flags
5735 * and related fields.
5736 */
5737 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
5738 (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
5739 NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
5740
5741 if (error != 0) {
5742 ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
5743 goto cleanup;
5744 }
5745
5746cleanup:
0a7de745 5747 if (ia4_clat_dst != NULL) {
d9a64523 5748 IFA_REMREF(&ia4_clat_dst->ia_ifa);
0a7de745 5749 }
d9a64523
A
5750
5751 if (pbuf_is_valid(pbuf)) {
5752 *m = pbuf->pb_mbuf;
5753 pbuf->pb_mbuf = NULL;
5754 pbuf_destroy(pbuf);
5755 } else {
5756 error = -1;
5757 ip6stat.ip6s_clat464_in_invalpbuf_drop++;
5758 }
5759
5760 if (error == 0) {
5761 *proto_family = PF_INET;
5762 ip6stat.ip6s_clat464_in_success++;
5763 }
5764 } /* CLAT traffic */
5765
5766done:
0a7de745 5767 return error;
d9a64523
A
5768}
5769
2d21ac55 5770errno_t
6d2010ae
A
5771ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
5772 void *ioctl_arg)
5773{
5774 struct ifnet_filter *filter;
5775 int retval = EOPNOTSUPP;
5776 int result = 0;
5777
0a7de745
A
5778 if (ifp == NULL || ioctl_code == 0) {
5779 return EINVAL;
5780 }
6d2010ae
A
5781
5782 /* Get an io ref count if the interface is attached */
0a7de745
A
5783 if (!ifnet_is_attached(ifp, 1)) {
5784 return EOPNOTSUPP;
5785 }
6d2010ae 5786
39037602
A
5787 /*
5788 * Run the interface filters first.
91447636
A
5789 * We want to run all filters before calling the protocol,
5790 * interface family, or interface.
5791 */
6d2010ae
A
5792 lck_mtx_lock_spin(&ifp->if_flt_lock);
5793 /* prevent filter list from changing in case we drop the lock */
5794 if_flt_monitor_busy(ifp);
91447636 5795 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
6d2010ae
A
5796 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
5797 filter->filt_protocol == proto_fam)) {
5798 lck_mtx_unlock(&ifp->if_flt_lock);
5799
5800 result = filter->filt_ioctl(filter->filt_cookie, ifp,
5801 proto_fam, ioctl_code, ioctl_arg);
5802
5803 lck_mtx_lock_spin(&ifp->if_flt_lock);
5804
91447636
A
5805 /* Only update retval if no one has handled the ioctl */
5806 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
0a7de745 5807 if (result == ENOTSUP) {
91447636 5808 result = EOPNOTSUPP;
0a7de745 5809 }
91447636 5810 retval = result;
6d2010ae
A
5811 if (retval != 0 && retval != EOPNOTSUPP) {
5812 /* we're done with the filter list */
5813 if_flt_monitor_unbusy(ifp);
5814 lck_mtx_unlock(&ifp->if_flt_lock);
91447636
A
5815 goto cleanup;
5816 }
5817 }
5818 }
5819 }
6d2010ae
A
5820 /* we're done with the filter list */
5821 if_flt_monitor_unbusy(ifp);
5822 lck_mtx_unlock(&ifp->if_flt_lock);
5823
91447636 5824 /* Allow the protocol to handle the ioctl */
6d2010ae 5825 if (proto_fam != 0) {
0a7de745 5826 struct if_proto *proto;
6d2010ae
A
5827
5828 /* callee holds a proto refcnt upon success */
5829 ifnet_lock_shared(ifp);
5830 proto = find_attached_proto(ifp, proto_fam);
5831 ifnet_lock_done(ifp);
5832 if (proto != NULL) {
5833 proto_media_ioctl ioctlp =
5834 (proto->proto_kpi == kProtoKPI_v1 ?
5835 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
91447636 5836 result = EOPNOTSUPP;
0a7de745 5837 if (ioctlp != NULL) {
6d2010ae
A
5838 result = ioctlp(ifp, proto_fam, ioctl_code,
5839 ioctl_arg);
0a7de745 5840 }
6d2010ae
A
5841 if_proto_free(proto);
5842
91447636
A
5843 /* Only update retval if no one has handled the ioctl */
5844 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
0a7de745 5845 if (result == ENOTSUP) {
91447636 5846 result = EOPNOTSUPP;
0a7de745 5847 }
91447636 5848 retval = result;
0a7de745 5849 if (retval && retval != EOPNOTSUPP) {
91447636 5850 goto cleanup;
0a7de745 5851 }
91447636
A
5852 }
5853 }
5854 }
6d2010ae 5855
91447636 5856 /* retval is either 0 or EOPNOTSUPP */
6d2010ae 5857
91447636
A
5858 /*
5859 * Let the interface handle this ioctl.
5860 * If it returns EOPNOTSUPP, ignore that, we may have
5861 * already handled this in the protocol or family.
5862 */
0a7de745 5863 if (ifp->if_ioctl) {
91447636 5864 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
0a7de745 5865 }
6d2010ae 5866
91447636
A
5867 /* Only update retval if no one has handled the ioctl */
5868 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
0a7de745 5869 if (result == ENOTSUP) {
91447636 5870 result = EOPNOTSUPP;
0a7de745 5871 }
91447636
A
5872 retval = result;
5873 if (retval && retval != EOPNOTSUPP) {
5874 goto cleanup;
5875 }
5876 }
1c79356b 5877
6d2010ae 5878cleanup:
0a7de745 5879 if (retval == EJUSTRETURN) {
91447636 5880 retval = 0;
0a7de745 5881 }
6d2010ae
A
5882
5883 ifnet_decr_iorefcnt(ifp);
5884
0a7de745 5885 return retval;
91447636 5886}
1c79356b 5887
91447636 5888__private_extern__ errno_t
6d2010ae 5889dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
91447636 5890{
0a7de745 5891 errno_t error = 0;
6d2010ae
A
5892
5893
5894 if (ifp->if_set_bpf_tap) {
5895 /* Get an io reference on the interface if it is attached */
0a7de745
A
5896 if (!ifnet_is_attached(ifp, 1)) {
5897 return ENXIO;
5898 }
91447636 5899 error = ifp->if_set_bpf_tap(ifp, mode, callback);
6d2010ae
A
5900 ifnet_decr_iorefcnt(ifp);
5901 }
0a7de745 5902 return error;
1c79356b
A
5903}
5904
2d21ac55 5905errno_t
6d2010ae
A
5906dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
5907 struct sockaddr *ll_addr, size_t ll_len)
1c79356b 5908{
0a7de745 5909 errno_t result = EOPNOTSUPP;
91447636
A
5910 struct if_proto *proto;
5911 const struct sockaddr *verify;
2d21ac55 5912 proto_media_resolve_multi resolvep;
6d2010ae 5913
0a7de745
A
5914 if (!ifnet_is_attached(ifp, 1)) {
5915 return result;
5916 }
6d2010ae 5917
91447636 5918 bzero(ll_addr, ll_len);
6d2010ae
A
5919
5920 /* Call the protocol first; callee holds a proto refcnt upon success */
5921 ifnet_lock_shared(ifp);
91447636 5922 proto = find_attached_proto(ifp, proto_addr->sa_family);
6d2010ae 5923 ifnet_lock_done(ifp);
2d21ac55 5924 if (proto != NULL) {
6d2010ae
A
5925 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
5926 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
0a7de745 5927 if (resolvep != NULL) {
6d2010ae 5928 result = resolvep(ifp, proto_addr,
39037602 5929 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
0a7de745 5930 }
6d2010ae 5931 if_proto_free(proto);
91447636 5932 }
6d2010ae 5933
91447636
A
5934 /* Let the interface verify the multicast address */
5935 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
0a7de745 5936 if (result == 0) {
91447636 5937 verify = ll_addr;
0a7de745 5938 } else {
91447636 5939 verify = proto_addr;
0a7de745 5940 }
91447636
A
5941 result = ifp->if_check_multi(ifp, verify);
5942 }
6d2010ae
A
5943
5944 ifnet_decr_iorefcnt(ifp);
0a7de745 5945 return result;
91447636 5946}
1c79356b 5947
91447636 5948__private_extern__ errno_t
6d2010ae 5949dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
39037602
A
5950 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5951 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
91447636
A
5952{
5953 struct if_proto *proto;
0a7de745 5954 errno_t result = 0;
6d2010ae
A
5955
5956 /* callee holds a proto refcnt upon success */
5957 ifnet_lock_shared(ifp);
91447636 5958 proto = find_attached_proto(ifp, target_proto->sa_family);
6d2010ae 5959 ifnet_lock_done(ifp);
2d21ac55 5960 if (proto == NULL) {
91447636 5961 result = ENOTSUP;
6d2010ae 5962 } else {
0a7de745 5963 proto_media_send_arp arpp;
6d2010ae
A
5964 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
5965 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
39236c6e 5966 if (arpp == NULL) {
2d21ac55 5967 result = ENOTSUP;
39236c6e
A
5968 } else {
5969 switch (arpop) {
5970 case ARPOP_REQUEST:
5971 arpstat.txrequests++;
0a7de745 5972 if (target_hw != NULL) {
39236c6e 5973 arpstat.txurequests++;
0a7de745 5974 }
39236c6e
A
5975 break;
5976 case ARPOP_REPLY:
5977 arpstat.txreplies++;
5978 break;
5979 }
6d2010ae
A
5980 result = arpp(ifp, arpop, sender_hw, sender_proto,
5981 target_hw, target_proto);
39236c6e 5982 }
6d2010ae 5983 if_proto_free(proto);
91447636 5984 }
6d2010ae 5985
0a7de745 5986 return result;
91447636 5987}
1c79356b 5988
39236c6e
A
5989struct net_thread_marks { };
5990static const struct net_thread_marks net_thread_marks_base = { };
5991
5992__private_extern__ const net_thread_marks_t net_thread_marks_none =
0a7de745 5993 &net_thread_marks_base;
39236c6e
A
5994
5995__private_extern__ net_thread_marks_t
5996net_thread_marks_push(u_int32_t push)
316670eb 5997{
39236c6e
A
5998 static const char *const base = (const void*)&net_thread_marks_base;
5999 u_int32_t pop = 0;
6000
6001 if (push != 0) {
6002 struct uthread *uth = get_bsdthread_info(current_thread());
6003
6004 pop = push & ~uth->uu_network_marks;
0a7de745 6005 if (pop != 0) {
39236c6e 6006 uth->uu_network_marks |= pop;
0a7de745 6007 }
39236c6e
A
6008 }
6009
0a7de745 6010 return (net_thread_marks_t)&base[pop];
316670eb
A
6011}
6012
39236c6e
A
6013__private_extern__ net_thread_marks_t
6014net_thread_unmarks_push(u_int32_t unpush)
316670eb 6015{
39236c6e
A
6016 static const char *const base = (const void*)&net_thread_marks_base;
6017 u_int32_t unpop = 0;
6018
6019 if (unpush != 0) {
6020 struct uthread *uth = get_bsdthread_info(current_thread());
316670eb 6021
39236c6e 6022 unpop = unpush & uth->uu_network_marks;
0a7de745 6023 if (unpop != 0) {
39236c6e 6024 uth->uu_network_marks &= ~unpop;
0a7de745 6025 }
39236c6e
A
6026 }
6027
0a7de745 6028 return (net_thread_marks_t)&base[unpop];
316670eb
A
6029}
6030
6031__private_extern__ void
39236c6e 6032net_thread_marks_pop(net_thread_marks_t popx)
316670eb 6033{
39236c6e 6034 static const char *const base = (const void*)&net_thread_marks_base;
3e170ce0 6035 const ptrdiff_t pop = (const char *)popx - (const char *)base;
316670eb 6036
39236c6e
A
6037 if (pop != 0) {
6038 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6039 struct uthread *uth = get_bsdthread_info(current_thread());
6040
6041 VERIFY((pop & ones) == pop);
6042 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
6043 uth->uu_network_marks &= ~pop;
6044 }
6045}
6046
6047__private_extern__ void
6048net_thread_unmarks_pop(net_thread_marks_t unpopx)
6049{
6050 static const char *const base = (const void*)&net_thread_marks_base;
3e170ce0 6051 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
39236c6e
A
6052
6053 if (unpop != 0) {
6054 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6055 struct uthread *uth = get_bsdthread_info(current_thread());
6056
6057 VERIFY((unpop & ones) == unpop);
6058 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
6059 uth->uu_network_marks |= unpop;
6060 }
6061}
6062
6063__private_extern__ u_int32_t
6064net_thread_is_marked(u_int32_t check)
6065{
6066 if (check != 0) {
6067 struct uthread *uth = get_bsdthread_info(current_thread());
0a7de745
A
6068 return uth->uu_network_marks & check;
6069 } else {
6070 return 0;
39236c6e 6071 }
39236c6e
A
6072}
6073
6074__private_extern__ u_int32_t
6075net_thread_is_unmarked(u_int32_t check)
6076{
6077 if (check != 0) {
6078 struct uthread *uth = get_bsdthread_info(current_thread());
0a7de745
A
6079 return ~uth->uu_network_marks & check;
6080 } else {
6081 return 0;
39236c6e 6082 }
316670eb
A
6083}
6084
2d21ac55
A
6085static __inline__ int
6086_is_announcement(const struct sockaddr_in * sender_sin,
6d2010ae 6087 const struct sockaddr_in * target_sin)
2d21ac55 6088{
cb323159 6089 if (target_sin == NULL || sender_sin == NULL) {
0a7de745 6090 return FALSE;
2d21ac55 6091 }
cb323159 6092
0a7de745 6093 return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
2d21ac55
A
6094}
6095
91447636 6096__private_extern__ errno_t
39037602
A
6097dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
6098 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
6099 const struct sockaddr *target_proto0, u_int32_t rtflags)
91447636 6100{
0a7de745 6101 errno_t result = 0;
2d21ac55
A
6102 const struct sockaddr_in * sender_sin;
6103 const struct sockaddr_in * target_sin;
316670eb
A
6104 struct sockaddr_inarp target_proto_sinarp;
6105 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
6d2010ae 6106
cb323159
A
6107 if (target_proto == NULL || sender_proto == NULL) {
6108 return EINVAL;
6109 }
6110
6111 if (sender_proto->sa_family != target_proto->sa_family) {
0a7de745
A
6112 return EINVAL;
6113 }
6d2010ae 6114
316670eb
A
6115 /*
6116 * If the target is a (default) router, provide that
6117 * information to the send_arp callback routine.
6118 */
6119 if (rtflags & RTF_ROUTER) {
6120 bcopy(target_proto, &target_proto_sinarp,
0a7de745 6121 sizeof(struct sockaddr_in));
316670eb
A
6122 target_proto_sinarp.sin_other |= SIN_ROUTER;
6123 target_proto = (struct sockaddr *)&target_proto_sinarp;
6124 }
6125
91447636
A
6126 /*
6127 * If this is an ARP request and the target IP is IPv4LL,
2d21ac55
A
6128 * send the request on all interfaces. The exception is
6129 * an announcement, which must only appear on the specific
6130 * interface.
91447636 6131 */
316670eb
A
6132 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
6133 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
6d2010ae
A
6134 if (target_proto->sa_family == AF_INET &&
6135 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
6136 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
cb323159 6137 !_is_announcement(sender_sin, target_sin)) {
0a7de745
A
6138 ifnet_t *ifp_list;
6139 u_int32_t count;
6140 u_int32_t ifp_on;
6d2010ae 6141
91447636
A
6142 result = ENOTSUP;
6143
6144 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
6145 for (ifp_on = 0; ifp_on < count; ifp_on++) {
6d2010ae
A
6146 errno_t new_result;
6147 ifaddr_t source_hw = NULL;
6148 ifaddr_t source_ip = NULL;
6149 struct sockaddr_in source_ip_copy;
6150 struct ifnet *cur_ifp = ifp_list[ifp_on];
6151
91447636 6152 /*
6d2010ae
A
6153 * Only arp on interfaces marked for IPv4LL
6154 * ARPing. This may mean that we don't ARP on
6155 * the interface the subnet route points to.
91447636 6156 */
0a7de745 6157 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
91447636 6158 continue;
0a7de745 6159 }
b0d623f7 6160
91447636 6161 /* Find the source IP address */
6d2010ae
A
6162 ifnet_lock_shared(cur_ifp);
6163 source_hw = cur_ifp->if_lladdr;
6164 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
6165 ifa_link) {
6166 IFA_LOCK(source_ip);
6167 if (source_ip->ifa_addr != NULL &&
6168 source_ip->ifa_addr->sa_family ==
6169 AF_INET) {
6170 /* Copy the source IP address */
6171 source_ip_copy =
6172 *(struct sockaddr_in *)
316670eb 6173 (void *)source_ip->ifa_addr;
6d2010ae 6174 IFA_UNLOCK(source_ip);
91447636
A
6175 break;
6176 }
6d2010ae 6177 IFA_UNLOCK(source_ip);
91447636 6178 }
6d2010ae 6179
91447636
A
6180 /* No IP Source, don't arp */
6181 if (source_ip == NULL) {
6d2010ae 6182 ifnet_lock_done(cur_ifp);
91447636
A
6183 continue;
6184 }
6d2010ae
A
6185
6186 IFA_ADDREF(source_hw);
6187 ifnet_lock_done(cur_ifp);
6188
91447636 6189 /* Send the ARP */
6d2010ae 6190 new_result = dlil_send_arp_internal(cur_ifp,
316670eb
A
6191 arpop, (struct sockaddr_dl *)(void *)
6192 source_hw->ifa_addr,
6d2010ae
A
6193 (struct sockaddr *)&source_ip_copy, NULL,
6194 target_proto);
b0d623f7 6195
6d2010ae 6196 IFA_REMREF(source_hw);
91447636
A
6197 if (result == ENOTSUP) {
6198 result = new_result;
6199 }
6200 }
6d2010ae 6201 ifnet_list_free(ifp_list);
91447636 6202 }
6d2010ae
A
6203 } else {
6204 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
6205 sender_proto, target_hw, target_proto);
91447636 6206 }
6d2010ae 6207
0a7de745 6208 return result;
91447636 6209}
1c79356b 6210
6d2010ae
A
6211/*
6212 * Caller must hold ifnet head lock.
6213 */
6214static int
6215ifnet_lookup(struct ifnet *ifp)
91447636 6216{
6d2010ae
A
6217 struct ifnet *_ifp;
6218
5ba3f43e 6219 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
6d2010ae 6220 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
0a7de745 6221 if (_ifp == ifp) {
91447636 6222 break;
0a7de745 6223 }
6d2010ae 6224 }
0a7de745 6225 return _ifp != NULL;
91447636 6226}
39037602 6227
6d2010ae
A
6228/*
6229 * Caller has to pass a non-zero refio argument to get a
6230 * IO reference count. This will prevent ifnet_detach from
39037602 6231 * being called when there are outstanding io reference counts.
91447636 6232 */
6d2010ae
A
6233int
6234ifnet_is_attached(struct ifnet *ifp, int refio)
6235{
6236 int ret;
6237
6238 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e 6239 if ((ret = IF_FULLY_ATTACHED(ifp))) {
0a7de745 6240 if (refio > 0) {
6d2010ae 6241 ifp->if_refio++;
0a7de745 6242 }
6d2010ae
A
6243 }
6244 lck_mtx_unlock(&ifp->if_ref_lock);
6245
0a7de745 6246 return ret;
6d2010ae
A
6247}
6248
cb323159
A
6249void
6250ifnet_incr_pending_thread_count(struct ifnet *ifp)
6251{
6252 lck_mtx_lock_spin(&ifp->if_ref_lock);
6253 ifp->if_threads_pending++;
6254 lck_mtx_unlock(&ifp->if_ref_lock);
6255}
6256
6257void
6258ifnet_decr_pending_thread_count(struct ifnet *ifp)
6259{
6260 lck_mtx_lock_spin(&ifp->if_ref_lock);
6261 VERIFY(ifp->if_threads_pending > 0);
6262 ifp->if_threads_pending--;
6263 if (ifp->if_threads_pending == 0) {
6264 wakeup(&ifp->if_threads_pending);
6265 }
6266 lck_mtx_unlock(&ifp->if_ref_lock);
6267}
6268
39037602
A
6269/*
6270 * Caller must ensure the interface is attached; the assumption is that
6271 * there is at least an outstanding IO reference count held already.
cb323159 6272 * Most callers would call ifnet_is_{attached,data_ready}() instead.
39037602
A
6273 */
6274void
6275ifnet_incr_iorefcnt(struct ifnet *ifp)
6276{
6277 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e 6278 VERIFY(IF_FULLY_ATTACHED(ifp));
39037602
A
6279 VERIFY(ifp->if_refio > 0);
6280 ifp->if_refio++;
6281 lck_mtx_unlock(&ifp->if_ref_lock);
6282}
6283
cb323159
A
6284__attribute__((always_inline))
6285static void
6286ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
6d2010ae 6287{
cb323159
A
6288 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
6289
6d2010ae 6290 VERIFY(ifp->if_refio > 0);
5ba3f43e 6291 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
cb323159 6292
6d2010ae 6293 ifp->if_refio--;
cb323159 6294 VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
6d2010ae 6295
39037602
A
6296 /*
6297 * if there are no more outstanding io references, wakeup the
6d2010ae
A
6298 * ifnet_detach thread if detaching flag is set.
6299 */
0a7de745 6300 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
6d2010ae 6301 wakeup(&(ifp->if_refio));
0a7de745 6302 }
cb323159
A
6303}
6304
6305void
6306ifnet_decr_iorefcnt(struct ifnet *ifp)
6307{
6308 lck_mtx_lock_spin(&ifp->if_ref_lock);
6309 ifnet_decr_iorefcnt_locked(ifp);
6310 lck_mtx_unlock(&ifp->if_ref_lock);
6311}
6312
6313boolean_t
6314ifnet_datamov_begin(struct ifnet *ifp)
6315{
6316 boolean_t ret;
6317
6318 lck_mtx_lock_spin(&ifp->if_ref_lock);
6319 if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
6320 ifp->if_refio++;
6321 ifp->if_datamov++;
6322 }
6323 lck_mtx_unlock(&ifp->if_ref_lock);
6324
6325 return ret;
6326}
6327
6328void
6329ifnet_datamov_end(struct ifnet *ifp)
6330{
6331 lck_mtx_lock_spin(&ifp->if_ref_lock);
6332 VERIFY(ifp->if_datamov > 0);
6333 /*
6334 * if there's no more thread moving data, wakeup any
6335 * drainers that's blocked waiting for this.
6336 */
6337 if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
6338 wakeup(&(ifp->if_datamov));
6339 }
6340 ifnet_decr_iorefcnt_locked(ifp);
6341 lck_mtx_unlock(&ifp->if_ref_lock);
6342}
6343
6344void
6345ifnet_datamov_suspend(struct ifnet *ifp)
6346{
6347 lck_mtx_lock_spin(&ifp->if_ref_lock);
6348 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6349 ifp->if_refio++;
6350 if (ifp->if_suspend++ == 0) {
6351 VERIFY(ifp->if_refflags & IFRF_READY);
6352 ifp->if_refflags &= ~IFRF_READY;
6353 }
6354 lck_mtx_unlock(&ifp->if_ref_lock);
6355}
6356
6357void
6358ifnet_datamov_drain(struct ifnet *ifp)
6359{
6360 lck_mtx_lock(&ifp->if_ref_lock);
6361 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6362 /* data movement must already be suspended */
6363 VERIFY(ifp->if_suspend > 0);
6364 VERIFY(!(ifp->if_refflags & IFRF_READY));
6365 ifp->if_drainers++;
6366 while (ifp->if_datamov != 0) {
6367 (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
6368 (PZERO - 1), __func__, NULL);
6369 }
6370 VERIFY(!(ifp->if_refflags & IFRF_READY));
6371 VERIFY(ifp->if_drainers > 0);
6372 ifp->if_drainers--;
6373 lck_mtx_unlock(&ifp->if_ref_lock);
6374
6375 /* purge the interface queues */
6376 if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
6377 if_qflush(ifp, 0);
6378 }
6379}
5ba3f43e 6380
cb323159
A
6381void
6382ifnet_datamov_resume(struct ifnet *ifp)
6383{
6384 lck_mtx_lock(&ifp->if_ref_lock);
6385 /* data movement must already be suspended */
6386 VERIFY(ifp->if_suspend > 0);
6387 if (--ifp->if_suspend == 0) {
6388 VERIFY(!(ifp->if_refflags & IFRF_READY));
6389 ifp->if_refflags |= IFRF_READY;
6390 }
6391 ifnet_decr_iorefcnt_locked(ifp);
6d2010ae
A
6392 lck_mtx_unlock(&ifp->if_ref_lock);
6393}
b0d623f7 6394
6d2010ae
A
6395static void
6396dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
6397{
6398 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
6399 ctrace_t *tr;
6400 u_int32_t idx;
6401 u_int16_t *cnt;
1c79356b 6402
6d2010ae
A
6403 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
6404 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
6405 /* NOTREACHED */
6406 }
6407
6408 if (refhold) {
6409 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
6410 tr = dl_if_dbg->dldbg_if_refhold;
6411 } else {
6412 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
6413 tr = dl_if_dbg->dldbg_if_refrele;
6414 }
6415
6416 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
6417 ctrace_record(&tr[idx]);
91447636 6418}
1c79356b 6419
6d2010ae
A
6420errno_t
6421dlil_if_ref(struct ifnet *ifp)
6422{
6423 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6424
0a7de745
A
6425 if (dl_if == NULL) {
6426 return EINVAL;
6427 }
6d2010ae
A
6428
6429 lck_mtx_lock_spin(&dl_if->dl_if_lock);
6430 ++dl_if->dl_if_refcnt;
6431 if (dl_if->dl_if_refcnt == 0) {
6432 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
6433 /* NOTREACHED */
6434 }
0a7de745 6435 if (dl_if->dl_if_trace != NULL) {
6d2010ae 6436 (*dl_if->dl_if_trace)(dl_if, TRUE);
0a7de745 6437 }
6d2010ae
A
6438 lck_mtx_unlock(&dl_if->dl_if_lock);
6439
0a7de745 6440 return 0;
91447636 6441}
1c79356b 6442
6d2010ae
A
6443errno_t
6444dlil_if_free(struct ifnet *ifp)
6445{
6446 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5ba3f43e 6447 bool need_release = FALSE;
6d2010ae 6448
0a7de745
A
6449 if (dl_if == NULL) {
6450 return EINVAL;
6451 }
6d2010ae
A
6452
6453 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5ba3f43e
A
6454 switch (dl_if->dl_if_refcnt) {
6455 case 0:
6d2010ae
A
6456 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
6457 /* NOTREACHED */
5ba3f43e
A
6458 break;
6459 case 1:
6460 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
6461 need_release = TRUE;
6462 }
6463 break;
6464 default:
6465 break;
6d2010ae
A
6466 }
6467 --dl_if->dl_if_refcnt;
0a7de745 6468 if (dl_if->dl_if_trace != NULL) {
6d2010ae 6469 (*dl_if->dl_if_trace)(dl_if, FALSE);
0a7de745 6470 }
6d2010ae 6471 lck_mtx_unlock(&dl_if->dl_if_lock);
5ba3f43e
A
6472 if (need_release) {
6473 dlil_if_release(ifp);
6474 }
0a7de745 6475 return 0;
6d2010ae 6476}
1c79356b 6477
2d21ac55 6478static errno_t
6d2010ae 6479dlil_attach_protocol_internal(struct if_proto *proto,
5ba3f43e
A
6480 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
6481 uint32_t * proto_count)
91447636 6482{
6d2010ae 6483 struct kev_dl_proto_data ev_pr_data;
91447636
A
6484 struct ifnet *ifp = proto->ifp;
6485 int retval = 0;
b0d623f7 6486 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
6d2010ae
A
6487 struct if_proto *prev_proto;
6488 struct if_proto *_proto;
6489
6490 /* callee holds a proto refcnt upon success */
6491 ifnet_lock_exclusive(ifp);
6492 _proto = find_attached_proto(ifp, proto->protocol_family);
6493 if (_proto != NULL) {
91447636 6494 ifnet_lock_done(ifp);
6d2010ae 6495 if_proto_free(_proto);
0a7de745 6496 return EEXIST;
91447636 6497 }
6d2010ae 6498
91447636
A
6499 /*
6500 * Call family module add_proto routine so it can refine the
6501 * demux descriptors as it wishes.
6502 */
6d2010ae
A
6503 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
6504 demux_count);
91447636 6505 if (retval) {
6d2010ae 6506 ifnet_lock_done(ifp);
0a7de745 6507 return retval;
91447636 6508 }
6d2010ae 6509
91447636
A
6510 /*
6511 * Insert the protocol in the hash
6512 */
6d2010ae 6513 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
0a7de745 6514 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
6d2010ae 6515 prev_proto = SLIST_NEXT(prev_proto, next_hash);
0a7de745
A
6516 }
6517 if (prev_proto) {
6d2010ae 6518 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
0a7de745 6519 } else {
6d2010ae
A
6520 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
6521 proto, next_hash);
0a7de745 6522 }
6d2010ae
A
6523
6524 /* hold a proto refcnt for attach */
6525 if_proto_ref(proto);
1c79356b 6526
91447636 6527 /*
6d2010ae
A
6528 * The reserved field carries the number of protocol still attached
6529 * (subject to change)
91447636 6530 */
91447636 6531 ev_pr_data.proto_family = proto->protocol_family;
a39ff7e2
A
6532 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
6533
6d2010ae
A
6534 ifnet_lock_done(ifp);
6535
6536 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
6537 (struct net_event_data *)&ev_pr_data,
0a7de745 6538 sizeof(struct kev_dl_proto_data));
5ba3f43e
A
6539 if (proto_count != NULL) {
6540 *proto_count = ev_pr_data.proto_remaining_count;
6541 }
0a7de745 6542 return retval;
91447636 6543}
0b4e3aa0 6544
2d21ac55
A
6545errno_t
6546ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 6547 const struct ifnet_attach_proto_param *proto_details)
91447636
A
6548{
6549 int retval = 0;
6550 struct if_proto *ifproto = NULL;
5ba3f43e 6551 uint32_t proto_count = 0;
6d2010ae
A
6552
6553 ifnet_head_lock_shared();
6554 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6555 retval = EINVAL;
6556 goto end;
6557 }
6558 /* Check that the interface is in the global list */
6559 if (!ifnet_lookup(ifp)) {
6560 retval = ENXIO;
6561 goto end;
6562 }
6563
6564 ifproto = zalloc(dlif_proto_zone);
6565 if (ifproto == NULL) {
91447636
A
6566 retval = ENOMEM;
6567 goto end;
6568 }
6d2010ae
A
6569 bzero(ifproto, dlif_proto_size);
6570
6571 /* refcnt held above during lookup */
91447636
A
6572 ifproto->ifp = ifp;
6573 ifproto->protocol_family = protocol;
6574 ifproto->proto_kpi = kProtoKPI_v1;
6575 ifproto->kpi.v1.input = proto_details->input;
6576 ifproto->kpi.v1.pre_output = proto_details->pre_output;
6577 ifproto->kpi.v1.event = proto_details->event;
6578 ifproto->kpi.v1.ioctl = proto_details->ioctl;
6579 ifproto->kpi.v1.detached = proto_details->detached;
6580 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
6581 ifproto->kpi.v1.send_arp = proto_details->send_arp;
6d2010ae 6582
2d21ac55 6583 retval = dlil_attach_protocol_internal(ifproto,
0a7de745
A
6584 proto_details->demux_list, proto_details->demux_count,
6585 &proto_count);
6d2010ae 6586
9bccf70c 6587end:
cb323159 6588 if (retval != 0 && retval != EEXIST) {
39236c6e 6589 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
cb323159 6590 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
5ba3f43e
A
6591 } else {
6592 if (dlil_verbose) {
cb323159
A
6593 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
6594 ifp != NULL ? if_name(ifp) : "N/A",
0a7de745 6595 protocol, proto_count);
5ba3f43e 6596 }
6d2010ae
A
6597 }
6598 ifnet_head_done();
5ba3f43e 6599 if (retval == 0) {
a39ff7e2
A
6600 /*
6601 * A protocol has been attached, mark the interface up.
6602 * This used to be done by configd.KernelEventMonitor, but that
6603 * is inherently prone to races (rdar://problem/30810208).
6604 */
6605 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6606 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6607 dlil_post_sifflags_msg(ifp);
5ba3f43e 6608 } else if (ifproto != NULL) {
6d2010ae 6609 zfree(dlif_proto_zone, ifproto);
5ba3f43e 6610 }
0a7de745 6611 return retval;
1c79356b
A
6612}
6613
2d21ac55
A
6614errno_t
6615ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 6616 const struct ifnet_attach_proto_param_v2 *proto_details)
91447636 6617{
2d21ac55 6618 int retval = 0;
91447636 6619 struct if_proto *ifproto = NULL;
5ba3f43e 6620 uint32_t proto_count = 0;
6d2010ae
A
6621
6622 ifnet_head_lock_shared();
6623 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6624 retval = EINVAL;
6625 goto end;
6626 }
6627 /* Check that the interface is in the global list */
6628 if (!ifnet_lookup(ifp)) {
6629 retval = ENXIO;
6630 goto end;
6631 }
6632
6633 ifproto = zalloc(dlif_proto_zone);
6634 if (ifproto == NULL) {
91447636
A
6635 retval = ENOMEM;
6636 goto end;
6637 }
2d21ac55 6638 bzero(ifproto, sizeof(*ifproto));
6d2010ae
A
6639
6640 /* refcnt held above during lookup */
2d21ac55
A
6641 ifproto->ifp = ifp;
6642 ifproto->protocol_family = protocol;
6643 ifproto->proto_kpi = kProtoKPI_v2;
6644 ifproto->kpi.v2.input = proto_details->input;
6645 ifproto->kpi.v2.pre_output = proto_details->pre_output;
6646 ifproto->kpi.v2.event = proto_details->event;
6647 ifproto->kpi.v2.ioctl = proto_details->ioctl;
6648 ifproto->kpi.v2.detached = proto_details->detached;
6649 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
6650 ifproto->kpi.v2.send_arp = proto_details->send_arp;
1c79356b 6651
6d2010ae 6652 retval = dlil_attach_protocol_internal(ifproto,
0a7de745
A
6653 proto_details->demux_list, proto_details->demux_count,
6654 &proto_count);
6d2010ae
A
6655
6656end:
cb323159 6657 if (retval != 0 && retval != EEXIST) {
39236c6e 6658 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
cb323159 6659 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
5ba3f43e
A
6660 } else {
6661 if (dlil_verbose) {
cb323159
A
6662 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
6663 ifp != NULL ? if_name(ifp) : "N/A",
0a7de745 6664 protocol, proto_count);
5ba3f43e 6665 }
2d21ac55 6666 }
6d2010ae 6667 ifnet_head_done();
5ba3f43e 6668 if (retval == 0) {
a39ff7e2
A
6669 /*
6670 * A protocol has been attached, mark the interface up.
6671 * This used to be done by configd.KernelEventMonitor, but that
6672 * is inherently prone to races (rdar://problem/30810208).
6673 */
6674 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6675 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6676 dlil_post_sifflags_msg(ifp);
5ba3f43e 6677 } else if (ifproto != NULL) {
6d2010ae 6678 zfree(dlif_proto_zone, ifproto);
5ba3f43e 6679 }
0a7de745 6680 return retval;
91447636 6681}
1c79356b 6682
2d21ac55
A
6683errno_t
6684ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
91447636
A
6685{
6686 struct if_proto *proto = NULL;
0a7de745 6687 int retval = 0;
6d2010ae
A
6688
6689 if (ifp == NULL || proto_family == 0) {
6690 retval = EINVAL;
91447636
A
6691 goto end;
6692 }
6d2010ae
A
6693
6694 ifnet_lock_exclusive(ifp);
6695 /* callee holds a proto refcnt upon success */
91447636 6696 proto = find_attached_proto(ifp, proto_family);
91447636
A
6697 if (proto == NULL) {
6698 retval = ENXIO;
6d2010ae 6699 ifnet_lock_done(ifp);
91447636
A
6700 goto end;
6701 }
6d2010ae
A
6702
6703 /* call family module del_proto */
0a7de745 6704 if (ifp->if_del_proto) {
91447636 6705 ifp->if_del_proto(ifp, proto->protocol_family);
0a7de745 6706 }
1c79356b 6707
6d2010ae
A
6708 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
6709 proto, if_proto, next_hash);
6710
6711 if (proto->proto_kpi == kProtoKPI_v1) {
6712 proto->kpi.v1.input = ifproto_media_input_v1;
39037602 6713 proto->kpi.v1.pre_output = ifproto_media_preout;
6d2010ae
A
6714 proto->kpi.v1.event = ifproto_media_event;
6715 proto->kpi.v1.ioctl = ifproto_media_ioctl;
6716 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
6717 proto->kpi.v1.send_arp = ifproto_media_send_arp;
6718 } else {
6719 proto->kpi.v2.input = ifproto_media_input_v2;
6720 proto->kpi.v2.pre_output = ifproto_media_preout;
6721 proto->kpi.v2.event = ifproto_media_event;
6722 proto->kpi.v2.ioctl = ifproto_media_ioctl;
6723 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
6724 proto->kpi.v2.send_arp = ifproto_media_send_arp;
6725 }
6726 proto->detached = 1;
6727 ifnet_lock_done(ifp);
6728
6729 if (dlil_verbose) {
cb323159 6730 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
39236c6e 6731 (proto->proto_kpi == kProtoKPI_v1) ?
6d2010ae
A
6732 "v1" : "v2", proto_family);
6733 }
6734
6735 /* release proto refcnt held during protocol attach */
6736 if_proto_free(proto);
91447636
A
6737
6738 /*
6d2010ae
A
6739 * Release proto refcnt held during lookup; the rest of
6740 * protocol detach steps will happen when the last proto
6741 * reference is released.
91447636 6742 */
6d2010ae
A
6743 if_proto_free(proto);
6744
91447636 6745end:
0a7de745 6746 return retval;
91447636 6747}
1c79356b 6748
6d2010ae
A
6749
6750static errno_t
6751ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
6752 struct mbuf *packet, char *header)
91447636 6753{
6d2010ae 6754#pragma unused(ifp, protocol, packet, header)
0a7de745 6755 return ENXIO;
6d2010ae
A
6756}
6757
6758static errno_t
6759ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
6760 struct mbuf *packet)
6761{
6762#pragma unused(ifp, protocol, packet)
0a7de745 6763 return ENXIO;
6d2010ae
A
6764}
6765
6766static errno_t
6767ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
6768 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
6769 char *link_layer_dest)
6770{
6771#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
0a7de745 6772 return ENXIO;
91447636 6773}
9bccf70c 6774
91447636 6775static void
6d2010ae
A
6776ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
6777 const struct kev_msg *event)
6778{
6779#pragma unused(ifp, protocol, event)
6780}
6781
6782static errno_t
6783ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
6784 unsigned long command, void *argument)
6785{
6786#pragma unused(ifp, protocol, command, argument)
0a7de745 6787 return ENXIO;
6d2010ae
A
6788}
6789
6790static errno_t
6791ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
6792 struct sockaddr_dl *out_ll, size_t ll_len)
6793{
6794#pragma unused(ifp, proto_addr, out_ll, ll_len)
0a7de745 6795 return ENXIO;
6d2010ae
A
6796}
6797
6798static errno_t
6799ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
6800 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6801 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
6802{
6803#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
0a7de745 6804 return ENXIO;
91447636 6805}
9bccf70c 6806
91447636 6807extern int if_next_index(void);
4bd07ac2 6808extern int tcp_ecn_outbound;
91447636 6809
2d21ac55 6810errno_t
6d2010ae 6811ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
91447636 6812{
91447636 6813 struct ifnet *tmp_if;
6d2010ae
A
6814 struct ifaddr *ifa;
6815 struct if_data_internal if_data_saved;
6816 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
316670eb
A
6817 struct dlil_threading_info *dl_inp;
6818 u_int32_t sflags = 0;
6819 int err;
1c79356b 6820
0a7de745
A
6821 if (ifp == NULL) {
6822 return EINVAL;
6823 }
6d2010ae 6824
7ddcb079
A
6825 /*
6826 * Serialize ifnet attach using dlil_ifnet_lock, in order to
6827 * prevent the interface from being configured while it is
6828 * embryonic, as ifnet_head_lock is dropped and reacquired
6829 * below prior to marking the ifnet with IFRF_ATTACHED.
6830 */
6831 dlil_if_lock();
6d2010ae 6832 ifnet_head_lock_exclusive();
91447636
A
6833 /* Verify we aren't already on the list */
6834 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
6835 if (tmp_if == ifp) {
6836 ifnet_head_done();
7ddcb079 6837 dlil_if_unlock();
0a7de745 6838 return EEXIST;
91447636
A
6839 }
6840 }
0b4e3aa0 6841
6d2010ae 6842 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e
A
6843 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
6844 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
6d2010ae
A
6845 __func__, ifp);
6846 /* NOTREACHED */
91447636 6847 }
6d2010ae 6848 lck_mtx_unlock(&ifp->if_ref_lock);
1c79356b 6849
6d2010ae 6850 ifnet_lock_exclusive(ifp);
b0d623f7 6851
6d2010ae
A
6852 /* Sanity check */
6853 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6854 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
cb323159 6855 VERIFY(ifp->if_threads_pending == 0);
6d2010ae
A
6856
6857 if (ll_addr != NULL) {
6858 if (ifp->if_addrlen == 0) {
6859 ifp->if_addrlen = ll_addr->sdl_alen;
6860 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
6861 ifnet_lock_done(ifp);
6862 ifnet_head_done();
7ddcb079 6863 dlil_if_unlock();
0a7de745 6864 return EINVAL;
b0d623f7
A
6865 }
6866 }
6867
91447636 6868 /*
b0d623f7 6869 * Allow interfaces without protocol families to attach
91447636
A
6870 * only if they have the necessary fields filled out.
6871 */
6d2010ae
A
6872 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
6873 DLIL_PRINTF("%s: Attempt to attach interface without "
6874 "family module - %d\n", __func__, ifp->if_family);
6875 ifnet_lock_done(ifp);
6876 ifnet_head_done();
7ddcb079 6877 dlil_if_unlock();
0a7de745 6878 return ENODEV;
1c79356b
A
6879 }
6880
6d2010ae
A
6881 /* Allocate protocol hash table */
6882 VERIFY(ifp->if_proto_hash == NULL);
6883 ifp->if_proto_hash = zalloc(dlif_phash_zone);
6884 if (ifp->if_proto_hash == NULL) {
6885 ifnet_lock_done(ifp);
6886 ifnet_head_done();
7ddcb079 6887 dlil_if_unlock();
0a7de745 6888 return ENOBUFS;
6d2010ae
A
6889 }
6890 bzero(ifp->if_proto_hash, dlif_phash_size);
91447636 6891
6d2010ae
A
6892 lck_mtx_lock_spin(&ifp->if_flt_lock);
6893 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
91447636 6894 TAILQ_INIT(&ifp->if_flt_head);
6d2010ae
A
6895 VERIFY(ifp->if_flt_busy == 0);
6896 VERIFY(ifp->if_flt_waiters == 0);
6897 lck_mtx_unlock(&ifp->if_flt_lock);
6898
6d2010ae
A
6899 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
6900 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
91447636 6901 LIST_INIT(&ifp->if_multiaddrs);
6d2010ae 6902 }
1c79356b 6903
6d2010ae
A
6904 VERIFY(ifp->if_allhostsinm == NULL);
6905 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
6906 TAILQ_INIT(&ifp->if_addrhead);
6907
6d2010ae
A
6908 if (ifp->if_index == 0) {
6909 int idx = if_next_index();
6910
6911 if (idx == -1) {
6912 ifp->if_index = 0;
6913 ifnet_lock_done(ifp);
6914 ifnet_head_done();
7ddcb079 6915 dlil_if_unlock();
0a7de745 6916 return ENOBUFS;
1c79356b 6917 }
6d2010ae
A
6918 ifp->if_index = idx;
6919 }
6920 /* There should not be anything occupying this slot */
6921 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6922
6923 /* allocate (if needed) and initialize a link address */
6d2010ae
A
6924 ifa = dlil_alloc_lladdr(ifp, ll_addr);
6925 if (ifa == NULL) {
6926 ifnet_lock_done(ifp);
6927 ifnet_head_done();
7ddcb079 6928 dlil_if_unlock();
0a7de745 6929 return ENOBUFS;
6d2010ae
A
6930 }
6931
6932 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
6933 ifnet_addrs[ifp->if_index - 1] = ifa;
6934
6935 /* make this address the first on the list */
6936 IFA_LOCK(ifa);
6937 /* hold a reference for ifnet_addrs[] */
6938 IFA_ADDREF_LOCKED(ifa);
6939 /* if_attach_link_ifa() holds a reference for ifa_link */
6940 if_attach_link_ifa(ifp, ifa);
6941 IFA_UNLOCK(ifa);
6942
2d21ac55 6943#if CONFIG_MACF_NET
6d2010ae 6944 mac_ifnet_label_associate(ifp);
2d21ac55 6945#endif
2d21ac55 6946
6d2010ae
A
6947 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
6948 ifindex2ifnet[ifp->if_index] = ifp;
2d21ac55 6949
6d2010ae
A
6950 /* Hold a reference to the underlying dlil_ifnet */
6951 ifnet_reference(ifp);
6952
316670eb
A
6953 /* Clear stats (save and restore other fields that we care) */
6954 if_data_saved = ifp->if_data;
0a7de745 6955 bzero(&ifp->if_data, sizeof(ifp->if_data));
316670eb
A
6956 ifp->if_data.ifi_type = if_data_saved.ifi_type;
6957 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
6958 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
6959 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
6960 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
6961 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
6962 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
6963 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
6964 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
6965 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
6966 ifnet_touch_lastchange(ifp);
6967
6968 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
39037602
A
6969 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
6970 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
316670eb
A
6971
6972 /* By default, use SFB and enable flow advisory */
6973 sflags = PKTSCHEDF_QALG_SFB;
0a7de745 6974 if (if_flowadv) {
316670eb 6975 sflags |= PKTSCHEDF_QALG_FLOWCTL;
0a7de745 6976 }
316670eb 6977
0a7de745 6978 if (if_delaybased_queue) {
fe8ab488 6979 sflags |= PKTSCHEDF_QALG_DELAYBASED;
0a7de745 6980 }
fe8ab488 6981
5ba3f43e 6982 if (ifp->if_output_sched_model ==
0a7de745 6983 IFNET_SCHED_MODEL_DRIVER_MANAGED) {
5ba3f43e 6984 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
0a7de745 6985 }
5ba3f43e 6986
316670eb
A
6987 /* Initialize transmit queue(s) */
6988 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
6989 if (err != 0) {
6990 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
6991 "err=%d", __func__, ifp, err);
6992 /* NOTREACHED */
6993 }
6994
6995 /* Sanity checks on the input thread storage */
6996 dl_inp = &dl_if->dl_if_inpstorage;
0a7de745 6997 bzero(&dl_inp->stats, sizeof(dl_inp->stats));
316670eb
A
6998 VERIFY(dl_inp->input_waiting == 0);
6999 VERIFY(dl_inp->wtot == 0);
7000 VERIFY(dl_inp->ifp == NULL);
7001 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
7002 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
7003 VERIFY(!dl_inp->net_affinity);
7004 VERIFY(ifp->if_inp == NULL);
7005 VERIFY(dl_inp->input_thr == THREAD_NULL);
7006 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
7007 VERIFY(dl_inp->poll_thr == THREAD_NULL);
7008 VERIFY(dl_inp->tag == 0);
cb323159 7009
316670eb
A
7010#if IFNET_INPUT_SANITY_CHK
7011 VERIFY(dl_inp->input_mbuf_cnt == 0);
7012#endif /* IFNET_INPUT_SANITY_CHK */
7013
cb323159
A
7014 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7015 dlil_reset_rxpoll_params(ifp);
316670eb 7016 /*
cb323159 7017 * A specific DLIL input thread is created per non-loopback interface.
316670eb 7018 */
cb323159 7019 if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
316670eb 7020 ifp->if_inp = dl_inp;
cb323159 7021 ifnet_incr_pending_thread_count(ifp);
316670eb
A
7022 err = dlil_create_input_thread(ifp, ifp->if_inp);
7023 if (err != 0) {
7024 panic_plain("%s: ifp=%p couldn't get an input thread; "
7025 "err=%d", __func__, ifp, err);
7026 /* NOTREACHED */
7027 }
7028 }
6d2010ae 7029 /*
39236c6e
A
7030 * If the driver supports the new transmit model, calculate flow hash
7031 * and create a workloop starter thread to invoke the if_start callback
7032 * where the packets may be dequeued and transmitted.
6d2010ae 7033 */
316670eb 7034 if (ifp->if_eflags & IFEF_TXSTART) {
39236c6e
A
7035 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
7036 VERIFY(ifp->if_flowhash != 0);
316670eb
A
7037 VERIFY(ifp->if_start_thread == THREAD_NULL);
7038
7039 ifnet_set_start_cycle(ifp, NULL);
7040 ifp->if_start_active = 0;
7041 ifp->if_start_req = 0;
39236c6e 7042 ifp->if_start_flags = 0;
5ba3f43e 7043 VERIFY(ifp->if_start != NULL);
cb323159
A
7044 ifnet_incr_pending_thread_count(ifp);
7045 if ((err = kernel_thread_start(ifnet_start_thread_func,
5ba3f43e
A
7046 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
7047 panic_plain("%s: "
7048 "ifp=%p couldn't get a start thread; "
316670eb 7049 "err=%d", __func__, ifp, err);
0a7de745 7050 /* NOTREACHED */
6d2010ae 7051 }
316670eb 7052 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
0a7de745 7053 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP));
39236c6e
A
7054 } else {
7055 ifp->if_flowhash = 0;
316670eb
A
7056 }
7057
cb323159
A
7058 /* Reset polling parameters */
7059 ifnet_set_poll_cycle(ifp, NULL);
7060 ifp->if_poll_update = 0;
7061 ifp->if_poll_flags = 0;
7062 ifp->if_poll_req = 0;
7063 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7064
316670eb
A
7065 /*
7066 * If the driver supports the new receive model, create a poller
7067 * thread to invoke if_input_poll callback where the packets may
7068 * be dequeued from the driver and processed for reception.
cb323159 7069 * if the interface is netif compat then the poller thread is managed by netif.
316670eb 7070 */
cb323159
A
7071 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL) &&
7072 (ifp->if_xflags & IFXF_LEGACY)) {
316670eb
A
7073 VERIFY(ifp->if_input_poll != NULL);
7074 VERIFY(ifp->if_input_ctl != NULL);
cb323159
A
7075 ifnet_incr_pending_thread_count(ifp);
7076 if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
316670eb
A
7077 &ifp->if_poll_thread)) != KERN_SUCCESS) {
7078 panic_plain("%s: ifp=%p couldn't get a poll thread; "
6d2010ae
A
7079 "err=%d", __func__, ifp, err);
7080 /* NOTREACHED */
7081 }
316670eb 7082 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
0a7de745 7083 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP));
91447636 7084 }
6d2010ae 7085
316670eb
A
7086 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
7087 VERIFY(ifp->if_desc.ifd_len == 0);
7088 VERIFY(ifp->if_desc.ifd_desc != NULL);
6d2010ae
A
7089
7090 /* Record attach PC stacktrace */
7091 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
7092
7093 ifp->if_updatemcasts = 0;
7094 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
7095 struct ifmultiaddr *ifma;
7096 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
7097 IFMA_LOCK(ifma);
7098 if (ifma->ifma_addr->sa_family == AF_LINK ||
0a7de745 7099 ifma->ifma_addr->sa_family == AF_UNSPEC) {
6d2010ae 7100 ifp->if_updatemcasts++;
0a7de745 7101 }
6d2010ae
A
7102 IFMA_UNLOCK(ifma);
7103 }
7104
cb323159 7105 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
39236c6e 7106 "membership(s)\n", if_name(ifp),
6d2010ae
A
7107 ifp->if_updatemcasts);
7108 }
7109
39236c6e 7110 /* Clear logging parameters */
0a7de745 7111 bzero(&ifp->if_log, sizeof(ifp->if_log));
5ba3f43e
A
7112
7113 /* Clear foreground/realtime activity timestamps */
39236c6e 7114 ifp->if_fg_sendts = 0;
5ba3f43e 7115 ifp->if_rt_sendts = 0;
39236c6e
A
7116
7117 VERIFY(ifp->if_delegated.ifp == NULL);
7118 VERIFY(ifp->if_delegated.type == 0);
7119 VERIFY(ifp->if_delegated.family == 0);
7120 VERIFY(ifp->if_delegated.subfamily == 0);
fe8ab488 7121 VERIFY(ifp->if_delegated.expensive == 0);
cb323159 7122 VERIFY(ifp->if_delegated.constrained == 0);
39236c6e 7123
39037602
A
7124 VERIFY(ifp->if_agentids == NULL);
7125 VERIFY(ifp->if_agentcount == 0);
3e170ce0
A
7126
7127 /* Reset interface state */
7128 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
39037602 7129 ifp->if_interface_state.valid_bitmask |=
0a7de745 7130 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
3e170ce0 7131 ifp->if_interface_state.interface_availability =
0a7de745 7132 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
3e170ce0
A
7133
7134 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
7135 if (ifp == lo_ifp) {
7136 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
7137 ifp->if_interface_state.valid_bitmask |=
7138 IF_INTERFACE_STATE_LQM_STATE_VALID;
7139 } else {
7140 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
7141 }
4bd07ac2
A
7142
7143 /*
7144 * Enable ECN capability on this interface depending on the
7145 * value of ECN global setting
7146 */
7147 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
7148 ifp->if_eflags |= IFEF_ECN_ENABLE;
7149 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
7150 }
7151
39037602
A
7152 /*
7153 * Built-in Cyclops always on policy for WiFi infra
7154 */
7155 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
7156 errno_t error;
7157
7158 error = if_set_qosmarking_mode(ifp,
7159 IFRTYPE_QOSMARKING_FASTLANE);
7160 if (error != 0) {
cb323159 7161 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
39037602
A
7162 __func__, ifp->if_xname, error);
7163 } else {
7164 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
7165#if (DEVELOPMENT || DEBUG)
cb323159 7166 DLIL_PRINTF("%s fastlane enabled on %s\n",
0a7de745 7167 __func__, ifp->if_xname);
39037602
A
7168#endif /* (DEVELOPMENT || DEBUG) */
7169 }
7170 }
7171
0c530ab8 7172 ifnet_lock_done(ifp);
b0d623f7 7173 ifnet_head_done();
6d2010ae 7174
5ba3f43e 7175
6d2010ae
A
7176 lck_mtx_lock(&ifp->if_cached_route_lock);
7177 /* Enable forwarding cached route */
7178 ifp->if_fwd_cacheok = 1;
7179 /* Clean up any existing cached routes */
39236c6e 7180 ROUTE_RELEASE(&ifp->if_fwd_route);
0a7de745 7181 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
39236c6e 7182 ROUTE_RELEASE(&ifp->if_src_route);
0a7de745 7183 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
39236c6e 7184 ROUTE_RELEASE(&ifp->if_src_route6);
0a7de745 7185 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
6d2010ae
A
7186 lck_mtx_unlock(&ifp->if_cached_route_lock);
7187
7188 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
7189
b0d623f7 7190 /*
6d2010ae
A
7191 * Allocate and attach IGMPv3/MLDv2 interface specific variables
7192 * and trees; do this before the ifnet is marked as attached.
7193 * The ifnet keeps the reference to the info structures even after
7194 * the ifnet is detached, since the network-layer records still
7195 * refer to the info structures even after that. This also
7196 * makes it possible for them to still function after the ifnet
7197 * is recycled or reattached.
b0d623f7 7198 */
6d2010ae
A
7199#if INET
7200 if (IGMP_IFINFO(ifp) == NULL) {
7201 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
7202 VERIFY(IGMP_IFINFO(ifp) != NULL);
7203 } else {
7204 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
7205 igmp_domifreattach(IGMP_IFINFO(ifp));
7206 }
7207#endif /* INET */
7208#if INET6
7209 if (MLD_IFINFO(ifp) == NULL) {
7210 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
7211 VERIFY(MLD_IFINFO(ifp) != NULL);
7212 } else {
7213 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
7214 mld_domifreattach(MLD_IFINFO(ifp));
7215 }
7216#endif /* INET6 */
b0d623f7 7217
39236c6e 7218 VERIFY(ifp->if_data_threshold == 0);
5ba3f43e 7219 VERIFY(ifp->if_dt_tcall != NULL);
39236c6e 7220
6d2010ae 7221 /*
cb323159
A
7222 * Wait for the created kernel threads for I/O to get
7223 * scheduled and run at least once before we proceed
7224 * to mark interface as attached.
6d2010ae 7225 */
cb323159
A
7226 lck_mtx_lock(&ifp->if_ref_lock);
7227 while (ifp->if_threads_pending != 0) {
7228 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
7229 "interface %s to get scheduled at least once.\n",
7230 __func__, ifp->if_xname);
7231 (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
7232 __func__, NULL);
7233 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
7234 }
7235 lck_mtx_unlock(&ifp->if_ref_lock);
7236 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
7237 "at least once. Proceeding.\n", __func__, ifp->if_xname);
7238
7239 /* Final mark this ifnet as attached. */
6d2010ae
A
7240 lck_mtx_lock(rnh_lock);
7241 ifnet_lock_exclusive(ifp);
7242 lck_mtx_lock_spin(&ifp->if_ref_lock);
cb323159 7243 ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
6d2010ae 7244 lck_mtx_unlock(&ifp->if_ref_lock);
d1ecb069 7245 if (net_rtref) {
6d2010ae
A
7246 /* boot-args override; enable idle notification */
7247 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
d1ecb069 7248 IFRF_IDLE_NOTIFY);
6d2010ae
A
7249 } else {
7250 /* apply previous request(s) to set the idle flags, if any */
7251 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
7252 ifp->if_idle_new_flags_mask);
d1ecb069 7253 }
6d2010ae
A
7254 ifnet_lock_done(ifp);
7255 lck_mtx_unlock(rnh_lock);
7ddcb079 7256 dlil_if_unlock();
6d2010ae
A
7257
7258#if PF
7259 /*
7260 * Attach packet filter to this interface, if enabled.
7261 */
7262 pf_ifnet_hook(ifp, 1);
7263#endif /* PF */
d1ecb069 7264
2d21ac55 7265 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
1c79356b 7266
6d2010ae 7267 if (dlil_verbose) {
cb323159 7268 DLIL_PRINTF("%s: attached%s\n", if_name(ifp),
6d2010ae
A
7269 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
7270 }
7271
0a7de745 7272 return 0;
6d2010ae
A
7273}
7274
7275/*
7276 * Prepare the storage for the first/permanent link address, which must
7277 * must have the same lifetime as the ifnet itself. Although the link
7278 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
7279 * its location in memory must never change as it may still be referred
7280 * to by some parts of the system afterwards (unfortunate implementation
7281 * artifacts inherited from BSD.)
7282 *
7283 * Caller must hold ifnet lock as writer.
7284 */
7285static struct ifaddr *
7286dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
7287{
7288 struct ifaddr *ifa, *oifa;
7289 struct sockaddr_dl *asdl, *msdl;
0a7de745 7290 char workbuf[IFNAMSIZ * 2];
6d2010ae
A
7291 int namelen, masklen, socksize;
7292 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7293
7294 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
7295 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
7296
4ba76501 7297 namelen = scnprintf(workbuf, sizeof(workbuf), "%s",
39236c6e 7298 if_name(ifp));
39037602
A
7299 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
7300 + ((namelen > 0) ? namelen : 0);
6d2010ae 7301 socksize = masklen + ifp->if_addrlen;
0a7de745
A
7302#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
7303 if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
6d2010ae 7304 socksize = sizeof(struct sockaddr_dl);
0a7de745 7305 }
6d2010ae
A
7306 socksize = ROUNDUP(socksize);
7307#undef ROUNDUP
7308
7309 ifa = ifp->if_lladdr;
7310 if (socksize > DLIL_SDLMAXLEN ||
7311 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
7312 /*
7313 * Rare, but in the event that the link address requires
7314 * more storage space than DLIL_SDLMAXLEN, allocate the
7315 * largest possible storages for address and mask, such
7316 * that we can reuse the same space when if_addrlen grows.
7317 * This same space will be used when if_addrlen shrinks.
7318 */
7319 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
0a7de745 7320 int ifasize = sizeof(*ifa) + 2 * SOCK_MAXADDRLEN;
6d2010ae 7321 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
0a7de745
A
7322 if (ifa == NULL) {
7323 return NULL;
7324 }
6d2010ae
A
7325 ifa_lock_init(ifa);
7326 /* Don't set IFD_ALLOC, as this is permanent */
7327 ifa->ifa_debug = IFD_LINK;
7328 }
7329 IFA_LOCK(ifa);
7330 /* address and mask sockaddr_dl locations */
7331 asdl = (struct sockaddr_dl *)(ifa + 1);
7332 bzero(asdl, SOCK_MAXADDRLEN);
316670eb
A
7333 msdl = (struct sockaddr_dl *)(void *)
7334 ((char *)asdl + SOCK_MAXADDRLEN);
6d2010ae
A
7335 bzero(msdl, SOCK_MAXADDRLEN);
7336 } else {
7337 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
7338 /*
7339 * Use the storage areas for address and mask within the
7340 * dlil_ifnet structure. This is the most common case.
7341 */
7342 if (ifa == NULL) {
7343 ifa = &dl_if->dl_if_lladdr.ifa;
7344 ifa_lock_init(ifa);
7345 /* Don't set IFD_ALLOC, as this is permanent */
7346 ifa->ifa_debug = IFD_LINK;
7347 }
7348 IFA_LOCK(ifa);
7349 /* address and mask sockaddr_dl locations */
316670eb 7350 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
0a7de745 7351 bzero(asdl, sizeof(dl_if->dl_if_lladdr.asdl));
316670eb 7352 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
0a7de745 7353 bzero(msdl, sizeof(dl_if->dl_if_lladdr.msdl));
6d2010ae
A
7354 }
7355
7356 /* hold a permanent reference for the ifnet itself */
7357 IFA_ADDREF_LOCKED(ifa);
7358 oifa = ifp->if_lladdr;
7359 ifp->if_lladdr = ifa;
7360
7361 VERIFY(ifa->ifa_debug == IFD_LINK);
7362 ifa->ifa_ifp = ifp;
7363 ifa->ifa_rtrequest = link_rtrequest;
7364 ifa->ifa_addr = (struct sockaddr *)asdl;
7365 asdl->sdl_len = socksize;
7366 asdl->sdl_family = AF_LINK;
39037602
A
7367 if (namelen > 0) {
7368 bcopy(workbuf, asdl->sdl_data, min(namelen,
0a7de745 7369 sizeof(asdl->sdl_data)));
39037602
A
7370 asdl->sdl_nlen = namelen;
7371 } else {
7372 asdl->sdl_nlen = 0;
7373 }
6d2010ae
A
7374 asdl->sdl_index = ifp->if_index;
7375 asdl->sdl_type = ifp->if_type;
7376 if (ll_addr != NULL) {
7377 asdl->sdl_alen = ll_addr->sdl_alen;
7378 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
7379 } else {
7380 asdl->sdl_alen = 0;
7381 }
39037602 7382 ifa->ifa_netmask = (struct sockaddr *)msdl;
6d2010ae 7383 msdl->sdl_len = masklen;
0a7de745 7384 while (namelen > 0) {
6d2010ae 7385 msdl->sdl_data[--namelen] = 0xff;
0a7de745 7386 }
6d2010ae
A
7387 IFA_UNLOCK(ifa);
7388
0a7de745 7389 if (oifa != NULL) {
6d2010ae 7390 IFA_REMREF(oifa);
0a7de745 7391 }
6d2010ae 7392
0a7de745 7393 return ifa;
6d2010ae
A
7394}
7395
7396static void
7397if_purgeaddrs(struct ifnet *ifp)
7398{
7399#if INET
7400 in_purgeaddrs(ifp);
7401#endif /* INET */
7402#if INET6
7403 in6_purgeaddrs(ifp);
7404#endif /* INET6 */
1c79356b
A
7405}
7406
2d21ac55 7407errno_t
6d2010ae 7408ifnet_detach(ifnet_t ifp)
1c79356b 7409{
39236c6e 7410 struct ifnet *delegated_ifp;
39037602 7411 struct nd_ifinfo *ndi = NULL;
39236c6e 7412
0a7de745
A
7413 if (ifp == NULL) {
7414 return EINVAL;
7415 }
6d2010ae 7416
39037602 7417 ndi = ND_IFINFO(ifp);
0a7de745 7418 if (NULL != ndi) {
39037602 7419 ndi->cga_initialized = FALSE;
0a7de745 7420 }
39037602 7421
6d2010ae 7422 lck_mtx_lock(rnh_lock);
316670eb 7423 ifnet_head_lock_exclusive();
91447636 7424 ifnet_lock_exclusive(ifp);
6d2010ae 7425
cb323159
A
7426 if (ifp->if_output_netem != NULL) {
7427 netem_destroy(ifp->if_output_netem);
7428 ifp->if_output_netem = NULL;
7429 }
7430
6d2010ae
A
7431 /*
7432 * Check to see if this interface has previously triggered
7433 * aggressive protocol draining; if so, decrement the global
7434 * refcnt and clear PR_AGGDRAIN on the route domain if
7435 * there are no more of such an interface around.
7436 */
7437 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
7438
7439 lck_mtx_lock_spin(&ifp->if_ref_lock);
39037602 7440 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
6d2010ae
A
7441 lck_mtx_unlock(&ifp->if_ref_lock);
7442 ifnet_lock_done(ifp);
6d2010ae 7443 ifnet_head_done();
13f56ec4 7444 lck_mtx_unlock(rnh_lock);
0a7de745 7445 return EINVAL;
6d2010ae 7446 } else if (ifp->if_refflags & IFRF_DETACHING) {
91447636 7447 /* Interface has already been detached */
6d2010ae 7448 lck_mtx_unlock(&ifp->if_ref_lock);
91447636 7449 ifnet_lock_done(ifp);
6d2010ae 7450 ifnet_head_done();
13f56ec4 7451 lck_mtx_unlock(rnh_lock);
0a7de745 7452 return ENXIO;
55e303ae 7453 }
5ba3f43e 7454 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
6d2010ae
A
7455 /* Indicate this interface is being detached */
7456 ifp->if_refflags &= ~IFRF_ATTACHED;
7457 ifp->if_refflags |= IFRF_DETACHING;
7458 lck_mtx_unlock(&ifp->if_ref_lock);
7459
5c9f4661 7460 if (dlil_verbose) {
cb323159 7461 DLIL_PRINTF("%s: detaching\n", if_name(ifp));
5c9f4661
A
7462 }
7463
7464 /* clean up flow control entry object if there's any */
7465 if (ifp->if_eflags & IFEF_TXSTART) {
7466 ifnet_flowadv(ifp->if_flowhash);
7467 }
6d2010ae 7468
490019cf
A
7469 /* Reset ECN enable/disable flags */
7470 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
7471 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
7472
d9a64523
A
7473 /* Reset CLAT46 flag */
7474 ifp->if_eflags &= ~IFEF_CLAT46;
7475
cb323159
A
7476 /*
7477 * We do not reset the TCP keep alive counters in case
7478 * a TCP connection stays connection after the interface
7479 * went down
7480 */
7481 if (ifp->if_tcp_kao_cnt > 0) {
7482 os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
7483 __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
7484 }
7485 ifp->if_tcp_kao_max = 0;
7486
91447636 7487 /*
6d2010ae
A
7488 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
7489 * no longer be visible during lookups from this point.
91447636 7490 */
6d2010ae
A
7491 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
7492 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
7493 ifp->if_link.tqe_next = NULL;
7494 ifp->if_link.tqe_prev = NULL;
39037602 7495 if (ifp->if_ordered_link.tqe_next != NULL ||
0a7de745 7496 ifp->if_ordered_link.tqe_prev != NULL) {
39037602
A
7497 ifnet_remove_from_ordered_list(ifp);
7498 }
6d2010ae
A
7499 ifindex2ifnet[ifp->if_index] = NULL;
7500
3e170ce0
A
7501 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
7502 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
7503
6d2010ae
A
7504 /* Record detach PC stacktrace */
7505 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
7506
39236c6e 7507 /* Clear logging parameters */
0a7de745 7508 bzero(&ifp->if_log, sizeof(ifp->if_log));
39236c6e
A
7509
7510 /* Clear delegated interface info (reference released below) */
7511 delegated_ifp = ifp->if_delegated.ifp;
0a7de745 7512 bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
39236c6e 7513
3e170ce0
A
7514 /* Reset interface state */
7515 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
7516
91447636 7517 ifnet_lock_done(ifp);
6d2010ae 7518 ifnet_head_done();
13f56ec4 7519 lck_mtx_unlock(rnh_lock);
6d2010ae 7520
5ba3f43e 7521
39236c6e 7522 /* Release reference held on the delegated interface */
0a7de745 7523 if (delegated_ifp != NULL) {
39236c6e 7524 ifnet_release(delegated_ifp);
0a7de745 7525 }
39236c6e 7526
316670eb 7527 /* Reset Link Quality Metric (unless loopback [lo0]) */
0a7de745 7528 if (ifp != lo_ifp) {
3e170ce0 7529 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
0a7de745 7530 }
316670eb
A
7531
7532 /* Reset TCP local statistics */
0a7de745 7533 if (ifp->if_tcp_stat != NULL) {
316670eb 7534 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
0a7de745 7535 }
316670eb
A
7536
7537 /* Reset UDP local statistics */
0a7de745 7538 if (ifp->if_udp_stat != NULL) {
316670eb 7539 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
0a7de745 7540 }
316670eb 7541
4bd07ac2 7542 /* Reset ifnet IPv4 stats */
0a7de745 7543 if (ifp->if_ipv4_stat != NULL) {
4bd07ac2 7544 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
0a7de745 7545 }
4bd07ac2
A
7546
7547 /* Reset ifnet IPv6 stats */
0a7de745 7548 if (ifp->if_ipv6_stat != NULL) {
4bd07ac2 7549 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
0a7de745 7550 }
4bd07ac2 7551
3e170ce0
A
7552 /* Release memory held for interface link status report */
7553 if (ifp->if_link_status != NULL) {
7554 FREE(ifp->if_link_status, M_TEMP);
7555 ifp->if_link_status = NULL;
7556 }
7557
39037602
A
7558 /* Clear agent IDs */
7559 if (ifp->if_agentids != NULL) {
7560 FREE(ifp->if_agentids, M_NETAGENT);
7561 ifp->if_agentids = NULL;
7562 }
7563 ifp->if_agentcount = 0;
7564
7565
2d21ac55
A
7566 /* Let BPF know we're detaching */
7567 bpfdetach(ifp);
6d2010ae
A
7568
7569 /* Mark the interface as DOWN */
7570 if_down(ifp);
7571
7572 /* Disable forwarding cached route */
7573 lck_mtx_lock(&ifp->if_cached_route_lock);
7574 ifp->if_fwd_cacheok = 0;
7575 lck_mtx_unlock(&ifp->if_cached_route_lock);
7576
5ba3f43e 7577 /* Disable data threshold and wait for any pending event posting */
39236c6e 7578 ifp->if_data_threshold = 0;
5ba3f43e
A
7579 VERIFY(ifp->if_dt_tcall != NULL);
7580 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
7581
d1ecb069 7582 /*
6d2010ae
A
7583 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
7584 * references to the info structures and leave them attached to
7585 * this ifnet.
d1ecb069 7586 */
6d2010ae
A
7587#if INET
7588 igmp_domifdetach(ifp);
7589#endif /* INET */
7590#if INET6
7591 mld_domifdetach(ifp);
7592#endif /* INET6 */
7593
7594 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
7595
7596 /* Let worker thread take care of the rest, to avoid reentrancy */
7ddcb079 7597 dlil_if_lock();
6d2010ae 7598 ifnet_detaching_enqueue(ifp);
7ddcb079 7599 dlil_if_unlock();
6d2010ae 7600
0a7de745 7601 return 0;
6d2010ae
A
7602}
7603
7604static void
7605ifnet_detaching_enqueue(struct ifnet *ifp)
7606{
7ddcb079 7607 dlil_if_lock_assert();
6d2010ae
A
7608
7609 ++ifnet_detaching_cnt;
7610 VERIFY(ifnet_detaching_cnt != 0);
7611 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
7612 wakeup((caddr_t)&ifnet_delayed_run);
7613}
7614
7615static struct ifnet *
7616ifnet_detaching_dequeue(void)
7617{
7618 struct ifnet *ifp;
7619
7ddcb079 7620 dlil_if_lock_assert();
6d2010ae
A
7621
7622 ifp = TAILQ_FIRST(&ifnet_detaching_head);
7623 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
7624 if (ifp != NULL) {
7625 VERIFY(ifnet_detaching_cnt != 0);
7626 --ifnet_detaching_cnt;
7627 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
7628 ifp->if_detaching_link.tqe_next = NULL;
7629 ifp->if_detaching_link.tqe_prev = NULL;
7630 }
0a7de745 7631 return ifp;
6d2010ae
A
7632}
7633
316670eb
A
7634static int
7635ifnet_detacher_thread_cont(int err)
6d2010ae 7636{
316670eb 7637#pragma unused(err)
6d2010ae
A
7638 struct ifnet *ifp;
7639
7640 for (;;) {
316670eb 7641 dlil_if_lock_assert();
6d2010ae 7642 while (ifnet_detaching_cnt == 0) {
316670eb
A
7643 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
7644 (PZERO - 1), "ifnet_detacher_cont", 0,
7645 ifnet_detacher_thread_cont);
7646 /* NOTREACHED */
6d2010ae
A
7647 }
7648
cb323159
A
7649 net_update_uptime();
7650
6d2010ae
A
7651 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
7652
7653 /* Take care of detaching ifnet */
7654 ifp = ifnet_detaching_dequeue();
316670eb
A
7655 if (ifp != NULL) {
7656 dlil_if_unlock();
6d2010ae 7657 ifnet_detach_final(ifp);
316670eb
A
7658 dlil_if_lock();
7659 }
55e303ae 7660 }
316670eb
A
7661}
7662
cb323159 7663__dead2
316670eb
A
7664static void
7665ifnet_detacher_thread_func(void *v, wait_result_t w)
7666{
7667#pragma unused(v, w)
cb323159 7668 dlil_decr_pending_thread_count();
316670eb
A
7669 dlil_if_lock();
7670 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
7671 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
7672 /*
7673 * msleep0() shouldn't have returned as PCATCH was not set;
7674 * therefore assert in this case.
7675 */
7676 dlil_if_unlock();
7677 VERIFY(0);
6d2010ae 7678}
b0d623f7 7679
6d2010ae
A
7680static void
7681ifnet_detach_final(struct ifnet *ifp)
7682{
7683 struct ifnet_filter *filter, *filter_next;
7684 struct ifnet_filter_head fhead;
316670eb 7685 struct dlil_threading_info *inp;
6d2010ae
A
7686 struct ifaddr *ifa;
7687 ifnet_detached_func if_free;
7688 int i;
7689
7690 lck_mtx_lock(&ifp->if_ref_lock);
7691 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7692 panic("%s: flags mismatch (detaching not set) ifp=%p",
7693 __func__, ifp);
7694 /* NOTREACHED */
7695 }
7696
316670eb
A
7697 /*
7698 * Wait until the existing IO references get released
7699 * before we proceed with ifnet_detach. This is not a
7700 * common case, so block without using a continuation.
b0d623f7 7701 */
6d2010ae 7702 while (ifp->if_refio > 0) {
cb323159 7703 DLIL_PRINTF("%s: Waiting for IO references on %s interface "
39236c6e 7704 "to be released\n", __func__, if_name(ifp));
6d2010ae 7705 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
0a7de745 7706 (PZERO - 1), "ifnet_ioref_wait", NULL);
6d2010ae 7707 }
cb323159
A
7708
7709 VERIFY(ifp->if_datamov == 0);
7710 VERIFY(ifp->if_drainers == 0);
7711 VERIFY(ifp->if_suspend == 0);
7712 ifp->if_refflags &= ~IFRF_READY;
6d2010ae
A
7713 lck_mtx_unlock(&ifp->if_ref_lock);
7714
fe8ab488
A
7715 /* Drain and destroy send queue */
7716 ifclassq_teardown(ifp);
7717
6d2010ae
A
7718 /* Detach interface filters */
7719 lck_mtx_lock(&ifp->if_flt_lock);
7720 if_flt_monitor_enter(ifp);
b0d623f7 7721
5ba3f43e 7722 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636
A
7723 fhead = ifp->if_flt_head;
7724 TAILQ_INIT(&ifp->if_flt_head);
2d21ac55 7725
6d2010ae
A
7726 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
7727 filter_next = TAILQ_NEXT(filter, filt_next);
7728 lck_mtx_unlock(&ifp->if_flt_lock);
7729
7730 dlil_detach_filter_internal(filter, 1);
7731 lck_mtx_lock(&ifp->if_flt_lock);
7732 }
7733 if_flt_monitor_leave(ifp);
7734 lck_mtx_unlock(&ifp->if_flt_lock);
7735
7736 /* Tell upper layers to drop their network addresses */
7737 if_purgeaddrs(ifp);
7738
7739 ifnet_lock_exclusive(ifp);
7740
7741 /* Uplumb all protocols */
7742 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
7743 struct if_proto *proto;
7744
7745 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7746 while (proto != NULL) {
7747 protocol_family_t family = proto->protocol_family;
7748 ifnet_lock_done(ifp);
7749 proto_unplumb(family, ifp);
7750 ifnet_lock_exclusive(ifp);
7751 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7752 }
7753 /* There should not be any protocols left */
7754 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
7755 }
7756 zfree(dlif_phash_zone, ifp->if_proto_hash);
7757 ifp->if_proto_hash = NULL;
7758
7759 /* Detach (permanent) link address from if_addrhead */
7760 ifa = TAILQ_FIRST(&ifp->if_addrhead);
7761 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
7762 IFA_LOCK(ifa);
7763 if_detach_link_ifa(ifp, ifa);
7764 IFA_UNLOCK(ifa);
7765
7766 /* Remove (permanent) link address from ifnet_addrs[] */
7767 IFA_REMREF(ifa);
7768 ifnet_addrs[ifp->if_index - 1] = NULL;
7769
7770 /* This interface should not be on {ifnet_head,detaching} */
7771 VERIFY(ifp->if_link.tqe_next == NULL);
7772 VERIFY(ifp->if_link.tqe_prev == NULL);
7773 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
7774 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
39037602
A
7775 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
7776 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
6d2010ae
A
7777
7778 /* The slot should have been emptied */
7779 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
7780
7781 /* There should not be any addresses left */
7782 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
1c79356b 7783
316670eb
A
7784 /*
7785 * Signal the starter thread to terminate itself.
7786 */
7787 if (ifp->if_start_thread != THREAD_NULL) {
7788 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e 7789 ifp->if_start_flags = 0;
316670eb
A
7790 ifp->if_start_thread = THREAD_NULL;
7791 wakeup_one((caddr_t)&ifp->if_start_thread);
7792 lck_mtx_unlock(&ifp->if_start_lock);
7793 }
7794
7795 /*
7796 * Signal the poller thread to terminate itself.
7797 */
7798 if (ifp->if_poll_thread != THREAD_NULL) {
7799 lck_mtx_lock_spin(&ifp->if_poll_lock);
7800 ifp->if_poll_thread = THREAD_NULL;
7801 wakeup_one((caddr_t)&ifp->if_poll_thread);
7802 lck_mtx_unlock(&ifp->if_poll_lock);
7803 }
7804
2d21ac55
A
7805 /*
7806 * If thread affinity was set for the workloop thread, we will need
7807 * to tear down the affinity and release the extra reference count
316670eb
A
7808 * taken at attach time. Does not apply to lo0 or other interfaces
7809 * without dedicated input threads.
2d21ac55 7810 */
316670eb
A
7811 if ((inp = ifp->if_inp) != NULL) {
7812 VERIFY(inp != dlil_main_input_thread);
7813
7814 if (inp->net_affinity) {
7815 struct thread *tp, *wtp, *ptp;
7816
7817 lck_mtx_lock_spin(&inp->input_lck);
7818 wtp = inp->wloop_thr;
7819 inp->wloop_thr = THREAD_NULL;
7820 ptp = inp->poll_thr;
7821 inp->poll_thr = THREAD_NULL;
0a7de745 7822 tp = inp->input_thr; /* don't nullify now */
316670eb
A
7823 inp->tag = 0;
7824 inp->net_affinity = FALSE;
7825 lck_mtx_unlock(&inp->input_lck);
7826
7827 /* Tear down poll thread affinity */
7828 if (ptp != NULL) {
7829 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
cb323159 7830 VERIFY(ifp->if_xflags & IFXF_LEGACY);
316670eb
A
7831 (void) dlil_affinity_set(ptp,
7832 THREAD_AFFINITY_TAG_NULL);
7833 thread_deallocate(ptp);
6d2010ae 7834 }
2d21ac55 7835
2d21ac55 7836 /* Tear down workloop thread affinity */
316670eb
A
7837 if (wtp != NULL) {
7838 (void) dlil_affinity_set(wtp,
2d21ac55 7839 THREAD_AFFINITY_TAG_NULL);
316670eb 7840 thread_deallocate(wtp);
2d21ac55 7841 }
1c79356b 7842
316670eb 7843 /* Tear down DLIL input thread affinity */
2d21ac55
A
7844 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
7845 thread_deallocate(tp);
9bccf70c 7846 }
1c79356b 7847
316670eb
A
7848 /* disassociate ifp DLIL input thread */
7849 ifp->if_inp = NULL;
6d2010ae 7850
5ba3f43e 7851 /* tell the input thread to terminate */
316670eb
A
7852 lck_mtx_lock_spin(&inp->input_lck);
7853 inp->input_waiting |= DLIL_INPUT_TERMINATE;
7854 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
7855 wakeup_one((caddr_t)&inp->input_waiting);
91447636 7856 }
316670eb 7857 lck_mtx_unlock(&inp->input_lck);
5c9f4661 7858 ifnet_lock_done(ifp);
5ba3f43e
A
7859
7860 /* wait for the input thread to terminate */
7861 lck_mtx_lock_spin(&inp->input_lck);
7862 while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE)
0a7de745 7863 == 0) {
5ba3f43e
A
7864 (void) msleep(&inp->input_waiting, &inp->input_lck,
7865 (PZERO - 1) | PSPIN, inp->input_name, NULL);
7866 }
7867 lck_mtx_unlock(&inp->input_lck);
5c9f4661 7868 ifnet_lock_exclusive(ifp);
5ba3f43e
A
7869
7870 /* clean-up input thread state */
7871 dlil_clean_threading_info(inp);
cb323159
A
7872 /* clean-up poll parameters */
7873 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7874 dlil_reset_rxpoll_params(ifp);
55e303ae 7875 }
6d2010ae
A
7876
7877 /* The driver might unload, so point these to ourselves */
7878 if_free = ifp->if_free;
5ba3f43e 7879 ifp->if_output_dlil = ifp_if_output;
6d2010ae 7880 ifp->if_output = ifp_if_output;
316670eb
A
7881 ifp->if_pre_enqueue = ifp_if_output;
7882 ifp->if_start = ifp_if_start;
7883 ifp->if_output_ctl = ifp_if_ctl;
5ba3f43e 7884 ifp->if_input_dlil = ifp_if_input;
316670eb
A
7885 ifp->if_input_poll = ifp_if_input_poll;
7886 ifp->if_input_ctl = ifp_if_ctl;
6d2010ae
A
7887 ifp->if_ioctl = ifp_if_ioctl;
7888 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
7889 ifp->if_free = ifp_if_free;
7890 ifp->if_demux = ifp_if_demux;
7891 ifp->if_event = ifp_if_event;
39236c6e
A
7892 ifp->if_framer_legacy = ifp_if_framer;
7893 ifp->if_framer = ifp_if_framer_extended;
6d2010ae
A
7894 ifp->if_add_proto = ifp_if_add_proto;
7895 ifp->if_del_proto = ifp_if_del_proto;
7896 ifp->if_check_multi = ifp_if_check_multi;
7897
316670eb
A
7898 /* wipe out interface description */
7899 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
7900 ifp->if_desc.ifd_len = 0;
7901 VERIFY(ifp->if_desc.ifd_desc != NULL);
7902 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
7903
39236c6e
A
7904 /* there shouldn't be any delegation by now */
7905 VERIFY(ifp->if_delegated.ifp == NULL);
7906 VERIFY(ifp->if_delegated.type == 0);
7907 VERIFY(ifp->if_delegated.family == 0);
7908 VERIFY(ifp->if_delegated.subfamily == 0);
fe8ab488 7909 VERIFY(ifp->if_delegated.expensive == 0);
cb323159 7910 VERIFY(ifp->if_delegated.constrained == 0);
39236c6e 7911
39037602
A
7912 /* QoS marking get cleared */
7913 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
7914 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
7915
5ba3f43e 7916
6d2010ae
A
7917 ifnet_lock_done(ifp);
7918
7919#if PF
7920 /*
7921 * Detach this interface from packet filter, if enabled.
7922 */
7923 pf_ifnet_hook(ifp, 0);
7924#endif /* PF */
7925
7926 /* Filter list should be empty */
7927 lck_mtx_lock_spin(&ifp->if_flt_lock);
7928 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
7929 VERIFY(ifp->if_flt_busy == 0);
7930 VERIFY(ifp->if_flt_waiters == 0);
7931 lck_mtx_unlock(&ifp->if_flt_lock);
7932
316670eb
A
7933 /* Last chance to drain send queue */
7934 if_qflush(ifp, 0);
7935
6d2010ae
A
7936 /* Last chance to cleanup any cached route */
7937 lck_mtx_lock(&ifp->if_cached_route_lock);
7938 VERIFY(!ifp->if_fwd_cacheok);
39236c6e 7939 ROUTE_RELEASE(&ifp->if_fwd_route);
0a7de745 7940 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
39236c6e 7941 ROUTE_RELEASE(&ifp->if_src_route);
0a7de745 7942 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
39236c6e 7943 ROUTE_RELEASE(&ifp->if_src_route6);
0a7de745 7944 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
6d2010ae
A
7945 lck_mtx_unlock(&ifp->if_cached_route_lock);
7946
39236c6e 7947 VERIFY(ifp->if_data_threshold == 0);
5ba3f43e
A
7948 VERIFY(ifp->if_dt_tcall != NULL);
7949 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
39236c6e 7950
6d2010ae
A
7951 ifnet_llreach_ifdetach(ifp);
7952
7953 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
7954
6d2010ae
A
7955 /*
7956 * Finally, mark this ifnet as detached.
7957 */
7958 lck_mtx_lock_spin(&ifp->if_ref_lock);
7959 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7960 panic("%s: flags mismatch (detaching not set) ifp=%p",
7961 __func__, ifp);
7962 /* NOTREACHED */
55e303ae 7963 }
6d2010ae
A
7964 ifp->if_refflags &= ~IFRF_DETACHING;
7965 lck_mtx_unlock(&ifp->if_ref_lock);
0a7de745 7966 if (if_free != NULL) {
39037602 7967 if_free(ifp);
0a7de745 7968 }
6d2010ae 7969
0a7de745 7970 if (dlil_verbose) {
cb323159 7971 DLIL_PRINTF("%s: detached\n", if_name(ifp));
0a7de745 7972 }
6d2010ae
A
7973
7974 /* Release reference held during ifnet attach */
7975 ifnet_release(ifp);
1c79356b 7976}
9bccf70c 7977
5ba3f43e 7978errno_t
6d2010ae 7979ifp_if_output(struct ifnet *ifp, struct mbuf *m)
9bccf70c 7980{
6d2010ae 7981#pragma unused(ifp)
39037602 7982 m_freem_list(m);
0a7de745 7983 return 0;
9bccf70c
A
7984}
7985
5ba3f43e 7986void
316670eb
A
7987ifp_if_start(struct ifnet *ifp)
7988{
7989 ifnet_purge(ifp);
7990}
7991
39037602
A
7992static errno_t
7993ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
7994 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
7995 boolean_t poll, struct thread *tp)
7996{
7997#pragma unused(ifp, m_tail, s, poll, tp)
7998 m_freem_list(m_head);
0a7de745 7999 return ENXIO;
39037602
A
8000}
8001
316670eb
A
8002static void
8003ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
8004 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
8005{
8006#pragma unused(ifp, flags, max_cnt)
0a7de745 8007 if (m_head != NULL) {
316670eb 8008 *m_head = NULL;
0a7de745
A
8009 }
8010 if (m_tail != NULL) {
316670eb 8011 *m_tail = NULL;
0a7de745
A
8012 }
8013 if (cnt != NULL) {
316670eb 8014 *cnt = 0;
0a7de745
A
8015 }
8016 if (len != NULL) {
316670eb 8017 *len = 0;
0a7de745 8018 }
316670eb
A
8019}
8020
8021static errno_t
8022ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
8023{
8024#pragma unused(ifp, cmd, arglen, arg)
0a7de745 8025 return EOPNOTSUPP;
316670eb
A
8026}
8027
6d2010ae
A
8028static errno_t
8029ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
9bccf70c 8030{
6d2010ae
A
8031#pragma unused(ifp, fh, pf)
8032 m_freem(m);
0a7de745 8033 return EJUSTRETURN;
9bccf70c
A
8034}
8035
6d2010ae
A
8036static errno_t
8037ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
8038 const struct ifnet_demux_desc *da, u_int32_t dc)
9bccf70c 8039{
6d2010ae 8040#pragma unused(ifp, pf, da, dc)
0a7de745 8041 return EINVAL;
9bccf70c
A
8042}
8043
91447636 8044static errno_t
6d2010ae 8045ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
9bccf70c 8046{
6d2010ae 8047#pragma unused(ifp, pf)
0a7de745 8048 return EINVAL;
6d2010ae
A
8049}
8050
8051static errno_t
8052ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
8053{
8054#pragma unused(ifp, sa)
0a7de745 8055 return EOPNOTSUPP;
6d2010ae
A
8056}
8057
5ba3f43e
A
8058#if CONFIG_EMBEDDED
8059static errno_t
8060ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8061 const struct sockaddr *sa, const char *ll, const char *t,
8062 u_int32_t *pre, u_int32_t *post)
8063#else
39236c6e
A
8064static errno_t
8065ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8066 const struct sockaddr *sa, const char *ll, const char *t)
5ba3f43e 8067#endif /* !CONFIG_EMBEDDED */
6d2010ae
A
8068{
8069#pragma unused(ifp, m, sa, ll, t)
5ba3f43e 8070#if CONFIG_EMBEDDED
0a7de745 8071 return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
5ba3f43e 8072#else
0a7de745 8073 return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
5ba3f43e 8074#endif /* !CONFIG_EMBEDDED */
39236c6e
A
8075}
8076
8077static errno_t
8078ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
8079 const struct sockaddr *sa, const char *ll, const char *t,
8080 u_int32_t *pre, u_int32_t *post)
8081{
8082#pragma unused(ifp, sa, ll, t)
6d2010ae
A
8083 m_freem(*m);
8084 *m = NULL;
39236c6e 8085
0a7de745 8086 if (pre != NULL) {
39236c6e 8087 *pre = 0;
0a7de745
A
8088 }
8089 if (post != NULL) {
39236c6e 8090 *post = 0;
0a7de745 8091 }
39236c6e 8092
0a7de745 8093 return EJUSTRETURN;
6d2010ae
A
8094}
8095
316670eb 8096errno_t
6d2010ae
A
8097ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
8098{
8099#pragma unused(ifp, cmd, arg)
0a7de745 8100 return EOPNOTSUPP;
6d2010ae
A
8101}
8102
8103static errno_t
8104ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
8105{
8106#pragma unused(ifp, tm, f)
8107 /* XXX not sure what to do here */
0a7de745 8108 return 0;
6d2010ae
A
8109}
8110
8111static void
8112ifp_if_free(struct ifnet *ifp)
8113{
8114#pragma unused(ifp)
8115}
8116
8117static void
8118ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
8119{
8120#pragma unused(ifp, e)
9bccf70c
A
8121}
8122
0a7de745
A
8123int
8124dlil_if_acquire(u_int32_t family, const void *uniqueid,
a39ff7e2 8125 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
6d2010ae
A
8126{
8127 struct ifnet *ifp1 = NULL;
8128 struct dlil_ifnet *dlifp1 = NULL;
cb323159 8129 struct dlil_ifnet *dlifp1_saved = NULL;
6d2010ae
A
8130 void *buf, *base, **pbuf;
8131 int ret = 0;
8132
a39ff7e2 8133 VERIFY(*ifp == NULL);
7ddcb079 8134 dlil_if_lock();
a39ff7e2
A
8135 /*
8136 * We absolutely can't have an interface with the same name
8137 * in in-use state.
8138 * To make sure of that list has to be traversed completely
8139 */
6d2010ae
A
8140 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
8141 ifp1 = (struct ifnet *)dlifp1;
8142
0a7de745 8143 if (ifp1->if_family != family) {
6d2010ae 8144 continue;
0a7de745 8145 }
6d2010ae 8146
a39ff7e2
A
8147 /*
8148 * If interface is in use, return EBUSY if either unique id
8149 * or interface extended names are the same
8150 */
6d2010ae 8151 lck_mtx_lock(&dlifp1->dl_if_lock);
a39ff7e2 8152 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
6d2010ae 8153 if (dlifp1->dl_if_flags & DLIF_INUSE) {
a39ff7e2
A
8154 lck_mtx_unlock(&dlifp1->dl_if_lock);
8155 ret = EBUSY;
8156 goto end;
8157 }
8158 }
8159
8160 if (uniqueid_len) {
8161 if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
8162 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
8163 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6d2010ae 8164 lck_mtx_unlock(&dlifp1->dl_if_lock);
a39ff7e2 8165 ret = EBUSY;
9bccf70c 8166 goto end;
a39ff7e2 8167 } else {
a39ff7e2 8168 /* Cache the first interface that can be recycled */
0a7de745 8169 if (*ifp == NULL) {
a39ff7e2 8170 *ifp = ifp1;
cb323159 8171 dlifp1_saved = dlifp1;
0a7de745 8172 }
a39ff7e2
A
8173 /*
8174 * XXX Do not break or jump to end as we have to traverse
8175 * the whole list to ensure there are no name collisions
8176 */
6d2010ae 8177 }
6d2010ae
A
8178 }
8179 }
8180 lck_mtx_unlock(&dlifp1->dl_if_lock);
8181 }
8182
a39ff7e2 8183 /* If there's an interface that can be recycled, use that */
0a7de745 8184 if (*ifp != NULL) {
cb323159
A
8185 if (dlifp1_saved != NULL) {
8186 lck_mtx_lock(&dlifp1_saved->dl_if_lock);
8187 dlifp1_saved->dl_if_flags |= (DLIF_INUSE | DLIF_REUSE);
8188 lck_mtx_unlock(&dlifp1_saved->dl_if_lock);
8189 dlifp1_saved = NULL;
8190 }
a39ff7e2 8191 goto end;
0a7de745 8192 }
a39ff7e2 8193
6d2010ae
A
8194 /* no interface found, allocate a new one */
8195 buf = zalloc(dlif_zone);
8196 if (buf == NULL) {
8197 ret = ENOMEM;
8198 goto end;
8199 }
8200 bzero(buf, dlif_bufsize);
8201
8202 /* Get the 64-bit aligned base address for this object */
0a7de745
A
8203 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
8204 sizeof(u_int64_t));
6d2010ae
A
8205 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
8206
8207 /*
8208 * Wind back a pointer size from the aligned base and
8209 * save the original address so we can free it later.
8210 */
0a7de745 8211 pbuf = (void **)((intptr_t)base - sizeof(void *));
6d2010ae
A
8212 *pbuf = buf;
8213 dlifp1 = base;
8214
8215 if (uniqueid_len) {
8216 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
8217 M_NKE, M_WAITOK);
8218 if (dlifp1->dl_if_uniqueid == NULL) {
5ba3f43e 8219 zfree(dlif_zone, buf);
6d2010ae
A
8220 ret = ENOMEM;
8221 goto end;
8222 }
8223 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
8224 dlifp1->dl_if_uniqueid_len = uniqueid_len;
8225 }
8226
8227 ifp1 = (struct ifnet *)dlifp1;
8228 dlifp1->dl_if_flags = DLIF_INUSE;
8229 if (ifnet_debug) {
8230 dlifp1->dl_if_flags |= DLIF_DEBUG;
8231 dlifp1->dl_if_trace = dlil_if_trace;
8232 }
8233 ifp1->if_name = dlifp1->dl_if_namestorage;
39236c6e 8234 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
316670eb
A
8235
8236 /* initialize interface description */
8237 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
8238 ifp1->if_desc.ifd_len = 0;
8239 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
8240
5ba3f43e 8241
2d21ac55 8242#if CONFIG_MACF_NET
6d2010ae 8243 mac_ifnet_label_init(ifp1);
2d21ac55 8244#endif
9bccf70c 8245
316670eb
A
8246 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
8247 DLIL_PRINTF("%s: failed to allocate if local stats, "
8248 "error: %d\n", __func__, ret);
8249 /* This probably shouldn't be fatal */
8250 ret = 0;
8251 }
8252
6d2010ae
A
8253 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
8254 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
8255 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
8256 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6d2010ae
A
8257 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
8258 ifnet_lock_attr);
8259 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
3e170ce0
A
8260#if INET
8261 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
8262 ifnet_lock_attr);
8263 ifp1->if_inetdata = NULL;
8264#endif
39236c6e 8265#if INET6
3e170ce0
A
8266 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
8267 ifnet_lock_attr);
39236c6e
A
8268 ifp1->if_inet6data = NULL;
8269#endif
3e170ce0
A
8270 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
8271 ifnet_lock_attr);
8272 ifp1->if_link_status = NULL;
6d2010ae 8273
316670eb
A
8274 /* for send data paths */
8275 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
8276 ifnet_lock_attr);
8277 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
8278 ifnet_lock_attr);
8279 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
8280 ifnet_lock_attr);
8281
8282 /* for receive data paths */
8283 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
8284 ifnet_lock_attr);
8285
5ba3f43e
A
8286 /* thread call allocation is done with sleeping zalloc */
8287 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
8288 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
8289 if (ifp1->if_dt_tcall == NULL) {
8290 panic_plain("%s: couldn't create if_dt_tcall", __func__);
8291 /* NOTREACHED */
8292 }
8293
6d2010ae
A
8294 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
8295
8296 *ifp = ifp1;
9bccf70c
A
8297
8298end:
7ddcb079 8299 dlil_if_unlock();
9bccf70c 8300
0a7de745
A
8301 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof(u_int64_t)) &&
8302 IS_P2ALIGNED(&ifp1->if_data, sizeof(u_int64_t))));
6d2010ae 8303
0a7de745 8304 return ret;
9bccf70c
A
8305}
8306
2d21ac55 8307__private_extern__ void
0a7de745 8308dlil_if_release(ifnet_t ifp)
6d2010ae
A
8309{
8310 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
8311
5ba3f43e
A
8312 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
8313 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
8314 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
8315 }
8316
6d2010ae
A
8317 ifnet_lock_exclusive(ifp);
8318 lck_mtx_lock(&dlifp->dl_if_lock);
8319 dlifp->dl_if_flags &= ~DLIF_INUSE;
fe8ab488 8320 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6d2010ae 8321 ifp->if_name = dlifp->dl_if_namestorage;
39236c6e
A
8322 /* Reset external name (name + unit) */
8323 ifp->if_xname = dlifp->dl_if_xnamestorage;
39037602 8324 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
39236c6e 8325 "%s?", ifp->if_name);
6d2010ae 8326 lck_mtx_unlock(&dlifp->dl_if_lock);
2d21ac55 8327#if CONFIG_MACF_NET
6d2010ae 8328 /*
39037602
A
8329 * We can either recycle the MAC label here or in dlil_if_acquire().
8330 * It seems logical to do it here but this means that anything that
8331 * still has a handle on ifp will now see it as unlabeled.
8332 * Since the interface is "dead" that may be OK. Revisit later.
8333 */
6d2010ae 8334 mac_ifnet_label_recycle(ifp);
2d21ac55 8335#endif
6d2010ae 8336 ifnet_lock_done(ifp);
9bccf70c 8337}
4a3eedf9 8338
7ddcb079
A
8339__private_extern__ void
8340dlil_if_lock(void)
8341{
8342 lck_mtx_lock(&dlil_ifnet_lock);
8343}
8344
8345__private_extern__ void
8346dlil_if_unlock(void)
8347{
8348 lck_mtx_unlock(&dlil_ifnet_lock);
8349}
8350
8351__private_extern__ void
8352dlil_if_lock_assert(void)
8353{
5ba3f43e 8354 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
7ddcb079
A
8355}
8356
4a3eedf9
A
8357__private_extern__ void
8358dlil_proto_unplumb_all(struct ifnet *ifp)
8359{
8360 /*
39236c6e
A
8361 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
8362 * each bucket contains exactly one entry; PF_VLAN does not need an
8363 * explicit unplumb.
4a3eedf9 8364 *
39236c6e 8365 * if_proto_hash[3] is for other protocols; we expect anything
4a3eedf9
A
8366 * in this bucket to respond to the DETACHING event (which would
8367 * have happened by now) and do the unplumb then.
8368 */
8369 (void) proto_unplumb(PF_INET, ifp);
8370#if INET6
8371 (void) proto_unplumb(PF_INET6, ifp);
8372#endif /* INET6 */
4a3eedf9 8373}
6d2010ae
A
8374
8375static void
8376ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
8377{
8378 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8379 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8380
0a7de745 8381 route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
6d2010ae
A
8382
8383 lck_mtx_unlock(&ifp->if_cached_route_lock);
8384}
8385
8386static void
8387ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
8388{
8389 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8390 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8391
8392 if (ifp->if_fwd_cacheok) {
0a7de745 8393 route_copyin(src, &ifp->if_src_route, sizeof(*src));
6d2010ae 8394 } else {
39236c6e 8395 ROUTE_RELEASE(src);
6d2010ae
A
8396 }
8397 lck_mtx_unlock(&ifp->if_cached_route_lock);
8398}
8399
8400#if INET6
8401static void
8402ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
8403{
8404 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8405 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8406
8407 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
0a7de745 8408 sizeof(*dst));
6d2010ae
A
8409
8410 lck_mtx_unlock(&ifp->if_cached_route_lock);
8411}
8412
8413static void
8414ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
8415{
8416 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8417 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8418
8419 if (ifp->if_fwd_cacheok) {
8420 route_copyin((struct route *)src,
0a7de745 8421 (struct route *)&ifp->if_src_route6, sizeof(*src));
6d2010ae 8422 } else {
39236c6e 8423 ROUTE_RELEASE(src);
6d2010ae
A
8424 }
8425 lck_mtx_unlock(&ifp->if_cached_route_lock);
8426}
8427#endif /* INET6 */
8428
8429struct rtentry *
0a7de745 8430ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6d2010ae 8431{
0a7de745
A
8432 struct route src_rt;
8433 struct sockaddr_in *dst;
316670eb
A
8434
8435 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
6d2010ae
A
8436
8437 ifp_src_route_copyout(ifp, &src_rt);
8438
39236c6e
A
8439 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
8440 ROUTE_RELEASE(&src_rt);
8441 if (dst->sin_family != AF_INET) {
0a7de745
A
8442 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8443 dst->sin_len = sizeof(src_rt.ro_dst);
6d2010ae
A
8444 dst->sin_family = AF_INET;
8445 }
8446 dst->sin_addr = src_ip;
8447
5ba3f43e
A
8448 VERIFY(src_rt.ro_rt == NULL);
8449 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
8450 0, 0, ifp->if_index);
6d2010ae 8451
5ba3f43e
A
8452 if (src_rt.ro_rt != NULL) {
8453 /* retain a ref, copyin consumes one */
0a7de745 8454 struct rtentry *rte = src_rt.ro_rt;
5ba3f43e
A
8455 RT_ADDREF(rte);
8456 ifp_src_route_copyin(ifp, &src_rt);
8457 src_rt.ro_rt = rte;
6d2010ae
A
8458 }
8459 }
8460
0a7de745 8461 return src_rt.ro_rt;
6d2010ae
A
8462}
8463
8464#if INET6
39037602 8465struct rtentry *
6d2010ae
A
8466ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
8467{
8468 struct route_in6 src_rt;
8469
8470 ifp_src_route6_copyout(ifp, &src_rt);
8471
39236c6e
A
8472 if (ROUTE_UNUSABLE(&src_rt) ||
8473 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
8474 ROUTE_RELEASE(&src_rt);
8475 if (src_rt.ro_dst.sin6_family != AF_INET6) {
0a7de745
A
8476 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8477 src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
6d2010ae
A
8478 src_rt.ro_dst.sin6_family = AF_INET6;
8479 }
8480 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
316670eb 8481 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
0a7de745 8482 sizeof(src_rt.ro_dst.sin6_addr));
6d2010ae
A
8483
8484 if (src_rt.ro_rt == NULL) {
8485 src_rt.ro_rt = rtalloc1_scoped(
0a7de745
A
8486 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
8487 ifp->if_index);
6d2010ae
A
8488
8489 if (src_rt.ro_rt != NULL) {
8490 /* retain a ref, copyin consumes one */
0a7de745 8491 struct rtentry *rte = src_rt.ro_rt;
6d2010ae
A
8492 RT_ADDREF(rte);
8493 ifp_src_route6_copyin(ifp, &src_rt);
8494 src_rt.ro_rt = rte;
8495 }
8496 }
8497 }
8498
0a7de745 8499 return src_rt.ro_rt;
6d2010ae
A
8500}
8501#endif /* INET6 */
316670eb
A
8502
8503void
3e170ce0 8504if_lqm_update(struct ifnet *ifp, int lqm, int locked)
316670eb
A
8505{
8506 struct kev_dl_link_quality_metric_data ev_lqm_data;
8507
8508 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
8509
8510 /* Normalize to edge */
5ba3f43e
A
8511 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
8512 lqm = IFNET_LQM_THRESH_ABORT;
8513 atomic_bitset_32(&tcbinfo.ipi_flags,
8514 INPCBINFO_HANDLE_LQM_ABORT);
8515 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
8516 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
8517 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
8518 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
8519 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
8520 lqm <= IFNET_LQM_THRESH_POOR) {
316670eb 8521 lqm = IFNET_LQM_THRESH_POOR;
5ba3f43e
A
8522 } else if (lqm > IFNET_LQM_THRESH_POOR &&
8523 lqm <= IFNET_LQM_THRESH_GOOD) {
316670eb 8524 lqm = IFNET_LQM_THRESH_GOOD;
5ba3f43e 8525 }
316670eb 8526
3e170ce0
A
8527 /*
8528 * Take the lock if needed
8529 */
0a7de745 8530 if (!locked) {
3e170ce0 8531 ifnet_lock_exclusive(ifp);
0a7de745 8532 }
3e170ce0
A
8533
8534 if (lqm == ifp->if_interface_state.lqm_state &&
39037602 8535 (ifp->if_interface_state.valid_bitmask &
3e170ce0
A
8536 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
8537 /*
8538 * Release the lock if was not held by the caller
8539 */
0a7de745 8540 if (!locked) {
3e170ce0 8541 ifnet_lock_done(ifp);
0a7de745
A
8542 }
8543 return; /* nothing to update */
316670eb 8544 }
3e170ce0 8545 ifp->if_interface_state.valid_bitmask |=
0a7de745 8546 IF_INTERFACE_STATE_LQM_STATE_VALID;
3e170ce0
A
8547 ifp->if_interface_state.lqm_state = lqm;
8548
8549 /*
8550 * Don't want to hold the lock when issuing kernel events
8551 */
316670eb
A
8552 ifnet_lock_done(ifp);
8553
0a7de745 8554 bzero(&ev_lqm_data, sizeof(ev_lqm_data));
316670eb
A
8555 ev_lqm_data.link_quality_metric = lqm;
8556
8557 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
0a7de745 8558 (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data));
3e170ce0
A
8559
8560 /*
8561 * Reacquire the lock for the caller
8562 */
0a7de745 8563 if (locked) {
3e170ce0 8564 ifnet_lock_exclusive(ifp);
0a7de745 8565 }
3e170ce0
A
8566}
8567
8568static void
8569if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
8570{
8571 struct kev_dl_rrc_state kev;
39037602 8572
3e170ce0
A
8573 if (rrc_state == ifp->if_interface_state.rrc_state &&
8574 (ifp->if_interface_state.valid_bitmask &
0a7de745 8575 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
3e170ce0 8576 return;
0a7de745 8577 }
3e170ce0
A
8578
8579 ifp->if_interface_state.valid_bitmask |=
8580 IF_INTERFACE_STATE_RRC_STATE_VALID;
8581
8582 ifp->if_interface_state.rrc_state = rrc_state;
8583
8584 /*
8585 * Don't want to hold the lock when issuing kernel events
8586 */
8587 ifnet_lock_done(ifp);
8588
8589 bzero(&kev, sizeof(struct kev_dl_rrc_state));
8590 kev.rrc_state = rrc_state;
8591
8592 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
8593 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
8594
8595 ifnet_lock_exclusive(ifp);
8596}
8597
8598errno_t
8599if_state_update(struct ifnet *ifp,
39037602 8600 struct if_interface_state *if_interface_state)
3e170ce0
A
8601{
8602 u_short if_index_available = 0;
8603
8604 ifnet_lock_exclusive(ifp);
8605
8606 if ((ifp->if_type != IFT_CELLULAR) &&
8607 (if_interface_state->valid_bitmask &
8608 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
8609 ifnet_lock_done(ifp);
0a7de745 8610 return ENOTSUP;
3e170ce0
A
8611 }
8612 if ((if_interface_state->valid_bitmask &
8613 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
8614 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
8615 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
8616 ifnet_lock_done(ifp);
0a7de745 8617 return EINVAL;
3e170ce0
A
8618 }
8619 if ((if_interface_state->valid_bitmask &
8620 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
8621 if_interface_state->rrc_state !=
8622 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
8623 if_interface_state->rrc_state !=
8624 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
8625 ifnet_lock_done(ifp);
0a7de745 8626 return EINVAL;
3e170ce0
A
8627 }
8628
8629 if (if_interface_state->valid_bitmask &
8630 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8631 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
8632 }
8633 if (if_interface_state->valid_bitmask &
8634 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8635 if_rrc_state_update(ifp, if_interface_state->rrc_state);
8636 }
8637 if (if_interface_state->valid_bitmask &
8638 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8639 ifp->if_interface_state.valid_bitmask |=
8640 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8641 ifp->if_interface_state.interface_availability =
8642 if_interface_state->interface_availability;
8643
8644 if (ifp->if_interface_state.interface_availability ==
8645 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
cb323159
A
8646 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
8647 __func__, if_name(ifp), ifp->if_index);
3e170ce0 8648 if_index_available = ifp->if_index;
cb323159
A
8649 } else {
8650 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
8651 __func__, if_name(ifp), ifp->if_index);
3e170ce0
A
8652 }
8653 }
8654 ifnet_lock_done(ifp);
8655
8656 /*
8657 * Check if the TCP connections going on this interface should be
8658 * forced to send probe packets instead of waiting for TCP timers
cb323159
A
8659 * to fire. This is done on an explicit notification such as
8660 * SIOCSIFINTERFACESTATE which marks the interface as available.
3e170ce0 8661 */
0a7de745 8662 if (if_index_available > 0) {
3e170ce0 8663 tcp_interface_send_probe(if_index_available);
0a7de745 8664 }
3e170ce0 8665
0a7de745 8666 return 0;
3e170ce0
A
8667}
8668
8669void
8670if_get_state(struct ifnet *ifp,
39037602 8671 struct if_interface_state *if_interface_state)
3e170ce0
A
8672{
8673 ifnet_lock_shared(ifp);
8674
8675 if_interface_state->valid_bitmask = 0;
8676
8677 if (ifp->if_interface_state.valid_bitmask &
8678 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8679 if_interface_state->valid_bitmask |=
8680 IF_INTERFACE_STATE_RRC_STATE_VALID;
8681 if_interface_state->rrc_state =
8682 ifp->if_interface_state.rrc_state;
8683 }
8684 if (ifp->if_interface_state.valid_bitmask &
8685 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8686 if_interface_state->valid_bitmask |=
8687 IF_INTERFACE_STATE_LQM_STATE_VALID;
8688 if_interface_state->lqm_state =
8689 ifp->if_interface_state.lqm_state;
8690 }
8691 if (ifp->if_interface_state.valid_bitmask &
8692 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8693 if_interface_state->valid_bitmask |=
8694 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8695 if_interface_state->interface_availability =
8696 ifp->if_interface_state.interface_availability;
8697 }
8698
8699 ifnet_lock_done(ifp);
8700}
8701
8702errno_t
8703if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
8704{
8705 ifnet_lock_exclusive(ifp);
8706 if (conn_probe > 1) {
8707 ifnet_lock_done(ifp);
0a7de745 8708 return EINVAL;
3e170ce0 8709 }
0a7de745 8710 if (conn_probe == 0) {
3e170ce0 8711 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
0a7de745 8712 } else {
3e170ce0 8713 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
0a7de745 8714 }
3e170ce0
A
8715 ifnet_lock_done(ifp);
8716
5ba3f43e
A
8717#if NECP
8718 necp_update_all_clients();
8719#endif /* NECP */
8720
3e170ce0 8721 tcp_probe_connectivity(ifp, conn_probe);
0a7de745 8722 return 0;
316670eb
A
8723}
8724
8725/* for uuid.c */
cb323159
A
8726static int
8727get_ether_index(int * ret_other_index)
316670eb
A
8728{
8729 struct ifnet *ifp;
cb323159
A
8730 int en0_index = 0;
8731 int other_en_index = 0;
8732 int any_ether_index = 0;
8733 short best_unit = 0;
316670eb 8734
cb323159 8735 *ret_other_index = 0;
316670eb 8736 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
cb323159
A
8737 /*
8738 * find en0, or if not en0, the lowest unit en*, and if not
8739 * that, any ethernet
8740 */
316670eb 8741 ifnet_lock_shared(ifp);
cb323159
A
8742 if (strcmp(ifp->if_name, "en") == 0) {
8743 if (ifp->if_unit == 0) {
8744 /* found en0, we're done */
8745 en0_index = ifp->if_index;
8746 ifnet_lock_done(ifp);
8747 break;
8748 }
8749 if (other_en_index == 0 || ifp->if_unit < best_unit) {
8750 other_en_index = ifp->if_index;
8751 best_unit = ifp->if_unit;
8752 }
8753 } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
8754 any_ether_index = ifp->if_index;
316670eb 8755 }
316670eb
A
8756 ifnet_lock_done(ifp);
8757 }
cb323159
A
8758 if (en0_index == 0) {
8759 if (other_en_index != 0) {
8760 *ret_other_index = other_en_index;
8761 } else if (any_ether_index != 0) {
8762 *ret_other_index = any_ether_index;
8763 }
8764 }
8765 return en0_index;
8766}
8767
8768int
8769uuid_get_ethernet(u_int8_t *node)
8770{
8771 static int en0_index;
8772 struct ifnet *ifp;
8773 int other_index = 0;
8774 int the_index = 0;
8775 int ret;
316670eb 8776
cb323159
A
8777 ifnet_head_lock_shared();
8778 if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
8779 en0_index = get_ether_index(&other_index);
8780 }
8781 if (en0_index != 0) {
8782 the_index = en0_index;
8783 } else if (other_index != 0) {
8784 the_index = other_index;
8785 }
8786 if (the_index != 0) {
8787 ifp = ifindex2ifnet[the_index];
8788 VERIFY(ifp != NULL);
8789 memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
8790 ret = 0;
8791 } else {
8792 ret = -1;
8793 }
8794 ifnet_head_done();
8795 return ret;
316670eb
A
8796}
8797
8798static int
8799sysctl_rxpoll SYSCTL_HANDLER_ARGS
8800{
8801#pragma unused(arg1, arg2)
39236c6e
A
8802 uint32_t i;
8803 int err;
316670eb
A
8804
8805 i = if_rxpoll;
8806
8807 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
8808 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8809 return err;
8810 }
316670eb 8811
0a7de745
A
8812 if (net_rxpoll == 0) {
8813 return ENXIO;
8814 }
316670eb
A
8815
8816 if_rxpoll = i;
0a7de745 8817 return err;
316670eb
A
8818}
8819
8820static int
39236c6e 8821sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
8822{
8823#pragma unused(arg1, arg2)
39236c6e
A
8824 uint64_t q;
8825 int err;
316670eb 8826
39236c6e 8827 q = if_rxpoll_mode_holdtime;
316670eb 8828
39236c6e 8829 err = sysctl_handle_quad(oidp, &q, 0, req);
0a7de745
A
8830 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8831 return err;
8832 }
316670eb 8833
0a7de745 8834 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) {
39236c6e 8835 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
0a7de745 8836 }
39236c6e
A
8837
8838 if_rxpoll_mode_holdtime = q;
316670eb 8839
0a7de745 8840 return err;
316670eb
A
8841}
8842
8843static int
39236c6e 8844sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
8845{
8846#pragma unused(arg1, arg2)
39236c6e
A
8847 uint64_t q;
8848 int err;
316670eb 8849
39236c6e 8850 q = if_rxpoll_sample_holdtime;
316670eb 8851
39236c6e 8852 err = sysctl_handle_quad(oidp, &q, 0, req);
0a7de745
A
8853 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8854 return err;
8855 }
316670eb 8856
0a7de745 8857 if (q < IF_RXPOLL_SAMPLETIME_MIN) {
39236c6e 8858 q = IF_RXPOLL_SAMPLETIME_MIN;
0a7de745 8859 }
39236c6e
A
8860
8861 if_rxpoll_sample_holdtime = q;
316670eb 8862
0a7de745 8863 return err;
316670eb
A
8864}
8865
39236c6e
A
8866static int
8867sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
316670eb 8868{
39236c6e
A
8869#pragma unused(arg1, arg2)
8870 uint64_t q;
8871 int err;
316670eb 8872
39236c6e 8873 q = if_rxpoll_interval_time;
316670eb 8874
39236c6e 8875 err = sysctl_handle_quad(oidp, &q, 0, req);
0a7de745
A
8876 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8877 return err;
8878 }
39236c6e 8879
0a7de745 8880 if (q < IF_RXPOLL_INTERVALTIME_MIN) {
39236c6e 8881 q = IF_RXPOLL_INTERVALTIME_MIN;
0a7de745 8882 }
316670eb 8883
39236c6e 8884 if_rxpoll_interval_time = q;
316670eb 8885
0a7de745 8886 return err;
316670eb
A
8887}
8888
39236c6e
A
8889static int
8890sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
316670eb 8891{
39236c6e
A
8892#pragma unused(arg1, arg2)
8893 uint32_t i;
8894 int err;
316670eb 8895
cb323159 8896 i = if_sysctl_rxpoll_wlowat;
316670eb 8897
39236c6e 8898 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
8899 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8900 return err;
8901 }
316670eb 8902
cb323159 8903 if (i == 0 || i >= if_sysctl_rxpoll_whiwat) {
0a7de745
A
8904 return EINVAL;
8905 }
39236c6e 8906
cb323159 8907 if_sysctl_rxpoll_wlowat = i;
0a7de745 8908 return err;
316670eb
A
8909}
8910
39236c6e
A
8911static int
8912sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
316670eb 8913{
39236c6e
A
8914#pragma unused(arg1, arg2)
8915 uint32_t i;
8916 int err;
316670eb 8917
cb323159 8918 i = if_sysctl_rxpoll_whiwat;
316670eb 8919
39236c6e 8920 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
8921 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8922 return err;
8923 }
316670eb 8924
cb323159 8925 if (i <= if_sysctl_rxpoll_wlowat) {
0a7de745
A
8926 return EINVAL;
8927 }
39236c6e 8928
cb323159 8929 if_sysctl_rxpoll_whiwat = i;
0a7de745 8930 return err;
316670eb
A
8931}
8932
8933static int
39236c6e 8934sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
316670eb 8935{
39236c6e
A
8936#pragma unused(arg1, arg2)
8937 int i, err;
316670eb 8938
39236c6e 8939 i = if_sndq_maxlen;
316670eb 8940
39236c6e 8941 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
8942 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8943 return err;
8944 }
316670eb 8945
0a7de745 8946 if (i < IF_SNDQ_MINLEN) {
39236c6e 8947 i = IF_SNDQ_MINLEN;
0a7de745 8948 }
316670eb 8949
39236c6e 8950 if_sndq_maxlen = i;
0a7de745 8951 return err;
316670eb
A
8952}
8953
39236c6e
A
8954static int
8955sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
316670eb 8956{
39236c6e
A
8957#pragma unused(arg1, arg2)
8958 int i, err;
8959
8960 i = if_rcvq_maxlen;
8961
8962 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
8963 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8964 return err;
8965 }
39236c6e 8966
0a7de745 8967 if (i < IF_RCVQ_MINLEN) {
39236c6e 8968 i = IF_RCVQ_MINLEN;
0a7de745 8969 }
39236c6e
A
8970
8971 if_rcvq_maxlen = i;
0a7de745 8972 return err;
316670eb
A
8973}
8974
cb323159 8975int
316670eb
A
8976dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
8977 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
8978{
8979 struct kev_dl_node_presence kev;
8980 struct sockaddr_dl *sdl;
8981 struct sockaddr_in6 *sin6;
cb323159 8982 int ret = 0;
316670eb
A
8983
8984 VERIFY(ifp);
8985 VERIFY(sa);
8986 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
8987
0a7de745 8988 bzero(&kev, sizeof(kev));
316670eb
A
8989 sin6 = &kev.sin6_node_address;
8990 sdl = &kev.sdl_node_address;
8991 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
8992 kev.rssi = rssi;
8993 kev.link_quality_metric = lqm;
8994 kev.node_proximity_metric = npm;
0a7de745 8995 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
316670eb 8996
cb323159
A
8997 ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
8998 if (ret == 0) {
8999 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
9000 &kev.link_data, sizeof(kev));
9001 if (err != 0) {
9002 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
9003 "error %d\n", __func__, err);
9004 }
9005 }
9006 return ret;
316670eb
A
9007}
9008
9009void
9010dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
9011{
cb323159
A
9012 struct kev_dl_node_absence kev = {};
9013 struct sockaddr_in6 *kev_sin6 = NULL;
9014 struct sockaddr_dl *kev_sdl = NULL;
316670eb 9015
cb323159
A
9016 VERIFY(ifp != NULL);
9017 VERIFY(sa != NULL);
316670eb
A
9018 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
9019
cb323159
A
9020 kev_sin6 = &kev.sin6_node_address;
9021 kev_sdl = &kev.sdl_node_address;
9022
9023 if (sa->sa_family == AF_INET6) {
9024 /*
9025 * If IPv6 address is given, get the link layer
9026 * address from what was cached in the neighbor cache
9027 */
9028 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9029 bcopy(sa, kev_sin6, sa->sa_len);
9030 nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
9031 } else {
9032 /*
9033 * If passed address is AF_LINK type, derive the address
9034 * based on the link address.
9035 */
9036 nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
9037 nd6_alt_node_absent(ifp, kev_sin6, NULL);
9038 }
9039
9040 kev_sdl->sdl_type = ifp->if_type;
9041 kev_sdl->sdl_index = ifp->if_index;
316670eb 9042
316670eb 9043 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
0a7de745 9044 &kev.link_data, sizeof(kev));
316670eb
A
9045}
9046
cb323159
A
9047int
9048dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
9049 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
9050{
9051 struct kev_dl_node_presence kev = {};
9052 struct sockaddr_dl *kev_sdl = NULL;
9053 struct sockaddr_in6 *kev_sin6 = NULL;
9054 int ret = 0;
9055
9056 VERIFY(ifp != NULL);
9057 VERIFY(sa != NULL && sdl != NULL);
9058 VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
9059
9060 kev_sin6 = &kev.sin6_node_address;
9061 kev_sdl = &kev.sdl_node_address;
9062
9063 VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
9064 bcopy(sdl, kev_sdl, sdl->sdl_len);
9065 kev_sdl->sdl_type = ifp->if_type;
9066 kev_sdl->sdl_index = ifp->if_index;
9067
9068 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9069 bcopy(sa, kev_sin6, sa->sa_len);
9070
9071 kev.rssi = rssi;
9072 kev.link_quality_metric = lqm;
9073 kev.node_proximity_metric = npm;
9074 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
9075
9076 ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
9077 if (ret == 0) {
9078 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
9079 &kev.link_data, sizeof(kev));
9080 if (err != 0) {
9081 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with",
9082 "error %d\n", __func__, err);
9083 }
9084 }
9085 return ret;
9086}
9087
39236c6e
A
9088const void *
9089dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
0a7de745 9090 kauth_cred_t *credp)
39236c6e
A
9091{
9092 const u_int8_t *bytes;
9093 size_t size;
9094
9095 bytes = CONST_LLADDR(sdl);
9096 size = sdl->sdl_alen;
9097
9098#if CONFIG_MACF
9099 if (dlil_lladdr_ckreq) {
9100 switch (sdl->sdl_type) {
9101 case IFT_ETHER:
39236c6e 9102 case IFT_IEEE1394:
39236c6e
A
9103 break;
9104 default:
9105 credp = NULL;
9106 break;
0a7de745
A
9107 }
9108 ;
39236c6e
A
9109
9110 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
9111 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
0a7de745 9112 [0] = 2
39236c6e
A
9113 };
9114
5ba3f43e 9115 bytes = unspec;
39236c6e
A
9116 }
9117 }
9118#else
9119#pragma unused(credp)
9120#endif
9121
0a7de745
A
9122 if (sizep != NULL) {
9123 *sizep = size;
9124 }
9125 return bytes;
39236c6e
A
9126}
9127
9128void
9129dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
9130 u_int8_t info[DLIL_MODARGLEN])
9131{
9132 struct kev_dl_issues kev;
9133 struct timeval tv;
9134
9135 VERIFY(ifp != NULL);
9136 VERIFY(modid != NULL);
0a7de745
A
9137 _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
9138 _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
39236c6e 9139
0a7de745 9140 bzero(&kev, sizeof(kev));
39236c6e
A
9141
9142 microtime(&tv);
9143 kev.timestamp = tv.tv_sec;
9144 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
0a7de745 9145 if (info != NULL) {
39236c6e 9146 bcopy(info, &kev.info, DLIL_MODARGLEN);
0a7de745 9147 }
39236c6e
A
9148
9149 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
0a7de745 9150 &kev.link_data, sizeof(kev));
39236c6e
A
9151}
9152
316670eb
A
9153errno_t
9154ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9155 struct proc *p)
9156{
9157 u_int32_t level = IFNET_THROTTLE_OFF;
9158 errno_t result = 0;
9159
9160 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
9161
9162 if (cmd == SIOCSIFOPPORTUNISTIC) {
9163 /*
9164 * XXX: Use priv_check_cred() instead of root check?
9165 */
0a7de745
A
9166 if ((result = proc_suser(p)) != 0) {
9167 return result;
9168 }
316670eb
A
9169
9170 if (ifr->ifr_opportunistic.ifo_flags ==
0a7de745 9171 IFRIFOF_BLOCK_OPPORTUNISTIC) {
316670eb 9172 level = IFNET_THROTTLE_OPPORTUNISTIC;
0a7de745 9173 } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
316670eb 9174 level = IFNET_THROTTLE_OFF;
0a7de745 9175 } else {
316670eb 9176 result = EINVAL;
0a7de745 9177 }
316670eb 9178
0a7de745 9179 if (result == 0) {
316670eb 9180 result = ifnet_set_throttle(ifp, level);
0a7de745 9181 }
316670eb
A
9182 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
9183 ifr->ifr_opportunistic.ifo_flags = 0;
9184 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
9185 ifr->ifr_opportunistic.ifo_flags |=
9186 IFRIFOF_BLOCK_OPPORTUNISTIC;
9187 }
9188 }
9189
9190 /*
9191 * Return the count of current opportunistic connections
9192 * over the interface.
9193 */
9194 if (result == 0) {
9195 uint32_t flags = 0;
9196 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
0a7de745 9197 INPCB_OPPORTUNISTIC_SETCMD : 0;
39037602 9198 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
0a7de745 9199 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
316670eb
A
9200 ifr->ifr_opportunistic.ifo_inuse =
9201 udp_count_opportunistic(ifp->if_index, flags) +
9202 tcp_count_opportunistic(ifp->if_index, flags);
9203 }
9204
0a7de745 9205 if (result == EALREADY) {
316670eb 9206 result = 0;
0a7de745 9207 }
316670eb 9208
0a7de745 9209 return result;
316670eb
A
9210}
9211
9212int
9213ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
9214{
9215 struct ifclassq *ifq;
9216 int err = 0;
9217
0a7de745
A
9218 if (!(ifp->if_eflags & IFEF_TXSTART)) {
9219 return ENXIO;
9220 }
316670eb
A
9221
9222 *level = IFNET_THROTTLE_OFF;
9223
9224 ifq = &ifp->if_snd;
9225 IFCQ_LOCK(ifq);
9226 /* Throttling works only for IFCQ, not ALTQ instances */
0a7de745 9227 if (IFCQ_IS_ENABLED(ifq)) {
316670eb 9228 IFCQ_GET_THROTTLE(ifq, *level, err);
0a7de745 9229 }
316670eb
A
9230 IFCQ_UNLOCK(ifq);
9231
0a7de745 9232 return err;
316670eb
A
9233}
9234
9235int
9236ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
9237{
9238 struct ifclassq *ifq;
9239 int err = 0;
9240
0a7de745
A
9241 if (!(ifp->if_eflags & IFEF_TXSTART)) {
9242 return ENXIO;
9243 }
316670eb 9244
39236c6e
A
9245 ifq = &ifp->if_snd;
9246
316670eb
A
9247 switch (level) {
9248 case IFNET_THROTTLE_OFF:
9249 case IFNET_THROTTLE_OPPORTUNISTIC:
316670eb
A
9250 break;
9251 default:
0a7de745 9252 return EINVAL;
316670eb
A
9253 }
9254
316670eb 9255 IFCQ_LOCK(ifq);
0a7de745 9256 if (IFCQ_IS_ENABLED(ifq)) {
316670eb 9257 IFCQ_SET_THROTTLE(ifq, level, err);
0a7de745 9258 }
316670eb
A
9259 IFCQ_UNLOCK(ifq);
9260
9261 if (err == 0) {
cb323159 9262 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
39236c6e 9263 level);
cb323159
A
9264#if NECP
9265 necp_update_all_clients();
9266#endif /* NECP */
0a7de745 9267 if (level == IFNET_THROTTLE_OFF) {
316670eb 9268 ifnet_start(ifp);
0a7de745 9269 }
316670eb
A
9270 }
9271
0a7de745 9272 return err;
316670eb 9273}
39236c6e
A
9274
9275errno_t
9276ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9277 struct proc *p)
9278{
9279#pragma unused(p)
9280 errno_t result = 0;
9281 uint32_t flags;
9282 int level, category, subcategory;
9283
9284 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
9285
9286 if (cmd == SIOCSIFLOG) {
9287 if ((result = priv_check_cred(kauth_cred_get(),
0a7de745
A
9288 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
9289 return result;
9290 }
39236c6e
A
9291
9292 level = ifr->ifr_log.ifl_level;
0a7de745 9293 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
39236c6e 9294 result = EINVAL;
0a7de745 9295 }
39236c6e
A
9296
9297 flags = ifr->ifr_log.ifl_flags;
0a7de745 9298 if ((flags &= IFNET_LOGF_MASK) == 0) {
39236c6e 9299 result = EINVAL;
0a7de745 9300 }
39236c6e
A
9301
9302 category = ifr->ifr_log.ifl_category;
9303 subcategory = ifr->ifr_log.ifl_subcategory;
9304
0a7de745 9305 if (result == 0) {
39236c6e
A
9306 result = ifnet_set_log(ifp, level, flags,
9307 category, subcategory);
0a7de745 9308 }
39236c6e
A
9309 } else {
9310 result = ifnet_get_log(ifp, &level, &flags, &category,
9311 &subcategory);
9312 if (result == 0) {
9313 ifr->ifr_log.ifl_level = level;
9314 ifr->ifr_log.ifl_flags = flags;
9315 ifr->ifr_log.ifl_category = category;
9316 ifr->ifr_log.ifl_subcategory = subcategory;
9317 }
9318 }
9319
0a7de745 9320 return result;
39236c6e
A
9321}
9322
9323int
9324ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
9325 int32_t category, int32_t subcategory)
9326{
9327 int err = 0;
9328
9329 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
9330 VERIFY(flags & IFNET_LOGF_MASK);
9331
9332 /*
9333 * The logging level applies to all facilities; make sure to
9334 * update them all with the most current level.
9335 */
9336 flags |= ifp->if_log.flags;
9337
9338 if (ifp->if_output_ctl != NULL) {
9339 struct ifnet_log_params l;
9340
0a7de745 9341 bzero(&l, sizeof(l));
39236c6e
A
9342 l.level = level;
9343 l.flags = flags;
9344 l.flags &= ~IFNET_LOGF_DLIL;
9345 l.category = category;
9346 l.subcategory = subcategory;
9347
9348 /* Send this request to lower layers */
9349 if (l.flags != 0) {
9350 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
0a7de745 9351 sizeof(l), &l);
39236c6e
A
9352 }
9353 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
9354 /*
9355 * If targeted to the lower layers without an output
9356 * control callback registered on the interface, just
9357 * silently ignore facilities other than ours.
9358 */
9359 flags &= IFNET_LOGF_DLIL;
0a7de745 9360 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
39236c6e 9361 level = 0;
0a7de745 9362 }
39236c6e
A
9363 }
9364
9365 if (err == 0) {
0a7de745 9366 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
39236c6e 9367 ifp->if_log.flags = 0;
0a7de745 9368 } else {
39236c6e 9369 ifp->if_log.flags |= flags;
0a7de745 9370 }
39236c6e
A
9371
9372 log(LOG_INFO, "%s: logging level set to %d flags=%b "
9373 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
9374 ifp->if_log.level, ifp->if_log.flags,
9375 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
9376 category, subcategory);
9377 }
9378
0a7de745 9379 return err;
39236c6e
A
9380}
9381
9382int
9383ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
9384 int32_t *category, int32_t *subcategory)
9385{
0a7de745 9386 if (level != NULL) {
39236c6e 9387 *level = ifp->if_log.level;
0a7de745
A
9388 }
9389 if (flags != NULL) {
39236c6e 9390 *flags = ifp->if_log.flags;
0a7de745
A
9391 }
9392 if (category != NULL) {
39236c6e 9393 *category = ifp->if_log.category;
0a7de745
A
9394 }
9395 if (subcategory != NULL) {
39236c6e 9396 *subcategory = ifp->if_log.subcategory;
0a7de745 9397 }
39236c6e 9398
0a7de745 9399 return 0;
39236c6e
A
9400}
9401
9402int
9403ifnet_notify_address(struct ifnet *ifp, int af)
9404{
9405 struct ifnet_notify_address_params na;
9406
9407#if PF
9408 (void) pf_ifaddr_hook(ifp);
9409#endif /* PF */
9410
0a7de745
A
9411 if (ifp->if_output_ctl == NULL) {
9412 return EOPNOTSUPP;
9413 }
39236c6e 9414
0a7de745 9415 bzero(&na, sizeof(na));
39236c6e
A
9416 na.address_family = af;
9417
0a7de745
A
9418 return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
9419 sizeof(na), &na);
39236c6e
A
9420}
9421
9422errno_t
9423ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
9424{
9425 if (ifp == NULL || flowid == NULL) {
0a7de745 9426 return EINVAL;
39236c6e 9427 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 9428 !IF_FULLY_ATTACHED(ifp)) {
0a7de745 9429 return ENXIO;
39236c6e
A
9430 }
9431
9432 *flowid = ifp->if_flowhash;
9433
0a7de745 9434 return 0;
39236c6e
A
9435}
9436
9437errno_t
9438ifnet_disable_output(struct ifnet *ifp)
9439{
9440 int err;
9441
9442 if (ifp == NULL) {
0a7de745 9443 return EINVAL;
39236c6e 9444 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 9445 !IF_FULLY_ATTACHED(ifp)) {
0a7de745 9446 return ENXIO;
39236c6e
A
9447 }
9448
9449 if ((err = ifnet_fc_add(ifp)) == 0) {
9450 lck_mtx_lock_spin(&ifp->if_start_lock);
9451 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
9452 lck_mtx_unlock(&ifp->if_start_lock);
9453 }
0a7de745 9454 return err;
39236c6e
A
9455}
9456
9457errno_t
9458ifnet_enable_output(struct ifnet *ifp)
9459{
9460 if (ifp == NULL) {
0a7de745 9461 return EINVAL;
39236c6e 9462 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 9463 !IF_FULLY_ATTACHED(ifp)) {
0a7de745 9464 return ENXIO;
39236c6e
A
9465 }
9466
5c9f4661 9467 ifnet_start_common(ifp, TRUE);
0a7de745 9468 return 0;
39236c6e
A
9469}
9470
9471void
9472ifnet_flowadv(uint32_t flowhash)
9473{
9474 struct ifnet_fc_entry *ifce;
9475 struct ifnet *ifp;
9476
9477 ifce = ifnet_fc_get(flowhash);
0a7de745 9478 if (ifce == NULL) {
39236c6e 9479 return;
0a7de745 9480 }
39236c6e
A
9481
9482 VERIFY(ifce->ifce_ifp != NULL);
9483 ifp = ifce->ifce_ifp;
9484
9485 /* flow hash gets recalculated per attach, so check */
9486 if (ifnet_is_attached(ifp, 1)) {
0a7de745 9487 if (ifp->if_flowhash == flowhash) {
39236c6e 9488 (void) ifnet_enable_output(ifp);
0a7de745 9489 }
39236c6e
A
9490 ifnet_decr_iorefcnt(ifp);
9491 }
9492 ifnet_fc_entry_free(ifce);
9493}
9494
9495/*
9496 * Function to compare ifnet_fc_entries in ifnet flow control tree
9497 */
9498static inline int
9499ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
9500{
0a7de745 9501 return fc1->ifce_flowhash - fc2->ifce_flowhash;
39236c6e
A
9502}
9503
9504static int
9505ifnet_fc_add(struct ifnet *ifp)
9506{
9507 struct ifnet_fc_entry keyfc, *ifce;
9508 uint32_t flowhash;
9509
9510 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
9511 VERIFY(ifp->if_flowhash != 0);
9512 flowhash = ifp->if_flowhash;
9513
0a7de745 9514 bzero(&keyfc, sizeof(keyfc));
39236c6e
A
9515 keyfc.ifce_flowhash = flowhash;
9516
9517 lck_mtx_lock_spin(&ifnet_fc_lock);
9518 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9519 if (ifce != NULL && ifce->ifce_ifp == ifp) {
9520 /* Entry is already in ifnet_fc_tree, return */
9521 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 9522 return 0;
39236c6e
A
9523 }
9524
9525 if (ifce != NULL) {
9526 /*
9527 * There is a different fc entry with the same flow hash
9528 * but different ifp pointer. There can be a collision
9529 * on flow hash but the probability is low. Let's just
9530 * avoid adding a second one when there is a collision.
9531 */
9532 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 9533 return EAGAIN;
39236c6e
A
9534 }
9535
9536 /* become regular mutex */
9537 lck_mtx_convert_spin(&ifnet_fc_lock);
9538
5c9f4661 9539 ifce = zalloc(ifnet_fc_zone);
39236c6e
A
9540 if (ifce == NULL) {
9541 /* memory allocation failed */
9542 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 9543 return ENOMEM;
39236c6e
A
9544 }
9545 bzero(ifce, ifnet_fc_zone_size);
9546
9547 ifce->ifce_flowhash = flowhash;
9548 ifce->ifce_ifp = ifp;
9549
9550 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9551 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 9552 return 0;
39236c6e
A
9553}
9554
9555static struct ifnet_fc_entry *
9556ifnet_fc_get(uint32_t flowhash)
9557{
9558 struct ifnet_fc_entry keyfc, *ifce;
9559 struct ifnet *ifp;
9560
0a7de745 9561 bzero(&keyfc, sizeof(keyfc));
39236c6e
A
9562 keyfc.ifce_flowhash = flowhash;
9563
9564 lck_mtx_lock_spin(&ifnet_fc_lock);
9565 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9566 if (ifce == NULL) {
9567 /* Entry is not present in ifnet_fc_tree, return */
9568 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 9569 return NULL;
39236c6e
A
9570 }
9571
9572 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9573
9574 VERIFY(ifce->ifce_ifp != NULL);
9575 ifp = ifce->ifce_ifp;
9576
9577 /* become regular mutex */
9578 lck_mtx_convert_spin(&ifnet_fc_lock);
9579
9580 if (!ifnet_is_attached(ifp, 0)) {
9581 /*
9582 * This ifp is not attached or in the process of being
9583 * detached; just don't process it.
9584 */
9585 ifnet_fc_entry_free(ifce);
9586 ifce = NULL;
9587 }
9588 lck_mtx_unlock(&ifnet_fc_lock);
9589
0a7de745 9590 return ifce;
39236c6e
A
9591}
9592
9593static void
9594ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
9595{
9596 zfree(ifnet_fc_zone, ifce);
9597}
9598
9599static uint32_t
9600ifnet_calc_flowhash(struct ifnet *ifp)
9601{
9602 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
9603 uint32_t flowhash = 0;
9604
0a7de745 9605 if (ifnet_flowhash_seed == 0) {
39236c6e 9606 ifnet_flowhash_seed = RandomULong();
0a7de745 9607 }
39236c6e 9608
0a7de745 9609 bzero(&fh, sizeof(fh));
39236c6e 9610
0a7de745 9611 (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
39236c6e
A
9612 fh.ifk_unit = ifp->if_unit;
9613 fh.ifk_flags = ifp->if_flags;
9614 fh.ifk_eflags = ifp->if_eflags;
9615 fh.ifk_capabilities = ifp->if_capabilities;
9616 fh.ifk_capenable = ifp->if_capenable;
9617 fh.ifk_output_sched_model = ifp->if_output_sched_model;
9618 fh.ifk_rand1 = RandomULong();
9619 fh.ifk_rand2 = RandomULong();
9620
9621try_again:
0a7de745 9622 flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
39236c6e
A
9623 if (flowhash == 0) {
9624 /* try to get a non-zero flowhash */
9625 ifnet_flowhash_seed = RandomULong();
9626 goto try_again;
9627 }
9628
0a7de745 9629 return flowhash;
39236c6e
A
9630}
9631
3e170ce0
A
9632int
9633ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
9634 uint16_t flags, uint8_t *data)
9635{
9636#pragma unused(flags)
9637 int error = 0;
9638
9639 switch (family) {
9640 case AF_INET:
9641 if_inetdata_lock_exclusive(ifp);
9642 if (IN_IFEXTRA(ifp) != NULL) {
9643 if (len == 0) {
9644 /* Allow clearing the signature */
9645 IN_IFEXTRA(ifp)->netsig_len = 0;
9646 bzero(IN_IFEXTRA(ifp)->netsig,
0a7de745 9647 sizeof(IN_IFEXTRA(ifp)->netsig));
3e170ce0
A
9648 if_inetdata_lock_done(ifp);
9649 break;
0a7de745 9650 } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
3e170ce0
A
9651 error = EINVAL;
9652 if_inetdata_lock_done(ifp);
9653 break;
9654 }
9655 IN_IFEXTRA(ifp)->netsig_len = len;
9656 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
9657 } else {
9658 error = ENOMEM;
9659 }
9660 if_inetdata_lock_done(ifp);
9661 break;
9662
9663 case AF_INET6:
9664 if_inet6data_lock_exclusive(ifp);
9665 if (IN6_IFEXTRA(ifp) != NULL) {
9666 if (len == 0) {
9667 /* Allow clearing the signature */
9668 IN6_IFEXTRA(ifp)->netsig_len = 0;
9669 bzero(IN6_IFEXTRA(ifp)->netsig,
0a7de745 9670 sizeof(IN6_IFEXTRA(ifp)->netsig));
3e170ce0
A
9671 if_inet6data_lock_done(ifp);
9672 break;
0a7de745 9673 } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
3e170ce0
A
9674 error = EINVAL;
9675 if_inet6data_lock_done(ifp);
9676 break;
9677 }
9678 IN6_IFEXTRA(ifp)->netsig_len = len;
9679 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
9680 } else {
9681 error = ENOMEM;
9682 }
9683 if_inet6data_lock_done(ifp);
9684 break;
9685
9686 default:
9687 error = EINVAL;
9688 break;
9689 }
9690
0a7de745 9691 return error;
3e170ce0
A
9692}
9693
9694int
9695ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
9696 uint16_t *flags, uint8_t *data)
9697{
9698 int error = 0;
9699
0a7de745
A
9700 if (ifp == NULL || len == NULL || data == NULL) {
9701 return EINVAL;
9702 }
3e170ce0
A
9703
9704 switch (family) {
9705 case AF_INET:
9706 if_inetdata_lock_shared(ifp);
9707 if (IN_IFEXTRA(ifp) != NULL) {
9708 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
9709 error = EINVAL;
9710 if_inetdata_lock_done(ifp);
9711 break;
9712 }
0a7de745 9713 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0) {
3e170ce0 9714 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
0a7de745 9715 } else {
3e170ce0 9716 error = ENOENT;
0a7de745 9717 }
3e170ce0
A
9718 } else {
9719 error = ENOMEM;
9720 }
9721 if_inetdata_lock_done(ifp);
9722 break;
9723
9724 case AF_INET6:
9725 if_inet6data_lock_shared(ifp);
9726 if (IN6_IFEXTRA(ifp) != NULL) {
9727 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
9728 error = EINVAL;
9729 if_inet6data_lock_done(ifp);
9730 break;
9731 }
0a7de745 9732 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0) {
3e170ce0 9733 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
0a7de745 9734 } else {
3e170ce0 9735 error = ENOENT;
0a7de745 9736 }
3e170ce0
A
9737 } else {
9738 error = ENOMEM;
9739 }
9740 if_inet6data_lock_done(ifp);
9741 break;
9742
9743 default:
9744 error = EINVAL;
9745 break;
9746 }
9747
0a7de745 9748 if (error == 0 && flags != NULL) {
3e170ce0 9749 *flags = 0;
0a7de745 9750 }
3e170ce0 9751
0a7de745 9752 return error;
3e170ce0
A
9753}
9754
5ba3f43e
A
9755#if INET6
9756int
9757ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
9758{
9759 int i, error = 0, one_set = 0;
9760
9761 if_inet6data_lock_exclusive(ifp);
9762
9763 if (IN6_IFEXTRA(ifp) == NULL) {
9764 error = ENOMEM;
9765 goto out;
9766 }
9767
9768 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
9769 uint32_t prefix_len =
9770 prefixes[i].prefix_len;
9771 struct in6_addr *prefix =
9772 &prefixes[i].ipv6_prefix;
9773
9774 if (prefix_len == 0) {
d9a64523
A
9775 clat_log0((LOG_DEBUG,
9776 "NAT64 prefixes purged from Interface %s\n",
9777 if_name(ifp)));
5ba3f43e
A
9778 /* Allow clearing the signature */
9779 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
9780 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9781 sizeof(struct in6_addr));
9782
9783 continue;
9784 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
0a7de745
A
9785 prefix_len != NAT64_PREFIX_LEN_40 &&
9786 prefix_len != NAT64_PREFIX_LEN_48 &&
9787 prefix_len != NAT64_PREFIX_LEN_56 &&
9788 prefix_len != NAT64_PREFIX_LEN_64 &&
9789 prefix_len != NAT64_PREFIX_LEN_96) {
d9a64523
A
9790 clat_log0((LOG_DEBUG,
9791 "NAT64 prefixlen is incorrect %d\n", prefix_len));
5ba3f43e
A
9792 error = EINVAL;
9793 goto out;
9794 }
9795
9796 if (IN6_IS_SCOPE_EMBED(prefix)) {
d9a64523
A
9797 clat_log0((LOG_DEBUG,
9798 "NAT64 prefix has interface/link local scope.\n"));
5ba3f43e
A
9799 error = EINVAL;
9800 goto out;
9801 }
9802
9803 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
9804 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9805 sizeof(struct in6_addr));
d9a64523
A
9806 clat_log0((LOG_DEBUG,
9807 "NAT64 prefix set to %s with prefixlen: %d\n",
9808 ip6_sprintf(prefix), prefix_len));
5ba3f43e
A
9809 one_set = 1;
9810 }
9811
9812out:
9813 if_inet6data_lock_done(ifp);
9814
0a7de745 9815 if (error == 0 && one_set != 0) {
5ba3f43e 9816 necp_update_all_clients();
0a7de745 9817 }
5ba3f43e 9818
0a7de745 9819 return error;
5ba3f43e
A
9820}
9821
9822int
9823ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
9824{
9825 int i, found_one = 0, error = 0;
9826
0a7de745
A
9827 if (ifp == NULL) {
9828 return EINVAL;
9829 }
5ba3f43e
A
9830
9831 if_inet6data_lock_shared(ifp);
9832
9833 if (IN6_IFEXTRA(ifp) == NULL) {
9834 error = ENOMEM;
9835 goto out;
9836 }
9837
9838 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
0a7de745 9839 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
5ba3f43e 9840 found_one = 1;
0a7de745 9841 }
5ba3f43e
A
9842 }
9843
9844 if (found_one == 0) {
9845 error = ENOENT;
9846 goto out;
9847 }
9848
0a7de745 9849 if (prefixes) {
5ba3f43e
A
9850 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
9851 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
0a7de745 9852 }
5ba3f43e
A
9853
9854out:
9855 if_inet6data_lock_done(ifp);
9856
0a7de745 9857 return error;
5ba3f43e
A
9858}
9859#endif
9860
39236c6e
A
9861static void
9862dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
9863 protocol_family_t pf)
9864{
9865#pragma unused(ifp)
9866 uint32_t did_sw;
9867
9868 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
0a7de745 9869 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) {
39236c6e 9870 return;
0a7de745 9871 }
39236c6e
A
9872
9873 switch (pf) {
9874 case PF_INET:
9875 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
0a7de745 9876 if (did_sw & CSUM_DELAY_IP) {
39236c6e 9877 hwcksum_dbg_finalized_hdr++;
0a7de745
A
9878 }
9879 if (did_sw & CSUM_DELAY_DATA) {
39236c6e 9880 hwcksum_dbg_finalized_data++;
0a7de745 9881 }
39236c6e
A
9882 break;
9883#if INET6
9884 case PF_INET6:
9885 /*
9886 * Checksum offload should not have been enabled when
9887 * extension headers exist; that also means that we
9888 * cannot force-finalize packets with extension headers.
9889 * Indicate to the callee should it skip such case by
9890 * setting optlen to -1.
9891 */
9892 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
9893 m->m_pkthdr.csum_flags);
0a7de745 9894 if (did_sw & CSUM_DELAY_IPV6_DATA) {
39236c6e 9895 hwcksum_dbg_finalized_data++;
0a7de745 9896 }
39236c6e
A
9897 break;
9898#endif /* INET6 */
9899 default:
9900 return;
9901 }
9902}
9903
9904static void
9905dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
9906 protocol_family_t pf)
9907{
5ba3f43e 9908 uint16_t sum = 0;
39236c6e
A
9909 uint32_t hlen;
9910
9911 if (frame_header == NULL ||
9912 frame_header < (char *)mbuf_datastart(m) ||
9913 frame_header > (char *)m->m_data) {
cb323159 9914 DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
39236c6e
A
9915 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
9916 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
9917 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
9918 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
9919 (uint64_t)VM_KERNEL_ADDRPERM(m));
9920 return;
9921 }
9922 hlen = (m->m_data - frame_header);
9923
9924 switch (pf) {
9925 case PF_INET:
9926#if INET6
9927 case PF_INET6:
9928#endif /* INET6 */
9929 break;
9930 default:
9931 return;
9932 }
9933
9934 /*
9935 * Force partial checksum offload; useful to simulate cases
9936 * where the hardware does not support partial checksum offload,
9937 * in order to validate correctness throughout the layers above.
9938 */
9939 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
9940 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
9941
0a7de745 9942 if (foff > (uint32_t)m->m_pkthdr.len) {
39236c6e 9943 return;
0a7de745 9944 }
39236c6e
A
9945
9946 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
9947
9948 /* Compute 16-bit 1's complement sum from forced offset */
9949 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
9950
9951 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
9952 m->m_pkthdr.csum_rx_val = sum;
9953 m->m_pkthdr.csum_rx_start = (foff + hlen);
9954
9955 hwcksum_dbg_partial_forced++;
9956 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
9957 }
9958
9959 /*
9960 * Partial checksum offload verification (and adjustment);
9961 * useful to validate and test cases where the hardware
9962 * supports partial checksum offload.
9963 */
9964 if ((m->m_pkthdr.csum_flags &
9965 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
9966 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
9967 uint32_t rxoff;
9968
9969 /* Start offset must begin after frame header */
9970 rxoff = m->m_pkthdr.csum_rx_start;
9971 if (hlen > rxoff) {
9972 hwcksum_dbg_bad_rxoff++;
9973 if (dlil_verbose) {
cb323159 9974 DLIL_PRINTF("%s: partial cksum start offset %d "
39236c6e
A
9975 "is less than frame header length %d for "
9976 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
9977 (uint64_t)VM_KERNEL_ADDRPERM(m));
9978 }
9979 return;
9980 }
39037602 9981 rxoff -= hlen;
39236c6e
A
9982
9983 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
9984 /*
9985 * Compute the expected 16-bit 1's complement sum;
9986 * skip this if we've already computed it above
9987 * when partial checksum offload is forced.
9988 */
9989 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
9990
9991 /* Hardware or driver is buggy */
9992 if (sum != m->m_pkthdr.csum_rx_val) {
9993 hwcksum_dbg_bad_cksum++;
9994 if (dlil_verbose) {
cb323159 9995 DLIL_PRINTF("%s: bad partial cksum value "
39236c6e
A
9996 "0x%x (expected 0x%x) for mbuf "
9997 "0x%llx [rx_start %d]\n",
9998 if_name(ifp),
9999 m->m_pkthdr.csum_rx_val, sum,
10000 (uint64_t)VM_KERNEL_ADDRPERM(m),
10001 m->m_pkthdr.csum_rx_start);
10002 }
10003 return;
10004 }
10005 }
10006 hwcksum_dbg_verified++;
10007
10008 /*
10009 * This code allows us to emulate various hardwares that
10010 * perform 16-bit 1's complement sum beginning at various
10011 * start offset values.
10012 */
10013 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
10014 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
10015
0a7de745 10016 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) {
39236c6e 10017 return;
0a7de745 10018 }
39236c6e 10019
5ba3f43e
A
10020 sum = m_adj_sum16(m, rxoff, aoff,
10021 m_pktlen(m) - aoff, sum);
39236c6e
A
10022
10023 m->m_pkthdr.csum_rx_val = sum;
10024 m->m_pkthdr.csum_rx_start = (aoff + hlen);
10025
10026 hwcksum_dbg_adjusted++;
10027 }
10028 }
10029}
10030
10031static int
10032sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
10033{
10034#pragma unused(arg1, arg2)
10035 u_int32_t i;
10036 int err;
10037
10038 i = hwcksum_dbg_mode;
10039
10040 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
10041 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10042 return err;
10043 }
39236c6e 10044
0a7de745
A
10045 if (hwcksum_dbg == 0) {
10046 return ENODEV;
10047 }
39236c6e 10048
0a7de745
A
10049 if ((i & ~HWCKSUM_DBG_MASK) != 0) {
10050 return EINVAL;
10051 }
39236c6e
A
10052
10053 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
10054
0a7de745 10055 return err;
39236c6e
A
10056}
10057
10058static int
10059sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
10060{
10061#pragma unused(arg1, arg2)
10062 u_int32_t i;
10063 int err;
10064
10065 i = hwcksum_dbg_partial_rxoff_forced;
10066
10067 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
10068 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10069 return err;
10070 }
39236c6e 10071
0a7de745
A
10072 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
10073 return ENODEV;
10074 }
39236c6e
A
10075
10076 hwcksum_dbg_partial_rxoff_forced = i;
10077
0a7de745 10078 return err;
39236c6e
A
10079}
10080
10081static int
10082sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
10083{
10084#pragma unused(arg1, arg2)
10085 u_int32_t i;
10086 int err;
10087
10088 i = hwcksum_dbg_partial_rxoff_adj;
10089
10090 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
10091 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10092 return err;
10093 }
39236c6e 10094
0a7de745
A
10095 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) {
10096 return ENODEV;
10097 }
39236c6e
A
10098
10099 hwcksum_dbg_partial_rxoff_adj = i;
10100
0a7de745 10101 return err;
39236c6e
A
10102}
10103
3e170ce0
A
10104static int
10105sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
10106{
10107#pragma unused(oidp, arg1, arg2)
10108 int err;
39037602 10109
3e170ce0 10110 if (req->oldptr == USER_ADDR_NULL) {
3e170ce0
A
10111 }
10112 if (req->newptr != USER_ADDR_NULL) {
0a7de745 10113 return EPERM;
3e170ce0
A
10114 }
10115 err = SYSCTL_OUT(req, &tx_chain_len_stats,
10116 sizeof(struct chain_len_stats));
10117
0a7de745 10118 return err;
3e170ce0
A
10119}
10120
10121
5ba3f43e 10122#if DEBUG || DEVELOPMENT
39236c6e
A
10123/* Blob for sum16 verification */
10124static uint8_t sumdata[] = {
10125 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
10126 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
10127 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
10128 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
10129 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
10130 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
10131 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
10132 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
10133 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
10134 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
10135 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
10136 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
10137 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
10138 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
10139 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
10140 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
10141 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
10142 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
10143 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
10144 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
10145 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
10146 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
10147 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
10148 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
10149 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
10150 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
10151 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
10152 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
10153 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
10154 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
10155 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
10156 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
10157 0xc8, 0x28, 0x02, 0x00, 0x00
10158};
10159
10160/* Precomputed 16-bit 1's complement sums for various spans of the above data */
10161static struct {
0a7de745
A
10162 boolean_t init;
10163 uint16_t len;
10164 uint16_t sumr; /* reference */
10165 uint16_t sumrp; /* reference, precomputed */
39236c6e 10166} sumtbl[] = {
0a7de745
A
10167 { FALSE, 0, 0, 0x0000 },
10168 { FALSE, 1, 0, 0x001f },
10169 { FALSE, 2, 0, 0x8b1f },
10170 { FALSE, 3, 0, 0x8b27 },
10171 { FALSE, 7, 0, 0x790e },
10172 { FALSE, 11, 0, 0xcb6d },
10173 { FALSE, 20, 0, 0x20dd },
10174 { FALSE, 27, 0, 0xbabd },
10175 { FALSE, 32, 0, 0xf3e8 },
10176 { FALSE, 37, 0, 0x197d },
10177 { FALSE, 43, 0, 0x9eae },
10178 { FALSE, 64, 0, 0x4678 },
5ba3f43e
A
10179 { FALSE, 127, 0, 0x9399 },
10180 { FALSE, 256, 0, 0xd147 },
10181 { FALSE, 325, 0, 0x0358 },
39236c6e 10182};
0a7de745 10183#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
39236c6e
A
10184
10185static void
10186dlil_verify_sum16(void)
10187{
10188 struct mbuf *m;
10189 uint8_t *buf;
10190 int n;
10191
10192 /* Make sure test data plus extra room for alignment fits in cluster */
0a7de745 10193 _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
39236c6e 10194
5ba3f43e
A
10195 kprintf("DLIL: running SUM16 self-tests ... ");
10196
39236c6e 10197 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
0a7de745 10198 m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
d9a64523 10199
0a7de745 10200 buf = mtod(m, uint8_t *); /* base address */
39236c6e
A
10201
10202 for (n = 0; n < SUMTBL_MAX; n++) {
10203 uint16_t len = sumtbl[n].len;
10204 int i;
10205
10206 /* Verify for all possible alignments */
0a7de745 10207 for (i = 0; i < (int)sizeof(uint64_t); i++) {
5ba3f43e 10208 uint16_t sum, sumr;
39236c6e
A
10209 uint8_t *c;
10210
10211 /* Copy over test data to mbuf */
0a7de745 10212 VERIFY(len <= sizeof(sumdata));
39236c6e
A
10213 c = buf + i;
10214 bcopy(sumdata, c, len);
10215
10216 /* Zero-offset test (align by data pointer) */
10217 m->m_data = (caddr_t)c;
10218 m->m_len = len;
10219 sum = m_sum16(m, 0, len);
10220
5ba3f43e
A
10221 if (!sumtbl[n].init) {
10222 sumr = in_cksum_mbuf_ref(m, len, 0, 0);
10223 sumtbl[n].sumr = sumr;
10224 sumtbl[n].init = TRUE;
10225 } else {
10226 sumr = sumtbl[n].sumr;
10227 }
10228
39236c6e 10229 /* Something is horribly broken; stop now */
5ba3f43e
A
10230 if (sumr != sumtbl[n].sumrp) {
10231 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
10232 "for len=%d align=%d sum=0x%04x "
10233 "[expected=0x%04x]\n", __func__,
10234 len, i, sum, sumr);
10235 /* NOTREACHED */
10236 } else if (sum != sumr) {
10237 panic_plain("\n%s: broken m_sum16() for len=%d "
10238 "align=%d sum=0x%04x [expected=0x%04x]\n",
10239 __func__, len, i, sum, sumr);
39236c6e
A
10240 /* NOTREACHED */
10241 }
10242
10243 /* Alignment test by offset (fixed data pointer) */
10244 m->m_data = (caddr_t)buf;
10245 m->m_len = i + len;
10246 sum = m_sum16(m, i, len);
10247
10248 /* Something is horribly broken; stop now */
5ba3f43e
A
10249 if (sum != sumr) {
10250 panic_plain("\n%s: broken m_sum16() for len=%d "
10251 "offset=%d sum=0x%04x [expected=0x%04x]\n",
10252 __func__, len, i, sum, sumr);
39236c6e
A
10253 /* NOTREACHED */
10254 }
10255#if INET
10256 /* Simple sum16 contiguous buffer test by aligment */
10257 sum = b_sum16(c, len);
10258
10259 /* Something is horribly broken; stop now */
5ba3f43e
A
10260 if (sum != sumr) {
10261 panic_plain("\n%s: broken b_sum16() for len=%d "
10262 "align=%d sum=0x%04x [expected=0x%04x]\n",
10263 __func__, len, i, sum, sumr);
39236c6e
A
10264 /* NOTREACHED */
10265 }
10266#endif /* INET */
10267 }
10268 }
10269 m_freem(m);
10270
5ba3f43e 10271 kprintf("PASSED\n");
39236c6e 10272}
5ba3f43e 10273#endif /* DEBUG || DEVELOPMENT */
39236c6e 10274
0a7de745 10275#define CASE_STRINGIFY(x) case x: return #x
39236c6e
A
10276
10277__private_extern__ const char *
10278dlil_kev_dl_code_str(u_int32_t event_code)
10279{
10280 switch (event_code) {
0a7de745
A
10281 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
10282 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
10283 CASE_STRINGIFY(KEV_DL_SIFMTU);
10284 CASE_STRINGIFY(KEV_DL_SIFPHYS);
10285 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
10286 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
10287 CASE_STRINGIFY(KEV_DL_ADDMULTI);
10288 CASE_STRINGIFY(KEV_DL_DELMULTI);
10289 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
10290 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
10291 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
10292 CASE_STRINGIFY(KEV_DL_LINK_OFF);
10293 CASE_STRINGIFY(KEV_DL_LINK_ON);
10294 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
10295 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
10296 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
10297 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
10298 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
10299 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
10300 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
10301 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
10302 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
10303 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
10304 CASE_STRINGIFY(KEV_DL_ISSUES);
10305 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
39236c6e
A
10306 default:
10307 break;
10308 }
0a7de745 10309 return "";
39236c6e 10310}
3e170ce0 10311
5ba3f43e
A
10312static void
10313dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
10314{
10315#pragma unused(arg1)
10316 struct ifnet *ifp = arg0;
10317
10318 if (ifnet_is_attached(ifp, 1)) {
10319 nstat_ifnet_threshold_reached(ifp->if_index);
10320 ifnet_decr_iorefcnt(ifp);
10321 }
10322}
10323
10324void
10325ifnet_notify_data_threshold(struct ifnet *ifp)
10326{
10327 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
10328 uint64_t oldbytes = ifp->if_dt_bytes;
10329
10330 ASSERT(ifp->if_dt_tcall != NULL);
10331
10332 /*
10333 * If we went over the threshold, notify NetworkStatistics.
10334 * We rate-limit it based on the threshold interval value.
10335 */
10336 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
10337 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
10338 !thread_call_isactive(ifp->if_dt_tcall)) {
10339 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
10340 uint64_t now = mach_absolute_time(), deadline = now;
10341 uint64_t ival;
10342
10343 if (tival != 0) {
10344 nanoseconds_to_absolutetime(tival, &ival);
10345 clock_deadline_for_periodic_event(ival, now, &deadline);
10346 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
10347 deadline);
10348 } else {
10349 (void) thread_call_enter(ifp->if_dt_tcall);
10350 }
10351 }
10352}
10353
39037602
A
10354#if (DEVELOPMENT || DEBUG)
10355/*
10356 * The sysctl variable name contains the input parameters of
10357 * ifnet_get_keepalive_offload_frames()
10358 * ifp (interface index): name[0]
10359 * frames_array_count: name[1]
10360 * frame_data_offset: name[2]
10361 * The return length gives used_frames_count
10362 */
10363static int
10364sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
10365{
10366#pragma unused(oidp)
10367 int *name = (int *)arg1;
10368 u_int namelen = arg2;
10369 int idx;
10370 ifnet_t ifp = NULL;
10371 u_int32_t frames_array_count;
10372 size_t frame_data_offset;
10373 u_int32_t used_frames_count;
10374 struct ifnet_keepalive_offload_frame *frames_array = NULL;
10375 int error = 0;
10376 u_int32_t i;
10377
10378 /*
10379 * Only root can get look at other people TCP frames
10380 */
10381 error = proc_suser(current_proc());
0a7de745 10382 if (error != 0) {
39037602 10383 goto done;
0a7de745 10384 }
39037602
A
10385 /*
10386 * Validate the input parameters
10387 */
10388 if (req->newptr != USER_ADDR_NULL) {
10389 error = EPERM;
10390 goto done;
10391 }
10392 if (namelen != 3) {
10393 error = EINVAL;
10394 goto done;
10395 }
10396 if (req->oldptr == USER_ADDR_NULL) {
10397 error = EINVAL;
10398 goto done;
10399 }
10400 if (req->oldlen == 0) {
10401 error = EINVAL;
10402 goto done;
10403 }
10404 idx = name[0];
10405 frames_array_count = name[1];
10406 frame_data_offset = name[2];
10407
10408 /* Make sure the passed buffer is large enough */
10409 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
10410 req->oldlen) {
10411 error = ENOMEM;
10412 goto done;
10413 }
10414
10415 ifnet_head_lock_shared();
4d15aeb1 10416 if (!IF_INDEX_IN_RANGE(idx)) {
39037602
A
10417 ifnet_head_done();
10418 error = ENOENT;
10419 goto done;
10420 }
10421 ifp = ifindex2ifnet[idx];
10422 ifnet_head_done();
10423
10424 frames_array = _MALLOC(frames_array_count *
10425 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
10426 if (frames_array == NULL) {
10427 error = ENOMEM;
10428 goto done;
10429 }
10430
10431 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
10432 frames_array_count, frame_data_offset, &used_frames_count);
10433 if (error != 0) {
cb323159 10434 DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n",
39037602
A
10435 __func__, error);
10436 goto done;
10437 }
10438
10439 for (i = 0; i < used_frames_count; i++) {
10440 error = SYSCTL_OUT(req, frames_array + i,
10441 sizeof(struct ifnet_keepalive_offload_frame));
10442 if (error != 0) {
10443 goto done;
10444 }
10445 }
10446done:
0a7de745 10447 if (frames_array != NULL) {
39037602 10448 _FREE(frames_array, M_TEMP);
0a7de745
A
10449 }
10450 return error;
39037602
A
10451}
10452#endif /* DEVELOPMENT || DEBUG */
5ba3f43e
A
10453
10454void
10455ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
10456 struct ifnet *ifp)
10457{
10458 tcp_update_stats_per_flow(ifs, ifp);
10459}