]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/dlil.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
CommitLineData
1c79356b 1/*
0a7de745 2 * Copyright (c) 1999-2019 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
2d21ac55
A
28/*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
39236c6e 34#include <stddef.h>
1c79356b 35
1c79356b
A
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/socket.h>
91447636
A
42#include <sys/domain.h>
43#include <sys/user.h>
2d21ac55 44#include <sys/random.h>
316670eb 45#include <sys/socketvar.h>
1c79356b
A
46#include <net/if_dl.h>
47#include <net/if.h>
91447636 48#include <net/route.h>
1c79356b
A
49#include <net/if_var.h>
50#include <net/dlil.h>
91447636 51#include <net/if_arp.h>
316670eb 52#include <net/iptap.h>
39236c6e 53#include <net/pktap.h>
1c79356b
A
54#include <sys/kern_event.h>
55#include <sys/kdebug.h>
6d2010ae 56#include <sys/mcache.h>
39236c6e
A
57#include <sys/syslog.h>
58#include <sys/protosw.h>
59#include <sys/priv.h>
1c79356b 60
91447636 61#include <kern/assert.h>
1c79356b 62#include <kern/task.h>
9bccf70c
A
63#include <kern/thread.h>
64#include <kern/sched_prim.h>
91447636 65#include <kern/locks.h>
6d2010ae 66#include <kern/zalloc.h>
9bccf70c 67
39236c6e 68#include <net/kpi_protocol.h>
1c79356b 69#include <net/if_types.h>
9d749ea3 70#include <net/if_ipsec.h>
6d2010ae 71#include <net/if_llreach.h>
9d749ea3 72#include <net/if_utun.h>
91447636 73#include <net/kpi_interfacefilter.h>
316670eb
A
74#include <net/classq/classq.h>
75#include <net/classq/classq_sfb.h>
39236c6e
A
76#include <net/flowhash.h>
77#include <net/ntstat.h>
5ba3f43e
A
78#include <net/if_llatbl.h>
79#include <net/net_api_stats.h>
a39ff7e2 80#include <net/if_ports_used.h>
d9a64523 81#include <netinet/in.h>
6d2010ae
A
82#if INET
83#include <netinet/in_var.h>
84#include <netinet/igmp_var.h>
316670eb
A
85#include <netinet/ip_var.h>
86#include <netinet/tcp.h>
87#include <netinet/tcp_var.h>
88#include <netinet/udp.h>
89#include <netinet/udp_var.h>
90#include <netinet/if_ether.h>
91#include <netinet/in_pcb.h>
39037602 92#include <netinet/in_tclass.h>
d9a64523
A
93#include <netinet/ip.h>
94#include <netinet/ip_icmp.h>
95#include <netinet/icmp_var.h>
6d2010ae
A
96#endif /* INET */
97
98#if INET6
d9a64523 99#include <net/nat464_utils.h>
6d2010ae
A
100#include <netinet6/in6_var.h>
101#include <netinet6/nd6.h>
102#include <netinet6/mld6_var.h>
39236c6e 103#include <netinet6/scope6_var.h>
d9a64523
A
104#include <netinet/ip6.h>
105#include <netinet/icmp6.h>
6d2010ae 106#endif /* INET6 */
d9a64523 107#include <net/pf_pbuf.h>
91447636 108#include <libkern/OSAtomic.h>
39236c6e 109#include <libkern/tree.h>
1c79356b 110
39236c6e 111#include <dev/random/randomdev.h>
d52fe63f 112#include <machine/machine_routines.h>
1c79356b 113
2d21ac55 114#include <mach/thread_act.h>
6d2010ae 115#include <mach/sdt.h>
2d21ac55 116
39236c6e
A
117#if CONFIG_MACF
118#include <sys/kauth.h>
2d21ac55 119#include <security/mac_framework.h>
39236c6e
A
120#include <net/ethernet.h>
121#include <net/firewire.h>
122#endif
2d21ac55 123
b0d623f7
A
124#if PF
125#include <net/pfvar.h>
126#endif /* PF */
316670eb 127#include <net/pktsched/pktsched.h>
b0d623f7 128
39037602
A
129#if NECP
130#include <net/necp.h>
131#endif /* NECP */
1c79356b 132
5ba3f43e 133
0a7de745
A
134#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
135#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
136#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
137#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
138#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
39037602 139
0a7de745
A
140#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
141#define MAX_LINKADDR 4 /* LONGWORDS */
142#define M_NKE M_IFADDR
1c79356b 143
2d21ac55 144#if 1
0a7de745 145#define DLIL_PRINTF printf
91447636 146#else
0a7de745 147#define DLIL_PRINTF kprintf
91447636
A
148#endif
149
0a7de745 150#define IF_DATA_REQUIRE_ALIGNED_64(f) \
6d2010ae 151 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
d1ecb069 152
0a7de745 153#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
6d2010ae
A
154 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
155
91447636 156enum {
0a7de745
A
157 kProtoKPI_v1 = 1,
158 kProtoKPI_v2 = 2
91447636
A
159};
160
6d2010ae
A
161/*
162 * List of if_proto structures in if_proto_hash[] is protected by
163 * the ifnet lock. The rest of the fields are initialized at protocol
164 * attach time and never change, thus no lock required as long as
165 * a reference to it is valid, via if_proto_ref().
166 */
91447636 167struct if_proto {
0a7de745
A
168 SLIST_ENTRY(if_proto) next_hash;
169 u_int32_t refcount;
170 u_int32_t detached;
171 struct ifnet *ifp;
172 protocol_family_t protocol_family;
173 int proto_kpi;
174 union {
91447636 175 struct {
0a7de745
A
176 proto_media_input input;
177 proto_media_preout pre_output;
178 proto_media_event event;
179 proto_media_ioctl ioctl;
180 proto_media_detached detached;
181 proto_media_resolve_multi resolve_multi;
182 proto_media_send_arp send_arp;
91447636 183 } v1;
2d21ac55 184 struct {
0a7de745
A
185 proto_media_input_v2 input;
186 proto_media_preout pre_output;
187 proto_media_event event;
188 proto_media_ioctl ioctl;
189 proto_media_detached detached;
190 proto_media_resolve_multi resolve_multi;
191 proto_media_send_arp send_arp;
2d21ac55 192 } v2;
91447636 193 } kpi;
1c79356b
A
194};
195
91447636
A
196SLIST_HEAD(proto_hash_entry, if_proto);
197
0a7de745 198#define DLIL_SDLDATALEN \
6d2010ae 199 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
1c79356b 200
9bccf70c 201struct dlil_ifnet {
0a7de745 202 struct ifnet dl_if; /* public ifnet */
6d2010ae 203 /*
316670eb 204 * DLIL private fields, protected by dl_if_lock
6d2010ae
A
205 */
206 decl_lck_mtx_data(, dl_if_lock);
0a7de745
A
207 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
208 u_int32_t dl_if_flags; /* flags (below) */
209 u_int32_t dl_if_refcnt; /* refcnt */
6d2010ae 210 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
0a7de745
A
211 void *dl_if_uniqueid; /* unique interface id */
212 size_t dl_if_uniqueid_len; /* length of the unique id */
213 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
214 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
6d2010ae 215 struct {
0a7de745
A
216 struct ifaddr ifa; /* lladdr ifa */
217 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
218 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
6d2010ae 219 } dl_if_lladdr;
316670eb
A
220 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
221 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
0a7de745
A
222 ctrace_t dl_if_attach; /* attach PC stacktrace */
223 ctrace_t dl_if_detach; /* detach PC stacktrace */
6d2010ae
A
224};
225
226/* Values for dl_if_flags (private to DLIL) */
0a7de745
A
227#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
228#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
229#define DLIF_DEBUG 0x4 /* has debugging info */
6d2010ae 230
0a7de745 231#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
6d2010ae
A
232
233/* For gdb */
234__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
235
236struct dlil_ifnet_dbg {
0a7de745
A
237 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
238 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
239 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
6d2010ae
A
240 /*
241 * Circular lists of ifnet_{reference,release} callers.
242 */
0a7de745
A
243 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
244 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
1c79356b
A
245};
246
0a7de745
A
247#define DLIL_TO_IFP(s) (&s->dl_if)
248#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
6d2010ae 249
91447636 250struct ifnet_filter {
0a7de745
A
251 TAILQ_ENTRY(ifnet_filter) filt_next;
252 u_int32_t filt_skip;
253 u_int32_t filt_flags;
254 ifnet_t filt_ifp;
255 const char *filt_name;
256 void *filt_cookie;
257 protocol_family_t filt_protocol;
258 iff_input_func filt_input;
259 iff_output_func filt_output;
260 iff_event_func filt_event;
261 iff_ioctl_func filt_ioctl;
262 iff_detached_func filt_detached;
1c79356b
A
263};
264
2d21ac55 265struct proto_input_entry;
55e303ae 266
91447636 267static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
91447636 268static lck_grp_t *dlil_lock_group;
6d2010ae 269lck_grp_t *ifnet_lock_group;
91447636 270static lck_grp_t *ifnet_head_lock_group;
316670eb
A
271static lck_grp_t *ifnet_snd_lock_group;
272static lck_grp_t *ifnet_rcv_lock_group;
6d2010ae 273lck_attr_t *ifnet_lock_attr;
7ddcb079
A
274decl_lck_rw_data(static, ifnet_head_lock);
275decl_lck_mtx_data(static, dlil_ifnet_lock);
39236c6e 276u_int32_t dlil_filter_disable_tso_count = 0;
316670eb 277
6d2010ae 278#if DEBUG
0a7de745 279static unsigned int ifnet_debug = 1; /* debugging (enabled) */
6d2010ae 280#else
0a7de745 281static unsigned int ifnet_debug; /* debugging (disabled) */
6d2010ae 282#endif /* !DEBUG */
0a7de745
A
283static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
284static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
285static struct zone *dlif_zone; /* zone for dlil_ifnet */
6d2010ae 286
0a7de745
A
287#define DLIF_ZONE_MAX IFNETS_MAX /* maximum elements in zone */
288#define DLIF_ZONE_NAME "ifnet" /* zone name */
6d2010ae 289
0a7de745
A
290static unsigned int dlif_filt_size; /* size of ifnet_filter */
291static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
6d2010ae 292
0a7de745
A
293#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
294#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
6d2010ae 295
0a7de745
A
296static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
297static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
6d2010ae 298
0a7de745
A
299#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
300#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
6d2010ae 301
0a7de745
A
302static unsigned int dlif_proto_size; /* size of if_proto */
303static struct zone *dlif_proto_zone; /* zone for if_proto */
6d2010ae 304
0a7de745
A
305#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
306#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
6d2010ae 307
0a7de745 308static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
39037602 309static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
0a7de745 310static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
316670eb 311
0a7de745
A
312#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
313#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
316670eb 314
0a7de745
A
315static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
316static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
317static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
316670eb 318
0a7de745
A
319#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
320#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
316670eb 321
d1ecb069 322static u_int32_t net_rtref;
d1ecb069 323
316670eb
A
324static struct dlil_main_threading_info dlil_main_input_thread_info;
325__private_extern__ struct dlil_threading_info *dlil_main_input_thread =
326 (struct dlil_threading_info *)&dlil_main_input_thread_info;
2d21ac55 327
39037602 328static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
91447636 329static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
6d2010ae
A
330static void dlil_if_trace(struct dlil_ifnet *, int);
331static void if_proto_ref(struct if_proto *);
332static void if_proto_free(struct if_proto *);
333static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
a39ff7e2 334static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
0a7de745 335 u_int32_t list_count);
6d2010ae
A
336static void if_flt_monitor_busy(struct ifnet *);
337static void if_flt_monitor_unbusy(struct ifnet *);
338static void if_flt_monitor_enter(struct ifnet *);
339static void if_flt_monitor_leave(struct ifnet *);
340static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
341 char **, protocol_family_t);
342static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
343 protocol_family_t);
344static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
345 const struct sockaddr_dl *);
346static int ifnet_lookup(struct ifnet *);
347static void if_purgeaddrs(struct ifnet *);
348
349static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
350 struct mbuf *, char *);
351static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
352 struct mbuf *);
353static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
354 mbuf_t *, const struct sockaddr *, void *, char *, char *);
355static void ifproto_media_event(struct ifnet *, protocol_family_t,
356 const struct kev_msg *);
357static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
358 unsigned long, void *);
359static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
360 struct sockaddr_dl *, size_t);
361static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
362 const struct sockaddr_dl *, const struct sockaddr *,
363 const struct sockaddr_dl *, const struct sockaddr *);
364
39037602
A
365static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
366 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
367 boolean_t poll, struct thread *tp);
316670eb
A
368static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
369 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
370static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
6d2010ae
A
371static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
372 protocol_family_t *);
373static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
374 const struct ifnet_demux_desc *, u_int32_t);
375static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
376static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
5ba3f43e
A
377#if CONFIG_EMBEDDED
378static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
379 const struct sockaddr *, const char *, const char *,
380 u_int32_t *, u_int32_t *);
381#else
6d2010ae 382static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
39236c6e 383 const struct sockaddr *, const char *, const char *);
5ba3f43e 384#endif /* CONFIG_EMBEDDED */
39236c6e
A
385static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
386 const struct sockaddr *, const char *, const char *,
387 u_int32_t *, u_int32_t *);
6d2010ae
A
388static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
389static void ifp_if_free(struct ifnet *);
390static void ifp_if_event(struct ifnet *, const struct kev_msg *);
316670eb
A
391static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
392static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
6d2010ae 393
316670eb
A
394static void dlil_main_input_thread_func(void *, wait_result_t);
395static void dlil_input_thread_func(void *, wait_result_t);
396static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
6d2010ae 397static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
316670eb
A
398static void dlil_terminate_input_thread(struct dlil_threading_info *);
399static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
400 struct dlil_threading_info *, boolean_t);
401static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
402static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
403 u_int32_t, ifnet_model_t, boolean_t);
404static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
405 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
0a7de745 406static int dlil_is_clat_needed(protocol_family_t, mbuf_t );
d9a64523
A
407static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *);
408static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *);
5ba3f43e 409#if DEBUG || DEVELOPMENT
39236c6e 410static void dlil_verify_sum16(void);
5ba3f43e 411#endif /* DEBUG || DEVELOPMENT */
39236c6e
A
412static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
413 protocol_family_t);
414static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
415 protocol_family_t);
416
316670eb
A
417static void ifnet_detacher_thread_func(void *, wait_result_t);
418static int ifnet_detacher_thread_cont(int);
6d2010ae
A
419static void ifnet_detach_final(struct ifnet *);
420static void ifnet_detaching_enqueue(struct ifnet *);
421static struct ifnet *ifnet_detaching_dequeue(void);
422
316670eb
A
423static void ifnet_start_thread_fn(void *, wait_result_t);
424static void ifnet_poll_thread_fn(void *, wait_result_t);
425static void ifnet_poll(struct ifnet *);
5ba3f43e
A
426static errno_t ifnet_enqueue_common(struct ifnet *, void *,
427 classq_pkt_type_t, boolean_t, boolean_t *);
316670eb 428
6d2010ae
A
429static void ifp_src_route_copyout(struct ifnet *, struct route *);
430static void ifp_src_route_copyin(struct ifnet *, struct route *);
431#if INET6
432static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
433static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
434#endif /* INET6 */
435
316670eb 436static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
39236c6e
A
437static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
438static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
439static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
440static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
441static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
316670eb
A
442static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
443static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
39236c6e
A
444static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
445static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
446static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
3e170ce0
A
447
448struct chain_len_stats tx_chain_len_stats;
449static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
316670eb 450
5ba3f43e
A
451#if TEST_INPUT_THREAD_TERMINATION
452static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
453#endif /* TEST_INPUT_THREAD_TERMINATION */
454
6d2010ae
A
455/* The following are protected by dlil_ifnet_lock */
456static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
457static u_int32_t ifnet_detaching_cnt;
0a7de745 458static void *ifnet_delayed_run; /* wait channel for detaching thread */
6d2010ae 459
39236c6e
A
460decl_lck_mtx_data(static, ifnet_fc_lock);
461
462static uint32_t ifnet_flowhash_seed;
463
464struct ifnet_flowhash_key {
0a7de745
A
465 char ifk_name[IFNAMSIZ];
466 uint32_t ifk_unit;
467 uint32_t ifk_flags;
468 uint32_t ifk_eflags;
469 uint32_t ifk_capabilities;
470 uint32_t ifk_capenable;
471 uint32_t ifk_output_sched_model;
472 uint32_t ifk_rand1;
473 uint32_t ifk_rand2;
39236c6e
A
474};
475
476/* Flow control entry per interface */
477struct ifnet_fc_entry {
478 RB_ENTRY(ifnet_fc_entry) ifce_entry;
0a7de745
A
479 u_int32_t ifce_flowhash;
480 struct ifnet *ifce_ifp;
39236c6e
A
481};
482
483static uint32_t ifnet_calc_flowhash(struct ifnet *);
484static int ifce_cmp(const struct ifnet_fc_entry *,
485 const struct ifnet_fc_entry *);
486static int ifnet_fc_add(struct ifnet *);
487static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
488static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
489
490/* protected by ifnet_fc_lock */
491RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
492RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
493RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
494
0a7de745
A
495static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
496static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
39236c6e 497
0a7de745
A
498#define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
499#define IFNET_FC_ZONE_MAX 32
39236c6e 500
39037602 501extern void bpfdetach(struct ifnet *);
6d2010ae 502extern void proto_input_run(void);
91447636 503
39037602 504extern uint32_t udp_count_opportunistic(unsigned int ifindex,
0a7de745 505 u_int32_t flags);
39037602 506extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
0a7de745 507 u_int32_t flags);
316670eb 508
6d2010ae 509__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
91447636 510
39236c6e 511#if CONFIG_MACF
5ba3f43e
A
512#ifdef CONFIG_EMBEDDED
513int dlil_lladdr_ckreq = 1;
514#else
39236c6e
A
515int dlil_lladdr_ckreq = 0;
516#endif
5ba3f43e 517#endif
39236c6e 518
b0d623f7 519#if DEBUG
39236c6e 520int dlil_verbose = 1;
b0d623f7 521#else
39236c6e 522int dlil_verbose = 0;
b0d623f7 523#endif /* DEBUG */
6d2010ae 524#if IFNET_INPUT_SANITY_CHK
6d2010ae 525/* sanity checking of input packet lists received */
316670eb
A
526static u_int32_t dlil_input_sanity_check = 0;
527#endif /* IFNET_INPUT_SANITY_CHK */
528/* rate limit debug messages */
529struct timespec dlil_dbgrate = { 1, 0 };
b0d623f7 530
6d2010ae 531SYSCTL_DECL(_net_link_generic_system);
91447636 532
316670eb
A
533SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
534 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
535
0a7de745 536#define IF_SNDQ_MINLEN 32
316670eb
A
537u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
538SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
539 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
540 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
541
0a7de745
A
542#define IF_RCVQ_MINLEN 32
543#define IF_RCVQ_MAXLEN 256
316670eb
A
544u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
545SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
546 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
547 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
548
0a7de745 549#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
316670eb
A
550static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
551SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
552 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
553 "ilog2 of EWMA decay rate of avg inbound packets");
554
0a7de745
A
555#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
556#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
316670eb 557static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
39236c6e
A
558SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
559 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
560 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
561 "Q", "input poll mode freeze time");
316670eb 562
0a7de745
A
563#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
564#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
316670eb 565static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
39236c6e
A
566SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
567 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
568 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
569 "Q", "input poll sampling time");
570
0a7de745
A
571#define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
572#define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
39236c6e
A
573static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
574SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
575 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
576 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
577 "Q", "input poll interval (time)");
578
0a7de745 579#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
316670eb
A
580static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
581SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
582 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
583 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
584
0a7de745 585#define IF_RXPOLL_WLOWAT 10
316670eb 586static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
39236c6e
A
587SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
588 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
589 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
590 "I", "input poll wakeup low watermark");
316670eb 591
0a7de745 592#define IF_RXPOLL_WHIWAT 100
316670eb 593static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
39236c6e
A
594SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
595 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
596 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
597 "I", "input poll wakeup high watermark");
316670eb 598
0a7de745 599static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
316670eb
A
600SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
601 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
602 "max packets per poll call");
603
604static u_int32_t if_rxpoll = 1;
605SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
606 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
607 sysctl_rxpoll, "I", "enable opportunistic input polling");
608
5ba3f43e
A
609#if TEST_INPUT_THREAD_TERMINATION
610static u_int32_t if_input_thread_termination_spin = 0;
611SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
612 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
613 &if_input_thread_termination_spin, 0,
614 sysctl_input_thread_termination_spin,
615 "I", "input thread termination spin limit");
616#endif /* TEST_INPUT_THREAD_TERMINATION */
316670eb
A
617
618static u_int32_t cur_dlil_input_threads = 0;
619SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
39037602 620 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
316670eb 621 "Current number of DLIL input threads");
91447636 622
6d2010ae 623#if IFNET_INPUT_SANITY_CHK
316670eb 624SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
39037602 625 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
6d2010ae 626 "Turn on sanity checking in DLIL input");
316670eb 627#endif /* IFNET_INPUT_SANITY_CHK */
1c79356b 628
316670eb
A
629static u_int32_t if_flowadv = 1;
630SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
631 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
632 "enable flow-advisory mechanism");
633
fe8ab488
A
634static u_int32_t if_delaybased_queue = 1;
635SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
636 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
637 "enable delay based dynamic queue sizing");
638
39236c6e
A
639static uint64_t hwcksum_in_invalidated = 0;
640SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
641 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
642 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
643
644uint32_t hwcksum_dbg = 0;
645SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
646 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
647 "enable hardware cksum debugging");
648
3e170ce0
A
649u_int32_t ifnet_start_delayed = 0;
650SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
651 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
652 "number of times start was delayed");
653
654u_int32_t ifnet_delay_start_disabled = 0;
655SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
656 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
657 "number of times start was delayed");
658
0a7de745
A
659#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
660#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
661#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
662#define HWCKSUM_DBG_MASK \
663 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
39236c6e
A
664 HWCKSUM_DBG_FINALIZE_FORCED)
665
666static uint32_t hwcksum_dbg_mode = 0;
667SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
668 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
669 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
670
671static uint64_t hwcksum_dbg_partial_forced = 0;
672SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
673 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
674 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
675
676static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
677SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
678 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
679 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
680
681static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
682SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
683 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
684 &hwcksum_dbg_partial_rxoff_forced, 0,
685 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
686 "forced partial cksum rx offset");
687
688static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
689SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
690 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
691 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
692 "adjusted partial cksum rx offset");
693
694static uint64_t hwcksum_dbg_verified = 0;
695SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
696 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
697 &hwcksum_dbg_verified, "packets verified for having good checksum");
698
699static uint64_t hwcksum_dbg_bad_cksum = 0;
700SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
701 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
702 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
703
704static uint64_t hwcksum_dbg_bad_rxoff = 0;
705SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
706 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
707 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
708
709static uint64_t hwcksum_dbg_adjusted = 0;
710SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
711 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
712 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
713
714static uint64_t hwcksum_dbg_finalized_hdr = 0;
715SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
716 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
717 &hwcksum_dbg_finalized_hdr, "finalized headers");
718
719static uint64_t hwcksum_dbg_finalized_data = 0;
720SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
721 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
722 &hwcksum_dbg_finalized_data, "finalized payloads");
723
724uint32_t hwcksum_tx = 1;
725SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
726 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
727 "enable transmit hardware checksum offload");
728
729uint32_t hwcksum_rx = 1;
730SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
731 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
732 "enable receive hardware checksum offload");
733
3e170ce0
A
734SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
735 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
736 sysctl_tx_chain_len_stats, "S", "");
737
738uint32_t tx_chain_len_count = 0;
739SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
39037602 740 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
3e170ce0 741
0a7de745 742static uint32_t threshold_notify = 1; /* enable/disable */
5ba3f43e
A
743SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
744 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
745
0a7de745 746static uint32_t threshold_interval = 2; /* in seconds */
5ba3f43e
A
747SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
748 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
749
39037602
A
750#if (DEVELOPMENT || DEBUG)
751static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
752SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
753 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
754#endif /* DEVELOPMENT || DEBUG */
755
5ba3f43e 756struct net_api_stats net_api_stats;
0a7de745
A
757SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD | CTLFLAG_LOCKED,
758 &net_api_stats, net_api_stats, "");
5ba3f43e
A
759
760
316670eb 761unsigned int net_rxpoll = 1;
6d2010ae
A
762unsigned int net_affinity = 1;
763static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
1c79356b 764
0a7de745 765extern u_int32_t inject_buckets;
b36670ce 766
0a7de745
A
767static lck_grp_attr_t *dlil_grp_attributes = NULL;
768static lck_attr_t *dlil_lck_attributes = NULL;
91447636 769
5ba3f43e
A
770/* DLIL data threshold thread call */
771static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
772
773static void dlil_mit_tcall_fn(thread_call_param_t, thread_call_param_t);
774
775uint32_t dlil_rcv_mit_pkts_min = 5;
776uint32_t dlil_rcv_mit_pkts_max = 64;
777uint32_t dlil_rcv_mit_interval = (500 * 1000);
778
779#if (DEVELOPMENT || DEBUG)
780SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_min,
781 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_min, 0, "");
782SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_max,
783 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_max, 0, "");
784SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_interval,
785 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_interval, 0, "");
786#endif /* DEVELOPMENT || DEBUG */
787
91447636 788
0a7de745
A
789#define DLIL_INPUT_CHECK(m, ifp) { \
790 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
791 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
792 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
793 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
794 /* NOTREACHED */ \
795 } \
316670eb
A
796}
797
0a7de745
A
798#define DLIL_EWMA(old, new, decay) do { \
799 u_int32_t _avg; \
800 if ((_avg = (old)) > 0) \
801 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
802 else \
803 _avg = (new); \
804 (old) = _avg; \
316670eb
A
805} while (0)
806
0a7de745
A
807#define MBPS (1ULL * 1000 * 1000)
808#define GBPS (MBPS * 1000)
316670eb
A
809
810struct rxpoll_time_tbl {
0a7de745
A
811 u_int64_t speed; /* downlink speed */
812 u_int32_t plowat; /* packets low watermark */
813 u_int32_t phiwat; /* packets high watermark */
814 u_int32_t blowat; /* bytes low watermark */
815 u_int32_t bhiwat; /* bytes high watermark */
316670eb
A
816};
817
818static struct rxpoll_time_tbl rxpoll_tbl[] = {
0a7de745
A
819 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
820 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
821 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
822 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
823 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
316670eb
A
824 { 0, 0, 0, 0, 0 }
825};
826
39236c6e 827int
b0d623f7 828proto_hash_value(u_int32_t protocol_family)
91447636 829{
4a3eedf9
A
830 /*
831 * dlil_proto_unplumb_all() depends on the mapping between
832 * the hash bucket index and the protocol family defined
833 * here; future changes must be applied there as well.
834 */
39037602 835 switch (protocol_family) {
0a7de745
A
836 case PF_INET:
837 return 0;
838 case PF_INET6:
839 return 1;
840 case PF_VLAN:
841 return 2;
842 case PF_UNSPEC:
843 default:
844 return 3;
91447636
A
845 }
846}
847
6d2010ae
A
848/*
849 * Caller must already be holding ifnet lock.
850 */
851static struct if_proto *
b0d623f7 852find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
1c79356b 853{
91447636 854 struct if_proto *proto = NULL;
b0d623f7 855 u_int32_t i = proto_hash_value(protocol_family);
6d2010ae
A
856
857 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
858
0a7de745 859 if (ifp->if_proto_hash != NULL) {
91447636 860 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
0a7de745 861 }
6d2010ae 862
0a7de745 863 while (proto != NULL && proto->protocol_family != protocol_family) {
91447636 864 proto = SLIST_NEXT(proto, next_hash);
0a7de745 865 }
6d2010ae 866
0a7de745 867 if (proto != NULL) {
6d2010ae 868 if_proto_ref(proto);
0a7de745 869 }
6d2010ae 870
0a7de745 871 return proto;
1c79356b
A
872}
873
91447636
A
874static void
875if_proto_ref(struct if_proto *proto)
1c79356b 876{
6d2010ae 877 atomic_add_32(&proto->refcount, 1);
1c79356b
A
878}
879
6d2010ae
A
880extern void if_rtproto_del(struct ifnet *ifp, int protocol);
881
91447636
A
882static void
883if_proto_free(struct if_proto *proto)
0b4e3aa0 884{
6d2010ae
A
885 u_int32_t oldval;
886 struct ifnet *ifp = proto->ifp;
887 u_int32_t proto_family = proto->protocol_family;
888 struct kev_dl_proto_data ev_pr_data;
889
890 oldval = atomic_add_32_ov(&proto->refcount, -1);
0a7de745 891 if (oldval > 1) {
6d2010ae 892 return;
0a7de745 893 }
6d2010ae
A
894
895 /* No more reference on this, protocol must have been detached */
896 VERIFY(proto->detached);
897
898 if (proto->proto_kpi == kProtoKPI_v1) {
0a7de745 899 if (proto->kpi.v1.detached) {
6d2010ae 900 proto->kpi.v1.detached(ifp, proto->protocol_family);
0a7de745 901 }
6d2010ae
A
902 }
903 if (proto->proto_kpi == kProtoKPI_v2) {
0a7de745 904 if (proto->kpi.v2.detached) {
6d2010ae 905 proto->kpi.v2.detached(ifp, proto->protocol_family);
0a7de745 906 }
91447636 907 }
6d2010ae
A
908
909 /*
910 * Cleanup routes that may still be in the routing table for that
911 * interface/protocol pair.
912 */
913 if_rtproto_del(ifp, proto_family);
914
915 /*
916 * The reserved field carries the number of protocol still attached
917 * (subject to change)
918 */
919 ifnet_lock_shared(ifp);
920 ev_pr_data.proto_family = proto_family;
a39ff7e2 921 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
6d2010ae
A
922 ifnet_lock_done(ifp);
923
924 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
925 (struct net_event_data *)&ev_pr_data,
0a7de745 926 sizeof(struct kev_dl_proto_data));
6d2010ae 927
a39ff7e2
A
928 if (ev_pr_data.proto_remaining_count == 0) {
929 /*
930 * The protocol count has gone to zero, mark the interface down.
931 * This used to be done by configd.KernelEventMonitor, but that
932 * is inherently prone to races (rdar://problem/30810208).
933 */
934 (void) ifnet_set_flags(ifp, 0, IFF_UP);
935 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
936 dlil_post_sifflags_msg(ifp);
937 }
938
6d2010ae 939 zfree(dlif_proto_zone, proto);
0b4e3aa0
A
940}
941
91447636 942__private_extern__ void
6d2010ae 943ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
1c79356b 944{
5ba3f43e
A
945#if !MACH_ASSERT
946#pragma unused(ifp)
947#endif
6d2010ae
A
948 unsigned int type = 0;
949 int ass = 1;
950
951 switch (what) {
952 case IFNET_LCK_ASSERT_EXCLUSIVE:
953 type = LCK_RW_ASSERT_EXCLUSIVE;
954 break;
955
956 case IFNET_LCK_ASSERT_SHARED:
957 type = LCK_RW_ASSERT_SHARED;
958 break;
959
960 case IFNET_LCK_ASSERT_OWNED:
961 type = LCK_RW_ASSERT_HELD;
962 break;
963
964 case IFNET_LCK_ASSERT_NOTOWNED:
965 /* nothing to do here for RW lock; bypass assert */
966 ass = 0;
967 break;
968
969 default:
970 panic("bad ifnet assert type: %d", what);
971 /* NOTREACHED */
972 }
0a7de745 973 if (ass) {
5ba3f43e 974 LCK_RW_ASSERT(&ifp->if_lock, type);
0a7de745 975 }
1c79356b
A
976}
977
91447636 978__private_extern__ void
6d2010ae 979ifnet_lock_shared(struct ifnet *ifp)
1c79356b 980{
6d2010ae 981 lck_rw_lock_shared(&ifp->if_lock);
1c79356b
A
982}
983
91447636 984__private_extern__ void
6d2010ae 985ifnet_lock_exclusive(struct ifnet *ifp)
0b4e3aa0 986{
6d2010ae 987 lck_rw_lock_exclusive(&ifp->if_lock);
0b4e3aa0
A
988}
989
91447636 990__private_extern__ void
6d2010ae 991ifnet_lock_done(struct ifnet *ifp)
1c79356b 992{
6d2010ae 993 lck_rw_done(&ifp->if_lock);
1c79356b
A
994}
995
3e170ce0
A
996#if INET
997__private_extern__ void
998if_inetdata_lock_shared(struct ifnet *ifp)
999{
1000 lck_rw_lock_shared(&ifp->if_inetdata_lock);
1001}
1002
1003__private_extern__ void
1004if_inetdata_lock_exclusive(struct ifnet *ifp)
1005{
1006 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
1007}
1008
1009__private_extern__ void
1010if_inetdata_lock_done(struct ifnet *ifp)
1011{
1012 lck_rw_done(&ifp->if_inetdata_lock);
1013}
1014#endif
1015
39236c6e
A
1016#if INET6
1017__private_extern__ void
1018if_inet6data_lock_shared(struct ifnet *ifp)
1019{
1020 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1021}
1022
1023__private_extern__ void
1024if_inet6data_lock_exclusive(struct ifnet *ifp)
1025{
1026 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1027}
1028
1029__private_extern__ void
1030if_inet6data_lock_done(struct ifnet *ifp)
1031{
1032 lck_rw_done(&ifp->if_inet6data_lock);
1033}
1034#endif
1035
91447636 1036__private_extern__ void
2d21ac55 1037ifnet_head_lock_shared(void)
1c79356b 1038{
6d2010ae 1039 lck_rw_lock_shared(&ifnet_head_lock);
1c79356b
A
1040}
1041
91447636 1042__private_extern__ void
2d21ac55 1043ifnet_head_lock_exclusive(void)
91447636 1044{
6d2010ae 1045 lck_rw_lock_exclusive(&ifnet_head_lock);
91447636 1046}
1c79356b 1047
91447636 1048__private_extern__ void
2d21ac55 1049ifnet_head_done(void)
1c79356b 1050{
6d2010ae 1051 lck_rw_done(&ifnet_head_lock);
91447636 1052}
1c79356b 1053
39037602
A
1054__private_extern__ void
1055ifnet_head_assert_exclusive(void)
1056{
5ba3f43e 1057 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
39037602
A
1058}
1059
6d2010ae 1060/*
a39ff7e2
A
1061 * dlil_ifp_protolist
1062 * - get the list of protocols attached to the interface, or just the number
1063 * of attached protocols
1064 * - if the number returned is greater than 'list_count', truncation occurred
1065 *
1066 * Note:
1067 * - caller must already be holding ifnet lock.
6d2010ae 1068 */
a39ff7e2
A
1069static u_int32_t
1070dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1071 u_int32_t list_count)
91447636 1072{
0a7de745
A
1073 u_int32_t count = 0;
1074 int i;
6d2010ae
A
1075
1076 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1077
0a7de745 1078 if (ifp->if_proto_hash == NULL) {
6d2010ae 1079 goto done;
0a7de745 1080 }
6d2010ae
A
1081
1082 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1083 struct if_proto *proto;
1084 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
a39ff7e2
A
1085 if (list != NULL && count < list_count) {
1086 list[count] = proto->protocol_family;
1087 }
6d2010ae 1088 count++;
91447636
A
1089 }
1090 }
6d2010ae 1091done:
0a7de745 1092 return count;
91447636 1093}
1c79356b 1094
a39ff7e2
A
1095__private_extern__ u_int32_t
1096if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1097{
1098 ifnet_lock_shared(ifp);
1099 count = dlil_ifp_protolist(ifp, protolist, count);
1100 ifnet_lock_done(ifp);
0a7de745 1101 return count;
a39ff7e2
A
1102}
1103
1104__private_extern__ void
1105if_free_protolist(u_int32_t *list)
1106{
1107 _FREE(list, M_TEMP);
1108}
1109
91447636 1110__private_extern__ void
6d2010ae
A
1111dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1112 u_int32_t event_code, struct net_event_data *event_data,
1113 u_int32_t event_data_len)
91447636 1114{
6d2010ae
A
1115 struct net_event_data ev_data;
1116 struct kev_msg ev_msg;
1117
0a7de745
A
1118 bzero(&ev_msg, sizeof(ev_msg));
1119 bzero(&ev_data, sizeof(ev_data));
6d2010ae 1120 /*
2d21ac55 1121 * a net event always starts with a net_event_data structure
91447636
A
1122 * but the caller can generate a simple net event or
1123 * provide a longer event structure to post
1124 */
0a7de745
A
1125 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1126 ev_msg.kev_class = KEV_NETWORK_CLASS;
1127 ev_msg.kev_subclass = event_subclass;
1128 ev_msg.event_code = event_code;
6d2010ae
A
1129
1130 if (event_data == NULL) {
91447636 1131 event_data = &ev_data;
0a7de745 1132 event_data_len = sizeof(struct net_event_data);
91447636 1133 }
6d2010ae 1134
fe8ab488 1135 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
91447636 1136 event_data->if_family = ifp->if_family;
39037602 1137 event_data->if_unit = (u_int32_t)ifp->if_unit;
6d2010ae 1138
91447636 1139 ev_msg.dv[0].data_length = event_data_len;
6d2010ae 1140 ev_msg.dv[0].data_ptr = event_data;
91447636 1141 ev_msg.dv[1].data_length = 0;
6d2010ae 1142
39037602
A
1143 /* Don't update interface generation for quality and RRC state changess */
1144 bool update_generation = (event_subclass != KEV_DL_SUBCLASS ||
0a7de745
A
1145 (event_code != KEV_DL_LINK_QUALITY_METRIC_CHANGED &&
1146 event_code != KEV_DL_RRC_STATE_CHANGED));
39037602
A
1147
1148 dlil_event_internal(ifp, &ev_msg, update_generation);
1c79356b
A
1149}
1150
316670eb
A
1151__private_extern__ int
1152dlil_alloc_local_stats(struct ifnet *ifp)
1153{
1154 int ret = EINVAL;
1155 void *buf, *base, **pbuf;
1156
0a7de745 1157 if (ifp == NULL) {
316670eb 1158 goto end;
0a7de745 1159 }
316670eb
A
1160
1161 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1162 /* allocate tcpstat_local structure */
1163 buf = zalloc(dlif_tcpstat_zone);
1164 if (buf == NULL) {
1165 ret = ENOMEM;
1166 goto end;
1167 }
1168 bzero(buf, dlif_tcpstat_bufsize);
1169
1170 /* Get the 64-bit aligned base address for this object */
0a7de745
A
1171 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1172 sizeof(u_int64_t));
316670eb
A
1173 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1174 ((intptr_t)buf + dlif_tcpstat_bufsize));
1175
1176 /*
1177 * Wind back a pointer size from the aligned base and
1178 * save the original address so we can free it later.
1179 */
0a7de745 1180 pbuf = (void **)((intptr_t)base - sizeof(void *));
316670eb
A
1181 *pbuf = buf;
1182 ifp->if_tcp_stat = base;
1183
1184 /* allocate udpstat_local structure */
1185 buf = zalloc(dlif_udpstat_zone);
1186 if (buf == NULL) {
1187 ret = ENOMEM;
1188 goto end;
1189 }
1190 bzero(buf, dlif_udpstat_bufsize);
1191
1192 /* Get the 64-bit aligned base address for this object */
0a7de745
A
1193 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1194 sizeof(u_int64_t));
316670eb
A
1195 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1196 ((intptr_t)buf + dlif_udpstat_bufsize));
1197
1198 /*
1199 * Wind back a pointer size from the aligned base and
1200 * save the original address so we can free it later.
1201 */
0a7de745 1202 pbuf = (void **)((intptr_t)base - sizeof(void *));
316670eb
A
1203 *pbuf = buf;
1204 ifp->if_udp_stat = base;
1205
0a7de745
A
1206 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
1207 IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
316670eb
A
1208
1209 ret = 0;
1210 }
1211
4bd07ac2
A
1212 if (ifp->if_ipv4_stat == NULL) {
1213 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
0a7de745 1214 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
4bd07ac2
A
1215 if (ifp->if_ipv4_stat == NULL) {
1216 ret = ENOMEM;
1217 goto end;
1218 }
1219 }
1220
1221 if (ifp->if_ipv6_stat == NULL) {
1222 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
0a7de745 1223 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
4bd07ac2
A
1224 if (ifp->if_ipv6_stat == NULL) {
1225 ret = ENOMEM;
1226 goto end;
1227 }
1228 }
316670eb
A
1229end:
1230 if (ret != 0) {
1231 if (ifp->if_tcp_stat != NULL) {
1232 pbuf = (void **)
0a7de745 1233 ((intptr_t)ifp->if_tcp_stat - sizeof(void *));
316670eb
A
1234 zfree(dlif_tcpstat_zone, *pbuf);
1235 ifp->if_tcp_stat = NULL;
1236 }
1237 if (ifp->if_udp_stat != NULL) {
1238 pbuf = (void **)
0a7de745 1239 ((intptr_t)ifp->if_udp_stat - sizeof(void *));
316670eb
A
1240 zfree(dlif_udpstat_zone, *pbuf);
1241 ifp->if_udp_stat = NULL;
1242 }
4bd07ac2
A
1243 if (ifp->if_ipv4_stat != NULL) {
1244 FREE(ifp->if_ipv4_stat, M_TEMP);
1245 ifp->if_ipv4_stat = NULL;
1246 }
1247 if (ifp->if_ipv6_stat != NULL) {
1248 FREE(ifp->if_ipv6_stat, M_TEMP);
1249 ifp->if_ipv6_stat = NULL;
1250 }
316670eb
A
1251 }
1252
0a7de745 1253 return ret;
316670eb
A
1254}
1255
6d2010ae 1256static int
316670eb 1257dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
2d21ac55 1258{
316670eb
A
1259 thread_continue_t func;
1260 u_int32_t limit;
2d21ac55
A
1261 int error;
1262
316670eb
A
1263 /* NULL ifp indicates the main input thread, called at dlil_init time */
1264 if (ifp == NULL) {
1265 func = dlil_main_input_thread_func;
1266 VERIFY(inp == dlil_main_input_thread);
1267 (void) strlcat(inp->input_name,
1268 "main_input", DLIL_THREADNAME_LEN);
1269 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1270 func = dlil_rxpoll_input_thread_func;
1271 VERIFY(inp != dlil_main_input_thread);
1272 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
39236c6e 1273 "%s_input_poll", if_name(ifp));
6d2010ae 1274 } else {
316670eb
A
1275 func = dlil_input_thread_func;
1276 VERIFY(inp != dlil_main_input_thread);
1277 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
39236c6e 1278 "%s_input", if_name(ifp));
6d2010ae 1279 }
316670eb 1280 VERIFY(inp->input_thr == THREAD_NULL);
2d21ac55 1281
316670eb
A
1282 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1283 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1284
1285 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
0a7de745 1286 inp->ifp = ifp; /* NULL for main input thread */
316670eb
A
1287
1288 net_timerclear(&inp->mode_holdtime);
1289 net_timerclear(&inp->mode_lasttime);
1290 net_timerclear(&inp->sample_holdtime);
1291 net_timerclear(&inp->sample_lasttime);
1292 net_timerclear(&inp->dbg_lasttime);
1293
1294 /*
1295 * For interfaces that support opportunistic polling, set the
1296 * low and high watermarks for outstanding inbound packets/bytes.
1297 * Also define freeze times for transitioning between modes
1298 * and updating the average.
1299 */
1300 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1301 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
39236c6e 1302 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
316670eb
A
1303 } else {
1304 limit = (u_int32_t)-1;
1305 }
1306
5ba3f43e 1307 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
316670eb
A
1308 if (inp == dlil_main_input_thread) {
1309 struct dlil_main_threading_info *inpm =
1310 (struct dlil_main_threading_info *)inp;
5ba3f43e 1311 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
316670eb 1312 }
2d21ac55 1313
316670eb
A
1314 error = kernel_thread_start(func, inp, &inp->input_thr);
1315 if (error == KERN_SUCCESS) {
1316 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
0a7de745 1317 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_NETISR));
2d21ac55 1318 /*
316670eb
A
1319 * We create an affinity set so that the matching workloop
1320 * thread or the starter thread (for loopback) can be
1321 * scheduled on the same processor set as the input thread.
2d21ac55 1322 */
316670eb
A
1323 if (net_affinity) {
1324 struct thread *tp = inp->input_thr;
2d21ac55
A
1325 u_int32_t tag;
1326 /*
1327 * Randomize to reduce the probability
1328 * of affinity tag namespace collision.
1329 */
0a7de745 1330 read_frandom(&tag, sizeof(tag));
2d21ac55
A
1331 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1332 thread_reference(tp);
316670eb
A
1333 inp->tag = tag;
1334 inp->net_affinity = TRUE;
2d21ac55
A
1335 }
1336 }
316670eb
A
1337 } else if (inp == dlil_main_input_thread) {
1338 panic_plain("%s: couldn't create main input thread", __func__);
1339 /* NOTREACHED */
2d21ac55 1340 } else {
39236c6e
A
1341 panic_plain("%s: couldn't create %s input thread", __func__,
1342 if_name(ifp));
6d2010ae 1343 /* NOTREACHED */
2d21ac55 1344 }
b0d623f7 1345 OSAddAtomic(1, &cur_dlil_input_threads);
316670eb 1346
0a7de745 1347 return error;
2d21ac55
A
1348}
1349
5ba3f43e
A
1350#if TEST_INPUT_THREAD_TERMINATION
1351static int
1352sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
316670eb 1353{
5ba3f43e
A
1354#pragma unused(arg1, arg2)
1355 uint32_t i;
1356 int err;
316670eb 1357
5ba3f43e 1358 i = if_input_thread_termination_spin;
316670eb 1359
5ba3f43e 1360 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
1361 if (err != 0 || req->newptr == USER_ADDR_NULL) {
1362 return err;
1363 }
5ba3f43e 1364
0a7de745
A
1365 if (net_rxpoll == 0) {
1366 return ENXIO;
1367 }
316670eb 1368
5ba3f43e 1369 if_input_thread_termination_spin = i;
0a7de745 1370 return err;
5ba3f43e
A
1371}
1372#endif /* TEST_INPUT_THREAD_TERMINATION */
1373
1374static void
1375dlil_clean_threading_info(struct dlil_threading_info *inp)
1376{
316670eb
A
1377 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1378 lck_grp_free(inp->lck_grp);
1379
1380 inp->input_waiting = 0;
1381 inp->wtot = 0;
0a7de745 1382 bzero(inp->input_name, sizeof(inp->input_name));
316670eb
A
1383 inp->ifp = NULL;
1384 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1385 qlimit(&inp->rcvq_pkts) = 0;
0a7de745 1386 bzero(&inp->stats, sizeof(inp->stats));
316670eb
A
1387
1388 VERIFY(!inp->net_affinity);
1389 inp->input_thr = THREAD_NULL;
1390 VERIFY(inp->wloop_thr == THREAD_NULL);
1391 VERIFY(inp->poll_thr == THREAD_NULL);
1392 VERIFY(inp->tag == 0);
1393
1394 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
0a7de745
A
1395 bzero(&inp->tstats, sizeof(inp->tstats));
1396 bzero(&inp->pstats, sizeof(inp->pstats));
1397 bzero(&inp->sstats, sizeof(inp->sstats));
316670eb
A
1398
1399 net_timerclear(&inp->mode_holdtime);
1400 net_timerclear(&inp->mode_lasttime);
1401 net_timerclear(&inp->sample_holdtime);
1402 net_timerclear(&inp->sample_lasttime);
1403 net_timerclear(&inp->dbg_lasttime);
1404
1405#if IFNET_INPUT_SANITY_CHK
1406 inp->input_mbuf_cnt = 0;
1407#endif /* IFNET_INPUT_SANITY_CHK */
5ba3f43e 1408}
316670eb 1409
5ba3f43e
A
1410static void
1411dlil_terminate_input_thread(struct dlil_threading_info *inp)
1412{
1413 struct ifnet *ifp = inp->ifp;
1414
1415 VERIFY(current_thread() == inp->input_thr);
1416 VERIFY(inp != dlil_main_input_thread);
1417
1418 OSAddAtomic(-1, &cur_dlil_input_threads);
1419
1420#if TEST_INPUT_THREAD_TERMINATION
1421 { /* do something useless that won't get optimized away */
0a7de745 1422 uint32_t v = 1;
5ba3f43e 1423 for (uint32_t i = 0;
0a7de745
A
1424 i < if_input_thread_termination_spin;
1425 i++) {
5ba3f43e
A
1426 v = (i + 1) * v;
1427 }
1428 printf("the value is %d\n", v);
316670eb 1429 }
5ba3f43e
A
1430#endif /* TEST_INPUT_THREAD_TERMINATION */
1431
1432 lck_mtx_lock_spin(&inp->input_lck);
1433 VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0);
1434 inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE;
1435 wakeup_one((caddr_t)&inp->input_waiting);
1436 lck_mtx_unlock(&inp->input_lck);
316670eb
A
1437
1438 /* for the extra refcnt from kernel_thread_start() */
1439 thread_deallocate(current_thread());
1440
5ba3f43e
A
1441 if (dlil_verbose) {
1442 printf("%s: input thread terminated\n",
1443 if_name(ifp));
1444 }
1445
316670eb
A
1446 /* this is the end */
1447 thread_terminate(current_thread());
1448 /* NOTREACHED */
1449}
1450
2d21ac55
A
1451static kern_return_t
1452dlil_affinity_set(struct thread *tp, u_int32_t tag)
1453{
1454 thread_affinity_policy_data_t policy;
1455
0a7de745 1456 bzero(&policy, sizeof(policy));
2d21ac55 1457 policy.affinity_tag = tag;
0a7de745
A
1458 return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1459 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
2d21ac55
A
1460}
1461
91447636
A
1462void
1463dlil_init(void)
1464{
6d2010ae
A
1465 thread_t thread = THREAD_NULL;
1466
1467 /*
1468 * The following fields must be 64-bit aligned for atomic operations.
1469 */
1470 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
39037602 1471 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
6d2010ae
A
1472 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1473 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1474 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1475 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1476 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1477 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1478 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1479 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1480 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1481 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1482 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1483 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1484 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1485
1486 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
39037602 1487 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
6d2010ae
A
1488 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1489 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1490 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1491 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1492 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1493 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1494 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1495 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1496 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1497 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1498 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1499 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1500 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1501
1502 /*
1503 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1504 */
1505 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1506 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1507 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1508 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1509 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
39236c6e
A
1510 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1511 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1512 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1513 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
5ba3f43e 1514 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
6d2010ae
A
1515 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1516 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1517 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1518 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1519
39236c6e
A
1520 /*
1521 * ... as well as the mbuf checksum flags counterparts.
1522 */
1523 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1524 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1525 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1526 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1527 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1528 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1529 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1530 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1531 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
5ba3f43e 1532 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
39236c6e
A
1533 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1534
6d2010ae
A
1535 /*
1536 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1537 */
1538 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
316670eb 1539 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
6d2010ae 1540
39236c6e
A
1541 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1542 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1543 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1544 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1545
1546 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1547 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1548 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1549
1550 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1551 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1552 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1553 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1554 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1555 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1556 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1557 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1558 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1559 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1560 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1561 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1562 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1563 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1564 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1565 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1566
1567 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1568 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1569 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1570 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1571 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
fe8ab488 1572 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
39037602 1573 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
39236c6e
A
1574
1575 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1576 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1577
6d2010ae 1578 PE_parse_boot_argn("net_affinity", &net_affinity,
0a7de745 1579 sizeof(net_affinity));
b0d623f7 1580
0a7de745 1581 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
316670eb 1582
0a7de745 1583 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
6d2010ae 1584
0a7de745 1585 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
6d2010ae 1586
0a7de745
A
1587 dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) :
1588 sizeof(struct dlil_ifnet_dbg);
6d2010ae 1589 /* Enforce 64-bit alignment for dlil_ifnet structure */
0a7de745
A
1590 dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t);
1591 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t));
6d2010ae
A
1592 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1593 0, DLIF_ZONE_NAME);
1594 if (dlif_zone == NULL) {
316670eb
A
1595 panic_plain("%s: failed allocating %s", __func__,
1596 DLIF_ZONE_NAME);
6d2010ae
A
1597 /* NOTREACHED */
1598 }
1599 zone_change(dlif_zone, Z_EXPAND, TRUE);
1600 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1601
0a7de745 1602 dlif_filt_size = sizeof(struct ifnet_filter);
6d2010ae
A
1603 dlif_filt_zone = zinit(dlif_filt_size,
1604 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1605 if (dlif_filt_zone == NULL) {
316670eb 1606 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1607 DLIF_FILT_ZONE_NAME);
1608 /* NOTREACHED */
1609 }
1610 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1611 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1612
0a7de745 1613 dlif_phash_size = sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS;
6d2010ae
A
1614 dlif_phash_zone = zinit(dlif_phash_size,
1615 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1616 if (dlif_phash_zone == NULL) {
316670eb 1617 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1618 DLIF_PHASH_ZONE_NAME);
1619 /* NOTREACHED */
1620 }
1621 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1622 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1623
0a7de745 1624 dlif_proto_size = sizeof(struct if_proto);
6d2010ae
A
1625 dlif_proto_zone = zinit(dlif_proto_size,
1626 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1627 if (dlif_proto_zone == NULL) {
316670eb 1628 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1629 DLIF_PROTO_ZONE_NAME);
1630 /* NOTREACHED */
1631 }
1632 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1633 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1634
0a7de745 1635 dlif_tcpstat_size = sizeof(struct tcpstat_local);
316670eb
A
1636 /* Enforce 64-bit alignment for tcpstat_local structure */
1637 dlif_tcpstat_bufsize =
0a7de745 1638 dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t);
316670eb 1639 dlif_tcpstat_bufsize =
0a7de745 1640 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t));
316670eb
A
1641 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1642 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1643 DLIF_TCPSTAT_ZONE_NAME);
1644 if (dlif_tcpstat_zone == NULL) {
1645 panic_plain("%s: failed allocating %s", __func__,
1646 DLIF_TCPSTAT_ZONE_NAME);
1647 /* NOTREACHED */
1648 }
1649 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1650 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1651
0a7de745 1652 dlif_udpstat_size = sizeof(struct udpstat_local);
316670eb
A
1653 /* Enforce 64-bit alignment for udpstat_local structure */
1654 dlif_udpstat_bufsize =
0a7de745 1655 dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t);
316670eb 1656 dlif_udpstat_bufsize =
0a7de745 1657 P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t));
316670eb
A
1658 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1659 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1660 DLIF_UDPSTAT_ZONE_NAME);
1661 if (dlif_udpstat_zone == NULL) {
1662 panic_plain("%s: failed allocating %s", __func__,
1663 DLIF_UDPSTAT_ZONE_NAME);
1664 /* NOTREACHED */
1665 }
1666 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1667 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1668
6d2010ae 1669 ifnet_llreach_init();
5ba3f43e 1670 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
d1ecb069 1671
91447636 1672 TAILQ_INIT(&dlil_ifnet_head);
91447636 1673 TAILQ_INIT(&ifnet_head);
6d2010ae 1674 TAILQ_INIT(&ifnet_detaching_head);
39037602 1675 TAILQ_INIT(&ifnet_ordered_head);
6d2010ae 1676
91447636 1677 /* Setup the lock groups we will use */
2d21ac55 1678 dlil_grp_attributes = lck_grp_attr_alloc_init();
91447636 1679
316670eb 1680 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
6d2010ae
A
1681 dlil_grp_attributes);
1682 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1683 dlil_grp_attributes);
1684 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1685 dlil_grp_attributes);
316670eb
A
1686 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1687 dlil_grp_attributes);
1688 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
6d2010ae
A
1689 dlil_grp_attributes);
1690
91447636 1691 /* Setup the lock attributes we will use */
2d21ac55 1692 dlil_lck_attributes = lck_attr_alloc_init();
6d2010ae 1693
91447636 1694 ifnet_lock_attr = lck_attr_alloc_init();
6d2010ae
A
1695
1696 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1697 dlil_lck_attributes);
1698 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1699
39236c6e
A
1700 /* Setup interface flow control related items */
1701 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
316670eb 1702
0a7de745 1703 ifnet_fc_zone_size = sizeof(struct ifnet_fc_entry);
39236c6e
A
1704 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1705 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1706 if (ifnet_fc_zone == NULL) {
1707 panic_plain("%s: failed allocating %s", __func__,
1708 IFNET_FC_ZONE_NAME);
1709 /* NOTREACHED */
1710 }
1711 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1712 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
6d2010ae 1713
39236c6e 1714 /* Initialize interface address subsystem */
6d2010ae 1715 ifa_init();
39236c6e
A
1716
1717#if PF
1718 /* Initialize the packet filter */
1719 pfinit();
1720#endif /* PF */
1721
1722 /* Initialize queue algorithms */
1723 classq_init();
1724
1725 /* Initialize packet schedulers */
1726 pktsched_init();
1727
1728 /* Initialize flow advisory subsystem */
1729 flowadv_init();
1730
1731 /* Initialize the pktap virtual interface */
1732 pktap_init();
1733
39037602
A
1734 /* Initialize the service class to dscp map */
1735 net_qos_map_init();
1736
a39ff7e2
A
1737 /* Initialize the interface port list */
1738 if_ports_used_init();
1739
d9a64523
A
1740 /* Initialize the interface low power mode event handler */
1741 if_low_power_evhdlr_init();
1742
5ba3f43e 1743#if DEBUG || DEVELOPMENT
39236c6e
A
1744 /* Run self-tests */
1745 dlil_verify_sum16();
5ba3f43e
A
1746#endif /* DEBUG || DEVELOPMENT */
1747
1748 /* Initialize link layer table */
1749 lltable_glbl_init();
39236c6e 1750
91447636 1751 /*
316670eb
A
1752 * Create and start up the main DLIL input thread and the interface
1753 * detacher threads once everything is initialized.
91447636 1754 */
316670eb 1755 dlil_create_input_thread(NULL, dlil_main_input_thread);
2d21ac55 1756
316670eb
A
1757 if (kernel_thread_start(ifnet_detacher_thread_func,
1758 NULL, &thread) != KERN_SUCCESS) {
1759 panic_plain("%s: couldn't create detacher thread", __func__);
6d2010ae
A
1760 /* NOTREACHED */
1761 }
b0d623f7 1762 thread_deallocate(thread);
91447636 1763}
1c79356b 1764
6d2010ae
A
1765static void
1766if_flt_monitor_busy(struct ifnet *ifp)
1767{
5ba3f43e 1768 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1769
1770 ++ifp->if_flt_busy;
1771 VERIFY(ifp->if_flt_busy != 0);
1772}
1773
1774static void
1775if_flt_monitor_unbusy(struct ifnet *ifp)
1776{
1777 if_flt_monitor_leave(ifp);
1778}
1779
1780static void
1781if_flt_monitor_enter(struct ifnet *ifp)
1782{
5ba3f43e 1783 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1784
1785 while (ifp->if_flt_busy) {
1786 ++ifp->if_flt_waiters;
1787 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1788 (PZERO - 1), "if_flt_monitor", NULL);
1789 }
1790 if_flt_monitor_busy(ifp);
1791}
1792
1793static void
1794if_flt_monitor_leave(struct ifnet *ifp)
1795{
5ba3f43e 1796 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
1797
1798 VERIFY(ifp->if_flt_busy != 0);
1799 --ifp->if_flt_busy;
1800
1801 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1802 ifp->if_flt_waiters = 0;
1803 wakeup(&ifp->if_flt_head);
1804 }
1805}
1806
2d21ac55 1807__private_extern__ int
0a7de745 1808dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
39236c6e 1809 interface_filter_t *filter_ref, u_int32_t flags)
6d2010ae
A
1810{
1811 int retval = 0;
1812 struct ifnet_filter *filter = NULL;
9bccf70c 1813
6d2010ae
A
1814 ifnet_head_lock_shared();
1815 /* Check that the interface is in the global list */
1816 if (!ifnet_lookup(ifp)) {
1817 retval = ENXIO;
1818 goto done;
1819 }
1820
1821 filter = zalloc(dlif_filt_zone);
1822 if (filter == NULL) {
1823 retval = ENOMEM;
1824 goto done;
1825 }
1826 bzero(filter, dlif_filt_size);
1827
1828 /* refcnt held above during lookup */
39236c6e 1829 filter->filt_flags = flags;
91447636
A
1830 filter->filt_ifp = ifp;
1831 filter->filt_cookie = if_filter->iff_cookie;
1832 filter->filt_name = if_filter->iff_name;
1833 filter->filt_protocol = if_filter->iff_protocol;
743345f9
A
1834 /*
1835 * Do not install filter callbacks for internal coproc interface
1836 */
1837 if (!IFNET_IS_INTCOPROC(ifp)) {
1838 filter->filt_input = if_filter->iff_input;
1839 filter->filt_output = if_filter->iff_output;
1840 filter->filt_event = if_filter->iff_event;
1841 filter->filt_ioctl = if_filter->iff_ioctl;
1842 }
91447636 1843 filter->filt_detached = if_filter->iff_detached;
6d2010ae
A
1844
1845 lck_mtx_lock(&ifp->if_flt_lock);
1846 if_flt_monitor_enter(ifp);
1847
5ba3f43e 1848 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636 1849 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
6d2010ae
A
1850
1851 if_flt_monitor_leave(ifp);
1852 lck_mtx_unlock(&ifp->if_flt_lock);
1853
91447636 1854 *filter_ref = filter;
b0d623f7
A
1855
1856 /*
1857 * Bump filter count and route_generation ID to let TCP
1858 * know it shouldn't do TSO on this connection
1859 */
39236c6e
A
1860 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1861 OSAddAtomic(1, &dlil_filter_disable_tso_count);
b0d623f7 1862 routegenid_update();
39236c6e 1863 }
5ba3f43e
A
1864 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1865 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1866 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1867 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1868 }
6d2010ae 1869 if (dlil_verbose) {
39236c6e
A
1870 printf("%s: %s filter attached\n", if_name(ifp),
1871 if_filter->iff_name);
6d2010ae
A
1872 }
1873done:
1874 ifnet_head_done();
1875 if (retval != 0 && ifp != NULL) {
39236c6e
A
1876 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1877 if_name(ifp), if_filter->iff_name, retval);
6d2010ae 1878 }
0a7de745 1879 if (retval != 0 && filter != NULL) {
6d2010ae 1880 zfree(dlif_filt_zone, filter);
0a7de745 1881 }
6d2010ae 1882
0a7de745 1883 return retval;
1c79356b
A
1884}
1885
91447636 1886static int
0a7de745 1887dlil_detach_filter_internal(interface_filter_t filter, int detached)
1c79356b 1888{
91447636 1889 int retval = 0;
6d2010ae 1890
3a60a9f5 1891 if (detached == 0) {
6d2010ae
A
1892 ifnet_t ifp = NULL;
1893
3a60a9f5
A
1894 ifnet_head_lock_shared();
1895 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6d2010ae
A
1896 interface_filter_t entry = NULL;
1897
1898 lck_mtx_lock(&ifp->if_flt_lock);
3a60a9f5 1899 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
0a7de745 1900 if (entry != filter || entry->filt_skip) {
6d2010ae 1901 continue;
0a7de745 1902 }
6d2010ae
A
1903 /*
1904 * We've found a match; since it's possible
1905 * that the thread gets blocked in the monitor,
1906 * we do the lock dance. Interface should
1907 * not be detached since we still have a use
1908 * count held during filter attach.
1909 */
0a7de745 1910 entry->filt_skip = 1; /* skip input/output */
6d2010ae
A
1911 lck_mtx_unlock(&ifp->if_flt_lock);
1912 ifnet_head_done();
1913
1914 lck_mtx_lock(&ifp->if_flt_lock);
1915 if_flt_monitor_enter(ifp);
5ba3f43e 1916 LCK_MTX_ASSERT(&ifp->if_flt_lock,
6d2010ae
A
1917 LCK_MTX_ASSERT_OWNED);
1918
1919 /* Remove the filter from the list */
1920 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1921 filt_next);
1922
1923 if_flt_monitor_leave(ifp);
1924 lck_mtx_unlock(&ifp->if_flt_lock);
1925 if (dlil_verbose) {
39236c6e
A
1926 printf("%s: %s filter detached\n",
1927 if_name(ifp), filter->filt_name);
6d2010ae
A
1928 }
1929 goto destroy;
3a60a9f5 1930 }
6d2010ae 1931 lck_mtx_unlock(&ifp->if_flt_lock);
3a60a9f5
A
1932 }
1933 ifnet_head_done();
6d2010ae
A
1934
1935 /* filter parameter is not a valid filter ref */
1936 retval = EINVAL;
1937 goto done;
3a60a9f5 1938 }
6d2010ae 1939
0a7de745 1940 if (dlil_verbose) {
6d2010ae 1941 printf("%s filter detached\n", filter->filt_name);
0a7de745 1942 }
6d2010ae
A
1943
1944destroy:
1945
1946 /* Call the detached function if there is one */
0a7de745 1947 if (filter->filt_detached) {
91447636 1948 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
0a7de745 1949 }
9bccf70c 1950
b0d623f7
A
1951 /*
1952 * Decrease filter count and route_generation ID to let TCP
1953 * know it should reevalute doing TSO or not
1954 */
39236c6e
A
1955 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1956 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
b0d623f7 1957 routegenid_update();
39236c6e 1958 }
39037602 1959
5ba3f43e
A
1960 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1961
39037602
A
1962 /* Free the filter */
1963 zfree(dlif_filt_zone, filter);
1964 filter = NULL;
6d2010ae 1965done:
39037602 1966 if (retval != 0 && filter != NULL) {
6d2010ae
A
1967 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1968 filter->filt_name, retval);
1969 }
39037602 1970
0a7de745 1971 return retval;
1c79356b
A
1972}
1973
2d21ac55 1974__private_extern__ void
91447636
A
1975dlil_detach_filter(interface_filter_t filter)
1976{
0a7de745 1977 if (filter == NULL) {
3a60a9f5 1978 return;
0a7de745 1979 }
91447636
A
1980 dlil_detach_filter_internal(filter, 0);
1981}
1c79356b 1982
316670eb
A
1983/*
1984 * Main input thread:
1985 *
1986 * a) handles all inbound packets for lo0
1987 * b) handles all inbound packets for interfaces with no dedicated
1988 * input thread (e.g. anything but Ethernet/PDP or those that support
1989 * opportunistic polling.)
1990 * c) protocol registrations
1991 * d) packet injections
1992 */
39037602 1993__attribute__((noreturn))
91447636 1994static void
316670eb 1995dlil_main_input_thread_func(void *v, wait_result_t w)
91447636 1996{
316670eb
A
1997#pragma unused(w)
1998 struct dlil_main_threading_info *inpm = v;
1999 struct dlil_threading_info *inp = v;
2000
2001 VERIFY(inp == dlil_main_input_thread);
2002 VERIFY(inp->ifp == NULL);
2003 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2004
91447636 2005 while (1) {
2d21ac55 2006 struct mbuf *m = NULL, *m_loop = NULL;
316670eb
A
2007 u_int32_t m_cnt, m_cnt_loop;
2008 boolean_t proto_req;
6d2010ae 2009
316670eb 2010 lck_mtx_lock_spin(&inp->input_lck);
6d2010ae 2011
2d21ac55 2012 /* Wait until there is work to be done */
316670eb
A
2013 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2014 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2015 (void) msleep(&inp->input_waiting, &inp->input_lck,
2016 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2d21ac55
A
2017 }
2018
316670eb
A
2019 inp->input_waiting |= DLIL_INPUT_RUNNING;
2020 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2d21ac55 2021
316670eb
A
2022 /* Main input thread cannot be terminated */
2023 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
2d21ac55 2024
316670eb
A
2025 proto_req = (inp->input_waiting &
2026 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
6d2010ae 2027
316670eb
A
2028 /* Packets for non-dedicated interfaces other than lo0 */
2029 m_cnt = qlen(&inp->rcvq_pkts);
39037602 2030 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
6d2010ae 2031
39236c6e 2032 /* Packets exclusive to lo0 */
316670eb 2033 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
39037602 2034 m_loop = _getq_all(&inpm->lo_rcvq_pkts, NULL, NULL, NULL);
6d2010ae 2035
316670eb 2036 inp->wtot = 0;
6d2010ae 2037
316670eb 2038 lck_mtx_unlock(&inp->input_lck);
6d2010ae 2039
316670eb 2040 /*
39037602
A
2041 * NOTE warning %%% attention !!!!
2042 * We should think about putting some thread starvation
2043 * safeguards if we deal with long chains of packets.
2044 */
0a7de745 2045 if (m_loop != NULL) {
316670eb
A
2046 dlil_input_packet_list_extended(lo_ifp, m_loop,
2047 m_cnt_loop, inp->mode);
0a7de745 2048 }
6d2010ae 2049
0a7de745 2050 if (m != NULL) {
316670eb
A
2051 dlil_input_packet_list_extended(NULL, m,
2052 m_cnt, inp->mode);
0a7de745 2053 }
316670eb 2054
0a7de745 2055 if (proto_req) {
316670eb 2056 proto_input_run();
0a7de745 2057 }
316670eb
A
2058 }
2059
2060 /* NOTREACHED */
0a7de745 2061 VERIFY(0); /* we should never get here */
316670eb
A
2062}
2063
2064/*
2065 * Input thread for interfaces with legacy input model.
2066 */
2067static void
2068dlil_input_thread_func(void *v, wait_result_t w)
2069{
2070#pragma unused(w)
39037602 2071 char thread_name[MAXTHREADNAMESIZE];
316670eb
A
2072 struct dlil_threading_info *inp = v;
2073 struct ifnet *ifp = inp->ifp;
2074
39037602
A
2075 /* Construct the name for this thread, and then apply it. */
2076 bzero(thread_name, sizeof(thread_name));
2077 snprintf(thread_name, sizeof(thread_name), "dlil_input_%s", ifp->if_xname);
2078 thread_set_thread_name(inp->input_thr, thread_name);
2079
316670eb
A
2080 VERIFY(inp != dlil_main_input_thread);
2081 VERIFY(ifp != NULL);
2082 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
2083 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2d21ac55 2084
316670eb
A
2085 while (1) {
2086 struct mbuf *m = NULL;
2087 u_int32_t m_cnt;
2088
2089 lck_mtx_lock_spin(&inp->input_lck);
2d21ac55 2090
316670eb
A
2091 /* Wait until there is work to be done */
2092 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2093 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2094 (void) msleep(&inp->input_waiting, &inp->input_lck,
2095 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2d21ac55
A
2096 }
2097
316670eb
A
2098 inp->input_waiting |= DLIL_INPUT_RUNNING;
2099 inp->input_waiting &= ~DLIL_INPUT_WAITING;
6d2010ae 2100
316670eb
A
2101 /*
2102 * Protocol registration and injection must always use
2103 * the main input thread; in theory the latter can utilize
2104 * the corresponding input thread where the packet arrived
2105 * on, but that requires our knowing the interface in advance
2106 * (and the benefits might not worth the trouble.)
2107 */
2108 VERIFY(!(inp->input_waiting &
0a7de745 2109 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
6d2010ae 2110
316670eb
A
2111 /* Packets for this interface */
2112 m_cnt = qlen(&inp->rcvq_pkts);
39037602 2113 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
6d2010ae 2114
316670eb
A
2115 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2116 lck_mtx_unlock(&inp->input_lck);
2117
2118 /* Free up pending packets */
0a7de745 2119 if (m != NULL) {
316670eb 2120 mbuf_freem_list(m);
0a7de745 2121 }
316670eb
A
2122
2123 dlil_terminate_input_thread(inp);
2124 /* NOTREACHED */
2125 return;
2d21ac55
A
2126 }
2127
316670eb
A
2128 inp->wtot = 0;
2129
0a7de745 2130 dlil_input_stats_sync(ifp, inp);
316670eb
A
2131
2132 lck_mtx_unlock(&inp->input_lck);
2d21ac55 2133
91447636 2134 /*
39037602
A
2135 * NOTE warning %%% attention !!!!
2136 * We should think about putting some thread starvation
2137 * safeguards if we deal with long chains of packets.
2138 */
0a7de745 2139 if (m != NULL) {
316670eb
A
2140 dlil_input_packet_list_extended(NULL, m,
2141 m_cnt, inp->mode);
0a7de745 2142 }
2d21ac55 2143 }
316670eb
A
2144
2145 /* NOTREACHED */
0a7de745 2146 VERIFY(0); /* we should never get here */
2d21ac55
A
2147}
2148
316670eb
A
2149/*
2150 * Input thread for interfaces with opportunistic polling input model.
2151 */
2152static void
2153dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2d21ac55 2154{
316670eb
A
2155#pragma unused(w)
2156 struct dlil_threading_info *inp = v;
2157 struct ifnet *ifp = inp->ifp;
2158 struct timespec ts;
2d21ac55 2159
316670eb
A
2160 VERIFY(inp != dlil_main_input_thread);
2161 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
2d21ac55 2162
2d21ac55 2163 while (1) {
316670eb
A
2164 struct mbuf *m = NULL;
2165 u_int32_t m_cnt, m_size, poll_req = 0;
2166 ifnet_model_t mode;
2167 struct timespec now, delta;
39236c6e 2168 u_int64_t ival;
6d2010ae 2169
316670eb 2170 lck_mtx_lock_spin(&inp->input_lck);
6d2010ae 2171
0a7de745 2172 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
39236c6e 2173 ival = IF_RXPOLL_INTERVALTIME_MIN;
0a7de745 2174 }
39236c6e 2175
316670eb
A
2176 /* Link parameters changed? */
2177 if (ifp->if_poll_update != 0) {
2178 ifp->if_poll_update = 0;
39236c6e 2179 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
91447636 2180 }
1c79356b 2181
316670eb
A
2182 /* Current operating mode */
2183 mode = inp->mode;
1c79356b 2184
316670eb 2185 /* Wait until there is work to be done */
39236c6e 2186 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
316670eb
A
2187 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2188 (void) msleep(&inp->input_waiting, &inp->input_lck,
2189 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2190 }
2d21ac55 2191
316670eb
A
2192 inp->input_waiting |= DLIL_INPUT_RUNNING;
2193 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2d21ac55
A
2194
2195 /*
316670eb
A
2196 * Protocol registration and injection must always use
2197 * the main input thread; in theory the latter can utilize
2198 * the corresponding input thread where the packet arrived
2199 * on, but that requires our knowing the interface in advance
2200 * (and the benefits might not worth the trouble.)
2d21ac55 2201 */
316670eb 2202 VERIFY(!(inp->input_waiting &
0a7de745 2203 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2d21ac55 2204
316670eb
A
2205 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2206 /* Free up pending packets */
5ba3f43e 2207 lck_mtx_convert_spin(&inp->input_lck);
316670eb 2208 _flushq(&inp->rcvq_pkts);
5ba3f43e 2209 if (inp->input_mit_tcall != NULL) {
0a7de745 2210 if (thread_call_isactive(inp->input_mit_tcall)) {
5ba3f43e 2211 thread_call_cancel(inp->input_mit_tcall);
0a7de745 2212 }
5ba3f43e 2213 }
316670eb 2214 lck_mtx_unlock(&inp->input_lck);
2d21ac55 2215
316670eb
A
2216 dlil_terminate_input_thread(inp);
2217 /* NOTREACHED */
2218 return;
2d21ac55 2219 }
2d21ac55 2220
316670eb
A
2221 /* Total count of all packets */
2222 m_cnt = qlen(&inp->rcvq_pkts);
2223
2224 /* Total bytes of all packets */
2225 m_size = qsize(&inp->rcvq_pkts);
2226
2227 /* Packets for this interface */
39037602 2228 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
316670eb
A
2229 VERIFY(m != NULL || m_cnt == 0);
2230
2231 nanouptime(&now);
0a7de745 2232 if (!net_timerisset(&inp->sample_lasttime)) {
316670eb 2233 *(&inp->sample_lasttime) = *(&now);
0a7de745 2234 }
316670eb
A
2235
2236 net_timersub(&now, &inp->sample_lasttime, &delta);
2237 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
2238 u_int32_t ptot, btot;
2239
2240 /* Accumulate statistics for current sampling */
2241 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
2242
0a7de745 2243 if (net_timercmp(&delta, &inp->sample_holdtime, <)) {
316670eb 2244 goto skip;
0a7de745 2245 }
316670eb
A
2246
2247 *(&inp->sample_lasttime) = *(&now);
2248
2249 /* Calculate min/max of inbound bytes */
2250 btot = (u_int32_t)inp->sstats.bytes;
0a7de745 2251 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot) {
316670eb 2252 inp->rxpoll_bmin = btot;
0a7de745
A
2253 }
2254 if (btot > inp->rxpoll_bmax) {
316670eb 2255 inp->rxpoll_bmax = btot;
0a7de745 2256 }
316670eb
A
2257
2258 /* Calculate EWMA of inbound bytes */
2259 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
2260
2261 /* Calculate min/max of inbound packets */
2262 ptot = (u_int32_t)inp->sstats.packets;
0a7de745 2263 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot) {
316670eb 2264 inp->rxpoll_pmin = ptot;
0a7de745
A
2265 }
2266 if (ptot > inp->rxpoll_pmax) {
316670eb 2267 inp->rxpoll_pmax = ptot;
0a7de745 2268 }
316670eb
A
2269
2270 /* Calculate EWMA of inbound packets */
2271 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
2272
2273 /* Reset sampling statistics */
2274 PKTCNTR_CLEAR(&inp->sstats);
2275
2276 /* Calculate EWMA of wakeup requests */
2277 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2278 inp->wtot = 0;
2279
2280 if (dlil_verbose) {
0a7de745 2281 if (!net_timerisset(&inp->dbg_lasttime)) {
316670eb 2282 *(&inp->dbg_lasttime) = *(&now);
0a7de745 2283 }
316670eb
A
2284 net_timersub(&now, &inp->dbg_lasttime, &delta);
2285 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2286 *(&inp->dbg_lasttime) = *(&now);
39236c6e 2287 printf("%s: [%s] pkts avg %d max %d "
316670eb
A
2288 "limits [%d/%d], wreq avg %d "
2289 "limits [%d/%d], bytes avg %d "
39236c6e
A
2290 "limits [%d/%d]\n", if_name(ifp),
2291 (inp->mode ==
316670eb
A
2292 IFNET_MODEL_INPUT_POLL_ON) ?
2293 "ON" : "OFF", inp->rxpoll_pavg,
2294 inp->rxpoll_pmax,
2295 inp->rxpoll_plowat,
2296 inp->rxpoll_phiwat,
2297 inp->rxpoll_wavg,
2298 inp->rxpoll_wlowat,
2299 inp->rxpoll_whiwat,
2300 inp->rxpoll_bavg,
2301 inp->rxpoll_blowat,
2302 inp->rxpoll_bhiwat);
2303 }
2304 }
2d21ac55 2305
316670eb 2306 /* Perform mode transition, if necessary */
0a7de745 2307 if (!net_timerisset(&inp->mode_lasttime)) {
316670eb 2308 *(&inp->mode_lasttime) = *(&now);
0a7de745 2309 }
316670eb
A
2310
2311 net_timersub(&now, &inp->mode_lasttime, &delta);
0a7de745 2312 if (net_timercmp(&delta, &inp->mode_holdtime, <)) {
316670eb 2313 goto skip;
0a7de745 2314 }
316670eb
A
2315
2316 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2317 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
316670eb
A
2318 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2319 mode = IFNET_MODEL_INPUT_POLL_OFF;
2320 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2321 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2322 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2323 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2324 mode = IFNET_MODEL_INPUT_POLL_ON;
2325 }
6d2010ae 2326
316670eb
A
2327 if (mode != inp->mode) {
2328 inp->mode = mode;
2329 *(&inp->mode_lasttime) = *(&now);
2330 poll_req++;
2331 }
2332 }
2333skip:
2334 dlil_input_stats_sync(ifp, inp);
6d2010ae 2335
316670eb 2336 lck_mtx_unlock(&inp->input_lck);
6d2010ae 2337
316670eb
A
2338 /*
2339 * If there's a mode change and interface is still attached,
2340 * perform a downcall to the driver for the new mode. Also
2341 * hold an IO refcnt on the interface to prevent it from
2342 * being detached (will be release below.)
2343 */
2344 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2345 struct ifnet_model_params p = { mode, { 0 } };
2346 errno_t err;
2347
2348 if (dlil_verbose) {
39236c6e 2349 printf("%s: polling is now %s, "
316670eb
A
2350 "pkts avg %d max %d limits [%d/%d], "
2351 "wreq avg %d limits [%d/%d], "
2352 "bytes avg %d limits [%d/%d]\n",
39236c6e 2353 if_name(ifp),
316670eb
A
2354 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2355 "ON" : "OFF", inp->rxpoll_pavg,
2356 inp->rxpoll_pmax, inp->rxpoll_plowat,
2357 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2358 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2359 inp->rxpoll_bavg, inp->rxpoll_blowat,
2360 inp->rxpoll_bhiwat);
2361 }
2d21ac55 2362
316670eb 2363 if ((err = ((*ifp->if_input_ctl)(ifp,
0a7de745 2364 IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) {
39236c6e
A
2365 printf("%s: error setting polling mode "
2366 "to %s (%d)\n", if_name(ifp),
316670eb
A
2367 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2368 "ON" : "OFF", err);
2369 }
1c79356b 2370
316670eb
A
2371 switch (mode) {
2372 case IFNET_MODEL_INPUT_POLL_OFF:
2373 ifnet_set_poll_cycle(ifp, NULL);
2374 inp->rxpoll_offreq++;
0a7de745 2375 if (err != 0) {
316670eb 2376 inp->rxpoll_offerr++;
0a7de745 2377 }
316670eb 2378 break;
2d21ac55 2379
316670eb 2380 case IFNET_MODEL_INPUT_POLL_ON:
39236c6e 2381 net_nsectimer(&ival, &ts);
316670eb
A
2382 ifnet_set_poll_cycle(ifp, &ts);
2383 ifnet_poll(ifp);
2384 inp->rxpoll_onreq++;
0a7de745 2385 if (err != 0) {
316670eb 2386 inp->rxpoll_onerr++;
0a7de745 2387 }
316670eb
A
2388 break;
2389
2390 default:
2391 VERIFY(0);
2392 /* NOTREACHED */
2393 }
2394
2395 /* Release the IO refcnt */
2396 ifnet_decr_iorefcnt(ifp);
2397 }
2398
2399 /*
39037602
A
2400 * NOTE warning %%% attention !!!!
2401 * We should think about putting some thread starvation
2402 * safeguards if we deal with long chains of packets.
2403 */
0a7de745 2404 if (m != NULL) {
316670eb 2405 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
0a7de745 2406 }
316670eb
A
2407 }
2408
2409 /* NOTREACHED */
0a7de745 2410 VERIFY(0); /* we should never get here */
316670eb
A
2411}
2412
39236c6e
A
2413/*
2414 * Must be called on an attached ifnet (caller is expected to check.)
2415 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2416 */
2417errno_t
2418dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2419 boolean_t locked)
316670eb 2420{
39236c6e 2421 struct dlil_threading_info *inp;
316670eb
A
2422 u_int64_t sample_holdtime, inbw;
2423
39236c6e 2424 VERIFY(ifp != NULL);
0a7de745
A
2425 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2426 return ENXIO;
2427 }
39236c6e
A
2428
2429 if (p != NULL) {
2430 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
0a7de745
A
2431 (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
2432 return EINVAL;
2433 }
2434 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2435 p->packets_lowat >= p->packets_hiwat) {
2436 return EINVAL;
2437 }
39236c6e 2438 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
0a7de745
A
2439 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
2440 return EINVAL;
2441 }
2442 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2443 p->bytes_lowat >= p->bytes_hiwat) {
2444 return EINVAL;
2445 }
39236c6e 2446 if (p->interval_time != 0 &&
0a7de745 2447 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
39236c6e 2448 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
0a7de745 2449 }
39236c6e
A
2450 }
2451
0a7de745 2452 if (!locked) {
39236c6e 2453 lck_mtx_lock(&inp->input_lck);
0a7de745 2454 }
39236c6e 2455
5ba3f43e 2456 LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
39236c6e
A
2457
2458 /*
2459 * Normally, we'd reset the parameters to the auto-tuned values
2460 * if the the input thread detects a change in link rate. If the
2461 * driver provides its own parameters right after a link rate
2462 * changes, but before the input thread gets to run, we want to
2463 * make sure to keep the driver's values. Clearing if_poll_update
2464 * will achieve that.
2465 */
0a7de745 2466 if (p != NULL && !locked && ifp->if_poll_update != 0) {
39236c6e 2467 ifp->if_poll_update = 0;
0a7de745 2468 }
316670eb 2469
39236c6e 2470 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
0a7de745 2471 sample_holdtime = 0; /* polling is disabled */
316670eb
A
2472 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2473 inp->rxpoll_blowat = 0;
2474 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2475 inp->rxpoll_bhiwat = (u_int32_t)-1;
39236c6e
A
2476 inp->rxpoll_plim = 0;
2477 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
316670eb 2478 } else {
39236c6e
A
2479 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2480 u_int64_t ival;
316670eb
A
2481 unsigned int n, i;
2482
39236c6e 2483 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
0a7de745 2484 if (inbw < rxpoll_tbl[i].speed) {
316670eb 2485 break;
0a7de745 2486 }
316670eb
A
2487 n = i;
2488 }
39236c6e
A
2489 /* auto-tune if caller didn't specify a value */
2490 plowat = ((p == NULL || p->packets_lowat == 0) ?
2491 rxpoll_tbl[n].plowat : p->packets_lowat);
2492 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2493 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2494 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2495 rxpoll_tbl[n].blowat : p->bytes_lowat);
2496 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2497 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2498 plim = ((p == NULL || p->packets_limit == 0) ?
2499 if_rxpoll_max : p->packets_limit);
2500 ival = ((p == NULL || p->interval_time == 0) ?
2501 if_rxpoll_interval_time : p->interval_time);
2502
2503 VERIFY(plowat != 0 && phiwat != 0);
2504 VERIFY(blowat != 0 && bhiwat != 0);
2505 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2506
316670eb
A
2507 sample_holdtime = if_rxpoll_sample_holdtime;
2508 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2509 inp->rxpoll_whiwat = if_rxpoll_whiwat;
39236c6e
A
2510 inp->rxpoll_plowat = plowat;
2511 inp->rxpoll_phiwat = phiwat;
2512 inp->rxpoll_blowat = blowat;
2513 inp->rxpoll_bhiwat = bhiwat;
2514 inp->rxpoll_plim = plim;
2515 inp->rxpoll_ival = ival;
316670eb
A
2516 }
2517
2518 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2519 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2520
2521 if (dlil_verbose) {
39236c6e
A
2522 printf("%s: speed %llu bps, sample per %llu nsec, "
2523 "poll interval %llu nsec, pkts per poll %u, "
2524 "pkt limits [%u/%u], wreq limits [%u/%u], "
2525 "bytes limits [%u/%u]\n", if_name(ifp),
2526 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2527 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2528 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
316670eb 2529 }
39236c6e 2530
0a7de745 2531 if (!locked) {
39236c6e 2532 lck_mtx_unlock(&inp->input_lck);
0a7de745 2533 }
39236c6e 2534
0a7de745 2535 return 0;
39236c6e
A
2536}
2537
2538/*
2539 * Must be called on an attached ifnet (caller is expected to check.)
2540 */
2541errno_t
2542dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2543{
2544 struct dlil_threading_info *inp;
2545
2546 VERIFY(ifp != NULL && p != NULL);
0a7de745
A
2547 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2548 return ENXIO;
2549 }
39236c6e 2550
0a7de745 2551 bzero(p, sizeof(*p));
39236c6e
A
2552
2553 lck_mtx_lock(&inp->input_lck);
2554 p->packets_limit = inp->rxpoll_plim;
2555 p->packets_lowat = inp->rxpoll_plowat;
2556 p->packets_hiwat = inp->rxpoll_phiwat;
2557 p->bytes_lowat = inp->rxpoll_blowat;
2558 p->bytes_hiwat = inp->rxpoll_bhiwat;
2559 p->interval_time = inp->rxpoll_ival;
2560 lck_mtx_unlock(&inp->input_lck);
2561
0a7de745 2562 return 0;
316670eb
A
2563}
2564
2565errno_t
2566ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2567 const struct ifnet_stat_increment_param *s)
2568{
0a7de745 2569 return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
316670eb
A
2570}
2571
2572errno_t
2573ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2574 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2575{
0a7de745 2576 return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
316670eb
A
2577}
2578
2579static errno_t
2580ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2581 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2582{
5ba3f43e 2583 dlil_input_func input_func;
39037602 2584 struct ifnet_stat_increment_param _s;
316670eb 2585 u_int32_t m_cnt = 0, m_size = 0;
39037602
A
2586 struct mbuf *last;
2587 errno_t err = 0;
316670eb 2588
39236c6e 2589 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
0a7de745 2590 if (m_head != NULL) {
39236c6e 2591 mbuf_freem_list(m_head);
0a7de745
A
2592 }
2593 return EINVAL;
39236c6e
A
2594 }
2595
2596 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2597 VERIFY(m_tail == NULL || ext);
2598 VERIFY(s != NULL || !ext);
2599
316670eb
A
2600 /*
2601 * Drop the packet(s) if the parameters are invalid, or if the
2602 * interface is no longer attached; else hold an IO refcnt to
2603 * prevent it from being detached (will be released below.)
2604 */
39236c6e 2605 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
0a7de745 2606 if (m_head != NULL) {
316670eb 2607 mbuf_freem_list(m_head);
0a7de745
A
2608 }
2609 return EINVAL;
316670eb
A
2610 }
2611
5ba3f43e
A
2612 input_func = ifp->if_input_dlil;
2613 VERIFY(input_func != NULL);
39037602 2614
316670eb
A
2615 if (m_tail == NULL) {
2616 last = m_head;
39236c6e 2617 while (m_head != NULL) {
316670eb 2618#if IFNET_INPUT_SANITY_CHK
0a7de745 2619 if (dlil_input_sanity_check != 0) {
316670eb 2620 DLIL_INPUT_CHECK(last, ifp);
0a7de745 2621 }
316670eb
A
2622#endif /* IFNET_INPUT_SANITY_CHK */
2623 m_cnt++;
2624 m_size += m_length(last);
0a7de745 2625 if (mbuf_nextpkt(last) == NULL) {
316670eb 2626 break;
0a7de745 2627 }
316670eb
A
2628 last = mbuf_nextpkt(last);
2629 }
2630 m_tail = last;
2631 } else {
2632#if IFNET_INPUT_SANITY_CHK
2633 if (dlil_input_sanity_check != 0) {
2634 last = m_head;
2635 while (1) {
2636 DLIL_INPUT_CHECK(last, ifp);
2637 m_cnt++;
2638 m_size += m_length(last);
0a7de745 2639 if (mbuf_nextpkt(last) == NULL) {
316670eb 2640 break;
0a7de745 2641 }
316670eb
A
2642 last = mbuf_nextpkt(last);
2643 }
2644 } else {
2645 m_cnt = s->packets_in;
2646 m_size = s->bytes_in;
2647 last = m_tail;
2648 }
2649#else
2650 m_cnt = s->packets_in;
2651 m_size = s->bytes_in;
2652 last = m_tail;
2653#endif /* IFNET_INPUT_SANITY_CHK */
2654 }
2655
2656 if (last != m_tail) {
39236c6e
A
2657 panic_plain("%s: invalid input packet chain for %s, "
2658 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2659 m_tail, last);
316670eb
A
2660 }
2661
2662 /*
2663 * Assert packet count only for the extended variant, for backwards
2664 * compatibility, since this came directly from the device driver.
2665 * Relax this assertion for input bytes, as the driver may have
2666 * included the link-layer headers in the computation; hence
2667 * m_size is just an approximation.
2668 */
2669 if (ext && s->packets_in != m_cnt) {
39236c6e
A
2670 panic_plain("%s: input packet count mismatch for %s, "
2671 "%d instead of %d\n", __func__, if_name(ifp),
2672 s->packets_in, m_cnt);
316670eb
A
2673 }
2674
39037602 2675 if (s == NULL) {
0a7de745 2676 bzero(&_s, sizeof(_s));
39037602
A
2677 s = &_s;
2678 } else {
2679 _s = *s;
2680 }
2681 _s.packets_in = m_cnt;
2682 _s.bytes_in = m_size;
2683
5ba3f43e 2684 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
39037602
A
2685
2686 if (ifp != lo_ifp) {
2687 /* Release the IO refcnt */
2688 ifnet_decr_iorefcnt(ifp);
2689 }
2690
0a7de745 2691 return err;
39037602
A
2692}
2693
39037602
A
2694
2695errno_t
2696dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2697{
0a7de745 2698 return ifp->if_output(ifp, m);
39037602
A
2699}
2700
2701errno_t
2702dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2703 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2704 boolean_t poll, struct thread *tp)
2705{
2706 struct dlil_threading_info *inp;
2707 u_int32_t m_cnt = s->packets_in;
2708 u_int32_t m_size = s->bytes_in;
2709
0a7de745 2710 if ((inp = ifp->if_inp) == NULL) {
316670eb 2711 inp = dlil_main_input_thread;
0a7de745 2712 }
316670eb
A
2713
2714 /*
2715 * If there is a matching DLIL input thread associated with an
2716 * affinity set, associate this thread with the same set. We
2717 * will only do this once.
2718 */
2719 lck_mtx_lock_spin(&inp->input_lck);
39037602 2720 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
316670eb
A
2721 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2722 (poll && inp->poll_thr == THREAD_NULL))) {
2723 u_int32_t tag = inp->tag;
2724
2725 if (poll) {
2726 VERIFY(inp->poll_thr == THREAD_NULL);
2727 inp->poll_thr = tp;
2728 } else {
2729 VERIFY(inp->wloop_thr == THREAD_NULL);
2730 inp->wloop_thr = tp;
2731 }
2732 lck_mtx_unlock(&inp->input_lck);
2733
2734 /* Associate the current thread with the new affinity tag */
2735 (void) dlil_affinity_set(tp, tag);
2736
2737 /*
2738 * Take a reference on the current thread; during detach,
5ba3f43e 2739 * we will need to refer to it in order to tear down its
316670eb
A
2740 * affinity.
2741 */
2742 thread_reference(tp);
2743 lck_mtx_lock_spin(&inp->input_lck);
2744 }
2745
39236c6e
A
2746 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2747
39037602 2748 /*
316670eb
A
2749 * Because of loopbacked multicast we cannot stuff the ifp in
2750 * the rcvif of the packet header: loopback (lo0) packets use a
2751 * dedicated list so that we can later associate them with lo_ifp
2752 * on their way up the stack. Packets for other interfaces without
2753 * dedicated input threads go to the regular list.
2754 */
39236c6e
A
2755 if (m_head != NULL) {
2756 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2757 struct dlil_main_threading_info *inpm =
2758 (struct dlil_main_threading_info *)inp;
2759 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2760 m_cnt, m_size);
2761 } else {
2762 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2763 m_cnt, m_size);
2764 }
316670eb
A
2765 }
2766
2767#if IFNET_INPUT_SANITY_CHK
2768 if (dlil_input_sanity_check != 0) {
2769 u_int32_t count;
2770 struct mbuf *m0;
2771
0a7de745 2772 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0)) {
316670eb 2773 count++;
0a7de745 2774 }
316670eb
A
2775
2776 if (count != m_cnt) {
39236c6e
A
2777 panic_plain("%s: invalid packet count %d "
2778 "(expected %d)\n", if_name(ifp),
316670eb
A
2779 count, m_cnt);
2780 /* NOTREACHED */
2781 }
2782
2783 inp->input_mbuf_cnt += m_cnt;
2784 }
2785#endif /* IFNET_INPUT_SANITY_CHK */
2786
39037602
A
2787 dlil_input_stats_add(s, inp, poll);
2788 /*
2789 * If we're using the main input thread, synchronize the
2790 * stats now since we have the interface context. All
2791 * other cases involving dedicated input threads will
2792 * have their stats synchronized there.
2793 */
0a7de745 2794 if (inp == dlil_main_input_thread) {
39037602 2795 dlil_input_stats_sync(ifp, inp);
0a7de745 2796 }
316670eb 2797
a39ff7e2
A
2798 if (inp->input_mit_tcall &&
2799 qlen(&inp->rcvq_pkts) >= dlil_rcv_mit_pkts_min &&
5ba3f43e
A
2800 qlen(&inp->rcvq_pkts) < dlil_rcv_mit_pkts_max &&
2801 (ifp->if_family == IFNET_FAMILY_ETHERNET ||
2802 ifp->if_type == IFT_CELLULAR)
2803 ) {
2804 if (!thread_call_isactive(inp->input_mit_tcall)) {
2805 uint64_t deadline;
2806 clock_interval_to_deadline(dlil_rcv_mit_interval,
2807 1, &deadline);
2808 (void) thread_call_enter_delayed(
0a7de745 2809 inp->input_mit_tcall, deadline);
5ba3f43e
A
2810 }
2811 } else {
2812 inp->input_waiting |= DLIL_INPUT_WAITING;
2813 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2814 inp->wtot++;
2815 wakeup_one((caddr_t)&inp->input_waiting);
2816 }
316670eb
A
2817 }
2818 lck_mtx_unlock(&inp->input_lck);
2819
0a7de745 2820 return 0;
316670eb
A
2821}
2822
5ba3f43e 2823
39236c6e 2824static void
5c9f4661 2825ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
316670eb 2826{
0a7de745 2827 if (!(ifp->if_eflags & IFEF_TXSTART)) {
39236c6e 2828 return;
0a7de745 2829 }
316670eb 2830 /*
39236c6e
A
2831 * If the starter thread is inactive, signal it to do work,
2832 * unless the interface is being flow controlled from below,
2833 * e.g. a virtual interface being flow controlled by a real
5c9f4661
A
2834 * network interface beneath it, or it's been disabled via
2835 * a call to ifnet_disable_output().
316670eb
A
2836 */
2837 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e
A
2838 if (resetfc) {
2839 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2840 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2841 lck_mtx_unlock(&ifp->if_start_lock);
2842 return;
2843 }
316670eb 2844 ifp->if_start_req++;
3e170ce0
A
2845 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2846 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
39037602
A
2847 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
2848 ifp->if_start_delayed == 0)) {
5ba3f43e
A
2849 (void) thread_wakeup_thread((caddr_t)&ifp->if_start_thread,
2850 ifp->if_start_thread);
316670eb
A
2851 }
2852 lck_mtx_unlock(&ifp->if_start_lock);
2853}
2854
39236c6e
A
2855void
2856ifnet_start(struct ifnet *ifp)
2857{
5c9f4661 2858 ifnet_start_common(ifp, FALSE);
39236c6e
A
2859}
2860
316670eb
A
2861static void
2862ifnet_start_thread_fn(void *v, wait_result_t w)
2863{
2864#pragma unused(w)
2865 struct ifnet *ifp = v;
2866 char ifname[IFNAMSIZ + 1];
39037602 2867 char thread_name[MAXTHREADNAMESIZE];
316670eb
A
2868 struct timespec *ts = NULL;
2869 struct ifclassq *ifq = &ifp->if_snd;
3e170ce0 2870 struct timespec delay_start_ts;
316670eb 2871
39037602
A
2872 /* Construct the name for this thread, and then apply it. */
2873 bzero(thread_name, sizeof(thread_name));
0a7de745 2874 (void) snprintf(thread_name, sizeof(thread_name),
5ba3f43e 2875 "ifnet_start_%s", ifp->if_xname);
39037602
A
2876 thread_set_thread_name(ifp->if_start_thread, thread_name);
2877
316670eb
A
2878 /*
2879 * Treat the dedicated starter thread for lo0 as equivalent to
2880 * the driver workloop thread; if net_affinity is enabled for
2881 * the main input thread, associate this starter thread to it
2882 * by binding them with the same affinity tag. This is done
2883 * only once (as we only have one lo_ifp which never goes away.)
2884 */
2885 if (ifp == lo_ifp) {
2886 struct dlil_threading_info *inp = dlil_main_input_thread;
2887 struct thread *tp = current_thread();
2888
2889 lck_mtx_lock(&inp->input_lck);
2890 if (inp->net_affinity) {
2891 u_int32_t tag = inp->tag;
2892
2893 VERIFY(inp->wloop_thr == THREAD_NULL);
2894 VERIFY(inp->poll_thr == THREAD_NULL);
2895 inp->wloop_thr = tp;
2896 lck_mtx_unlock(&inp->input_lck);
2897
2898 /* Associate this thread with the affinity tag */
2899 (void) dlil_affinity_set(tp, tag);
2900 } else {
2901 lck_mtx_unlock(&inp->input_lck);
2902 }
2903 }
2904
0a7de745 2905 (void) snprintf(ifname, sizeof(ifname), "%s_starter", if_name(ifp));
316670eb
A
2906
2907 lck_mtx_lock_spin(&ifp->if_start_lock);
2908
2909 for (;;) {
5ba3f43e 2910 if (ifp->if_start_thread != NULL) {
39037602
A
2911 (void) msleep(&ifp->if_start_thread,
2912 &ifp->if_start_lock,
3e170ce0 2913 (PZERO - 1) | PSPIN, ifname, ts);
5ba3f43e 2914 }
316670eb
A
2915 /* interface is detached? */
2916 if (ifp->if_start_thread == THREAD_NULL) {
2917 ifnet_set_start_cycle(ifp, NULL);
2918 lck_mtx_unlock(&ifp->if_start_lock);
2919 ifnet_purge(ifp);
2920
2921 if (dlil_verbose) {
39236c6e
A
2922 printf("%s: starter thread terminated\n",
2923 if_name(ifp));
316670eb
A
2924 }
2925
2926 /* for the extra refcnt from kernel_thread_start() */
2927 thread_deallocate(current_thread());
2928 /* this is the end */
2929 thread_terminate(current_thread());
2930 /* NOTREACHED */
2931 return;
2932 }
2933
2934 ifp->if_start_active = 1;
3e170ce0 2935
316670eb
A
2936 for (;;) {
2937 u_int32_t req = ifp->if_start_req;
3e170ce0
A
2938 if (!IFCQ_IS_EMPTY(ifq) &&
2939 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2940 ifp->if_start_delayed == 0 &&
2941 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2942 (ifp->if_eflags & IFEF_DELAY_START)) {
2943 ifp->if_start_delayed = 1;
2944 ifnet_start_delayed++;
2945 break;
2946 } else {
2947 ifp->if_start_delayed = 0;
2948 }
316670eb 2949 lck_mtx_unlock(&ifp->if_start_lock);
3e170ce0
A
2950
2951 /*
2952 * If no longer attached, don't call start because ifp
2953 * is being destroyed; else hold an IO refcnt to
2954 * prevent the interface from being detached (will be
2955 * released below.)
2956 */
2957 if (!ifnet_is_attached(ifp, 1)) {
2958 lck_mtx_lock_spin(&ifp->if_start_lock);
2959 break;
2960 }
2961
316670eb
A
2962 /* invoke the driver's start routine */
2963 ((*ifp->if_start)(ifp));
3e170ce0
A
2964
2965 /*
2966 * Release the io ref count taken by ifnet_is_attached.
2967 */
2968 ifnet_decr_iorefcnt(ifp);
2969
316670eb
A
2970 lck_mtx_lock_spin(&ifp->if_start_lock);
2971
5c9f4661
A
2972 /*
2973 * If there's no pending request or if the
2974 * interface has been disabled, we're done.
2975 */
2976 if (req == ifp->if_start_req ||
2977 (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
316670eb 2978 break;
5c9f4661 2979 }
316670eb 2980 }
3e170ce0 2981
316670eb
A
2982 ifp->if_start_req = 0;
2983 ifp->if_start_active = 0;
3e170ce0 2984
316670eb
A
2985 /*
2986 * Wakeup N ns from now if rate-controlled by TBR, and if
2987 * there are still packets in the send queue which haven't
2988 * been dequeued so far; else sleep indefinitely (ts = NULL)
2989 * until ifnet_start() is called again.
2990 */
2991 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2992 &ifp->if_start_cycle : NULL);
2993
3e170ce0
A
2994 if (ts == NULL && ifp->if_start_delayed == 1) {
2995 delay_start_ts.tv_sec = 0;
2996 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2997 ts = &delay_start_ts;
2998 }
2999
0a7de745 3000 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
316670eb 3001 ts = NULL;
0a7de745 3002 }
316670eb
A
3003 }
3004
3005 /* NOTREACHED */
316670eb
A
3006}
3007
3008void
3009ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
3010{
0a7de745
A
3011 if (ts == NULL) {
3012 bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
3013 } else {
316670eb 3014 *(&ifp->if_start_cycle) = *ts;
0a7de745 3015 }
316670eb 3016
0a7de745 3017 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
39236c6e
A
3018 printf("%s: restart interval set to %lu nsec\n",
3019 if_name(ifp), ts->tv_nsec);
0a7de745 3020 }
316670eb
A
3021}
3022
3023static void
3024ifnet_poll(struct ifnet *ifp)
3025{
3026 /*
3027 * If the poller thread is inactive, signal it to do work.
3028 */
3029 lck_mtx_lock_spin(&ifp->if_poll_lock);
3030 ifp->if_poll_req++;
3031 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
3032 wakeup_one((caddr_t)&ifp->if_poll_thread);
3033 }
3034 lck_mtx_unlock(&ifp->if_poll_lock);
3035}
3036
3037static void
3038ifnet_poll_thread_fn(void *v, wait_result_t w)
3039{
3040#pragma unused(w)
3041 struct dlil_threading_info *inp;
3042 struct ifnet *ifp = v;
3043 char ifname[IFNAMSIZ + 1];
3044 struct timespec *ts = NULL;
3045 struct ifnet_stat_increment_param s;
3046
0a7de745 3047 snprintf(ifname, sizeof(ifname), "%s_poller",
39236c6e 3048 if_name(ifp));
0a7de745 3049 bzero(&s, sizeof(s));
316670eb
A
3050
3051 lck_mtx_lock_spin(&ifp->if_poll_lock);
3052
3053 inp = ifp->if_inp;
3054 VERIFY(inp != NULL);
3055
3056 for (;;) {
3057 if (ifp->if_poll_thread != THREAD_NULL) {
3058 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
3059 (PZERO - 1) | PSPIN, ifname, ts);
3060 }
3061
3062 /* interface is detached (maybe while asleep)? */
3063 if (ifp->if_poll_thread == THREAD_NULL) {
3064 ifnet_set_poll_cycle(ifp, NULL);
3065 lck_mtx_unlock(&ifp->if_poll_lock);
3066
3067 if (dlil_verbose) {
39236c6e
A
3068 printf("%s: poller thread terminated\n",
3069 if_name(ifp));
316670eb
A
3070 }
3071
3072 /* for the extra refcnt from kernel_thread_start() */
3073 thread_deallocate(current_thread());
3074 /* this is the end */
3075 thread_terminate(current_thread());
3076 /* NOTREACHED */
3077 return;
3078 }
3079
3080 ifp->if_poll_active = 1;
3081 for (;;) {
3082 struct mbuf *m_head, *m_tail;
3083 u_int32_t m_lim, m_cnt, m_totlen;
3084 u_int16_t req = ifp->if_poll_req;
3085
3086 lck_mtx_unlock(&ifp->if_poll_lock);
3087
3088 /*
3089 * If no longer attached, there's nothing to do;
3090 * else hold an IO refcnt to prevent the interface
3091 * from being detached (will be released below.)
3092 */
db609669
A
3093 if (!ifnet_is_attached(ifp, 1)) {
3094 lck_mtx_lock_spin(&ifp->if_poll_lock);
316670eb 3095 break;
db609669 3096 }
316670eb 3097
39236c6e 3098 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
316670eb
A
3099 MAX((qlimit(&inp->rcvq_pkts)),
3100 (inp->rxpoll_phiwat << 2));
3101
3102 if (dlil_verbose > 1) {
39236c6e 3103 printf("%s: polling up to %d pkts, "
316670eb
A
3104 "pkts avg %d max %d, wreq avg %d, "
3105 "bytes avg %d\n",
39236c6e 3106 if_name(ifp), m_lim,
316670eb
A
3107 inp->rxpoll_pavg, inp->rxpoll_pmax,
3108 inp->rxpoll_wavg, inp->rxpoll_bavg);
3109 }
3110
3111 /* invoke the driver's input poll routine */
3112 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
0a7de745 3113 &m_cnt, &m_totlen));
316670eb
A
3114
3115 if (m_head != NULL) {
3116 VERIFY(m_tail != NULL && m_cnt > 0);
3117
3118 if (dlil_verbose > 1) {
39236c6e 3119 printf("%s: polled %d pkts, "
316670eb
A
3120 "pkts avg %d max %d, wreq avg %d, "
3121 "bytes avg %d\n",
39236c6e 3122 if_name(ifp), m_cnt,
316670eb
A
3123 inp->rxpoll_pavg, inp->rxpoll_pmax,
3124 inp->rxpoll_wavg, inp->rxpoll_bavg);
3125 }
3126
3127 /* stats are required for extended variant */
3128 s.packets_in = m_cnt;
3129 s.bytes_in = m_totlen;
3130
3131 (void) ifnet_input_common(ifp, m_head, m_tail,
3132 &s, TRUE, TRUE);
39236c6e
A
3133 } else {
3134 if (dlil_verbose > 1) {
3135 printf("%s: no packets, "
3136 "pkts avg %d max %d, wreq avg %d, "
3137 "bytes avg %d\n",
3138 if_name(ifp), inp->rxpoll_pavg,
3139 inp->rxpoll_pmax, inp->rxpoll_wavg,
3140 inp->rxpoll_bavg);
3141 }
3142
3143 (void) ifnet_input_common(ifp, NULL, NULL,
3144 NULL, FALSE, TRUE);
316670eb
A
3145 }
3146
3147 /* Release the io ref count */
3148 ifnet_decr_iorefcnt(ifp);
3149
3150 lck_mtx_lock_spin(&ifp->if_poll_lock);
3151
3152 /* if there's no pending request, we're done */
5c9f4661 3153 if (req == ifp->if_poll_req) {
316670eb 3154 break;
5c9f4661 3155 }
316670eb
A
3156 }
3157 ifp->if_poll_req = 0;
3158 ifp->if_poll_active = 0;
3159
3160 /*
3161 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3162 * until ifnet_poll() is called again.
3163 */
3164 ts = &ifp->if_poll_cycle;
0a7de745 3165 if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
316670eb 3166 ts = NULL;
0a7de745 3167 }
316670eb
A
3168 }
3169
3170 /* NOTREACHED */
316670eb
A
3171}
3172
3173void
3174ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3175{
0a7de745
A
3176 if (ts == NULL) {
3177 bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
3178 } else {
316670eb 3179 *(&ifp->if_poll_cycle) = *ts;
0a7de745 3180 }
316670eb 3181
0a7de745 3182 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
39236c6e
A
3183 printf("%s: poll interval set to %lu nsec\n",
3184 if_name(ifp), ts->tv_nsec);
0a7de745 3185 }
316670eb
A
3186}
3187
3188void
3189ifnet_purge(struct ifnet *ifp)
3190{
0a7de745 3191 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
316670eb 3192 if_qflush(ifp, 0);
0a7de745 3193 }
316670eb
A
3194}
3195
3196void
3197ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3198{
3199 IFCQ_LOCK_ASSERT_HELD(ifq);
3200
0a7de745 3201 if (!(IFCQ_IS_READY(ifq))) {
316670eb 3202 return;
0a7de745 3203 }
316670eb
A
3204
3205 if (IFCQ_TBR_IS_ENABLED(ifq)) {
3206 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
0a7de745 3207 ifq->ifcq_tbr.tbr_percent, 0 };
316670eb
A
3208 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3209 }
3210
3211 ifclassq_update(ifq, ev);
3212}
3213
3214void
3215ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3216{
3217 switch (ev) {
39236c6e 3218 case CLASSQ_EV_LINK_BANDWIDTH:
0a7de745 3219 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
316670eb 3220 ifp->if_poll_update++;
0a7de745 3221 }
316670eb
A
3222 break;
3223
3224 default:
3225 break;
3226 }
3227}
3228
3229errno_t
3230ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3231{
3232 struct ifclassq *ifq;
3233 u_int32_t omodel;
3234 errno_t err;
3235
0a7de745
A
3236 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
3237 return EINVAL;
3238 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3239 return ENXIO;
3240 }
316670eb
A
3241
3242 ifq = &ifp->if_snd;
3243 IFCQ_LOCK(ifq);
3244 omodel = ifp->if_output_sched_model;
3245 ifp->if_output_sched_model = model;
0a7de745 3246 if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
316670eb 3247 ifp->if_output_sched_model = omodel;
0a7de745 3248 }
316670eb
A
3249 IFCQ_UNLOCK(ifq);
3250
0a7de745 3251 return err;
316670eb
A
3252}
3253
3254errno_t
3255ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3256{
0a7de745
A
3257 if (ifp == NULL) {
3258 return EINVAL;
3259 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3260 return ENXIO;
3261 }
316670eb
A
3262
3263 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3264
0a7de745 3265 return 0;
316670eb
A
3266}
3267
3268errno_t
3269ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3270{
0a7de745
A
3271 if (ifp == NULL || maxqlen == NULL) {
3272 return EINVAL;
3273 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3274 return ENXIO;
3275 }
316670eb
A
3276
3277 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3278
0a7de745 3279 return 0;
316670eb
A
3280}
3281
3282errno_t
39236c6e 3283ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
316670eb 3284{
39236c6e
A
3285 errno_t err;
3286
0a7de745 3287 if (ifp == NULL || pkts == NULL) {
39236c6e 3288 err = EINVAL;
0a7de745 3289 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
39236c6e 3290 err = ENXIO;
0a7de745 3291 } else {
39236c6e
A
3292 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3293 pkts, NULL);
0a7de745 3294 }
316670eb 3295
0a7de745 3296 return err;
39236c6e 3297}
316670eb 3298
39236c6e
A
3299errno_t
3300ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3301 u_int32_t *pkts, u_int32_t *bytes)
3302{
3303 errno_t err;
3304
3305 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
0a7de745 3306 (pkts == NULL && bytes == NULL)) {
39236c6e 3307 err = EINVAL;
0a7de745 3308 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
39236c6e 3309 err = ENXIO;
0a7de745 3310 } else {
39236c6e 3311 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
0a7de745 3312 }
39236c6e 3313
0a7de745 3314 return err;
316670eb
A
3315}
3316
3317errno_t
3318ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3319{
3320 struct dlil_threading_info *inp;
3321
0a7de745
A
3322 if (ifp == NULL) {
3323 return EINVAL;
3324 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3325 return ENXIO;
3326 }
316670eb 3327
0a7de745 3328 if (maxqlen == 0) {
316670eb 3329 maxqlen = if_rcvq_maxlen;
0a7de745 3330 } else if (maxqlen < IF_RCVQ_MINLEN) {
316670eb 3331 maxqlen = IF_RCVQ_MINLEN;
0a7de745 3332 }
316670eb
A
3333
3334 inp = ifp->if_inp;
3335 lck_mtx_lock(&inp->input_lck);
3336 qlimit(&inp->rcvq_pkts) = maxqlen;
3337 lck_mtx_unlock(&inp->input_lck);
3338
0a7de745 3339 return 0;
316670eb
A
3340}
3341
3342errno_t
3343ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3344{
3345 struct dlil_threading_info *inp;
3346
0a7de745
A
3347 if (ifp == NULL || maxqlen == NULL) {
3348 return EINVAL;
3349 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3350 return ENXIO;
3351 }
316670eb
A
3352
3353 inp = ifp->if_inp;
3354 lck_mtx_lock(&inp->input_lck);
3355 *maxqlen = qlimit(&inp->rcvq_pkts);
3356 lck_mtx_unlock(&inp->input_lck);
0a7de745 3357 return 0;
316670eb
A
3358}
3359
5ba3f43e
A
3360void
3361ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3362 uint16_t delay_timeout)
3363{
3364 if (delay_qlen > 0 && delay_timeout > 0) {
3365 ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
3366 ifp->if_start_delay_qlen = min(100, delay_qlen);
3367 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3368 /* convert timeout to nanoseconds */
3369 ifp->if_start_delay_timeout *= 1000;
3370 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3371 ifp->if_xname, (uint32_t)delay_qlen,
3372 (uint32_t)delay_timeout);
3373 } else {
3374 ifp->if_eflags &= ~IFEF_ENQUEUE_MULTI;
3375 }
3376}
3377
3378static inline errno_t
3379ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype,
3380 boolean_t flush, boolean_t *pdrop)
316670eb 3381{
5ba3f43e
A
3382 volatile uint64_t *fg_ts = NULL;
3383 volatile uint64_t *rt_ts = NULL;
3384 struct mbuf *m = p;
3e170ce0 3385 struct timespec now;
5ba3f43e
A
3386 u_int64_t now_nsec = 0;
3387 int error = 0;
316670eb 3388
5ba3f43e
A
3389 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3390
3391 /*
3392 * If packet already carries a timestamp, either from dlil_output()
3393 * or from flowswitch, use it here. Otherwise, record timestamp.
3394 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3395 * the timestamp value is used internally there.
3396 */
3397 switch (ptype) {
3398 case QP_MBUF:
3399 ASSERT(m->m_flags & M_PKTHDR);
3400 ASSERT(m->m_nextpkt == NULL);
3401
3402 if (!(m->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3403 m->m_pkthdr.pkt_timestamp == 0) {
3404 nanouptime(&now);
3405 net_timernsec(&now, &now_nsec);
3406 m->m_pkthdr.pkt_timestamp = now_nsec;
3407 }
3408 m->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3409 /*
3410 * If the packet service class is not background,
3411 * update the timestamp to indicate recent activity
3412 * on a foreground socket.
3413 */
3414 if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3415 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3416 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND)) {
3417 ifp->if_fg_sendts = _net_uptime;
0a7de745 3418 if (fg_ts != NULL) {
5ba3f43e 3419 *fg_ts = _net_uptime;
0a7de745 3420 }
5ba3f43e
A
3421 }
3422 if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3423 ifp->if_rt_sendts = _net_uptime;
0a7de745 3424 if (rt_ts != NULL) {
5ba3f43e 3425 *rt_ts = _net_uptime;
0a7de745 3426 }
5ba3f43e
A
3427 }
3428 }
3429 break;
316670eb 3430
5ba3f43e
A
3431
3432 default:
3433 VERIFY(0);
3434 /* NOTREACHED */
3435 }
3e170ce0
A
3436
3437 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
5ba3f43e
A
3438 if (now_nsec == 0) {
3439 nanouptime(&now);
3440 net_timernsec(&now, &now_nsec);
3441 }
3e170ce0
A
3442 /*
3443 * If the driver chose to delay start callback for
3444 * coalescing multiple packets, Then use the following
3445 * heuristics to make sure that start callback will
3446 * be delayed only when bulk data transfer is detected.
3447 * 1. number of packets enqueued in (delay_win * 2) is
3448 * greater than or equal to the delay qlen.
3449 * 2. If delay_start is enabled it will stay enabled for
3450 * another 10 idle windows. This is to take into account
3451 * variable RTT and burst traffic.
3452 * 3. If the time elapsed since last enqueue is more
3453 * than 200ms we disable delaying start callback. This is
3454 * is to take idle time into account.
39037602 3455 */
3e170ce0
A
3456 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3457 if (ifp->if_start_delay_swin > 0) {
3458 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3459 ifp->if_start_delay_cnt++;
3460 } else if ((now_nsec - ifp->if_start_delay_swin)
3461 >= (200 * 1000 * 1000)) {
3462 ifp->if_start_delay_swin = now_nsec;
3463 ifp->if_start_delay_cnt = 1;
3464 ifp->if_start_delay_idle = 0;
3465 if (ifp->if_eflags & IFEF_DELAY_START) {
3466 ifp->if_eflags &=
3467 ~(IFEF_DELAY_START);
3468 ifnet_delay_start_disabled++;
3469 }
3470 } else {
3471 if (ifp->if_start_delay_cnt >=
3472 ifp->if_start_delay_qlen) {
3473 ifp->if_eflags |= IFEF_DELAY_START;
3474 ifp->if_start_delay_idle = 0;
3475 } else {
3476 if (ifp->if_start_delay_idle >= 10) {
3477 ifp->if_eflags &= ~(IFEF_DELAY_START);
3478 ifnet_delay_start_disabled++;
3479 } else {
3480 ifp->if_start_delay_idle++;
3481 }
39037602 3482 }
3e170ce0
A
3483 ifp->if_start_delay_swin = now_nsec;
3484 ifp->if_start_delay_cnt = 1;
3485 }
3486 } else {
3487 ifp->if_start_delay_swin = now_nsec;
3488 ifp->if_start_delay_cnt = 1;
3489 ifp->if_start_delay_idle = 0;
3490 ifp->if_eflags &= ~(IFEF_DELAY_START);
3491 }
3492 } else {
3493 ifp->if_eflags &= ~(IFEF_DELAY_START);
3494 }
3495
5ba3f43e
A
3496 switch (ptype) {
3497 case QP_MBUF:
3498 /* enqueue the packet (caller consumes object) */
3499 error = ifclassq_enqueue(&ifp->if_snd, m, QP_MBUF, pdrop);
3500 m = NULL;
3501 break;
3502
3503
3504 default:
3505 break;
3506 }
316670eb
A
3507
3508 /*
3509 * Tell the driver to start dequeueing; do this even when the queue
3510 * for the packet is suspended (EQSUSPENDED), as the driver could still
3511 * be dequeueing from other unsuspended queues.
3512 */
3e170ce0 3513 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
0a7de745 3514 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
316670eb 3515 ifnet_start(ifp);
0a7de745 3516 }
316670eb 3517
0a7de745 3518 return error;
316670eb
A
3519}
3520
5ba3f43e
A
3521errno_t
3522ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3523{
3524 boolean_t pdrop;
0a7de745 3525 return ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop);
5ba3f43e
A
3526}
3527
3528errno_t
3529ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3530 boolean_t *pdrop)
3531{
3532 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3533 m->m_nextpkt != NULL) {
3534 if (m != NULL) {
3535 m_freem_list(m);
3536 *pdrop = TRUE;
3537 }
0a7de745 3538 return EINVAL;
5ba3f43e
A
3539 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3540 !IF_FULLY_ATTACHED(ifp)) {
3541 /* flag tested without lock for performance */
3542 m_freem(m);
3543 *pdrop = TRUE;
0a7de745 3544 return ENXIO;
5ba3f43e
A
3545 } else if (!(ifp->if_flags & IFF_UP)) {
3546 m_freem(m);
3547 *pdrop = TRUE;
0a7de745 3548 return ENETDOWN;
5ba3f43e
A
3549 }
3550
0a7de745 3551 return ifnet_enqueue_common(ifp, m, QP_MBUF, flush, pdrop);
5ba3f43e
A
3552}
3553
3554
316670eb
A
3555errno_t
3556ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3557{
fe8ab488 3558 errno_t rc;
5ba3f43e 3559 classq_pkt_type_t ptype;
0a7de745
A
3560 if (ifp == NULL || mp == NULL) {
3561 return EINVAL;
3562 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3563 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3564 return ENXIO;
3565 }
3566 if (!ifnet_is_attached(ifp, 1)) {
3567 return ENXIO;
3568 }
5ba3f43e 3569
39037602 3570 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
5ba3f43e
A
3571 (void **)mp, NULL, NULL, NULL, &ptype);
3572 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
fe8ab488 3573 ifnet_decr_iorefcnt(ifp);
316670eb 3574
0a7de745 3575 return rc;
316670eb
A
3576}
3577
3578errno_t
3579ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3580 struct mbuf **mp)
3581{
fe8ab488 3582 errno_t rc;
5ba3f43e 3583 classq_pkt_type_t ptype;
0a7de745
A
3584 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
3585 return EINVAL;
3586 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3587 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3588 return ENXIO;
3589 }
3590 if (!ifnet_is_attached(ifp, 1)) {
3591 return ENXIO;
3592 }
39037602 3593
5ba3f43e
A
3594 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
3595 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)mp, NULL, NULL,
0a7de745 3596 NULL, &ptype);
5ba3f43e 3597 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
fe8ab488 3598 ifnet_decr_iorefcnt(ifp);
0a7de745 3599 return rc;
316670eb
A
3600}
3601
3602errno_t
39037602
A
3603ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3604 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
316670eb 3605{
fe8ab488 3606 errno_t rc;
5ba3f43e 3607 classq_pkt_type_t ptype;
0a7de745
A
3608 if (ifp == NULL || head == NULL || pkt_limit < 1) {
3609 return EINVAL;
3610 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3611 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3612 return ENXIO;
3613 }
3614 if (!ifnet_is_attached(ifp, 1)) {
3615 return ENXIO;
3616 }
39037602
A
3617
3618 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
5ba3f43e
A
3619 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head, (void **)tail, cnt,
3620 len, &ptype);
3621 VERIFY((*head == NULL) || (ptype == QP_MBUF));
39037602 3622 ifnet_decr_iorefcnt(ifp);
0a7de745 3623 return rc;
39037602
A
3624}
3625
3626errno_t
3627ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3628 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3629{
3630 errno_t rc;
5ba3f43e 3631 classq_pkt_type_t ptype;
0a7de745
A
3632 if (ifp == NULL || head == NULL || byte_limit < 1) {
3633 return EINVAL;
3634 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3635 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3636 return ENXIO;
3637 }
3638 if (!ifnet_is_attached(ifp, 1)) {
3639 return ENXIO;
3640 }
39037602
A
3641
3642 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
5ba3f43e
A
3643 byte_limit, (void **)head, (void **)tail, cnt, len, &ptype);
3644 VERIFY((*head == NULL) || (ptype == QP_MBUF));
fe8ab488 3645 ifnet_decr_iorefcnt(ifp);
0a7de745 3646 return rc;
316670eb
A
3647}
3648
3649errno_t
3650ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
39037602 3651 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
316670eb
A
3652 u_int32_t *len)
3653{
fe8ab488 3654 errno_t rc;
5ba3f43e 3655 classq_pkt_type_t ptype;
39037602 3656 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
0a7de745
A
3657 !MBUF_VALID_SC(sc)) {
3658 return EINVAL;
3659 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3660 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3661 return ENXIO;
3662 }
3663 if (!ifnet_is_attached(ifp, 1)) {
3664 return ENXIO;
3665 }
5ba3f43e
A
3666
3667 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
3668 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head,
3669 (void **)tail, cnt, len, &ptype);
3670 VERIFY((*head == NULL) || (ptype == QP_MBUF));
fe8ab488 3671 ifnet_decr_iorefcnt(ifp);
0a7de745 3672 return rc;
316670eb
A
3673}
3674
5ba3f43e 3675#if !CONFIG_EMBEDDED
39236c6e
A
3676errno_t
3677ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3678 const struct sockaddr *dest, const char *dest_linkaddr,
3679 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3680{
0a7de745 3681 if (pre != NULL) {
39236c6e 3682 *pre = 0;
0a7de745
A
3683 }
3684 if (post != NULL) {
39236c6e 3685 *post = 0;
0a7de745 3686 }
39236c6e 3687
0a7de745 3688 return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
39236c6e 3689}
5ba3f43e 3690#endif /* !CONFIG_EMBEDDED */
39236c6e 3691
316670eb
A
3692static int
3693dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3694 char **frame_header_p, protocol_family_t protocol_family)
3695{
3696 struct ifnet_filter *filter;
3697
3698 /*
3699 * Pass the inbound packet to the interface filters
6d2010ae
A
3700 */
3701 lck_mtx_lock_spin(&ifp->if_flt_lock);
3702 /* prevent filter list from changing in case we drop the lock */
3703 if_flt_monitor_busy(ifp);
2d21ac55
A
3704 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3705 int result;
3706
6d2010ae
A
3707 if (!filter->filt_skip && filter->filt_input != NULL &&
3708 (filter->filt_protocol == 0 ||
3709 filter->filt_protocol == protocol_family)) {
3710 lck_mtx_unlock(&ifp->if_flt_lock);
3711
2d21ac55 3712 result = (*filter->filt_input)(filter->filt_cookie,
6d2010ae
A
3713 ifp, protocol_family, m_p, frame_header_p);
3714
3715 lck_mtx_lock_spin(&ifp->if_flt_lock);
2d21ac55 3716 if (result != 0) {
6d2010ae
A
3717 /* we're done with the filter list */
3718 if_flt_monitor_unbusy(ifp);
3719 lck_mtx_unlock(&ifp->if_flt_lock);
0a7de745 3720 return result;
2d21ac55
A
3721 }
3722 }
3723 }
6d2010ae
A
3724 /* we're done with the filter list */
3725 if_flt_monitor_unbusy(ifp);
3726 lck_mtx_unlock(&ifp->if_flt_lock);
b7266188
A
3727
3728 /*
6d2010ae 3729 * Strip away M_PROTO1 bit prior to sending packet up the stack as
b7266188
A
3730 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3731 */
0a7de745 3732 if (*m_p != NULL) {
b7266188 3733 (*m_p)->m_flags &= ~M_PROTO1;
0a7de745 3734 }
b7266188 3735
0a7de745 3736 return 0;
1c79356b
A
3737}
3738
6d2010ae
A
3739static int
3740dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3741 protocol_family_t protocol_family)
3742{
3743 struct ifnet_filter *filter;
3744
3745 /*
3746 * Pass the outbound packet to the interface filters
3747 */
3748 lck_mtx_lock_spin(&ifp->if_flt_lock);
3749 /* prevent filter list from changing in case we drop the lock */
3750 if_flt_monitor_busy(ifp);
3751 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3752 int result;
3753
3754 if (!filter->filt_skip && filter->filt_output != NULL &&
3755 (filter->filt_protocol == 0 ||
3756 filter->filt_protocol == protocol_family)) {
3757 lck_mtx_unlock(&ifp->if_flt_lock);
3758
3759 result = filter->filt_output(filter->filt_cookie, ifp,
3760 protocol_family, m_p);
3761
3762 lck_mtx_lock_spin(&ifp->if_flt_lock);
3763 if (result != 0) {
3764 /* we're done with the filter list */
3765 if_flt_monitor_unbusy(ifp);
3766 lck_mtx_unlock(&ifp->if_flt_lock);
0a7de745 3767 return result;
6d2010ae
A
3768 }
3769 }
3770 }
3771 /* we're done with the filter list */
3772 if_flt_monitor_unbusy(ifp);
3773 lck_mtx_unlock(&ifp->if_flt_lock);
3774
0a7de745 3775 return 0;
6d2010ae
A
3776}
3777
2d21ac55
A
3778static void
3779dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
1c79356b 3780{
2d21ac55 3781 int error;
1c79356b 3782
2d21ac55
A
3783 if (ifproto->proto_kpi == kProtoKPI_v1) {
3784 /* Version 1 protocols get one packet at a time */
3785 while (m != NULL) {
0a7de745
A
3786 char * frame_header;
3787 mbuf_t next_packet;
6d2010ae 3788
2d21ac55
A
3789 next_packet = m->m_nextpkt;
3790 m->m_nextpkt = NULL;
39236c6e
A
3791 frame_header = m->m_pkthdr.pkt_hdr;
3792 m->m_pkthdr.pkt_hdr = NULL;
6d2010ae
A
3793 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3794 ifproto->protocol_family, m, frame_header);
0a7de745 3795 if (error != 0 && error != EJUSTRETURN) {
2d21ac55 3796 m_freem(m);
0a7de745 3797 }
2d21ac55
A
3798 m = next_packet;
3799 }
6d2010ae 3800 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
2d21ac55
A
3801 /* Version 2 protocols support packet lists */
3802 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
6d2010ae 3803 ifproto->protocol_family, m);
0a7de745 3804 if (error != 0 && error != EJUSTRETURN) {
2d21ac55 3805 m_freem_list(m);
0a7de745 3806 }
91447636 3807 }
2d21ac55 3808}
1c79356b 3809
316670eb
A
3810static void
3811dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3812 struct dlil_threading_info *inp, boolean_t poll)
3813{
3814 struct ifnet_stat_increment_param *d = &inp->stats;
3815
0a7de745 3816 if (s->packets_in != 0) {
316670eb 3817 d->packets_in += s->packets_in;
0a7de745
A
3818 }
3819 if (s->bytes_in != 0) {
316670eb 3820 d->bytes_in += s->bytes_in;
0a7de745
A
3821 }
3822 if (s->errors_in != 0) {
316670eb 3823 d->errors_in += s->errors_in;
0a7de745 3824 }
316670eb 3825
0a7de745 3826 if (s->packets_out != 0) {
316670eb 3827 d->packets_out += s->packets_out;
0a7de745
A
3828 }
3829 if (s->bytes_out != 0) {
316670eb 3830 d->bytes_out += s->bytes_out;
0a7de745
A
3831 }
3832 if (s->errors_out != 0) {
316670eb 3833 d->errors_out += s->errors_out;
0a7de745 3834 }
316670eb 3835
0a7de745 3836 if (s->collisions != 0) {
316670eb 3837 d->collisions += s->collisions;
0a7de745
A
3838 }
3839 if (s->dropped != 0) {
316670eb 3840 d->dropped += s->dropped;
0a7de745 3841 }
316670eb 3842
0a7de745 3843 if (poll) {
316670eb 3844 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
0a7de745 3845 }
316670eb
A
3846}
3847
3848static void
3849dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3850{
3851 struct ifnet_stat_increment_param *s = &inp->stats;
3852
3853 /*
3854 * Use of atomic operations is unavoidable here because
3855 * these stats may also be incremented elsewhere via KPIs.
3856 */
3857 if (s->packets_in != 0) {
3858 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3859 s->packets_in = 0;
3860 }
3861 if (s->bytes_in != 0) {
3862 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3863 s->bytes_in = 0;
3864 }
3865 if (s->errors_in != 0) {
3866 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3867 s->errors_in = 0;
3868 }
3869
3870 if (s->packets_out != 0) {
3871 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3872 s->packets_out = 0;
3873 }
3874 if (s->bytes_out != 0) {
3875 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3876 s->bytes_out = 0;
3877 }
3878 if (s->errors_out != 0) {
3879 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3880 s->errors_out = 0;
3881 }
3882
3883 if (s->collisions != 0) {
3884 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3885 s->collisions = 0;
3886 }
3887 if (s->dropped != 0) {
3888 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3889 s->dropped = 0;
3890 }
39037602 3891
5ba3f43e 3892 if (ifp->if_data_threshold != 0) {
39037602 3893 lck_mtx_convert_spin(&inp->input_lck);
5ba3f43e 3894 ifnet_notify_data_threshold(ifp);
39236c6e 3895 }
5ba3f43e 3896
316670eb
A
3897 /*
3898 * No need for atomic operations as they are modified here
3899 * only from within the DLIL input thread context.
3900 */
3901 if (inp->tstats.packets != 0) {
3902 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3903 inp->tstats.packets = 0;
3904 }
3905 if (inp->tstats.bytes != 0) {
3906 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3907 inp->tstats.bytes = 0;
3908 }
3909}
3910
3911__private_extern__ void
3912dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3913{
0a7de745
A
3914 return dlil_input_packet_list_common(ifp, m, 0,
3915 IFNET_MODEL_INPUT_POLL_OFF, FALSE);
316670eb
A
3916}
3917
2d21ac55 3918__private_extern__ void
316670eb
A
3919dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3920 u_int32_t cnt, ifnet_model_t mode)
3921{
0a7de745 3922 return dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE);
316670eb
A
3923}
3924
3925static void
3926dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3927 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
2d21ac55 3928{
d9a64523
A
3929 int error = 0;
3930 protocol_family_t protocol_family;
3931 mbuf_t next_packet;
0a7de745 3932 ifnet_t ifp = ifp_param;
d9a64523 3933 char *frame_header = NULL;
0a7de745 3934 struct if_proto *last_ifproto = NULL;
d9a64523
A
3935 mbuf_t pkt_first = NULL;
3936 mbuf_t *pkt_next = NULL;
3937 u_int32_t poll_thresh = 0, poll_ival = 0;
2d21ac55 3938
39037602 3939 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
2d21ac55 3940
316670eb 3941 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
0a7de745 3942 (poll_ival = if_rxpoll_interval_pkts) > 0) {
316670eb 3943 poll_thresh = cnt;
0a7de745 3944 }
6d2010ae 3945
2d21ac55 3946 while (m != NULL) {
6d2010ae
A
3947 struct if_proto *ifproto = NULL;
3948 int iorefcnt = 0;
0a7de745 3949 uint32_t pktf_mask; /* pkt flags to preserve */
2d21ac55 3950
0a7de745 3951 if (ifp_param == NULL) {
2d21ac55 3952 ifp = m->m_pkthdr.rcvif;
0a7de745 3953 }
6d2010ae 3954
316670eb 3955 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
0a7de745 3956 poll_ival > 0 && (--poll_thresh % poll_ival) == 0) {
316670eb 3957 ifnet_poll(ifp);
0a7de745 3958 }
316670eb 3959
6d2010ae 3960 /* Check if this mbuf looks valid */
316670eb 3961 MBUF_INPUT_CHECK(m, ifp);
6d2010ae
A
3962
3963 next_packet = m->m_nextpkt;
3964 m->m_nextpkt = NULL;
39236c6e
A
3965 frame_header = m->m_pkthdr.pkt_hdr;
3966 m->m_pkthdr.pkt_hdr = NULL;
2d21ac55 3967
316670eb
A
3968 /*
3969 * Get an IO reference count if the interface is not
3970 * loopback (lo0) and it is attached; lo0 never goes
3971 * away, so optimize for that.
6d2010ae
A
3972 */
3973 if (ifp != lo_ifp) {
3974 if (!ifnet_is_attached(ifp, 1)) {
3975 m_freem(m);
3976 goto next;
3977 }
3978 iorefcnt = 1;
5ba3f43e
A
3979 /*
3980 * Preserve the time stamp if it was set.
3981 */
3982 pktf_mask = PKTF_TS_VALID;
39236c6e
A
3983 } else {
3984 /*
3985 * If this arrived on lo0, preserve interface addr
3986 * info to allow for connectivity between loopback
3987 * and local interface addresses.
3988 */
0a7de745 3989 pktf_mask = (PKTF_LOOP | PKTF_IFAINFO);
2d21ac55 3990 }
d41d1dae 3991
39236c6e
A
3992 /* make sure packet comes in clean */
3993 m_classifier_init(m, pktf_mask);
3994
316670eb 3995 ifp_inc_traffic_class_in(ifp, m);
d41d1dae 3996
2d21ac55 3997 /* find which protocol family this packet is for */
6d2010ae 3998 ifnet_lock_shared(ifp);
2d21ac55 3999 error = (*ifp->if_demux)(ifp, m, frame_header,
6d2010ae
A
4000 &protocol_family);
4001 ifnet_lock_done(ifp);
2d21ac55 4002 if (error != 0) {
0a7de745 4003 if (error == EJUSTRETURN) {
2d21ac55 4004 goto next;
0a7de745 4005 }
2d21ac55
A
4006 protocol_family = 0;
4007 }
6d2010ae 4008
d9a64523
A
4009 pktap_input(ifp, protocol_family, m, frame_header);
4010
4011 /* Drop v4 packets received on CLAT46 enabled interface */
4012 if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp)) {
4013 m_freem(m);
4014 ip6stat.ip6s_clat464_in_v4_drop++;
4015 goto next;
4016 }
4017
4018 /* Translate the packet if it is received on CLAT interface */
4019 if (protocol_family == PF_INET6 && IS_INTF_CLAT46(ifp)
4020 && dlil_is_clat_needed(protocol_family, m)) {
4021 char *data = NULL;
4022 struct ether_header eh;
4023 struct ether_header *ehp = NULL;
4024
4025 if (ifp->if_type == IFT_ETHER) {
4026 ehp = (struct ether_header *)(void *)frame_header;
4027 /* Skip RX Ethernet packets if they are not IPV6 */
0a7de745 4028 if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6) {
d9a64523 4029 goto skip_clat;
0a7de745 4030 }
d9a64523
A
4031
4032 /* Keep a copy of frame_header for Ethernet packets */
4033 bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN);
4034 }
4035 error = dlil_clat64(ifp, &protocol_family, &m);
4036 data = (char *) mbuf_data(m);
4037 if (error != 0) {
4038 m_freem(m);
4039 ip6stat.ip6s_clat464_in_drop++;
4040 goto next;
4041 }
4042 /* Native v6 should be No-op */
0a7de745 4043 if (protocol_family != PF_INET) {
d9a64523 4044 goto skip_clat;
0a7de745 4045 }
d9a64523
A
4046
4047 /* Do this only for translated v4 packets. */
4048 switch (ifp->if_type) {
4049 case IFT_CELLULAR:
4050 frame_header = data;
4051 break;
4052 case IFT_ETHER:
4053 /*
4054 * Drop if the mbuf doesn't have enough
4055 * space for Ethernet header
4056 */
4057 if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) {
4058 m_free(m);
4059 ip6stat.ip6s_clat464_in_drop++;
4060 goto next;
4061 }
4062 /*
4063 * Set the frame_header ETHER_HDR_LEN bytes
4064 * preceeding the data pointer. Change
4065 * the ether_type too.
4066 */
4067 frame_header = data - ETHER_HDR_LEN;
4068 eh.ether_type = htons(ETHERTYPE_IP);
4069 bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN);
4070 break;
4071 }
4072 }
4073skip_clat:
39236c6e 4074 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
0a7de745 4075 !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
39236c6e
A
4076 dlil_input_cksum_dbg(ifp, m, frame_header,
4077 protocol_family);
0a7de745 4078 }
39236c6e
A
4079
4080 /*
4081 * For partial checksum offload, we expect the driver to
4082 * set the start offset indicating the start of the span
4083 * that is covered by the hardware-computed checksum;
4084 * adjust this start offset accordingly because the data
4085 * pointer has been advanced beyond the link-layer header.
4086 *
4087 * Don't adjust if the interface is a bridge member, as
4088 * the adjustment will occur from the context of the
4089 * bridge interface during input.
4090 */
4091 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
4092 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4093 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4094 int adj;
39236c6e
A
4095 if (frame_header == NULL ||
4096 frame_header < (char *)mbuf_datastart(m) ||
4097 frame_header > (char *)m->m_data ||
4098 (adj = (m->m_data - frame_header)) >
4099 m->m_pkthdr.csum_rx_start) {
4100 m->m_pkthdr.csum_data = 0;
4101 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
4102 hwcksum_in_invalidated++;
4103 } else {
4104 m->m_pkthdr.csum_rx_start -= adj;
4105 }
4106 }
4107
0a7de745 4108 if (clat_debug) {
d9a64523 4109 pktap_input(ifp, protocol_family, m, frame_header);
0a7de745 4110 }
316670eb 4111
0a7de745 4112 if (m->m_flags & (M_BCAST | M_MCAST)) {
6d2010ae 4113 atomic_add_64(&ifp->if_imcasts, 1);
0a7de745 4114 }
1c79356b 4115
2d21ac55
A
4116 /* run interface filters, exclude VLAN packets PR-3586856 */
4117 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
6d2010ae
A
4118 error = dlil_interface_filters_input(ifp, &m,
4119 &frame_header, protocol_family);
4120 if (error != 0) {
0a7de745 4121 if (error != EJUSTRETURN) {
2d21ac55 4122 m_freem(m);
0a7de745 4123 }
2d21ac55 4124 goto next;
91447636
A
4125 }
4126 }
39037602 4127 if (error != 0 || ((m->m_flags & M_PROMISC) != 0)) {
91447636 4128 m_freem(m);
2d21ac55 4129 goto next;
91447636 4130 }
6d2010ae 4131
2d21ac55
A
4132 /* Lookup the protocol attachment to this interface */
4133 if (protocol_family == 0) {
4134 ifproto = NULL;
6d2010ae
A
4135 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
4136 (last_ifproto->protocol_family == protocol_family)) {
4137 VERIFY(ifproto == NULL);
2d21ac55 4138 ifproto = last_ifproto;
6d2010ae
A
4139 if_proto_ref(last_ifproto);
4140 } else {
4141 VERIFY(ifproto == NULL);
4142 ifnet_lock_shared(ifp);
4143 /* callee holds a proto refcnt upon success */
0a7de745 4144 ifproto = find_attached_proto(ifp, protocol_family);
6d2010ae 4145 ifnet_lock_done(ifp);
2d21ac55
A
4146 }
4147 if (ifproto == NULL) {
4148 /* no protocol for this packet, discard */
4149 m_freem(m);
4150 goto next;
4151 }
4152 if (ifproto != last_ifproto) {
2d21ac55
A
4153 if (last_ifproto != NULL) {
4154 /* pass up the list for the previous protocol */
2d21ac55
A
4155 dlil_ifproto_input(last_ifproto, pkt_first);
4156 pkt_first = NULL;
4157 if_proto_free(last_ifproto);
2d21ac55
A
4158 }
4159 last_ifproto = ifproto;
6d2010ae 4160 if_proto_ref(ifproto);
2d21ac55
A
4161 }
4162 /* extend the list */
39236c6e 4163 m->m_pkthdr.pkt_hdr = frame_header;
2d21ac55
A
4164 if (pkt_first == NULL) {
4165 pkt_first = m;
4166 } else {
4167 *pkt_next = m;
4168 }
4169 pkt_next = &m->m_nextpkt;
1c79356b 4170
6d2010ae 4171next:
2d21ac55
A
4172 if (next_packet == NULL && last_ifproto != NULL) {
4173 /* pass up the last list of packets */
2d21ac55
A
4174 dlil_ifproto_input(last_ifproto, pkt_first);
4175 if_proto_free(last_ifproto);
6d2010ae
A
4176 last_ifproto = NULL;
4177 }
4178 if (ifproto != NULL) {
4179 if_proto_free(ifproto);
4180 ifproto = NULL;
2d21ac55 4181 }
316670eb 4182
2d21ac55 4183 m = next_packet;
1c79356b 4184
6d2010ae 4185 /* update the driver's multicast filter, if needed */
0a7de745 4186 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
6d2010ae 4187 ifp->if_updatemcasts = 0;
0a7de745
A
4188 }
4189 if (iorefcnt == 1) {
6d2010ae 4190 ifnet_decr_iorefcnt(ifp);
0a7de745 4191 }
91447636 4192 }
6d2010ae 4193
39037602 4194 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b
A
4195}
4196
6d2010ae
A
4197errno_t
4198if_mcasts_update(struct ifnet *ifp)
4199{
4200 errno_t err;
4201
4202 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
0a7de745 4203 if (err == EAFNOSUPPORT) {
6d2010ae 4204 err = 0;
0a7de745 4205 }
39236c6e
A
4206 printf("%s: %s %d suspended link-layer multicast membership(s) "
4207 "(err=%d)\n", if_name(ifp),
6d2010ae
A
4208 (err == 0 ? "successfully restored" : "failed to restore"),
4209 ifp->if_updatemcasts, err);
4210
4211 /* just return success */
0a7de745 4212 return 0;
6d2010ae
A
4213}
4214
39037602
A
4215/* If ifp is set, we will increment the generation for the interface */
4216int
4217dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
4218{
4219 if (ifp != NULL) {
4220 ifnet_increment_generation(ifp);
4221 }
4222
4223#if NECP
4224 necp_update_all_clients();
4225#endif /* NECP */
4226
0a7de745 4227 return kev_post_msg(event);
39037602
A
4228}
4229
a39ff7e2
A
4230__private_extern__ void
4231dlil_post_sifflags_msg(struct ifnet * ifp)
4232{
4233 struct kev_msg ev_msg;
4234 struct net_event_data ev_data;
4235
0a7de745
A
4236 bzero(&ev_data, sizeof(ev_data));
4237 bzero(&ev_msg, sizeof(ev_msg));
a39ff7e2
A
4238 ev_msg.vendor_code = KEV_VENDOR_APPLE;
4239 ev_msg.kev_class = KEV_NETWORK_CLASS;
4240 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
4241 ev_msg.event_code = KEV_DL_SIFFLAGS;
4242 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
4243 ev_data.if_family = ifp->if_family;
4244 ev_data.if_unit = (u_int32_t) ifp->if_unit;
4245 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
4246 ev_msg.dv[0].data_ptr = &ev_data;
4247 ev_msg.dv[1].data_length = 0;
4248 dlil_post_complete_msg(ifp, &ev_msg);
4249}
4250
0a7de745 4251#define TMP_IF_PROTO_ARR_SIZE 10
91447636 4252static int
39037602 4253dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
1c79356b 4254{
a1c7dba1
A
4255 struct ifnet_filter *filter = NULL;
4256 struct if_proto *proto = NULL;
4257 int if_proto_count = 0;
4258 struct if_proto **tmp_ifproto_arr = NULL;
4259 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4260 int tmp_ifproto_arr_idx = 0;
4261 bool tmp_malloc = false;
6d2010ae 4262
6d2010ae
A
4263 /*
4264 * Pass the event to the interface filters
4265 */
4266 lck_mtx_lock_spin(&ifp->if_flt_lock);
4267 /* prevent filter list from changing in case we drop the lock */
4268 if_flt_monitor_busy(ifp);
4269 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4270 if (filter->filt_event != NULL) {
4271 lck_mtx_unlock(&ifp->if_flt_lock);
4272
4273 filter->filt_event(filter->filt_cookie, ifp,
4274 filter->filt_protocol, event);
4275
4276 lck_mtx_lock_spin(&ifp->if_flt_lock);
91447636 4277 }
6d2010ae
A
4278 }
4279 /* we're done with the filter list */
4280 if_flt_monitor_unbusy(ifp);
4281 lck_mtx_unlock(&ifp->if_flt_lock);
4282
3e170ce0 4283 /* Get an io ref count if the interface is attached */
0a7de745 4284 if (!ifnet_is_attached(ifp, 1)) {
3e170ce0 4285 goto done;
0a7de745 4286 }
3e170ce0 4287
a1c7dba1
A
4288 /*
4289 * An embedded tmp_list_entry in if_proto may still get
4290 * over-written by another thread after giving up ifnet lock,
4291 * therefore we are avoiding embedded pointers here.
4292 */
6d2010ae 4293 ifnet_lock_shared(ifp);
a39ff7e2 4294 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
a1c7dba1 4295 if (if_proto_count) {
6d2010ae 4296 int i;
a1c7dba1
A
4297 VERIFY(ifp->if_proto_hash != NULL);
4298 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
4299 tmp_ifproto_arr = tmp_ifproto_stack_arr;
4300 } else {
4301 MALLOC(tmp_ifproto_arr, struct if_proto **,
0a7de745 4302 sizeof(*tmp_ifproto_arr) * if_proto_count,
a1c7dba1
A
4303 M_TEMP, M_ZERO);
4304 if (tmp_ifproto_arr == NULL) {
4305 ifnet_lock_done(ifp);
4306 goto cleanup;
4307 }
4308 tmp_malloc = true;
4309 }
6d2010ae
A
4310
4311 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6d2010ae
A
4312 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
4313 next_hash) {
a1c7dba1
A
4314 if_proto_ref(proto);
4315 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
4316 tmp_ifproto_arr_idx++;
91447636
A
4317 }
4318 }
a1c7dba1 4319 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
91447636 4320 }
6d2010ae
A
4321 ifnet_lock_done(ifp);
4322
a1c7dba1
A
4323 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
4324 tmp_ifproto_arr_idx++) {
4325 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
4326 VERIFY(proto != NULL);
4327 proto_media_event eventp =
4328 (proto->proto_kpi == kProtoKPI_v1 ?
4329 proto->kpi.v1.event :
4330 proto->kpi.v2.event);
4331
4332 if (eventp != NULL) {
4333 eventp(ifp, proto->protocol_family,
4334 event);
4335 }
4336 if_proto_free(proto);
4337 }
4338
39037602 4339cleanup:
a1c7dba1
A
4340 if (tmp_malloc) {
4341 FREE(tmp_ifproto_arr, M_TEMP);
4342 }
4343
6d2010ae 4344 /* Pass the event to the interface */
0a7de745 4345 if (ifp->if_event != NULL) {
6d2010ae 4346 ifp->if_event(ifp, event);
0a7de745 4347 }
6d2010ae
A
4348
4349 /* Release the io ref count */
4350 ifnet_decr_iorefcnt(ifp);
6d2010ae 4351done:
0a7de745 4352 return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
1c79356b
A
4353}
4354
2d21ac55 4355errno_t
6d2010ae 4356ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
1c79356b 4357{
39037602 4358 struct kev_msg kev_msg;
2d21ac55
A
4359 int result = 0;
4360
0a7de745
A
4361 if (ifp == NULL || event == NULL) {
4362 return EINVAL;
4363 }
1c79356b 4364
0a7de745 4365 bzero(&kev_msg, sizeof(kev_msg));
39037602
A
4366 kev_msg.vendor_code = event->vendor_code;
4367 kev_msg.kev_class = event->kev_class;
4368 kev_msg.kev_subclass = event->kev_subclass;
4369 kev_msg.event_code = event->event_code;
91447636
A
4370 kev_msg.dv[0].data_ptr = &event->event_data[0];
4371 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
4372 kev_msg.dv[1].data_length = 0;
6d2010ae 4373
39037602 4374 result = dlil_event_internal(ifp, &kev_msg, TRUE);
1c79356b 4375
0a7de745 4376 return result;
91447636 4377}
1c79356b 4378
2d21ac55
A
4379#if CONFIG_MACF_NET
4380#include <netinet/ip6.h>
4381#include <netinet/ip.h>
6d2010ae
A
4382static int
4383dlil_get_socket_type(struct mbuf **mp, int family, int raw)
2d21ac55
A
4384{
4385 struct mbuf *m;
4386 struct ip *ip;
4387 struct ip6_hdr *ip6;
4388 int type = SOCK_RAW;
4389
4390 if (!raw) {
4391 switch (family) {
4392 case PF_INET:
4393 m = m_pullup(*mp, sizeof(struct ip));
0a7de745 4394 if (m == NULL) {
2d21ac55 4395 break;
0a7de745 4396 }
2d21ac55
A
4397 *mp = m;
4398 ip = mtod(m, struct ip *);
0a7de745 4399 if (ip->ip_p == IPPROTO_TCP) {
2d21ac55 4400 type = SOCK_STREAM;
0a7de745 4401 } else if (ip->ip_p == IPPROTO_UDP) {
2d21ac55 4402 type = SOCK_DGRAM;
0a7de745 4403 }
2d21ac55
A
4404 break;
4405 case PF_INET6:
4406 m = m_pullup(*mp, sizeof(struct ip6_hdr));
0a7de745 4407 if (m == NULL) {
2d21ac55 4408 break;
0a7de745 4409 }
2d21ac55
A
4410 *mp = m;
4411 ip6 = mtod(m, struct ip6_hdr *);
0a7de745 4412 if (ip6->ip6_nxt == IPPROTO_TCP) {
2d21ac55 4413 type = SOCK_STREAM;
0a7de745 4414 } else if (ip6->ip6_nxt == IPPROTO_UDP) {
2d21ac55 4415 type = SOCK_DGRAM;
0a7de745 4416 }
2d21ac55
A
4417 break;
4418 }
4419 }
4420
0a7de745 4421 return type;
2d21ac55
A
4422}
4423#endif
4424
3e170ce0
A
4425static void
4426dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4427{
0a7de745 4428 mbuf_t n = m;
3e170ce0
A
4429 int chainlen = 0;
4430
4431 while (n != NULL) {
4432 chainlen++;
4433 n = n->m_next;
4434 }
4435 switch (chainlen) {
0a7de745
A
4436 case 0:
4437 break;
4438 case 1:
4439 atomic_add_64(&cls->cls_one, 1);
4440 break;
4441 case 2:
4442 atomic_add_64(&cls->cls_two, 1);
4443 break;
4444 case 3:
4445 atomic_add_64(&cls->cls_three, 1);
4446 break;
4447 case 4:
4448 atomic_add_64(&cls->cls_four, 1);
4449 break;
4450 case 5:
4451 default:
4452 atomic_add_64(&cls->cls_five_or_more, 1);
4453 break;
3e170ce0
A
4454 }
4455}
4456
1c79356b 4457/*
91447636
A
4458 * dlil_output
4459 *
4460 * Caller should have a lock on the protocol domain if the protocol
4461 * doesn't support finer grained locking. In most cases, the lock
4462 * will be held from the socket layer and won't be released until
4463 * we return back to the socket layer.
4464 *
4465 * This does mean that we must take a protocol lock before we take
4466 * an interface lock if we're going to take both. This makes sense
4467 * because a protocol is likely to interact with an ifp while it
4468 * is under the protocol lock.
316670eb
A
4469 *
4470 * An advisory code will be returned if adv is not null. This
39236c6e 4471 * can be used to provide feedback about interface queues to the
316670eb 4472 * application.
1c79356b 4473 */
6d2010ae
A
4474errno_t
4475dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
316670eb 4476 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
6d2010ae
A
4477{
4478 char *frame_type = NULL;
4479 char *dst_linkaddr = NULL;
4480 int retval = 0;
4481 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4482 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
0a7de745
A
4483 struct if_proto *proto = NULL;
4484 mbuf_t m = NULL;
4485 mbuf_t send_head = NULL;
4486 mbuf_t *send_tail = &send_head;
6d2010ae 4487 int iorefcnt = 0;
316670eb 4488 u_int32_t pre = 0, post = 0;
39236c6e
A
4489 u_int32_t fpkts = 0, fbytes = 0;
4490 int32_t flen = 0;
5ba3f43e
A
4491 struct timespec now;
4492 u_int64_t now_nsec;
d9a64523
A
4493 boolean_t did_clat46 = FALSE;
4494 protocol_family_t old_proto_family = proto_family;
4495 struct rtentry *rt = NULL;
6d2010ae 4496
39236c6e 4497 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
6d2010ae 4498
39037602
A
4499 /*
4500 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4501 * from happening while this operation is in progress
4502 */
6d2010ae
A
4503 if (!ifnet_is_attached(ifp, 1)) {
4504 retval = ENXIO;
4505 goto cleanup;
4506 }
4507 iorefcnt = 1;
4508
5ba3f43e 4509 VERIFY(ifp->if_output_dlil != NULL);
39037602 4510
6d2010ae 4511 /* update the driver's multicast filter, if needed */
0a7de745 4512 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
6d2010ae 4513 ifp->if_updatemcasts = 0;
0a7de745 4514 }
6d2010ae
A
4515
4516 frame_type = frame_type_buffer;
4517 dst_linkaddr = dst_linkaddr_buffer;
4518
91447636 4519 if (raw == 0) {
6d2010ae
A
4520 ifnet_lock_shared(ifp);
4521 /* callee holds a proto refcnt upon success */
91447636
A
4522 proto = find_attached_proto(ifp, proto_family);
4523 if (proto == NULL) {
6d2010ae 4524 ifnet_lock_done(ifp);
91447636
A
4525 retval = ENXIO;
4526 goto cleanup;
4527 }
6d2010ae 4528 ifnet_lock_done(ifp);
2d21ac55 4529 }
6d2010ae 4530
2d21ac55 4531preout_again:
0a7de745 4532 if (packetlist == NULL) {
2d21ac55 4533 goto cleanup;
0a7de745 4534 }
6d2010ae 4535
2d21ac55
A
4536 m = packetlist;
4537 packetlist = packetlist->m_nextpkt;
4538 m->m_nextpkt = NULL;
6d2010ae 4539
d9a64523
A
4540 /*
4541 * Perform address family translation for the first
4542 * packet outside the loop in order to perform address
4543 * lookup for the translated proto family.
4544 */
4545 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
4546 (ifp->if_type == IFT_CELLULAR ||
0a7de745 4547 dlil_is_clat_needed(proto_family, m))) {
d9a64523
A
4548 retval = dlil_clat46(ifp, &proto_family, &m);
4549 /*
4550 * Go to the next packet if translation fails
4551 */
4552 if (retval != 0) {
4553 m_freem(m);
4554 m = NULL;
4555 ip6stat.ip6s_clat464_out_drop++;
4556 /* Make sure that the proto family is PF_INET */
4557 ASSERT(proto_family == PF_INET);
4558 goto preout_again;
4559 }
4560 /*
4561 * Free the old one and make it point to the IPv6 proto structure.
4562 *
4563 * Change proto for the first time we have successfully
4564 * performed address family translation.
4565 */
4566 if (!did_clat46 && proto_family == PF_INET6) {
4567 struct sockaddr_in6 dest6;
4568 did_clat46 = TRUE;
4569
0a7de745 4570 if (proto != NULL) {
d9a64523 4571 if_proto_free(proto);
0a7de745 4572 }
d9a64523
A
4573 ifnet_lock_shared(ifp);
4574 /* callee holds a proto refcnt upon success */
4575 proto = find_attached_proto(ifp, proto_family);
4576 if (proto == NULL) {
4577 ifnet_lock_done(ifp);
4578 retval = ENXIO;
4579 m_freem(m);
4580 m = NULL;
4581 goto cleanup;
4582 }
4583 ifnet_lock_done(ifp);
4584 if (ifp->if_type == IFT_ETHER) {
4585 /* Update the dest to translated v6 address */
4586 dest6.sin6_len = sizeof(struct sockaddr_in6);
4587 dest6.sin6_family = AF_INET6;
4588 dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
4589 dest = (const struct sockaddr *)&dest6;
4590
4591 /*
4592 * Lookup route to the translated destination
4593 * Free this route ref during cleanup
4594 */
4595 rt = rtalloc1_scoped((struct sockaddr *)&dest6,
4596 0, 0, ifp->if_index);
4597
4598 route = rt;
4599 }
4600 }
4601 }
4602
4603 /*
4604 * This path gets packet chain going to the same destination.
4605 * The pre output routine is used to either trigger resolution of
4606 * the next hop or retreive the next hop's link layer addressing.
4607 * For ex: ether_inet(6)_pre_output routine.
4608 *
4609 * If the routine returns EJUSTRETURN, it implies that packet has
4610 * been queued, and therefore we have to call preout_again for the
4611 * following packet in the chain.
4612 *
4613 * For errors other than EJUSTRETURN, the current packet is freed
4614 * and the rest of the chain (pointed by packetlist is freed as
4615 * part of clean up.
4616 *
4617 * Else if there is no error the retrieved information is used for
4618 * all the packets in the chain.
4619 */
2d21ac55 4620 if (raw == 0) {
6d2010ae
A
4621 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
4622 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
91447636 4623 retval = 0;
6d2010ae
A
4624 if (preoutp != NULL) {
4625 retval = preoutp(ifp, proto_family, &m, dest, route,
4626 frame_type, dst_linkaddr);
4627
4628 if (retval != 0) {
0a7de745 4629 if (retval == EJUSTRETURN) {
6d2010ae 4630 goto preout_again;
0a7de745 4631 }
6d2010ae 4632 m_freem(m);
d9a64523 4633 m = NULL;
6d2010ae 4634 goto cleanup;
91447636 4635 }
1c79356b 4636 }
1c79356b 4637 }
2d21ac55
A
4638
4639#if CONFIG_MACF_NET
4640 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
4641 dlil_get_socket_type(&m, proto_family, raw));
39236c6e 4642 if (retval != 0) {
2d21ac55
A
4643 m_freem(m);
4644 goto cleanup;
4645 }
4646#endif
4647
4648 do {
d9a64523
A
4649 /*
4650 * Perform address family translation if needed.
4651 * For now we only support stateless 4 to 6 translation
4652 * on the out path.
4653 *
4654 * The routine below translates IP header, updates protocol
4655 * checksum and also translates ICMP.
4656 *
4657 * We skip the first packet as it is already translated and
4658 * the proto family is set to PF_INET6.
4659 */
4660 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
4661 (ifp->if_type == IFT_CELLULAR ||
0a7de745 4662 dlil_is_clat_needed(proto_family, m))) {
d9a64523 4663 retval = dlil_clat46(ifp, &proto_family, &m);
0a7de745 4664 /* Goto the next packet if the translation fails */
d9a64523
A
4665 if (retval != 0) {
4666 m_freem(m);
4667 m = NULL;
4668 ip6stat.ip6s_clat464_out_drop++;
4669 goto next;
4670 }
4671 }
4672
6d2010ae 4673#if CONFIG_DTRACE
316670eb 4674 if (!raw && proto_family == PF_INET) {
39037602
A
4675 struct ip *ip = mtod(m, struct ip *);
4676 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
0a7de745
A
4677 struct ip *, ip, struct ifnet *, ifp,
4678 struct ip *, ip, struct ip6_hdr *, NULL);
316670eb 4679 } else if (!raw && proto_family == PF_INET6) {
39037602
A
4680 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
4681 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
0a7de745
A
4682 struct ip6_hdr *, ip6, struct ifnet *, ifp,
4683 struct ip *, NULL, struct ip6_hdr *, ip6);
6d2010ae
A
4684 }
4685#endif /* CONFIG_DTRACE */
4686
39236c6e 4687 if (raw == 0 && ifp->if_framer != NULL) {
7e4a7d39
A
4688 int rcvif_set = 0;
4689
4690 /*
4691 * If this is a broadcast packet that needs to be
4692 * looped back into the system, set the inbound ifp
4693 * to that of the outbound ifp. This will allow
4694 * us to determine that it is a legitimate packet
4695 * for the system. Only set the ifp if it's not
4696 * already set, just to be safe.
4697 */
4698 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
4699 m->m_pkthdr.rcvif == NULL) {
4700 m->m_pkthdr.rcvif = ifp;
4701 rcvif_set = 1;
4702 }
4703
6d2010ae 4704 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
39236c6e
A
4705 frame_type, &pre, &post);
4706 if (retval != 0) {
0a7de745 4707 if (retval != EJUSTRETURN) {
2d21ac55 4708 m_freem(m);
0a7de745 4709 }
2d21ac55 4710 goto next;
91447636 4711 }
7e4a7d39 4712
39236c6e
A
4713 /*
4714 * For partial checksum offload, adjust the start
4715 * and stuff offsets based on the prepended header.
4716 */
4717 if ((m->m_pkthdr.csum_flags &
4718 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4719 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4720 m->m_pkthdr.csum_tx_stuff += pre;
4721 m->m_pkthdr.csum_tx_start += pre;
4722 }
4723
0a7de745 4724 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) {
39236c6e
A
4725 dlil_output_cksum_dbg(ifp, m, pre,
4726 proto_family);
0a7de745 4727 }
39236c6e 4728
7e4a7d39
A
4729 /*
4730 * Clear the ifp if it was set above, and to be
4731 * safe, only if it is still the same as the
4732 * outbound ifp we have in context. If it was
4733 * looped back, then a copy of it was sent to the
4734 * loopback interface with the rcvif set, and we
4735 * are clearing the one that will go down to the
4736 * layer below.
4737 */
0a7de745 4738 if (rcvif_set && m->m_pkthdr.rcvif == ifp) {
7e4a7d39 4739 m->m_pkthdr.rcvif = NULL;
0a7de745 4740 }
91447636 4741 }
6d2010ae
A
4742
4743 /*
2d21ac55
A
4744 * Let interface filters (if any) do their thing ...
4745 */
4746 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4747 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
6d2010ae
A
4748 retval = dlil_interface_filters_output(ifp,
4749 &m, proto_family);
4750 if (retval != 0) {
0a7de745 4751 if (retval != EJUSTRETURN) {
6d2010ae 4752 m_freem(m);
0a7de745 4753 }
6d2010ae 4754 goto next;
1c79356b 4755 }
1c79356b 4756 }
b7266188 4757 /*
39236c6e
A
4758 * Strip away M_PROTO1 bit prior to sending packet
4759 * to the driver as this field may be used by the driver
b7266188
A
4760 */
4761 m->m_flags &= ~M_PROTO1;
4762
2d21ac55
A
4763 /*
4764 * If the underlying interface is not capable of handling a
4765 * packet whose data portion spans across physically disjoint
4766 * pages, we need to "normalize" the packet so that we pass
4767 * down a chain of mbufs where each mbuf points to a span that
4768 * resides in the system page boundary. If the packet does
4769 * not cross page(s), the following is a no-op.
4770 */
4771 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
0a7de745 4772 if ((m = m_normalize(m)) == NULL) {
2d21ac55 4773 goto next;
0a7de745 4774 }
2d21ac55
A
4775 }
4776
6d2010ae
A
4777 /*
4778 * If this is a TSO packet, make sure the interface still
4779 * advertise TSO capability.
b0d623f7 4780 */
39236c6e 4781 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
6d2010ae
A
4782 retval = EMSGSIZE;
4783 m_freem(m);
4784 goto cleanup;
b0d623f7
A
4785 }
4786
39236c6e
A
4787 ifp_inc_traffic_class_out(ifp, m);
4788 pktap_output(ifp, proto_family, m, pre, post);
6d2010ae 4789
3e170ce0
A
4790 /*
4791 * Count the number of elements in the mbuf chain
4792 */
4793 if (tx_chain_len_count) {
4794 dlil_count_chain_len(m, &tx_chain_len_stats);
4795 }
4796
5ba3f43e
A
4797 /*
4798 * Record timestamp; ifnet_enqueue() will use this info
4799 * rather than redoing the work. An optimization could
4800 * involve doing this just once at the top, if there are
4801 * no interface filters attached, but that's probably
4802 * not a big deal.
4803 */
4804 nanouptime(&now);
4805 net_timernsec(&now, &now_nsec);
4806 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
4807
4808 /*
4809 * Discard partial sum information if this packet originated
4810 * from another interface; the packet would already have the
4811 * final checksum and we shouldn't recompute it.
4812 */
4813 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
0a7de745
A
4814 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4815 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5ba3f43e
A
4816 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4817 m->m_pkthdr.csum_data = 0;
4818 }
4819
2d21ac55
A
4820 /*
4821 * Finally, call the driver.
4822 */
3e170ce0 4823 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
39236c6e
A
4824 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4825 flen += (m_pktlen(m) - (pre + post));
4826 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4827 }
2d21ac55
A
4828 *send_tail = m;
4829 send_tail = &m->m_nextpkt;
6d2010ae 4830 } else {
39236c6e
A
4831 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4832 flen = (m_pktlen(m) - (pre + post));
4833 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4834 } else {
4835 flen = 0;
4836 }
6d2010ae 4837 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
39236c6e 4838 0, 0, 0, 0, 0);
5ba3f43e 4839 retval = (*ifp->if_output_dlil)(ifp, m);
316670eb
A
4840 if (retval == EQFULL || retval == EQSUSPENDED) {
4841 if (adv != NULL && adv->code == FADV_SUCCESS) {
4842 adv->code = (retval == EQFULL ?
4843 FADV_FLOW_CONTROLLED :
4844 FADV_SUSPENDED);
4845 }
4846 retval = 0;
4847 }
39236c6e
A
4848 if (retval == 0 && flen > 0) {
4849 fbytes += flen;
4850 fpkts++;
4851 }
4852 if (retval != 0 && dlil_verbose) {
4853 printf("%s: output error on %s retval = %d\n",
4854 __func__, if_name(ifp),
6d2010ae 4855 retval);
2d21ac55 4856 }
6d2010ae 4857 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
39236c6e 4858 0, 0, 0, 0, 0);
2d21ac55 4859 }
39236c6e 4860 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
2d21ac55
A
4861
4862next:
4863 m = packetlist;
39236c6e 4864 if (m != NULL) {
2d21ac55
A
4865 packetlist = packetlist->m_nextpkt;
4866 m->m_nextpkt = NULL;
4867 }
d9a64523 4868 /* Reset the proto family to old proto family for CLAT */
0a7de745 4869 if (did_clat46) {
d9a64523 4870 proto_family = old_proto_family;
0a7de745 4871 }
39236c6e 4872 } while (m != NULL);
d41d1dae 4873
39236c6e 4874 if (send_head != NULL) {
39236c6e
A
4875 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4876 0, 0, 0, 0, 0);
3e170ce0 4877 if (ifp->if_eflags & IFEF_SENDLIST) {
5ba3f43e 4878 retval = (*ifp->if_output_dlil)(ifp, send_head);
3e170ce0
A
4879 if (retval == EQFULL || retval == EQSUSPENDED) {
4880 if (adv != NULL) {
4881 adv->code = (retval == EQFULL ?
4882 FADV_FLOW_CONTROLLED :
4883 FADV_SUSPENDED);
4884 }
4885 retval = 0;
4886 }
4887 if (retval == 0 && flen > 0) {
4888 fbytes += flen;
4889 fpkts++;
4890 }
4891 if (retval != 0 && dlil_verbose) {
4892 printf("%s: output error on %s retval = %d\n",
4893 __func__, if_name(ifp), retval);
4894 }
4895 } else {
4896 struct mbuf *send_m;
4897 int enq_cnt = 0;
4898 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
4899 while (send_head != NULL) {
4900 send_m = send_head;
4901 send_head = send_m->m_nextpkt;
4902 send_m->m_nextpkt = NULL;
5ba3f43e 4903 retval = (*ifp->if_output_dlil)(ifp, send_m);
3e170ce0
A
4904 if (retval == EQFULL || retval == EQSUSPENDED) {
4905 if (adv != NULL) {
4906 adv->code = (retval == EQFULL ?
4907 FADV_FLOW_CONTROLLED :
4908 FADV_SUSPENDED);
4909 }
4910 retval = 0;
4911 }
4912 if (retval == 0) {
4913 enq_cnt++;
0a7de745 4914 if (flen > 0) {
3e170ce0 4915 fpkts++;
0a7de745 4916 }
3e170ce0
A
4917 }
4918 if (retval != 0 && dlil_verbose) {
39037602
A
4919 printf("%s: output error on %s "
4920 "retval = %d\n",
3e170ce0
A
4921 __func__, if_name(ifp), retval);
4922 }
4923 }
4924 if (enq_cnt > 0) {
4925 fbytes += flen;
4926 ifnet_start(ifp);
316670eb 4927 }
39236c6e
A
4928 }
4929 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 4930 }
6d2010ae 4931
39236c6e 4932 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 4933
91447636 4934cleanup:
0a7de745 4935 if (fbytes > 0) {
39236c6e 4936 ifp->if_fbytes += fbytes;
0a7de745
A
4937 }
4938 if (fpkts > 0) {
39236c6e 4939 ifp->if_fpackets += fpkts;
0a7de745
A
4940 }
4941 if (proto != NULL) {
6d2010ae 4942 if_proto_free(proto);
0a7de745
A
4943 }
4944 if (packetlist) { /* if any packets are left, clean up */
2d21ac55 4945 mbuf_freem_list(packetlist);
0a7de745
A
4946 }
4947 if (retval == EJUSTRETURN) {
91447636 4948 retval = 0;
0a7de745
A
4949 }
4950 if (iorefcnt == 1) {
6d2010ae 4951 ifnet_decr_iorefcnt(ifp);
0a7de745 4952 }
d9a64523
A
4953 if (rt != NULL) {
4954 rtfree(rt);
4955 rt = NULL;
4956 }
6d2010ae 4957
0a7de745 4958 return retval;
1c79356b
A
4959}
4960
d9a64523
A
4961/*
4962 * This routine checks if the destination address is not a loopback, link-local,
4963 * multicast or broadcast address.
4964 */
4965static int
4966dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
4967{
4968 int ret = 0;
0a7de745 4969 switch (proto_family) {
d9a64523
A
4970 case PF_INET: {
4971 struct ip *iph = mtod(m, struct ip *);
0a7de745 4972 if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
d9a64523 4973 ret = 1;
0a7de745 4974 }
d9a64523
A
4975 break;
4976 }
4977 case PF_INET6: {
4978 struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
4979 if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
0a7de745 4980 CLAT64_NEEDED(&ip6h->ip6_dst)) {
d9a64523 4981 ret = 1;
0a7de745 4982 }
d9a64523
A
4983 break;
4984 }
4985 }
4986
0a7de745 4987 return ret;
d9a64523
A
4988}
4989/*
4990 * @brief This routine translates IPv4 packet to IPv6 packet,
4991 * updates protocol checksum and also translates ICMP for code
4992 * along with inner header translation.
4993 *
4994 * @param ifp Pointer to the interface
4995 * @param proto_family pointer to protocol family. It is updated if function
4996 * performs the translation successfully.
4997 * @param m Pointer to the pointer pointing to the packet. Needed because this
4998 * routine can end up changing the mbuf to a different one.
4999 *
5000 * @return 0 on success or else a negative value.
5001 */
5002static errno_t
5003dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5004{
5005 VERIFY(*proto_family == PF_INET);
5006 VERIFY(IS_INTF_CLAT46(ifp));
5007
5008 pbuf_t pbuf_store, *pbuf = NULL;
5009 struct ip *iph = NULL;
5010 struct in_addr osrc, odst;
5011 uint8_t proto = 0;
5012 struct in6_ifaddr *ia6_clat_src = NULL;
5013 struct in6_addr *src = NULL;
5014 struct in6_addr dst;
5015 int error = 0;
5016 uint32_t off = 0;
5017 uint64_t tot_len = 0;
5018 uint16_t ip_id_val = 0;
5019 uint16_t ip_frag_off = 0;
5020
5021 boolean_t is_frag = FALSE;
5022 boolean_t is_first_frag = TRUE;
5023 boolean_t is_last_frag = TRUE;
5024
5025 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5026 pbuf = &pbuf_store;
5027 iph = pbuf->pb_data;
5028
5029 osrc = iph->ip_src;
5030 odst = iph->ip_dst;
5031 proto = iph->ip_p;
5032 off = iph->ip_hl << 2;
5033 ip_id_val = iph->ip_id;
5034 ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
5035
5036 tot_len = ntohs(iph->ip_len);
5037
5038 /*
5039 * For packets that are not first frags
5040 * we only need to adjust CSUM.
5041 * For 4 to 6, Fragmentation header gets appended
5042 * after proto translation.
5043 */
5044 if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
5045 is_frag = TRUE;
5046
5047 /* If the offset is not zero, it is not first frag */
0a7de745 5048 if (ip_frag_off != 0) {
d9a64523 5049 is_first_frag = FALSE;
0a7de745 5050 }
d9a64523
A
5051
5052 /* If IP_MF is set, then it is not last frag */
0a7de745 5053 if (ntohs(iph->ip_off) & IP_MF) {
d9a64523 5054 is_last_frag = FALSE;
0a7de745 5055 }
d9a64523
A
5056 }
5057
5058 /*
5059 * Retrive the local IPv6 CLAT46 address reserved for stateless
5060 * translation.
5061 */
5062 ia6_clat_src = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5063 if (ia6_clat_src == NULL) {
5064 ip6stat.ip6s_clat464_out_nov6addr_drop++;
5065 error = -1;
5066 goto cleanup;
5067 }
5068
5069 src = &ia6_clat_src->ia_addr.sin6_addr;
5070
5071 /*
5072 * Translate IPv4 destination to IPv6 destination by using the
5073 * prefixes learned through prior PLAT discovery.
5074 */
5075 if ((error = nat464_synthesize_ipv6(ifp, &odst, &dst)) != 0) {
5076 ip6stat.ip6s_clat464_out_v6synthfail_drop++;
5077 goto cleanup;
5078 }
5079
5080 /* Translate the IP header part first */
5081 error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
5082 iph->ip_ttl, *src, dst, tot_len) == NT_NAT64) ? 0 : -1;
5083
0a7de745 5084 iph = NULL; /* Invalidate iph as pbuf has been modified */
d9a64523
A
5085
5086 if (error != 0) {
5087 ip6stat.ip6s_clat464_out_46transfail_drop++;
5088 goto cleanup;
5089 }
5090
5091 /*
5092 * Translate protocol header, update checksum, checksum flags
5093 * and related fields.
5094 */
5095 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
5096 proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
5097
5098 if (error != 0) {
5099 ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
5100 goto cleanup;
5101 }
5102
5103 /* Now insert the IPv6 fragment header */
5104 if (is_frag) {
5105 error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
5106
5107 if (error != 0) {
5108 ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
5109 goto cleanup;
5110 }
5111 }
5112
5113cleanup:
0a7de745 5114 if (ia6_clat_src != NULL) {
d9a64523 5115 IFA_REMREF(&ia6_clat_src->ia_ifa);
0a7de745 5116 }
d9a64523
A
5117
5118 if (pbuf_is_valid(pbuf)) {
5119 *m = pbuf->pb_mbuf;
5120 pbuf->pb_mbuf = NULL;
5121 pbuf_destroy(pbuf);
5122 } else {
5123 error = -1;
5124 ip6stat.ip6s_clat464_out_invalpbuf_drop++;
5125 }
5126
5127 if (error == 0) {
5128 *proto_family = PF_INET6;
5129 ip6stat.ip6s_clat464_out_success++;
5130 }
5131
0a7de745 5132 return error;
d9a64523
A
5133}
5134
5135/*
5136 * @brief This routine translates incoming IPv6 to IPv4 packet,
5137 * updates protocol checksum and also translates ICMPv6 outer
5138 * and inner headers
5139 *
5140 * @return 0 on success or else a negative value.
5141 */
5142static errno_t
5143dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5144{
5145 VERIFY(*proto_family == PF_INET6);
5146 VERIFY(IS_INTF_CLAT46(ifp));
5147
5148 struct ip6_hdr *ip6h = NULL;
5149 struct in6_addr osrc, odst;
5150 uint8_t proto = 0;
5151 struct in6_ifaddr *ia6_clat_dst = NULL;
5152 struct in_ifaddr *ia4_clat_dst = NULL;
5153 struct in_addr *dst = NULL;
5154 struct in_addr src;
5155 int error = 0;
5156 uint32_t off = 0;
5157 u_int64_t tot_len = 0;
5158 uint8_t tos = 0;
5159 boolean_t is_first_frag = TRUE;
5160
5161 /* Incoming mbuf does not contain valid IP6 header */
5162 if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
5163 ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
5164 (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
5165 ip6stat.ip6s_clat464_in_tooshort_drop++;
0a7de745 5166 return -1;
d9a64523
A
5167 }
5168
5169 ip6h = mtod(*m, struct ip6_hdr *);
5170 /* Validate that mbuf contains IP payload equal to ip6_plen */
5171 if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
5172 ip6stat.ip6s_clat464_in_tooshort_drop++;
0a7de745 5173 return -1;
d9a64523
A
5174 }
5175
5176 osrc = ip6h->ip6_src;
5177 odst = ip6h->ip6_dst;
5178
5179 /*
5180 * Retrieve the local CLAT46 reserved IPv6 address.
5181 * Let the packet pass if we don't find one, as the flag
5182 * may get set before IPv6 configuration has taken place.
5183 */
5184 ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
0a7de745 5185 if (ia6_clat_dst == NULL) {
d9a64523 5186 goto done;
0a7de745 5187 }
d9a64523
A
5188
5189 /*
5190 * Check if the original dest in the packet is same as the reserved
5191 * CLAT46 IPv6 address
5192 */
5193 if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
5194 pbuf_t pbuf_store, *pbuf = NULL;
5195 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5196 pbuf = &pbuf_store;
5197
5198 /*
5199 * Retrive the local CLAT46 IPv4 address reserved for stateless
5200 * translation.
5201 */
5202 ia4_clat_dst = inifa_ifpclatv4(ifp);
5203 if (ia4_clat_dst == NULL) {
5204 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5205 ip6stat.ip6s_clat464_in_nov4addr_drop++;
5206 error = -1;
5207 goto cleanup;
5208 }
5209 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5210
5211 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5212 dst = &ia4_clat_dst->ia_addr.sin_addr;
5213 if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) {
5214 ip6stat.ip6s_clat464_in_v4synthfail_drop++;
5215 error = -1;
5216 goto cleanup;
5217 }
5218
5219 ip6h = pbuf->pb_data;
5220 off = sizeof(struct ip6_hdr);
5221 proto = ip6h->ip6_nxt;
5222 tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
5223 tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
5224
5225 /*
5226 * Translate the IP header and update the fragmentation
5227 * header if needed
5228 */
5229 error = (nat464_translate_64(pbuf, off, tos, &proto,
5230 ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
5231 0 : -1;
5232
5233 ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
5234
5235 if (error != 0) {
5236 ip6stat.ip6s_clat464_in_64transfail_drop++;
5237 goto cleanup;
5238 }
5239
5240 /*
5241 * Translate protocol header, update checksum, checksum flags
5242 * and related fields.
5243 */
5244 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
5245 (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
5246 NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
5247
5248 if (error != 0) {
5249 ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
5250 goto cleanup;
5251 }
5252
5253cleanup:
0a7de745 5254 if (ia4_clat_dst != NULL) {
d9a64523 5255 IFA_REMREF(&ia4_clat_dst->ia_ifa);
0a7de745 5256 }
d9a64523
A
5257
5258 if (pbuf_is_valid(pbuf)) {
5259 *m = pbuf->pb_mbuf;
5260 pbuf->pb_mbuf = NULL;
5261 pbuf_destroy(pbuf);
5262 } else {
5263 error = -1;
5264 ip6stat.ip6s_clat464_in_invalpbuf_drop++;
5265 }
5266
5267 if (error == 0) {
5268 *proto_family = PF_INET;
5269 ip6stat.ip6s_clat464_in_success++;
5270 }
5271 } /* CLAT traffic */
5272
5273done:
0a7de745 5274 return error;
d9a64523
A
5275}
5276
2d21ac55 5277errno_t
6d2010ae
A
5278ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
5279 void *ioctl_arg)
5280{
5281 struct ifnet_filter *filter;
5282 int retval = EOPNOTSUPP;
5283 int result = 0;
5284
0a7de745
A
5285 if (ifp == NULL || ioctl_code == 0) {
5286 return EINVAL;
5287 }
6d2010ae
A
5288
5289 /* Get an io ref count if the interface is attached */
0a7de745
A
5290 if (!ifnet_is_attached(ifp, 1)) {
5291 return EOPNOTSUPP;
5292 }
6d2010ae 5293
39037602
A
5294 /*
5295 * Run the interface filters first.
91447636
A
5296 * We want to run all filters before calling the protocol,
5297 * interface family, or interface.
5298 */
6d2010ae
A
5299 lck_mtx_lock_spin(&ifp->if_flt_lock);
5300 /* prevent filter list from changing in case we drop the lock */
5301 if_flt_monitor_busy(ifp);
91447636 5302 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
6d2010ae
A
5303 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
5304 filter->filt_protocol == proto_fam)) {
5305 lck_mtx_unlock(&ifp->if_flt_lock);
5306
5307 result = filter->filt_ioctl(filter->filt_cookie, ifp,
5308 proto_fam, ioctl_code, ioctl_arg);
5309
5310 lck_mtx_lock_spin(&ifp->if_flt_lock);
5311
91447636
A
5312 /* Only update retval if no one has handled the ioctl */
5313 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
0a7de745 5314 if (result == ENOTSUP) {
91447636 5315 result = EOPNOTSUPP;
0a7de745 5316 }
91447636 5317 retval = result;
6d2010ae
A
5318 if (retval != 0 && retval != EOPNOTSUPP) {
5319 /* we're done with the filter list */
5320 if_flt_monitor_unbusy(ifp);
5321 lck_mtx_unlock(&ifp->if_flt_lock);
91447636
A
5322 goto cleanup;
5323 }
5324 }
5325 }
5326 }
6d2010ae
A
5327 /* we're done with the filter list */
5328 if_flt_monitor_unbusy(ifp);
5329 lck_mtx_unlock(&ifp->if_flt_lock);
5330
91447636 5331 /* Allow the protocol to handle the ioctl */
6d2010ae 5332 if (proto_fam != 0) {
0a7de745 5333 struct if_proto *proto;
6d2010ae
A
5334
5335 /* callee holds a proto refcnt upon success */
5336 ifnet_lock_shared(ifp);
5337 proto = find_attached_proto(ifp, proto_fam);
5338 ifnet_lock_done(ifp);
5339 if (proto != NULL) {
5340 proto_media_ioctl ioctlp =
5341 (proto->proto_kpi == kProtoKPI_v1 ?
5342 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
91447636 5343 result = EOPNOTSUPP;
0a7de745 5344 if (ioctlp != NULL) {
6d2010ae
A
5345 result = ioctlp(ifp, proto_fam, ioctl_code,
5346 ioctl_arg);
0a7de745 5347 }
6d2010ae
A
5348 if_proto_free(proto);
5349
91447636
A
5350 /* Only update retval if no one has handled the ioctl */
5351 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
0a7de745 5352 if (result == ENOTSUP) {
91447636 5353 result = EOPNOTSUPP;
0a7de745 5354 }
91447636 5355 retval = result;
0a7de745 5356 if (retval && retval != EOPNOTSUPP) {
91447636 5357 goto cleanup;
0a7de745 5358 }
91447636
A
5359 }
5360 }
5361 }
6d2010ae 5362
91447636 5363 /* retval is either 0 or EOPNOTSUPP */
6d2010ae 5364
91447636
A
5365 /*
5366 * Let the interface handle this ioctl.
5367 * If it returns EOPNOTSUPP, ignore that, we may have
5368 * already handled this in the protocol or family.
5369 */
0a7de745 5370 if (ifp->if_ioctl) {
91447636 5371 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
0a7de745 5372 }
6d2010ae 5373
91447636
A
5374 /* Only update retval if no one has handled the ioctl */
5375 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
0a7de745 5376 if (result == ENOTSUP) {
91447636 5377 result = EOPNOTSUPP;
0a7de745 5378 }
91447636
A
5379 retval = result;
5380 if (retval && retval != EOPNOTSUPP) {
5381 goto cleanup;
5382 }
5383 }
1c79356b 5384
6d2010ae 5385cleanup:
0a7de745 5386 if (retval == EJUSTRETURN) {
91447636 5387 retval = 0;
0a7de745 5388 }
6d2010ae
A
5389
5390 ifnet_decr_iorefcnt(ifp);
5391
0a7de745 5392 return retval;
91447636 5393}
1c79356b 5394
91447636 5395__private_extern__ errno_t
6d2010ae 5396dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
91447636 5397{
0a7de745 5398 errno_t error = 0;
6d2010ae
A
5399
5400
5401 if (ifp->if_set_bpf_tap) {
5402 /* Get an io reference on the interface if it is attached */
0a7de745
A
5403 if (!ifnet_is_attached(ifp, 1)) {
5404 return ENXIO;
5405 }
91447636 5406 error = ifp->if_set_bpf_tap(ifp, mode, callback);
6d2010ae
A
5407 ifnet_decr_iorefcnt(ifp);
5408 }
0a7de745 5409 return error;
1c79356b
A
5410}
5411
2d21ac55 5412errno_t
6d2010ae
A
5413dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
5414 struct sockaddr *ll_addr, size_t ll_len)
1c79356b 5415{
0a7de745 5416 errno_t result = EOPNOTSUPP;
91447636
A
5417 struct if_proto *proto;
5418 const struct sockaddr *verify;
2d21ac55 5419 proto_media_resolve_multi resolvep;
6d2010ae 5420
0a7de745
A
5421 if (!ifnet_is_attached(ifp, 1)) {
5422 return result;
5423 }
6d2010ae 5424
91447636 5425 bzero(ll_addr, ll_len);
6d2010ae
A
5426
5427 /* Call the protocol first; callee holds a proto refcnt upon success */
5428 ifnet_lock_shared(ifp);
91447636 5429 proto = find_attached_proto(ifp, proto_addr->sa_family);
6d2010ae 5430 ifnet_lock_done(ifp);
2d21ac55 5431 if (proto != NULL) {
6d2010ae
A
5432 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
5433 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
0a7de745 5434 if (resolvep != NULL) {
6d2010ae 5435 result = resolvep(ifp, proto_addr,
39037602 5436 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
0a7de745 5437 }
6d2010ae 5438 if_proto_free(proto);
91447636 5439 }
6d2010ae 5440
91447636
A
5441 /* Let the interface verify the multicast address */
5442 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
0a7de745 5443 if (result == 0) {
91447636 5444 verify = ll_addr;
0a7de745 5445 } else {
91447636 5446 verify = proto_addr;
0a7de745 5447 }
91447636
A
5448 result = ifp->if_check_multi(ifp, verify);
5449 }
6d2010ae
A
5450
5451 ifnet_decr_iorefcnt(ifp);
0a7de745 5452 return result;
91447636 5453}
1c79356b 5454
91447636 5455__private_extern__ errno_t
6d2010ae 5456dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
39037602
A
5457 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5458 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
91447636
A
5459{
5460 struct if_proto *proto;
0a7de745 5461 errno_t result = 0;
6d2010ae
A
5462
5463 /* callee holds a proto refcnt upon success */
5464 ifnet_lock_shared(ifp);
91447636 5465 proto = find_attached_proto(ifp, target_proto->sa_family);
6d2010ae 5466 ifnet_lock_done(ifp);
2d21ac55 5467 if (proto == NULL) {
91447636 5468 result = ENOTSUP;
6d2010ae 5469 } else {
0a7de745 5470 proto_media_send_arp arpp;
6d2010ae
A
5471 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
5472 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
39236c6e 5473 if (arpp == NULL) {
2d21ac55 5474 result = ENOTSUP;
39236c6e
A
5475 } else {
5476 switch (arpop) {
5477 case ARPOP_REQUEST:
5478 arpstat.txrequests++;
0a7de745 5479 if (target_hw != NULL) {
39236c6e 5480 arpstat.txurequests++;
0a7de745 5481 }
39236c6e
A
5482 break;
5483 case ARPOP_REPLY:
5484 arpstat.txreplies++;
5485 break;
5486 }
6d2010ae
A
5487 result = arpp(ifp, arpop, sender_hw, sender_proto,
5488 target_hw, target_proto);
39236c6e 5489 }
6d2010ae 5490 if_proto_free(proto);
91447636 5491 }
6d2010ae 5492
0a7de745 5493 return result;
91447636 5494}
1c79356b 5495
39236c6e
A
5496struct net_thread_marks { };
5497static const struct net_thread_marks net_thread_marks_base = { };
5498
5499__private_extern__ const net_thread_marks_t net_thread_marks_none =
0a7de745 5500 &net_thread_marks_base;
39236c6e
A
5501
5502__private_extern__ net_thread_marks_t
5503net_thread_marks_push(u_int32_t push)
316670eb 5504{
39236c6e
A
5505 static const char *const base = (const void*)&net_thread_marks_base;
5506 u_int32_t pop = 0;
5507
5508 if (push != 0) {
5509 struct uthread *uth = get_bsdthread_info(current_thread());
5510
5511 pop = push & ~uth->uu_network_marks;
0a7de745 5512 if (pop != 0) {
39236c6e 5513 uth->uu_network_marks |= pop;
0a7de745 5514 }
39236c6e
A
5515 }
5516
0a7de745 5517 return (net_thread_marks_t)&base[pop];
316670eb
A
5518}
5519
39236c6e
A
5520__private_extern__ net_thread_marks_t
5521net_thread_unmarks_push(u_int32_t unpush)
316670eb 5522{
39236c6e
A
5523 static const char *const base = (const void*)&net_thread_marks_base;
5524 u_int32_t unpop = 0;
5525
5526 if (unpush != 0) {
5527 struct uthread *uth = get_bsdthread_info(current_thread());
316670eb 5528
39236c6e 5529 unpop = unpush & uth->uu_network_marks;
0a7de745 5530 if (unpop != 0) {
39236c6e 5531 uth->uu_network_marks &= ~unpop;
0a7de745 5532 }
39236c6e
A
5533 }
5534
0a7de745 5535 return (net_thread_marks_t)&base[unpop];
316670eb
A
5536}
5537
5538__private_extern__ void
39236c6e 5539net_thread_marks_pop(net_thread_marks_t popx)
316670eb 5540{
39236c6e 5541 static const char *const base = (const void*)&net_thread_marks_base;
3e170ce0 5542 const ptrdiff_t pop = (const char *)popx - (const char *)base;
316670eb 5543
39236c6e
A
5544 if (pop != 0) {
5545 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
5546 struct uthread *uth = get_bsdthread_info(current_thread());
5547
5548 VERIFY((pop & ones) == pop);
5549 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
5550 uth->uu_network_marks &= ~pop;
5551 }
5552}
5553
5554__private_extern__ void
5555net_thread_unmarks_pop(net_thread_marks_t unpopx)
5556{
5557 static const char *const base = (const void*)&net_thread_marks_base;
3e170ce0 5558 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
39236c6e
A
5559
5560 if (unpop != 0) {
5561 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
5562 struct uthread *uth = get_bsdthread_info(current_thread());
5563
5564 VERIFY((unpop & ones) == unpop);
5565 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
5566 uth->uu_network_marks |= unpop;
5567 }
5568}
5569
5570__private_extern__ u_int32_t
5571net_thread_is_marked(u_int32_t check)
5572{
5573 if (check != 0) {
5574 struct uthread *uth = get_bsdthread_info(current_thread());
0a7de745
A
5575 return uth->uu_network_marks & check;
5576 } else {
5577 return 0;
39236c6e 5578 }
39236c6e
A
5579}
5580
5581__private_extern__ u_int32_t
5582net_thread_is_unmarked(u_int32_t check)
5583{
5584 if (check != 0) {
5585 struct uthread *uth = get_bsdthread_info(current_thread());
0a7de745
A
5586 return ~uth->uu_network_marks & check;
5587 } else {
5588 return 0;
39236c6e 5589 }
316670eb
A
5590}
5591
2d21ac55
A
5592static __inline__ int
5593_is_announcement(const struct sockaddr_in * sender_sin,
6d2010ae 5594 const struct sockaddr_in * target_sin)
2d21ac55
A
5595{
5596 if (sender_sin == NULL) {
0a7de745 5597 return FALSE;
2d21ac55 5598 }
0a7de745 5599 return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
2d21ac55
A
5600}
5601
91447636 5602__private_extern__ errno_t
39037602
A
5603dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
5604 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
5605 const struct sockaddr *target_proto0, u_int32_t rtflags)
91447636 5606{
0a7de745 5607 errno_t result = 0;
2d21ac55
A
5608 const struct sockaddr_in * sender_sin;
5609 const struct sockaddr_in * target_sin;
316670eb
A
5610 struct sockaddr_inarp target_proto_sinarp;
5611 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
6d2010ae
A
5612
5613 if (target_proto == NULL || (sender_proto != NULL &&
0a7de745
A
5614 sender_proto->sa_family != target_proto->sa_family)) {
5615 return EINVAL;
5616 }
6d2010ae 5617
316670eb
A
5618 /*
5619 * If the target is a (default) router, provide that
5620 * information to the send_arp callback routine.
5621 */
5622 if (rtflags & RTF_ROUTER) {
5623 bcopy(target_proto, &target_proto_sinarp,
0a7de745 5624 sizeof(struct sockaddr_in));
316670eb
A
5625 target_proto_sinarp.sin_other |= SIN_ROUTER;
5626 target_proto = (struct sockaddr *)&target_proto_sinarp;
5627 }
5628
91447636
A
5629 /*
5630 * If this is an ARP request and the target IP is IPv4LL,
2d21ac55
A
5631 * send the request on all interfaces. The exception is
5632 * an announcement, which must only appear on the specific
5633 * interface.
91447636 5634 */
316670eb
A
5635 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
5636 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
6d2010ae
A
5637 if (target_proto->sa_family == AF_INET &&
5638 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
5639 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
5640 !_is_announcement(target_sin, sender_sin)) {
0a7de745
A
5641 ifnet_t *ifp_list;
5642 u_int32_t count;
5643 u_int32_t ifp_on;
6d2010ae 5644
91447636
A
5645 result = ENOTSUP;
5646
5647 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
5648 for (ifp_on = 0; ifp_on < count; ifp_on++) {
6d2010ae
A
5649 errno_t new_result;
5650 ifaddr_t source_hw = NULL;
5651 ifaddr_t source_ip = NULL;
5652 struct sockaddr_in source_ip_copy;
5653 struct ifnet *cur_ifp = ifp_list[ifp_on];
5654
91447636 5655 /*
6d2010ae
A
5656 * Only arp on interfaces marked for IPv4LL
5657 * ARPing. This may mean that we don't ARP on
5658 * the interface the subnet route points to.
91447636 5659 */
0a7de745 5660 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
91447636 5661 continue;
0a7de745 5662 }
b0d623f7 5663
91447636 5664 /* Find the source IP address */
6d2010ae
A
5665 ifnet_lock_shared(cur_ifp);
5666 source_hw = cur_ifp->if_lladdr;
5667 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
5668 ifa_link) {
5669 IFA_LOCK(source_ip);
5670 if (source_ip->ifa_addr != NULL &&
5671 source_ip->ifa_addr->sa_family ==
5672 AF_INET) {
5673 /* Copy the source IP address */
5674 source_ip_copy =
5675 *(struct sockaddr_in *)
316670eb 5676 (void *)source_ip->ifa_addr;
6d2010ae 5677 IFA_UNLOCK(source_ip);
91447636
A
5678 break;
5679 }
6d2010ae 5680 IFA_UNLOCK(source_ip);
91447636 5681 }
6d2010ae 5682
91447636
A
5683 /* No IP Source, don't arp */
5684 if (source_ip == NULL) {
6d2010ae 5685 ifnet_lock_done(cur_ifp);
91447636
A
5686 continue;
5687 }
6d2010ae
A
5688
5689 IFA_ADDREF(source_hw);
5690 ifnet_lock_done(cur_ifp);
5691
91447636 5692 /* Send the ARP */
6d2010ae 5693 new_result = dlil_send_arp_internal(cur_ifp,
316670eb
A
5694 arpop, (struct sockaddr_dl *)(void *)
5695 source_hw->ifa_addr,
6d2010ae
A
5696 (struct sockaddr *)&source_ip_copy, NULL,
5697 target_proto);
b0d623f7 5698
6d2010ae 5699 IFA_REMREF(source_hw);
91447636
A
5700 if (result == ENOTSUP) {
5701 result = new_result;
5702 }
5703 }
6d2010ae 5704 ifnet_list_free(ifp_list);
91447636 5705 }
6d2010ae
A
5706 } else {
5707 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
5708 sender_proto, target_hw, target_proto);
91447636 5709 }
6d2010ae 5710
0a7de745 5711 return result;
91447636 5712}
1c79356b 5713
6d2010ae
A
5714/*
5715 * Caller must hold ifnet head lock.
5716 */
5717static int
5718ifnet_lookup(struct ifnet *ifp)
91447636 5719{
6d2010ae
A
5720 struct ifnet *_ifp;
5721
5ba3f43e 5722 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
6d2010ae 5723 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
0a7de745 5724 if (_ifp == ifp) {
91447636 5725 break;
0a7de745 5726 }
6d2010ae 5727 }
0a7de745 5728 return _ifp != NULL;
91447636 5729}
39037602 5730
6d2010ae
A
5731/*
5732 * Caller has to pass a non-zero refio argument to get a
5733 * IO reference count. This will prevent ifnet_detach from
39037602 5734 * being called when there are outstanding io reference counts.
91447636 5735 */
6d2010ae
A
5736int
5737ifnet_is_attached(struct ifnet *ifp, int refio)
5738{
5739 int ret;
5740
5741 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e 5742 if ((ret = IF_FULLY_ATTACHED(ifp))) {
0a7de745 5743 if (refio > 0) {
6d2010ae 5744 ifp->if_refio++;
0a7de745 5745 }
6d2010ae
A
5746 }
5747 lck_mtx_unlock(&ifp->if_ref_lock);
5748
0a7de745 5749 return ret;
6d2010ae
A
5750}
5751
39037602
A
5752/*
5753 * Caller must ensure the interface is attached; the assumption is that
5754 * there is at least an outstanding IO reference count held already.
5755 * Most callers would call ifnet_is_attached() instead.
5756 */
5757void
5758ifnet_incr_iorefcnt(struct ifnet *ifp)
5759{
5760 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e 5761 VERIFY(IF_FULLY_ATTACHED(ifp));
39037602
A
5762 VERIFY(ifp->if_refio > 0);
5763 ifp->if_refio++;
5764 lck_mtx_unlock(&ifp->if_ref_lock);
5765}
5766
6d2010ae
A
5767void
5768ifnet_decr_iorefcnt(struct ifnet *ifp)
5769{
5770 lck_mtx_lock_spin(&ifp->if_ref_lock);
5771 VERIFY(ifp->if_refio > 0);
5ba3f43e 5772 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6d2010ae
A
5773 ifp->if_refio--;
5774
39037602
A
5775 /*
5776 * if there are no more outstanding io references, wakeup the
6d2010ae
A
5777 * ifnet_detach thread if detaching flag is set.
5778 */
0a7de745 5779 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
6d2010ae 5780 wakeup(&(ifp->if_refio));
0a7de745 5781 }
5ba3f43e 5782
6d2010ae
A
5783 lck_mtx_unlock(&ifp->if_ref_lock);
5784}
b0d623f7 5785
6d2010ae
A
5786static void
5787dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
5788{
5789 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
5790 ctrace_t *tr;
5791 u_int32_t idx;
5792 u_int16_t *cnt;
1c79356b 5793
6d2010ae
A
5794 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
5795 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
5796 /* NOTREACHED */
5797 }
5798
5799 if (refhold) {
5800 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
5801 tr = dl_if_dbg->dldbg_if_refhold;
5802 } else {
5803 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
5804 tr = dl_if_dbg->dldbg_if_refrele;
5805 }
5806
5807 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
5808 ctrace_record(&tr[idx]);
91447636 5809}
1c79356b 5810
6d2010ae
A
5811errno_t
5812dlil_if_ref(struct ifnet *ifp)
5813{
5814 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5815
0a7de745
A
5816 if (dl_if == NULL) {
5817 return EINVAL;
5818 }
6d2010ae
A
5819
5820 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5821 ++dl_if->dl_if_refcnt;
5822 if (dl_if->dl_if_refcnt == 0) {
5823 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
5824 /* NOTREACHED */
5825 }
0a7de745 5826 if (dl_if->dl_if_trace != NULL) {
6d2010ae 5827 (*dl_if->dl_if_trace)(dl_if, TRUE);
0a7de745 5828 }
6d2010ae
A
5829 lck_mtx_unlock(&dl_if->dl_if_lock);
5830
0a7de745 5831 return 0;
91447636 5832}
1c79356b 5833
6d2010ae
A
5834errno_t
5835dlil_if_free(struct ifnet *ifp)
5836{
5837 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5ba3f43e 5838 bool need_release = FALSE;
6d2010ae 5839
0a7de745
A
5840 if (dl_if == NULL) {
5841 return EINVAL;
5842 }
6d2010ae
A
5843
5844 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5ba3f43e
A
5845 switch (dl_if->dl_if_refcnt) {
5846 case 0:
6d2010ae
A
5847 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
5848 /* NOTREACHED */
5ba3f43e
A
5849 break;
5850 case 1:
5851 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
5852 need_release = TRUE;
5853 }
5854 break;
5855 default:
5856 break;
6d2010ae
A
5857 }
5858 --dl_if->dl_if_refcnt;
0a7de745 5859 if (dl_if->dl_if_trace != NULL) {
6d2010ae 5860 (*dl_if->dl_if_trace)(dl_if, FALSE);
0a7de745 5861 }
6d2010ae 5862 lck_mtx_unlock(&dl_if->dl_if_lock);
5ba3f43e
A
5863 if (need_release) {
5864 dlil_if_release(ifp);
5865 }
0a7de745 5866 return 0;
6d2010ae 5867}
1c79356b 5868
2d21ac55 5869static errno_t
6d2010ae 5870dlil_attach_protocol_internal(struct if_proto *proto,
5ba3f43e
A
5871 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
5872 uint32_t * proto_count)
91447636 5873{
6d2010ae 5874 struct kev_dl_proto_data ev_pr_data;
91447636
A
5875 struct ifnet *ifp = proto->ifp;
5876 int retval = 0;
b0d623f7 5877 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
6d2010ae
A
5878 struct if_proto *prev_proto;
5879 struct if_proto *_proto;
5880
5881 /* callee holds a proto refcnt upon success */
5882 ifnet_lock_exclusive(ifp);
5883 _proto = find_attached_proto(ifp, proto->protocol_family);
5884 if (_proto != NULL) {
91447636 5885 ifnet_lock_done(ifp);
6d2010ae 5886 if_proto_free(_proto);
0a7de745 5887 return EEXIST;
91447636 5888 }
6d2010ae 5889
91447636
A
5890 /*
5891 * Call family module add_proto routine so it can refine the
5892 * demux descriptors as it wishes.
5893 */
6d2010ae
A
5894 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
5895 demux_count);
91447636 5896 if (retval) {
6d2010ae 5897 ifnet_lock_done(ifp);
0a7de745 5898 return retval;
91447636 5899 }
6d2010ae 5900
91447636
A
5901 /*
5902 * Insert the protocol in the hash
5903 */
6d2010ae 5904 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
0a7de745 5905 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
6d2010ae 5906 prev_proto = SLIST_NEXT(prev_proto, next_hash);
0a7de745
A
5907 }
5908 if (prev_proto) {
6d2010ae 5909 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
0a7de745 5910 } else {
6d2010ae
A
5911 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
5912 proto, next_hash);
0a7de745 5913 }
6d2010ae
A
5914
5915 /* hold a proto refcnt for attach */
5916 if_proto_ref(proto);
1c79356b 5917
91447636 5918 /*
6d2010ae
A
5919 * The reserved field carries the number of protocol still attached
5920 * (subject to change)
91447636 5921 */
91447636 5922 ev_pr_data.proto_family = proto->protocol_family;
a39ff7e2
A
5923 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
5924
6d2010ae
A
5925 ifnet_lock_done(ifp);
5926
5927 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
5928 (struct net_event_data *)&ev_pr_data,
0a7de745 5929 sizeof(struct kev_dl_proto_data));
5ba3f43e
A
5930 if (proto_count != NULL) {
5931 *proto_count = ev_pr_data.proto_remaining_count;
5932 }
0a7de745 5933 return retval;
91447636 5934}
0b4e3aa0 5935
2d21ac55
A
5936errno_t
5937ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 5938 const struct ifnet_attach_proto_param *proto_details)
91447636
A
5939{
5940 int retval = 0;
5941 struct if_proto *ifproto = NULL;
5ba3f43e 5942 uint32_t proto_count = 0;
6d2010ae
A
5943
5944 ifnet_head_lock_shared();
5945 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5946 retval = EINVAL;
5947 goto end;
5948 }
5949 /* Check that the interface is in the global list */
5950 if (!ifnet_lookup(ifp)) {
5951 retval = ENXIO;
5952 goto end;
5953 }
5954
5955 ifproto = zalloc(dlif_proto_zone);
5956 if (ifproto == NULL) {
91447636
A
5957 retval = ENOMEM;
5958 goto end;
5959 }
6d2010ae
A
5960 bzero(ifproto, dlif_proto_size);
5961
5962 /* refcnt held above during lookup */
91447636
A
5963 ifproto->ifp = ifp;
5964 ifproto->protocol_family = protocol;
5965 ifproto->proto_kpi = kProtoKPI_v1;
5966 ifproto->kpi.v1.input = proto_details->input;
5967 ifproto->kpi.v1.pre_output = proto_details->pre_output;
5968 ifproto->kpi.v1.event = proto_details->event;
5969 ifproto->kpi.v1.ioctl = proto_details->ioctl;
5970 ifproto->kpi.v1.detached = proto_details->detached;
5971 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
5972 ifproto->kpi.v1.send_arp = proto_details->send_arp;
6d2010ae 5973
2d21ac55 5974 retval = dlil_attach_protocol_internal(ifproto,
0a7de745
A
5975 proto_details->demux_list, proto_details->demux_count,
5976 &proto_count);
6d2010ae 5977
9bccf70c 5978end:
6d2010ae 5979 if (retval != 0 && retval != EEXIST && ifp != NULL) {
39236c6e
A
5980 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5981 if_name(ifp), protocol, retval);
5ba3f43e
A
5982 } else {
5983 if (dlil_verbose) {
5984 printf("%s: attached v1 protocol %d (count = %d)\n",
0a7de745
A
5985 if_name(ifp),
5986 protocol, proto_count);
5ba3f43e 5987 }
6d2010ae
A
5988 }
5989 ifnet_head_done();
5ba3f43e 5990 if (retval == 0) {
a39ff7e2
A
5991 /*
5992 * A protocol has been attached, mark the interface up.
5993 * This used to be done by configd.KernelEventMonitor, but that
5994 * is inherently prone to races (rdar://problem/30810208).
5995 */
5996 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
5997 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
5998 dlil_post_sifflags_msg(ifp);
5ba3f43e 5999 } else if (ifproto != NULL) {
6d2010ae 6000 zfree(dlif_proto_zone, ifproto);
5ba3f43e 6001 }
0a7de745 6002 return retval;
1c79356b
A
6003}
6004
2d21ac55
A
6005errno_t
6006ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 6007 const struct ifnet_attach_proto_param_v2 *proto_details)
91447636 6008{
2d21ac55 6009 int retval = 0;
91447636 6010 struct if_proto *ifproto = NULL;
5ba3f43e 6011 uint32_t proto_count = 0;
6d2010ae
A
6012
6013 ifnet_head_lock_shared();
6014 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6015 retval = EINVAL;
6016 goto end;
6017 }
6018 /* Check that the interface is in the global list */
6019 if (!ifnet_lookup(ifp)) {
6020 retval = ENXIO;
6021 goto end;
6022 }
6023
6024 ifproto = zalloc(dlif_proto_zone);
6025 if (ifproto == NULL) {
91447636
A
6026 retval = ENOMEM;
6027 goto end;
6028 }
2d21ac55 6029 bzero(ifproto, sizeof(*ifproto));
6d2010ae
A
6030
6031 /* refcnt held above during lookup */
2d21ac55
A
6032 ifproto->ifp = ifp;
6033 ifproto->protocol_family = protocol;
6034 ifproto->proto_kpi = kProtoKPI_v2;
6035 ifproto->kpi.v2.input = proto_details->input;
6036 ifproto->kpi.v2.pre_output = proto_details->pre_output;
6037 ifproto->kpi.v2.event = proto_details->event;
6038 ifproto->kpi.v2.ioctl = proto_details->ioctl;
6039 ifproto->kpi.v2.detached = proto_details->detached;
6040 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
6041 ifproto->kpi.v2.send_arp = proto_details->send_arp;
1c79356b 6042
6d2010ae 6043 retval = dlil_attach_protocol_internal(ifproto,
0a7de745
A
6044 proto_details->demux_list, proto_details->demux_count,
6045 &proto_count);
6d2010ae
A
6046
6047end:
6048 if (retval != 0 && retval != EEXIST && ifp != NULL) {
39236c6e
A
6049 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
6050 if_name(ifp), protocol, retval);
5ba3f43e
A
6051 } else {
6052 if (dlil_verbose) {
6053 printf("%s: attached v2 protocol %d (count = %d)\n",
0a7de745
A
6054 if_name(ifp),
6055 protocol, proto_count);
5ba3f43e 6056 }
2d21ac55 6057 }
6d2010ae 6058 ifnet_head_done();
5ba3f43e 6059 if (retval == 0) {
a39ff7e2
A
6060 /*
6061 * A protocol has been attached, mark the interface up.
6062 * This used to be done by configd.KernelEventMonitor, but that
6063 * is inherently prone to races (rdar://problem/30810208).
6064 */
6065 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6066 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6067 dlil_post_sifflags_msg(ifp);
5ba3f43e 6068 } else if (ifproto != NULL) {
6d2010ae 6069 zfree(dlif_proto_zone, ifproto);
5ba3f43e 6070 }
0a7de745 6071 return retval;
91447636 6072}
1c79356b 6073
2d21ac55
A
6074errno_t
6075ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
91447636
A
6076{
6077 struct if_proto *proto = NULL;
0a7de745 6078 int retval = 0;
6d2010ae
A
6079
6080 if (ifp == NULL || proto_family == 0) {
6081 retval = EINVAL;
91447636
A
6082 goto end;
6083 }
6d2010ae
A
6084
6085 ifnet_lock_exclusive(ifp);
6086 /* callee holds a proto refcnt upon success */
91447636 6087 proto = find_attached_proto(ifp, proto_family);
91447636
A
6088 if (proto == NULL) {
6089 retval = ENXIO;
6d2010ae 6090 ifnet_lock_done(ifp);
91447636
A
6091 goto end;
6092 }
6d2010ae
A
6093
6094 /* call family module del_proto */
0a7de745 6095 if (ifp->if_del_proto) {
91447636 6096 ifp->if_del_proto(ifp, proto->protocol_family);
0a7de745 6097 }
1c79356b 6098
6d2010ae
A
6099 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
6100 proto, if_proto, next_hash);
6101
6102 if (proto->proto_kpi == kProtoKPI_v1) {
6103 proto->kpi.v1.input = ifproto_media_input_v1;
39037602 6104 proto->kpi.v1.pre_output = ifproto_media_preout;
6d2010ae
A
6105 proto->kpi.v1.event = ifproto_media_event;
6106 proto->kpi.v1.ioctl = ifproto_media_ioctl;
6107 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
6108 proto->kpi.v1.send_arp = ifproto_media_send_arp;
6109 } else {
6110 proto->kpi.v2.input = ifproto_media_input_v2;
6111 proto->kpi.v2.pre_output = ifproto_media_preout;
6112 proto->kpi.v2.event = ifproto_media_event;
6113 proto->kpi.v2.ioctl = ifproto_media_ioctl;
6114 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
6115 proto->kpi.v2.send_arp = ifproto_media_send_arp;
6116 }
6117 proto->detached = 1;
6118 ifnet_lock_done(ifp);
6119
6120 if (dlil_verbose) {
39236c6e
A
6121 printf("%s: detached %s protocol %d\n", if_name(ifp),
6122 (proto->proto_kpi == kProtoKPI_v1) ?
6d2010ae
A
6123 "v1" : "v2", proto_family);
6124 }
6125
6126 /* release proto refcnt held during protocol attach */
6127 if_proto_free(proto);
91447636
A
6128
6129 /*
6d2010ae
A
6130 * Release proto refcnt held during lookup; the rest of
6131 * protocol detach steps will happen when the last proto
6132 * reference is released.
91447636 6133 */
6d2010ae
A
6134 if_proto_free(proto);
6135
91447636 6136end:
0a7de745 6137 return retval;
91447636 6138}
1c79356b 6139
6d2010ae
A
6140
6141static errno_t
6142ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
6143 struct mbuf *packet, char *header)
91447636 6144{
6d2010ae 6145#pragma unused(ifp, protocol, packet, header)
0a7de745 6146 return ENXIO;
6d2010ae
A
6147}
6148
6149static errno_t
6150ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
6151 struct mbuf *packet)
6152{
6153#pragma unused(ifp, protocol, packet)
0a7de745 6154 return ENXIO;
6d2010ae
A
6155}
6156
6157static errno_t
6158ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
6159 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
6160 char *link_layer_dest)
6161{
6162#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
0a7de745 6163 return ENXIO;
91447636 6164}
9bccf70c 6165
91447636 6166static void
6d2010ae
A
6167ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
6168 const struct kev_msg *event)
6169{
6170#pragma unused(ifp, protocol, event)
6171}
6172
6173static errno_t
6174ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
6175 unsigned long command, void *argument)
6176{
6177#pragma unused(ifp, protocol, command, argument)
0a7de745 6178 return ENXIO;
6d2010ae
A
6179}
6180
6181static errno_t
6182ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
6183 struct sockaddr_dl *out_ll, size_t ll_len)
6184{
6185#pragma unused(ifp, proto_addr, out_ll, ll_len)
0a7de745 6186 return ENXIO;
6d2010ae
A
6187}
6188
6189static errno_t
6190ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
6191 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6192 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
6193{
6194#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
0a7de745 6195 return ENXIO;
91447636 6196}
9bccf70c 6197
91447636 6198extern int if_next_index(void);
4bd07ac2 6199extern int tcp_ecn_outbound;
91447636 6200
2d21ac55 6201errno_t
6d2010ae 6202ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
91447636 6203{
91447636 6204 struct ifnet *tmp_if;
6d2010ae
A
6205 struct ifaddr *ifa;
6206 struct if_data_internal if_data_saved;
6207 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
316670eb
A
6208 struct dlil_threading_info *dl_inp;
6209 u_int32_t sflags = 0;
6210 int err;
1c79356b 6211
0a7de745
A
6212 if (ifp == NULL) {
6213 return EINVAL;
6214 }
6d2010ae 6215
7ddcb079
A
6216 /*
6217 * Serialize ifnet attach using dlil_ifnet_lock, in order to
6218 * prevent the interface from being configured while it is
6219 * embryonic, as ifnet_head_lock is dropped and reacquired
6220 * below prior to marking the ifnet with IFRF_ATTACHED.
6221 */
6222 dlil_if_lock();
6d2010ae 6223 ifnet_head_lock_exclusive();
91447636
A
6224 /* Verify we aren't already on the list */
6225 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
6226 if (tmp_if == ifp) {
6227 ifnet_head_done();
7ddcb079 6228 dlil_if_unlock();
0a7de745 6229 return EEXIST;
91447636
A
6230 }
6231 }
0b4e3aa0 6232
6d2010ae 6233 lck_mtx_lock_spin(&ifp->if_ref_lock);
5ba3f43e
A
6234 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
6235 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
6d2010ae
A
6236 __func__, ifp);
6237 /* NOTREACHED */
91447636 6238 }
6d2010ae 6239 lck_mtx_unlock(&ifp->if_ref_lock);
1c79356b 6240
6d2010ae 6241 ifnet_lock_exclusive(ifp);
b0d623f7 6242
6d2010ae
A
6243 /* Sanity check */
6244 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6245 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
6246
6247 if (ll_addr != NULL) {
6248 if (ifp->if_addrlen == 0) {
6249 ifp->if_addrlen = ll_addr->sdl_alen;
6250 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
6251 ifnet_lock_done(ifp);
6252 ifnet_head_done();
7ddcb079 6253 dlil_if_unlock();
0a7de745 6254 return EINVAL;
b0d623f7
A
6255 }
6256 }
6257
91447636 6258 /*
b0d623f7 6259 * Allow interfaces without protocol families to attach
91447636
A
6260 * only if they have the necessary fields filled out.
6261 */
6d2010ae
A
6262 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
6263 DLIL_PRINTF("%s: Attempt to attach interface without "
6264 "family module - %d\n", __func__, ifp->if_family);
6265 ifnet_lock_done(ifp);
6266 ifnet_head_done();
7ddcb079 6267 dlil_if_unlock();
0a7de745 6268 return ENODEV;
1c79356b
A
6269 }
6270
6d2010ae
A
6271 /* Allocate protocol hash table */
6272 VERIFY(ifp->if_proto_hash == NULL);
6273 ifp->if_proto_hash = zalloc(dlif_phash_zone);
6274 if (ifp->if_proto_hash == NULL) {
6275 ifnet_lock_done(ifp);
6276 ifnet_head_done();
7ddcb079 6277 dlil_if_unlock();
0a7de745 6278 return ENOBUFS;
6d2010ae
A
6279 }
6280 bzero(ifp->if_proto_hash, dlif_phash_size);
91447636 6281
6d2010ae
A
6282 lck_mtx_lock_spin(&ifp->if_flt_lock);
6283 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
91447636 6284 TAILQ_INIT(&ifp->if_flt_head);
6d2010ae
A
6285 VERIFY(ifp->if_flt_busy == 0);
6286 VERIFY(ifp->if_flt_waiters == 0);
6287 lck_mtx_unlock(&ifp->if_flt_lock);
6288
6d2010ae
A
6289 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
6290 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
91447636 6291 LIST_INIT(&ifp->if_multiaddrs);
6d2010ae 6292 }
1c79356b 6293
6d2010ae
A
6294 VERIFY(ifp->if_allhostsinm == NULL);
6295 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
6296 TAILQ_INIT(&ifp->if_addrhead);
6297
6d2010ae
A
6298 if (ifp->if_index == 0) {
6299 int idx = if_next_index();
6300
6301 if (idx == -1) {
6302 ifp->if_index = 0;
6303 ifnet_lock_done(ifp);
6304 ifnet_head_done();
7ddcb079 6305 dlil_if_unlock();
0a7de745 6306 return ENOBUFS;
1c79356b 6307 }
6d2010ae
A
6308 ifp->if_index = idx;
6309 }
6310 /* There should not be anything occupying this slot */
6311 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6312
6313 /* allocate (if needed) and initialize a link address */
6d2010ae
A
6314 ifa = dlil_alloc_lladdr(ifp, ll_addr);
6315 if (ifa == NULL) {
6316 ifnet_lock_done(ifp);
6317 ifnet_head_done();
7ddcb079 6318 dlil_if_unlock();
0a7de745 6319 return ENOBUFS;
6d2010ae
A
6320 }
6321
6322 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
6323 ifnet_addrs[ifp->if_index - 1] = ifa;
6324
6325 /* make this address the first on the list */
6326 IFA_LOCK(ifa);
6327 /* hold a reference for ifnet_addrs[] */
6328 IFA_ADDREF_LOCKED(ifa);
6329 /* if_attach_link_ifa() holds a reference for ifa_link */
6330 if_attach_link_ifa(ifp, ifa);
6331 IFA_UNLOCK(ifa);
6332
2d21ac55 6333#if CONFIG_MACF_NET
6d2010ae 6334 mac_ifnet_label_associate(ifp);
2d21ac55 6335#endif
2d21ac55 6336
6d2010ae
A
6337 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
6338 ifindex2ifnet[ifp->if_index] = ifp;
2d21ac55 6339
6d2010ae
A
6340 /* Hold a reference to the underlying dlil_ifnet */
6341 ifnet_reference(ifp);
6342
316670eb
A
6343 /* Clear stats (save and restore other fields that we care) */
6344 if_data_saved = ifp->if_data;
0a7de745 6345 bzero(&ifp->if_data, sizeof(ifp->if_data));
316670eb
A
6346 ifp->if_data.ifi_type = if_data_saved.ifi_type;
6347 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
6348 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
6349 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
6350 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
6351 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
6352 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
6353 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
6354 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
6355 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
6356 ifnet_touch_lastchange(ifp);
6357
6358 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
39037602
A
6359 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
6360 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
316670eb
A
6361
6362 /* By default, use SFB and enable flow advisory */
6363 sflags = PKTSCHEDF_QALG_SFB;
0a7de745 6364 if (if_flowadv) {
316670eb 6365 sflags |= PKTSCHEDF_QALG_FLOWCTL;
0a7de745 6366 }
316670eb 6367
0a7de745 6368 if (if_delaybased_queue) {
fe8ab488 6369 sflags |= PKTSCHEDF_QALG_DELAYBASED;
0a7de745 6370 }
fe8ab488 6371
5ba3f43e 6372 if (ifp->if_output_sched_model ==
0a7de745 6373 IFNET_SCHED_MODEL_DRIVER_MANAGED) {
5ba3f43e 6374 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
0a7de745 6375 }
5ba3f43e 6376
316670eb
A
6377 /* Initialize transmit queue(s) */
6378 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
6379 if (err != 0) {
6380 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
6381 "err=%d", __func__, ifp, err);
6382 /* NOTREACHED */
6383 }
6384
6385 /* Sanity checks on the input thread storage */
6386 dl_inp = &dl_if->dl_if_inpstorage;
0a7de745 6387 bzero(&dl_inp->stats, sizeof(dl_inp->stats));
316670eb
A
6388 VERIFY(dl_inp->input_waiting == 0);
6389 VERIFY(dl_inp->wtot == 0);
6390 VERIFY(dl_inp->ifp == NULL);
6391 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
6392 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
6393 VERIFY(!dl_inp->net_affinity);
6394 VERIFY(ifp->if_inp == NULL);
6395 VERIFY(dl_inp->input_thr == THREAD_NULL);
6396 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
6397 VERIFY(dl_inp->poll_thr == THREAD_NULL);
6398 VERIFY(dl_inp->tag == 0);
6399 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
0a7de745
A
6400 bzero(&dl_inp->tstats, sizeof(dl_inp->tstats));
6401 bzero(&dl_inp->pstats, sizeof(dl_inp->pstats));
6402 bzero(&dl_inp->sstats, sizeof(dl_inp->sstats));
316670eb
A
6403#if IFNET_INPUT_SANITY_CHK
6404 VERIFY(dl_inp->input_mbuf_cnt == 0);
6405#endif /* IFNET_INPUT_SANITY_CHK */
6406
6407 /*
6408 * A specific DLIL input thread is created per Ethernet/cellular
6409 * interface or for an interface which supports opportunistic
6410 * input polling. Pseudo interfaces or other types of interfaces
6411 * use the main input thread instead.
6412 */
6413 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
6414 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
6415 ifp->if_inp = dl_inp;
6416 err = dlil_create_input_thread(ifp, ifp->if_inp);
6417 if (err != 0) {
6418 panic_plain("%s: ifp=%p couldn't get an input thread; "
6419 "err=%d", __func__, ifp, err);
6420 /* NOTREACHED */
6421 }
6422 }
6423
5ba3f43e
A
6424 if (ifp->if_inp != NULL && ifp->if_inp->input_mit_tcall == NULL) {
6425 ifp->if_inp->input_mit_tcall =
6426 thread_call_allocate_with_priority(dlil_mit_tcall_fn,
0a7de745 6427 ifp, THREAD_CALL_PRIORITY_KERNEL);
5ba3f43e
A
6428 }
6429
6d2010ae 6430 /*
39236c6e
A
6431 * If the driver supports the new transmit model, calculate flow hash
6432 * and create a workloop starter thread to invoke the if_start callback
6433 * where the packets may be dequeued and transmitted.
6d2010ae 6434 */
316670eb 6435 if (ifp->if_eflags & IFEF_TXSTART) {
39236c6e
A
6436 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
6437 VERIFY(ifp->if_flowhash != 0);
316670eb
A
6438 VERIFY(ifp->if_start_thread == THREAD_NULL);
6439
6440 ifnet_set_start_cycle(ifp, NULL);
6441 ifp->if_start_active = 0;
6442 ifp->if_start_req = 0;
39236c6e 6443 ifp->if_start_flags = 0;
5ba3f43e
A
6444 VERIFY(ifp->if_start != NULL);
6445 if ((err = kernel_thread_start(ifnet_start_thread_fn,
6446 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
6447 panic_plain("%s: "
6448 "ifp=%p couldn't get a start thread; "
316670eb 6449 "err=%d", __func__, ifp, err);
0a7de745 6450 /* NOTREACHED */
6d2010ae 6451 }
316670eb 6452 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
0a7de745 6453 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP));
39236c6e
A
6454 } else {
6455 ifp->if_flowhash = 0;
316670eb
A
6456 }
6457
6458 /*
6459 * If the driver supports the new receive model, create a poller
6460 * thread to invoke if_input_poll callback where the packets may
6461 * be dequeued from the driver and processed for reception.
6462 */
6463 if (ifp->if_eflags & IFEF_RXPOLL) {
6464 VERIFY(ifp->if_input_poll != NULL);
6465 VERIFY(ifp->if_input_ctl != NULL);
6466 VERIFY(ifp->if_poll_thread == THREAD_NULL);
6467
6468 ifnet_set_poll_cycle(ifp, NULL);
6469 ifp->if_poll_update = 0;
6470 ifp->if_poll_active = 0;
6471 ifp->if_poll_req = 0;
6472 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
6473 &ifp->if_poll_thread)) != KERN_SUCCESS) {
6474 panic_plain("%s: ifp=%p couldn't get a poll thread; "
6d2010ae
A
6475 "err=%d", __func__, ifp, err);
6476 /* NOTREACHED */
6477 }
316670eb 6478 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
0a7de745 6479 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP));
91447636 6480 }
6d2010ae 6481
316670eb
A
6482 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
6483 VERIFY(ifp->if_desc.ifd_len == 0);
6484 VERIFY(ifp->if_desc.ifd_desc != NULL);
6d2010ae
A
6485
6486 /* Record attach PC stacktrace */
6487 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
6488
6489 ifp->if_updatemcasts = 0;
6490 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
6491 struct ifmultiaddr *ifma;
6492 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
6493 IFMA_LOCK(ifma);
6494 if (ifma->ifma_addr->sa_family == AF_LINK ||
0a7de745 6495 ifma->ifma_addr->sa_family == AF_UNSPEC) {
6d2010ae 6496 ifp->if_updatemcasts++;
0a7de745 6497 }
6d2010ae
A
6498 IFMA_UNLOCK(ifma);
6499 }
6500
39236c6e
A
6501 printf("%s: attached with %d suspended link-layer multicast "
6502 "membership(s)\n", if_name(ifp),
6d2010ae
A
6503 ifp->if_updatemcasts);
6504 }
6505
39236c6e 6506 /* Clear logging parameters */
0a7de745 6507 bzero(&ifp->if_log, sizeof(ifp->if_log));
5ba3f43e
A
6508
6509 /* Clear foreground/realtime activity timestamps */
39236c6e 6510 ifp->if_fg_sendts = 0;
5ba3f43e 6511 ifp->if_rt_sendts = 0;
39236c6e
A
6512
6513 VERIFY(ifp->if_delegated.ifp == NULL);
6514 VERIFY(ifp->if_delegated.type == 0);
6515 VERIFY(ifp->if_delegated.family == 0);
6516 VERIFY(ifp->if_delegated.subfamily == 0);
fe8ab488 6517 VERIFY(ifp->if_delegated.expensive == 0);
39236c6e 6518
39037602
A
6519 VERIFY(ifp->if_agentids == NULL);
6520 VERIFY(ifp->if_agentcount == 0);
3e170ce0
A
6521
6522 /* Reset interface state */
6523 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
39037602 6524 ifp->if_interface_state.valid_bitmask |=
0a7de745 6525 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
3e170ce0 6526 ifp->if_interface_state.interface_availability =
0a7de745 6527 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
3e170ce0
A
6528
6529 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
6530 if (ifp == lo_ifp) {
6531 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
6532 ifp->if_interface_state.valid_bitmask |=
6533 IF_INTERFACE_STATE_LQM_STATE_VALID;
6534 } else {
6535 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
6536 }
4bd07ac2
A
6537
6538 /*
6539 * Enable ECN capability on this interface depending on the
6540 * value of ECN global setting
6541 */
6542 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
6543 ifp->if_eflags |= IFEF_ECN_ENABLE;
6544 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
6545 }
6546
39037602
A
6547 /*
6548 * Built-in Cyclops always on policy for WiFi infra
6549 */
6550 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
6551 errno_t error;
6552
6553 error = if_set_qosmarking_mode(ifp,
6554 IFRTYPE_QOSMARKING_FASTLANE);
6555 if (error != 0) {
6556 printf("%s if_set_qosmarking_mode(%s) error %d\n",
6557 __func__, ifp->if_xname, error);
6558 } else {
6559 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
6560#if (DEVELOPMENT || DEBUG)
6561 printf("%s fastlane enabled on %s\n",
0a7de745 6562 __func__, ifp->if_xname);
39037602
A
6563#endif /* (DEVELOPMENT || DEBUG) */
6564 }
6565 }
6566
0c530ab8 6567 ifnet_lock_done(ifp);
b0d623f7 6568 ifnet_head_done();
6d2010ae 6569
5ba3f43e 6570
6d2010ae
A
6571 lck_mtx_lock(&ifp->if_cached_route_lock);
6572 /* Enable forwarding cached route */
6573 ifp->if_fwd_cacheok = 1;
6574 /* Clean up any existing cached routes */
39236c6e 6575 ROUTE_RELEASE(&ifp->if_fwd_route);
0a7de745 6576 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
39236c6e 6577 ROUTE_RELEASE(&ifp->if_src_route);
0a7de745 6578 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
39236c6e 6579 ROUTE_RELEASE(&ifp->if_src_route6);
0a7de745 6580 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
6d2010ae
A
6581 lck_mtx_unlock(&ifp->if_cached_route_lock);
6582
6583 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
6584
b0d623f7 6585 /*
6d2010ae
A
6586 * Allocate and attach IGMPv3/MLDv2 interface specific variables
6587 * and trees; do this before the ifnet is marked as attached.
6588 * The ifnet keeps the reference to the info structures even after
6589 * the ifnet is detached, since the network-layer records still
6590 * refer to the info structures even after that. This also
6591 * makes it possible for them to still function after the ifnet
6592 * is recycled or reattached.
b0d623f7 6593 */
6d2010ae
A
6594#if INET
6595 if (IGMP_IFINFO(ifp) == NULL) {
6596 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
6597 VERIFY(IGMP_IFINFO(ifp) != NULL);
6598 } else {
6599 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
6600 igmp_domifreattach(IGMP_IFINFO(ifp));
6601 }
6602#endif /* INET */
6603#if INET6
6604 if (MLD_IFINFO(ifp) == NULL) {
6605 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
6606 VERIFY(MLD_IFINFO(ifp) != NULL);
6607 } else {
6608 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
6609 mld_domifreattach(MLD_IFINFO(ifp));
6610 }
6611#endif /* INET6 */
b0d623f7 6612
39236c6e 6613 VERIFY(ifp->if_data_threshold == 0);
5ba3f43e 6614 VERIFY(ifp->if_dt_tcall != NULL);
39236c6e 6615
6d2010ae
A
6616 /*
6617 * Finally, mark this ifnet as attached.
6618 */
6619 lck_mtx_lock(rnh_lock);
6620 ifnet_lock_exclusive(ifp);
6621 lck_mtx_lock_spin(&ifp->if_ref_lock);
0a7de745 6622 ifp->if_refflags = IFRF_ATTACHED; /* clears embryonic */
6d2010ae 6623 lck_mtx_unlock(&ifp->if_ref_lock);
d1ecb069 6624 if (net_rtref) {
6d2010ae
A
6625 /* boot-args override; enable idle notification */
6626 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
d1ecb069 6627 IFRF_IDLE_NOTIFY);
6d2010ae
A
6628 } else {
6629 /* apply previous request(s) to set the idle flags, if any */
6630 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
6631 ifp->if_idle_new_flags_mask);
d1ecb069 6632 }
6d2010ae
A
6633 ifnet_lock_done(ifp);
6634 lck_mtx_unlock(rnh_lock);
7ddcb079 6635 dlil_if_unlock();
6d2010ae
A
6636
6637#if PF
6638 /*
6639 * Attach packet filter to this interface, if enabled.
6640 */
6641 pf_ifnet_hook(ifp, 1);
6642#endif /* PF */
d1ecb069 6643
2d21ac55 6644 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
1c79356b 6645
6d2010ae 6646 if (dlil_verbose) {
39236c6e 6647 printf("%s: attached%s\n", if_name(ifp),
6d2010ae
A
6648 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
6649 }
6650
0a7de745 6651 return 0;
6d2010ae
A
6652}
6653
6654/*
6655 * Prepare the storage for the first/permanent link address, which must
6656 * must have the same lifetime as the ifnet itself. Although the link
6657 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
6658 * its location in memory must never change as it may still be referred
6659 * to by some parts of the system afterwards (unfortunate implementation
6660 * artifacts inherited from BSD.)
6661 *
6662 * Caller must hold ifnet lock as writer.
6663 */
6664static struct ifaddr *
6665dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
6666{
6667 struct ifaddr *ifa, *oifa;
6668 struct sockaddr_dl *asdl, *msdl;
0a7de745 6669 char workbuf[IFNAMSIZ * 2];
6d2010ae
A
6670 int namelen, masklen, socksize;
6671 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6672
6673 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
6674 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
6675
0a7de745 6676 namelen = snprintf(workbuf, sizeof(workbuf), "%s",
39236c6e 6677 if_name(ifp));
39037602
A
6678 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
6679 + ((namelen > 0) ? namelen : 0);
6d2010ae 6680 socksize = masklen + ifp->if_addrlen;
0a7de745
A
6681#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
6682 if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
6d2010ae 6683 socksize = sizeof(struct sockaddr_dl);
0a7de745 6684 }
6d2010ae
A
6685 socksize = ROUNDUP(socksize);
6686#undef ROUNDUP
6687
6688 ifa = ifp->if_lladdr;
6689 if (socksize > DLIL_SDLMAXLEN ||
6690 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
6691 /*
6692 * Rare, but in the event that the link address requires
6693 * more storage space than DLIL_SDLMAXLEN, allocate the
6694 * largest possible storages for address and mask, such
6695 * that we can reuse the same space when if_addrlen grows.
6696 * This same space will be used when if_addrlen shrinks.
6697 */
6698 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
0a7de745 6699 int ifasize = sizeof(*ifa) + 2 * SOCK_MAXADDRLEN;
6d2010ae 6700 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
0a7de745
A
6701 if (ifa == NULL) {
6702 return NULL;
6703 }
6d2010ae
A
6704 ifa_lock_init(ifa);
6705 /* Don't set IFD_ALLOC, as this is permanent */
6706 ifa->ifa_debug = IFD_LINK;
6707 }
6708 IFA_LOCK(ifa);
6709 /* address and mask sockaddr_dl locations */
6710 asdl = (struct sockaddr_dl *)(ifa + 1);
6711 bzero(asdl, SOCK_MAXADDRLEN);
316670eb
A
6712 msdl = (struct sockaddr_dl *)(void *)
6713 ((char *)asdl + SOCK_MAXADDRLEN);
6d2010ae
A
6714 bzero(msdl, SOCK_MAXADDRLEN);
6715 } else {
6716 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
6717 /*
6718 * Use the storage areas for address and mask within the
6719 * dlil_ifnet structure. This is the most common case.
6720 */
6721 if (ifa == NULL) {
6722 ifa = &dl_if->dl_if_lladdr.ifa;
6723 ifa_lock_init(ifa);
6724 /* Don't set IFD_ALLOC, as this is permanent */
6725 ifa->ifa_debug = IFD_LINK;
6726 }
6727 IFA_LOCK(ifa);
6728 /* address and mask sockaddr_dl locations */
316670eb 6729 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
0a7de745 6730 bzero(asdl, sizeof(dl_if->dl_if_lladdr.asdl));
316670eb 6731 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
0a7de745 6732 bzero(msdl, sizeof(dl_if->dl_if_lladdr.msdl));
6d2010ae
A
6733 }
6734
6735 /* hold a permanent reference for the ifnet itself */
6736 IFA_ADDREF_LOCKED(ifa);
6737 oifa = ifp->if_lladdr;
6738 ifp->if_lladdr = ifa;
6739
6740 VERIFY(ifa->ifa_debug == IFD_LINK);
6741 ifa->ifa_ifp = ifp;
6742 ifa->ifa_rtrequest = link_rtrequest;
6743 ifa->ifa_addr = (struct sockaddr *)asdl;
6744 asdl->sdl_len = socksize;
6745 asdl->sdl_family = AF_LINK;
39037602
A
6746 if (namelen > 0) {
6747 bcopy(workbuf, asdl->sdl_data, min(namelen,
0a7de745 6748 sizeof(asdl->sdl_data)));
39037602
A
6749 asdl->sdl_nlen = namelen;
6750 } else {
6751 asdl->sdl_nlen = 0;
6752 }
6d2010ae
A
6753 asdl->sdl_index = ifp->if_index;
6754 asdl->sdl_type = ifp->if_type;
6755 if (ll_addr != NULL) {
6756 asdl->sdl_alen = ll_addr->sdl_alen;
6757 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
6758 } else {
6759 asdl->sdl_alen = 0;
6760 }
39037602 6761 ifa->ifa_netmask = (struct sockaddr *)msdl;
6d2010ae 6762 msdl->sdl_len = masklen;
0a7de745 6763 while (namelen > 0) {
6d2010ae 6764 msdl->sdl_data[--namelen] = 0xff;
0a7de745 6765 }
6d2010ae
A
6766 IFA_UNLOCK(ifa);
6767
0a7de745 6768 if (oifa != NULL) {
6d2010ae 6769 IFA_REMREF(oifa);
0a7de745 6770 }
6d2010ae 6771
0a7de745 6772 return ifa;
6d2010ae
A
6773}
6774
6775static void
6776if_purgeaddrs(struct ifnet *ifp)
6777{
6778#if INET
6779 in_purgeaddrs(ifp);
6780#endif /* INET */
6781#if INET6
6782 in6_purgeaddrs(ifp);
6783#endif /* INET6 */
1c79356b
A
6784}
6785
2d21ac55 6786errno_t
6d2010ae 6787ifnet_detach(ifnet_t ifp)
1c79356b 6788{
39236c6e 6789 struct ifnet *delegated_ifp;
39037602 6790 struct nd_ifinfo *ndi = NULL;
39236c6e 6791
0a7de745
A
6792 if (ifp == NULL) {
6793 return EINVAL;
6794 }
6d2010ae 6795
39037602 6796 ndi = ND_IFINFO(ifp);
0a7de745 6797 if (NULL != ndi) {
39037602 6798 ndi->cga_initialized = FALSE;
0a7de745 6799 }
39037602 6800
6d2010ae 6801 lck_mtx_lock(rnh_lock);
316670eb 6802 ifnet_head_lock_exclusive();
91447636 6803 ifnet_lock_exclusive(ifp);
6d2010ae
A
6804
6805 /*
6806 * Check to see if this interface has previously triggered
6807 * aggressive protocol draining; if so, decrement the global
6808 * refcnt and clear PR_AGGDRAIN on the route domain if
6809 * there are no more of such an interface around.
6810 */
6811 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
6812
6813 lck_mtx_lock_spin(&ifp->if_ref_lock);
39037602 6814 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
6d2010ae
A
6815 lck_mtx_unlock(&ifp->if_ref_lock);
6816 ifnet_lock_done(ifp);
6d2010ae 6817 ifnet_head_done();
13f56ec4 6818 lck_mtx_unlock(rnh_lock);
0a7de745 6819 return EINVAL;
6d2010ae 6820 } else if (ifp->if_refflags & IFRF_DETACHING) {
91447636 6821 /* Interface has already been detached */
6d2010ae 6822 lck_mtx_unlock(&ifp->if_ref_lock);
91447636 6823 ifnet_lock_done(ifp);
6d2010ae 6824 ifnet_head_done();
13f56ec4 6825 lck_mtx_unlock(rnh_lock);
0a7de745 6826 return ENXIO;
55e303ae 6827 }
5ba3f43e 6828 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
6d2010ae
A
6829 /* Indicate this interface is being detached */
6830 ifp->if_refflags &= ~IFRF_ATTACHED;
6831 ifp->if_refflags |= IFRF_DETACHING;
6832 lck_mtx_unlock(&ifp->if_ref_lock);
6833
5c9f4661 6834 if (dlil_verbose) {
39236c6e 6835 printf("%s: detaching\n", if_name(ifp));
5c9f4661
A
6836 }
6837
6838 /* clean up flow control entry object if there's any */
6839 if (ifp->if_eflags & IFEF_TXSTART) {
6840 ifnet_flowadv(ifp->if_flowhash);
6841 }
6d2010ae 6842
490019cf
A
6843 /* Reset ECN enable/disable flags */
6844 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
6845 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
6846
d9a64523
A
6847 /* Reset CLAT46 flag */
6848 ifp->if_eflags &= ~IFEF_CLAT46;
6849
91447636 6850 /*
6d2010ae
A
6851 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6852 * no longer be visible during lookups from this point.
91447636 6853 */
6d2010ae
A
6854 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
6855 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
6856 ifp->if_link.tqe_next = NULL;
6857 ifp->if_link.tqe_prev = NULL;
39037602 6858 if (ifp->if_ordered_link.tqe_next != NULL ||
0a7de745 6859 ifp->if_ordered_link.tqe_prev != NULL) {
39037602
A
6860 ifnet_remove_from_ordered_list(ifp);
6861 }
6d2010ae
A
6862 ifindex2ifnet[ifp->if_index] = NULL;
6863
3e170ce0
A
6864 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6865 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
6866
6d2010ae
A
6867 /* Record detach PC stacktrace */
6868 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
6869
39236c6e 6870 /* Clear logging parameters */
0a7de745 6871 bzero(&ifp->if_log, sizeof(ifp->if_log));
39236c6e
A
6872
6873 /* Clear delegated interface info (reference released below) */
6874 delegated_ifp = ifp->if_delegated.ifp;
0a7de745 6875 bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
39236c6e 6876
3e170ce0
A
6877 /* Reset interface state */
6878 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
6879
91447636 6880 ifnet_lock_done(ifp);
6d2010ae 6881 ifnet_head_done();
13f56ec4 6882 lck_mtx_unlock(rnh_lock);
6d2010ae 6883
5ba3f43e 6884
39236c6e 6885 /* Release reference held on the delegated interface */
0a7de745 6886 if (delegated_ifp != NULL) {
39236c6e 6887 ifnet_release(delegated_ifp);
0a7de745 6888 }
39236c6e 6889
316670eb 6890 /* Reset Link Quality Metric (unless loopback [lo0]) */
0a7de745 6891 if (ifp != lo_ifp) {
3e170ce0 6892 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
0a7de745 6893 }
316670eb
A
6894
6895 /* Reset TCP local statistics */
0a7de745 6896 if (ifp->if_tcp_stat != NULL) {
316670eb 6897 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
0a7de745 6898 }
316670eb
A
6899
6900 /* Reset UDP local statistics */
0a7de745 6901 if (ifp->if_udp_stat != NULL) {
316670eb 6902 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
0a7de745 6903 }
316670eb 6904
4bd07ac2 6905 /* Reset ifnet IPv4 stats */
0a7de745 6906 if (ifp->if_ipv4_stat != NULL) {
4bd07ac2 6907 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
0a7de745 6908 }
4bd07ac2
A
6909
6910 /* Reset ifnet IPv6 stats */
0a7de745 6911 if (ifp->if_ipv6_stat != NULL) {
4bd07ac2 6912 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
0a7de745 6913 }
4bd07ac2 6914
3e170ce0
A
6915 /* Release memory held for interface link status report */
6916 if (ifp->if_link_status != NULL) {
6917 FREE(ifp->if_link_status, M_TEMP);
6918 ifp->if_link_status = NULL;
6919 }
6920
39037602
A
6921 /* Clear agent IDs */
6922 if (ifp->if_agentids != NULL) {
6923 FREE(ifp->if_agentids, M_NETAGENT);
6924 ifp->if_agentids = NULL;
6925 }
6926 ifp->if_agentcount = 0;
6927
6928
2d21ac55
A
6929 /* Let BPF know we're detaching */
6930 bpfdetach(ifp);
6d2010ae
A
6931
6932 /* Mark the interface as DOWN */
6933 if_down(ifp);
6934
6935 /* Disable forwarding cached route */
6936 lck_mtx_lock(&ifp->if_cached_route_lock);
6937 ifp->if_fwd_cacheok = 0;
6938 lck_mtx_unlock(&ifp->if_cached_route_lock);
6939
5ba3f43e 6940 /* Disable data threshold and wait for any pending event posting */
39236c6e 6941 ifp->if_data_threshold = 0;
5ba3f43e
A
6942 VERIFY(ifp->if_dt_tcall != NULL);
6943 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
6944
d1ecb069 6945 /*
6d2010ae
A
6946 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6947 * references to the info structures and leave them attached to
6948 * this ifnet.
d1ecb069 6949 */
6d2010ae
A
6950#if INET
6951 igmp_domifdetach(ifp);
6952#endif /* INET */
6953#if INET6
6954 mld_domifdetach(ifp);
6955#endif /* INET6 */
6956
6957 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
6958
6959 /* Let worker thread take care of the rest, to avoid reentrancy */
7ddcb079 6960 dlil_if_lock();
6d2010ae 6961 ifnet_detaching_enqueue(ifp);
7ddcb079 6962 dlil_if_unlock();
6d2010ae 6963
0a7de745 6964 return 0;
6d2010ae
A
6965}
6966
6967static void
6968ifnet_detaching_enqueue(struct ifnet *ifp)
6969{
7ddcb079 6970 dlil_if_lock_assert();
6d2010ae
A
6971
6972 ++ifnet_detaching_cnt;
6973 VERIFY(ifnet_detaching_cnt != 0);
6974 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
6975 wakeup((caddr_t)&ifnet_delayed_run);
6976}
6977
6978static struct ifnet *
6979ifnet_detaching_dequeue(void)
6980{
6981 struct ifnet *ifp;
6982
7ddcb079 6983 dlil_if_lock_assert();
6d2010ae
A
6984
6985 ifp = TAILQ_FIRST(&ifnet_detaching_head);
6986 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
6987 if (ifp != NULL) {
6988 VERIFY(ifnet_detaching_cnt != 0);
6989 --ifnet_detaching_cnt;
6990 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
6991 ifp->if_detaching_link.tqe_next = NULL;
6992 ifp->if_detaching_link.tqe_prev = NULL;
6993 }
0a7de745 6994 return ifp;
6d2010ae
A
6995}
6996
316670eb
A
6997static int
6998ifnet_detacher_thread_cont(int err)
6d2010ae 6999{
316670eb 7000#pragma unused(err)
6d2010ae
A
7001 struct ifnet *ifp;
7002
7003 for (;;) {
316670eb 7004 dlil_if_lock_assert();
6d2010ae 7005 while (ifnet_detaching_cnt == 0) {
316670eb
A
7006 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
7007 (PZERO - 1), "ifnet_detacher_cont", 0,
7008 ifnet_detacher_thread_cont);
7009 /* NOTREACHED */
6d2010ae
A
7010 }
7011
7012 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
7013
7014 /* Take care of detaching ifnet */
7015 ifp = ifnet_detaching_dequeue();
316670eb
A
7016 if (ifp != NULL) {
7017 dlil_if_unlock();
6d2010ae 7018 ifnet_detach_final(ifp);
316670eb
A
7019 dlil_if_lock();
7020 }
55e303ae 7021 }
316670eb
A
7022}
7023
7024static void
7025ifnet_detacher_thread_func(void *v, wait_result_t w)
7026{
7027#pragma unused(v, w)
7028 dlil_if_lock();
7029 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
7030 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
7031 /*
7032 * msleep0() shouldn't have returned as PCATCH was not set;
7033 * therefore assert in this case.
7034 */
7035 dlil_if_unlock();
7036 VERIFY(0);
6d2010ae 7037}
b0d623f7 7038
6d2010ae
A
7039static void
7040ifnet_detach_final(struct ifnet *ifp)
7041{
7042 struct ifnet_filter *filter, *filter_next;
7043 struct ifnet_filter_head fhead;
316670eb 7044 struct dlil_threading_info *inp;
6d2010ae
A
7045 struct ifaddr *ifa;
7046 ifnet_detached_func if_free;
7047 int i;
7048
7049 lck_mtx_lock(&ifp->if_ref_lock);
7050 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7051 panic("%s: flags mismatch (detaching not set) ifp=%p",
7052 __func__, ifp);
7053 /* NOTREACHED */
7054 }
7055
316670eb
A
7056 /*
7057 * Wait until the existing IO references get released
7058 * before we proceed with ifnet_detach. This is not a
7059 * common case, so block without using a continuation.
b0d623f7 7060 */
6d2010ae 7061 while (ifp->if_refio > 0) {
39236c6e
A
7062 printf("%s: Waiting for IO references on %s interface "
7063 "to be released\n", __func__, if_name(ifp));
6d2010ae 7064 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
0a7de745 7065 (PZERO - 1), "ifnet_ioref_wait", NULL);
6d2010ae
A
7066 }
7067 lck_mtx_unlock(&ifp->if_ref_lock);
7068
fe8ab488
A
7069 /* Drain and destroy send queue */
7070 ifclassq_teardown(ifp);
7071
6d2010ae
A
7072 /* Detach interface filters */
7073 lck_mtx_lock(&ifp->if_flt_lock);
7074 if_flt_monitor_enter(ifp);
b0d623f7 7075
5ba3f43e 7076 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636
A
7077 fhead = ifp->if_flt_head;
7078 TAILQ_INIT(&ifp->if_flt_head);
2d21ac55 7079
6d2010ae
A
7080 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
7081 filter_next = TAILQ_NEXT(filter, filt_next);
7082 lck_mtx_unlock(&ifp->if_flt_lock);
7083
7084 dlil_detach_filter_internal(filter, 1);
7085 lck_mtx_lock(&ifp->if_flt_lock);
7086 }
7087 if_flt_monitor_leave(ifp);
7088 lck_mtx_unlock(&ifp->if_flt_lock);
7089
7090 /* Tell upper layers to drop their network addresses */
7091 if_purgeaddrs(ifp);
7092
7093 ifnet_lock_exclusive(ifp);
7094
7095 /* Uplumb all protocols */
7096 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
7097 struct if_proto *proto;
7098
7099 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7100 while (proto != NULL) {
7101 protocol_family_t family = proto->protocol_family;
7102 ifnet_lock_done(ifp);
7103 proto_unplumb(family, ifp);
7104 ifnet_lock_exclusive(ifp);
7105 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7106 }
7107 /* There should not be any protocols left */
7108 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
7109 }
7110 zfree(dlif_phash_zone, ifp->if_proto_hash);
7111 ifp->if_proto_hash = NULL;
7112
7113 /* Detach (permanent) link address from if_addrhead */
7114 ifa = TAILQ_FIRST(&ifp->if_addrhead);
7115 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
7116 IFA_LOCK(ifa);
7117 if_detach_link_ifa(ifp, ifa);
7118 IFA_UNLOCK(ifa);
7119
7120 /* Remove (permanent) link address from ifnet_addrs[] */
7121 IFA_REMREF(ifa);
7122 ifnet_addrs[ifp->if_index - 1] = NULL;
7123
7124 /* This interface should not be on {ifnet_head,detaching} */
7125 VERIFY(ifp->if_link.tqe_next == NULL);
7126 VERIFY(ifp->if_link.tqe_prev == NULL);
7127 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
7128 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
39037602
A
7129 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
7130 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
6d2010ae
A
7131
7132 /* The slot should have been emptied */
7133 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
7134
7135 /* There should not be any addresses left */
7136 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
1c79356b 7137
316670eb
A
7138 /*
7139 * Signal the starter thread to terminate itself.
7140 */
7141 if (ifp->if_start_thread != THREAD_NULL) {
7142 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e 7143 ifp->if_start_flags = 0;
316670eb
A
7144 ifp->if_start_thread = THREAD_NULL;
7145 wakeup_one((caddr_t)&ifp->if_start_thread);
7146 lck_mtx_unlock(&ifp->if_start_lock);
7147 }
7148
7149 /*
7150 * Signal the poller thread to terminate itself.
7151 */
7152 if (ifp->if_poll_thread != THREAD_NULL) {
7153 lck_mtx_lock_spin(&ifp->if_poll_lock);
7154 ifp->if_poll_thread = THREAD_NULL;
7155 wakeup_one((caddr_t)&ifp->if_poll_thread);
7156 lck_mtx_unlock(&ifp->if_poll_lock);
7157 }
7158
2d21ac55
A
7159 /*
7160 * If thread affinity was set for the workloop thread, we will need
7161 * to tear down the affinity and release the extra reference count
316670eb
A
7162 * taken at attach time. Does not apply to lo0 or other interfaces
7163 * without dedicated input threads.
2d21ac55 7164 */
316670eb
A
7165 if ((inp = ifp->if_inp) != NULL) {
7166 VERIFY(inp != dlil_main_input_thread);
7167
7168 if (inp->net_affinity) {
7169 struct thread *tp, *wtp, *ptp;
7170
7171 lck_mtx_lock_spin(&inp->input_lck);
7172 wtp = inp->wloop_thr;
7173 inp->wloop_thr = THREAD_NULL;
7174 ptp = inp->poll_thr;
7175 inp->poll_thr = THREAD_NULL;
0a7de745 7176 tp = inp->input_thr; /* don't nullify now */
316670eb
A
7177 inp->tag = 0;
7178 inp->net_affinity = FALSE;
7179 lck_mtx_unlock(&inp->input_lck);
7180
7181 /* Tear down poll thread affinity */
7182 if (ptp != NULL) {
7183 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
7184 (void) dlil_affinity_set(ptp,
7185 THREAD_AFFINITY_TAG_NULL);
7186 thread_deallocate(ptp);
6d2010ae 7187 }
2d21ac55 7188
2d21ac55 7189 /* Tear down workloop thread affinity */
316670eb
A
7190 if (wtp != NULL) {
7191 (void) dlil_affinity_set(wtp,
2d21ac55 7192 THREAD_AFFINITY_TAG_NULL);
316670eb 7193 thread_deallocate(wtp);
2d21ac55 7194 }
1c79356b 7195
316670eb 7196 /* Tear down DLIL input thread affinity */
2d21ac55
A
7197 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
7198 thread_deallocate(tp);
9bccf70c 7199 }
1c79356b 7200
316670eb
A
7201 /* disassociate ifp DLIL input thread */
7202 ifp->if_inp = NULL;
6d2010ae 7203
5ba3f43e 7204 /* tell the input thread to terminate */
316670eb
A
7205 lck_mtx_lock_spin(&inp->input_lck);
7206 inp->input_waiting |= DLIL_INPUT_TERMINATE;
7207 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
7208 wakeup_one((caddr_t)&inp->input_waiting);
91447636 7209 }
316670eb 7210 lck_mtx_unlock(&inp->input_lck);
5c9f4661 7211 ifnet_lock_done(ifp);
5ba3f43e
A
7212
7213 /* wait for the input thread to terminate */
7214 lck_mtx_lock_spin(&inp->input_lck);
7215 while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE)
0a7de745 7216 == 0) {
5ba3f43e
A
7217 (void) msleep(&inp->input_waiting, &inp->input_lck,
7218 (PZERO - 1) | PSPIN, inp->input_name, NULL);
7219 }
7220 lck_mtx_unlock(&inp->input_lck);
5c9f4661 7221 ifnet_lock_exclusive(ifp);
5ba3f43e
A
7222
7223 /* clean-up input thread state */
7224 dlil_clean_threading_info(inp);
55e303ae 7225 }
6d2010ae
A
7226
7227 /* The driver might unload, so point these to ourselves */
7228 if_free = ifp->if_free;
5ba3f43e 7229 ifp->if_output_dlil = ifp_if_output;
6d2010ae 7230 ifp->if_output = ifp_if_output;
316670eb
A
7231 ifp->if_pre_enqueue = ifp_if_output;
7232 ifp->if_start = ifp_if_start;
7233 ifp->if_output_ctl = ifp_if_ctl;
5ba3f43e 7234 ifp->if_input_dlil = ifp_if_input;
316670eb
A
7235 ifp->if_input_poll = ifp_if_input_poll;
7236 ifp->if_input_ctl = ifp_if_ctl;
6d2010ae
A
7237 ifp->if_ioctl = ifp_if_ioctl;
7238 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
7239 ifp->if_free = ifp_if_free;
7240 ifp->if_demux = ifp_if_demux;
7241 ifp->if_event = ifp_if_event;
39236c6e
A
7242 ifp->if_framer_legacy = ifp_if_framer;
7243 ifp->if_framer = ifp_if_framer_extended;
6d2010ae
A
7244 ifp->if_add_proto = ifp_if_add_proto;
7245 ifp->if_del_proto = ifp_if_del_proto;
7246 ifp->if_check_multi = ifp_if_check_multi;
7247
316670eb
A
7248 /* wipe out interface description */
7249 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
7250 ifp->if_desc.ifd_len = 0;
7251 VERIFY(ifp->if_desc.ifd_desc != NULL);
7252 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
7253
39236c6e
A
7254 /* there shouldn't be any delegation by now */
7255 VERIFY(ifp->if_delegated.ifp == NULL);
7256 VERIFY(ifp->if_delegated.type == 0);
7257 VERIFY(ifp->if_delegated.family == 0);
7258 VERIFY(ifp->if_delegated.subfamily == 0);
fe8ab488 7259 VERIFY(ifp->if_delegated.expensive == 0);
39236c6e 7260
39037602
A
7261 /* QoS marking get cleared */
7262 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
7263 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
7264
5ba3f43e 7265
6d2010ae
A
7266 ifnet_lock_done(ifp);
7267
7268#if PF
7269 /*
7270 * Detach this interface from packet filter, if enabled.
7271 */
7272 pf_ifnet_hook(ifp, 0);
7273#endif /* PF */
7274
7275 /* Filter list should be empty */
7276 lck_mtx_lock_spin(&ifp->if_flt_lock);
7277 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
7278 VERIFY(ifp->if_flt_busy == 0);
7279 VERIFY(ifp->if_flt_waiters == 0);
7280 lck_mtx_unlock(&ifp->if_flt_lock);
7281
316670eb
A
7282 /* Last chance to drain send queue */
7283 if_qflush(ifp, 0);
7284
6d2010ae
A
7285 /* Last chance to cleanup any cached route */
7286 lck_mtx_lock(&ifp->if_cached_route_lock);
7287 VERIFY(!ifp->if_fwd_cacheok);
39236c6e 7288 ROUTE_RELEASE(&ifp->if_fwd_route);
0a7de745 7289 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
39236c6e 7290 ROUTE_RELEASE(&ifp->if_src_route);
0a7de745 7291 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
39236c6e 7292 ROUTE_RELEASE(&ifp->if_src_route6);
0a7de745 7293 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
6d2010ae
A
7294 lck_mtx_unlock(&ifp->if_cached_route_lock);
7295
39236c6e 7296 VERIFY(ifp->if_data_threshold == 0);
5ba3f43e
A
7297 VERIFY(ifp->if_dt_tcall != NULL);
7298 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
39236c6e 7299
6d2010ae
A
7300 ifnet_llreach_ifdetach(ifp);
7301
7302 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
7303
6d2010ae
A
7304 /*
7305 * Finally, mark this ifnet as detached.
7306 */
7307 lck_mtx_lock_spin(&ifp->if_ref_lock);
7308 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7309 panic("%s: flags mismatch (detaching not set) ifp=%p",
7310 __func__, ifp);
7311 /* NOTREACHED */
55e303ae 7312 }
6d2010ae
A
7313 ifp->if_refflags &= ~IFRF_DETACHING;
7314 lck_mtx_unlock(&ifp->if_ref_lock);
0a7de745 7315 if (if_free != NULL) {
39037602 7316 if_free(ifp);
0a7de745 7317 }
6d2010ae 7318
0a7de745 7319 if (dlil_verbose) {
39236c6e 7320 printf("%s: detached\n", if_name(ifp));
0a7de745 7321 }
6d2010ae
A
7322
7323 /* Release reference held during ifnet attach */
7324 ifnet_release(ifp);
1c79356b 7325}
9bccf70c 7326
5ba3f43e 7327errno_t
6d2010ae 7328ifp_if_output(struct ifnet *ifp, struct mbuf *m)
9bccf70c 7329{
6d2010ae 7330#pragma unused(ifp)
39037602 7331 m_freem_list(m);
0a7de745 7332 return 0;
9bccf70c
A
7333}
7334
5ba3f43e 7335void
316670eb
A
7336ifp_if_start(struct ifnet *ifp)
7337{
7338 ifnet_purge(ifp);
7339}
7340
39037602
A
7341static errno_t
7342ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
7343 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
7344 boolean_t poll, struct thread *tp)
7345{
7346#pragma unused(ifp, m_tail, s, poll, tp)
7347 m_freem_list(m_head);
0a7de745 7348 return ENXIO;
39037602
A
7349}
7350
316670eb
A
7351static void
7352ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
7353 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
7354{
7355#pragma unused(ifp, flags, max_cnt)
0a7de745 7356 if (m_head != NULL) {
316670eb 7357 *m_head = NULL;
0a7de745
A
7358 }
7359 if (m_tail != NULL) {
316670eb 7360 *m_tail = NULL;
0a7de745
A
7361 }
7362 if (cnt != NULL) {
316670eb 7363 *cnt = 0;
0a7de745
A
7364 }
7365 if (len != NULL) {
316670eb 7366 *len = 0;
0a7de745 7367 }
316670eb
A
7368}
7369
7370static errno_t
7371ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
7372{
7373#pragma unused(ifp, cmd, arglen, arg)
0a7de745 7374 return EOPNOTSUPP;
316670eb
A
7375}
7376
6d2010ae
A
7377static errno_t
7378ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
9bccf70c 7379{
6d2010ae
A
7380#pragma unused(ifp, fh, pf)
7381 m_freem(m);
0a7de745 7382 return EJUSTRETURN;
9bccf70c
A
7383}
7384
6d2010ae
A
7385static errno_t
7386ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
7387 const struct ifnet_demux_desc *da, u_int32_t dc)
9bccf70c 7388{
6d2010ae 7389#pragma unused(ifp, pf, da, dc)
0a7de745 7390 return EINVAL;
9bccf70c
A
7391}
7392
91447636 7393static errno_t
6d2010ae 7394ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
9bccf70c 7395{
6d2010ae 7396#pragma unused(ifp, pf)
0a7de745 7397 return EINVAL;
6d2010ae
A
7398}
7399
7400static errno_t
7401ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
7402{
7403#pragma unused(ifp, sa)
0a7de745 7404 return EOPNOTSUPP;
6d2010ae
A
7405}
7406
5ba3f43e
A
7407#if CONFIG_EMBEDDED
7408static errno_t
7409ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
7410 const struct sockaddr *sa, const char *ll, const char *t,
7411 u_int32_t *pre, u_int32_t *post)
7412#else
39236c6e
A
7413static errno_t
7414ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
7415 const struct sockaddr *sa, const char *ll, const char *t)
5ba3f43e 7416#endif /* !CONFIG_EMBEDDED */
6d2010ae
A
7417{
7418#pragma unused(ifp, m, sa, ll, t)
5ba3f43e 7419#if CONFIG_EMBEDDED
0a7de745 7420 return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
5ba3f43e 7421#else
0a7de745 7422 return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
5ba3f43e 7423#endif /* !CONFIG_EMBEDDED */
39236c6e
A
7424}
7425
7426static errno_t
7427ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
7428 const struct sockaddr *sa, const char *ll, const char *t,
7429 u_int32_t *pre, u_int32_t *post)
7430{
7431#pragma unused(ifp, sa, ll, t)
6d2010ae
A
7432 m_freem(*m);
7433 *m = NULL;
39236c6e 7434
0a7de745 7435 if (pre != NULL) {
39236c6e 7436 *pre = 0;
0a7de745
A
7437 }
7438 if (post != NULL) {
39236c6e 7439 *post = 0;
0a7de745 7440 }
39236c6e 7441
0a7de745 7442 return EJUSTRETURN;
6d2010ae
A
7443}
7444
316670eb 7445errno_t
6d2010ae
A
7446ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
7447{
7448#pragma unused(ifp, cmd, arg)
0a7de745 7449 return EOPNOTSUPP;
6d2010ae
A
7450}
7451
7452static errno_t
7453ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
7454{
7455#pragma unused(ifp, tm, f)
7456 /* XXX not sure what to do here */
0a7de745 7457 return 0;
6d2010ae
A
7458}
7459
7460static void
7461ifp_if_free(struct ifnet *ifp)
7462{
7463#pragma unused(ifp)
7464}
7465
7466static void
7467ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
7468{
7469#pragma unused(ifp, e)
9bccf70c
A
7470}
7471
0a7de745
A
7472int
7473dlil_if_acquire(u_int32_t family, const void *uniqueid,
a39ff7e2 7474 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
6d2010ae
A
7475{
7476 struct ifnet *ifp1 = NULL;
7477 struct dlil_ifnet *dlifp1 = NULL;
7478 void *buf, *base, **pbuf;
7479 int ret = 0;
7480
a39ff7e2 7481 VERIFY(*ifp == NULL);
7ddcb079 7482 dlil_if_lock();
a39ff7e2
A
7483 /*
7484 * We absolutely can't have an interface with the same name
7485 * in in-use state.
7486 * To make sure of that list has to be traversed completely
7487 */
6d2010ae
A
7488 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
7489 ifp1 = (struct ifnet *)dlifp1;
7490
0a7de745 7491 if (ifp1->if_family != family) {
6d2010ae 7492 continue;
0a7de745 7493 }
6d2010ae 7494
a39ff7e2
A
7495 /*
7496 * If interface is in use, return EBUSY if either unique id
7497 * or interface extended names are the same
7498 */
6d2010ae 7499 lck_mtx_lock(&dlifp1->dl_if_lock);
a39ff7e2 7500 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
6d2010ae 7501 if (dlifp1->dl_if_flags & DLIF_INUSE) {
a39ff7e2
A
7502 lck_mtx_unlock(&dlifp1->dl_if_lock);
7503 ret = EBUSY;
7504 goto end;
7505 }
7506 }
7507
7508 if (uniqueid_len) {
7509 if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
7510 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
7511 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6d2010ae 7512 lck_mtx_unlock(&dlifp1->dl_if_lock);
a39ff7e2 7513 ret = EBUSY;
9bccf70c 7514 goto end;
a39ff7e2 7515 } else {
0a7de745 7516 dlifp1->dl_if_flags |= (DLIF_INUSE | DLIF_REUSE);
a39ff7e2 7517 /* Cache the first interface that can be recycled */
0a7de745 7518 if (*ifp == NULL) {
a39ff7e2 7519 *ifp = ifp1;
0a7de745 7520 }
a39ff7e2
A
7521 /*
7522 * XXX Do not break or jump to end as we have to traverse
7523 * the whole list to ensure there are no name collisions
7524 */
6d2010ae 7525 }
6d2010ae
A
7526 }
7527 }
7528 lck_mtx_unlock(&dlifp1->dl_if_lock);
7529 }
7530
a39ff7e2 7531 /* If there's an interface that can be recycled, use that */
0a7de745 7532 if (*ifp != NULL) {
a39ff7e2 7533 goto end;
0a7de745 7534 }
a39ff7e2 7535
6d2010ae
A
7536 /* no interface found, allocate a new one */
7537 buf = zalloc(dlif_zone);
7538 if (buf == NULL) {
7539 ret = ENOMEM;
7540 goto end;
7541 }
7542 bzero(buf, dlif_bufsize);
7543
7544 /* Get the 64-bit aligned base address for this object */
0a7de745
A
7545 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
7546 sizeof(u_int64_t));
6d2010ae
A
7547 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
7548
7549 /*
7550 * Wind back a pointer size from the aligned base and
7551 * save the original address so we can free it later.
7552 */
0a7de745 7553 pbuf = (void **)((intptr_t)base - sizeof(void *));
6d2010ae
A
7554 *pbuf = buf;
7555 dlifp1 = base;
7556
7557 if (uniqueid_len) {
7558 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
7559 M_NKE, M_WAITOK);
7560 if (dlifp1->dl_if_uniqueid == NULL) {
5ba3f43e 7561 zfree(dlif_zone, buf);
6d2010ae
A
7562 ret = ENOMEM;
7563 goto end;
7564 }
7565 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
7566 dlifp1->dl_if_uniqueid_len = uniqueid_len;
7567 }
7568
7569 ifp1 = (struct ifnet *)dlifp1;
7570 dlifp1->dl_if_flags = DLIF_INUSE;
7571 if (ifnet_debug) {
7572 dlifp1->dl_if_flags |= DLIF_DEBUG;
7573 dlifp1->dl_if_trace = dlil_if_trace;
7574 }
7575 ifp1->if_name = dlifp1->dl_if_namestorage;
39236c6e 7576 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
316670eb
A
7577
7578 /* initialize interface description */
7579 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
7580 ifp1->if_desc.ifd_len = 0;
7581 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
7582
5ba3f43e 7583
2d21ac55 7584#if CONFIG_MACF_NET
6d2010ae 7585 mac_ifnet_label_init(ifp1);
2d21ac55 7586#endif
9bccf70c 7587
316670eb
A
7588 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
7589 DLIL_PRINTF("%s: failed to allocate if local stats, "
7590 "error: %d\n", __func__, ret);
7591 /* This probably shouldn't be fatal */
7592 ret = 0;
7593 }
7594
6d2010ae
A
7595 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
7596 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
7597 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
7598 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6d2010ae
A
7599 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
7600 ifnet_lock_attr);
7601 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
3e170ce0
A
7602#if INET
7603 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
7604 ifnet_lock_attr);
7605 ifp1->if_inetdata = NULL;
7606#endif
39236c6e 7607#if INET6
3e170ce0
A
7608 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
7609 ifnet_lock_attr);
39236c6e
A
7610 ifp1->if_inet6data = NULL;
7611#endif
3e170ce0
A
7612 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
7613 ifnet_lock_attr);
7614 ifp1->if_link_status = NULL;
6d2010ae 7615
316670eb
A
7616 /* for send data paths */
7617 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
7618 ifnet_lock_attr);
7619 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
7620 ifnet_lock_attr);
7621 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
7622 ifnet_lock_attr);
7623
7624 /* for receive data paths */
7625 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
7626 ifnet_lock_attr);
7627
5ba3f43e
A
7628 /* thread call allocation is done with sleeping zalloc */
7629 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
7630 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
7631 if (ifp1->if_dt_tcall == NULL) {
7632 panic_plain("%s: couldn't create if_dt_tcall", __func__);
7633 /* NOTREACHED */
7634 }
7635
6d2010ae
A
7636 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
7637
7638 *ifp = ifp1;
9bccf70c
A
7639
7640end:
7ddcb079 7641 dlil_if_unlock();
9bccf70c 7642
0a7de745
A
7643 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof(u_int64_t)) &&
7644 IS_P2ALIGNED(&ifp1->if_data, sizeof(u_int64_t))));
6d2010ae 7645
0a7de745 7646 return ret;
9bccf70c
A
7647}
7648
2d21ac55 7649__private_extern__ void
0a7de745 7650dlil_if_release(ifnet_t ifp)
6d2010ae
A
7651{
7652 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
7653
5ba3f43e
A
7654 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
7655 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
7656 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
7657 }
7658
6d2010ae
A
7659 ifnet_lock_exclusive(ifp);
7660 lck_mtx_lock(&dlifp->dl_if_lock);
7661 dlifp->dl_if_flags &= ~DLIF_INUSE;
fe8ab488 7662 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6d2010ae 7663 ifp->if_name = dlifp->dl_if_namestorage;
39236c6e
A
7664 /* Reset external name (name + unit) */
7665 ifp->if_xname = dlifp->dl_if_xnamestorage;
39037602 7666 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
39236c6e 7667 "%s?", ifp->if_name);
6d2010ae 7668 lck_mtx_unlock(&dlifp->dl_if_lock);
2d21ac55 7669#if CONFIG_MACF_NET
6d2010ae 7670 /*
39037602
A
7671 * We can either recycle the MAC label here or in dlil_if_acquire().
7672 * It seems logical to do it here but this means that anything that
7673 * still has a handle on ifp will now see it as unlabeled.
7674 * Since the interface is "dead" that may be OK. Revisit later.
7675 */
6d2010ae 7676 mac_ifnet_label_recycle(ifp);
2d21ac55 7677#endif
6d2010ae 7678 ifnet_lock_done(ifp);
9bccf70c 7679}
4a3eedf9 7680
7ddcb079
A
7681__private_extern__ void
7682dlil_if_lock(void)
7683{
7684 lck_mtx_lock(&dlil_ifnet_lock);
7685}
7686
7687__private_extern__ void
7688dlil_if_unlock(void)
7689{
7690 lck_mtx_unlock(&dlil_ifnet_lock);
7691}
7692
7693__private_extern__ void
7694dlil_if_lock_assert(void)
7695{
5ba3f43e 7696 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
7ddcb079
A
7697}
7698
4a3eedf9
A
7699__private_extern__ void
7700dlil_proto_unplumb_all(struct ifnet *ifp)
7701{
7702 /*
39236c6e
A
7703 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
7704 * each bucket contains exactly one entry; PF_VLAN does not need an
7705 * explicit unplumb.
4a3eedf9 7706 *
39236c6e 7707 * if_proto_hash[3] is for other protocols; we expect anything
4a3eedf9
A
7708 * in this bucket to respond to the DETACHING event (which would
7709 * have happened by now) and do the unplumb then.
7710 */
7711 (void) proto_unplumb(PF_INET, ifp);
7712#if INET6
7713 (void) proto_unplumb(PF_INET6, ifp);
7714#endif /* INET6 */
4a3eedf9 7715}
6d2010ae
A
7716
7717static void
7718ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
7719{
7720 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7721 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7722
0a7de745 7723 route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
6d2010ae
A
7724
7725 lck_mtx_unlock(&ifp->if_cached_route_lock);
7726}
7727
7728static void
7729ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
7730{
7731 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7732 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7733
7734 if (ifp->if_fwd_cacheok) {
0a7de745 7735 route_copyin(src, &ifp->if_src_route, sizeof(*src));
6d2010ae 7736 } else {
39236c6e 7737 ROUTE_RELEASE(src);
6d2010ae
A
7738 }
7739 lck_mtx_unlock(&ifp->if_cached_route_lock);
7740}
7741
7742#if INET6
7743static void
7744ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
7745{
7746 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7747 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7748
7749 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
0a7de745 7750 sizeof(*dst));
6d2010ae
A
7751
7752 lck_mtx_unlock(&ifp->if_cached_route_lock);
7753}
7754
7755static void
7756ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
7757{
7758 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7759 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7760
7761 if (ifp->if_fwd_cacheok) {
7762 route_copyin((struct route *)src,
0a7de745 7763 (struct route *)&ifp->if_src_route6, sizeof(*src));
6d2010ae 7764 } else {
39236c6e 7765 ROUTE_RELEASE(src);
6d2010ae
A
7766 }
7767 lck_mtx_unlock(&ifp->if_cached_route_lock);
7768}
7769#endif /* INET6 */
7770
7771struct rtentry *
0a7de745 7772ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6d2010ae 7773{
0a7de745
A
7774 struct route src_rt;
7775 struct sockaddr_in *dst;
316670eb
A
7776
7777 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
6d2010ae
A
7778
7779 ifp_src_route_copyout(ifp, &src_rt);
7780
39236c6e
A
7781 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
7782 ROUTE_RELEASE(&src_rt);
7783 if (dst->sin_family != AF_INET) {
0a7de745
A
7784 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
7785 dst->sin_len = sizeof(src_rt.ro_dst);
6d2010ae
A
7786 dst->sin_family = AF_INET;
7787 }
7788 dst->sin_addr = src_ip;
7789
5ba3f43e
A
7790 VERIFY(src_rt.ro_rt == NULL);
7791 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
7792 0, 0, ifp->if_index);
6d2010ae 7793
5ba3f43e
A
7794 if (src_rt.ro_rt != NULL) {
7795 /* retain a ref, copyin consumes one */
0a7de745 7796 struct rtentry *rte = src_rt.ro_rt;
5ba3f43e
A
7797 RT_ADDREF(rte);
7798 ifp_src_route_copyin(ifp, &src_rt);
7799 src_rt.ro_rt = rte;
6d2010ae
A
7800 }
7801 }
7802
0a7de745 7803 return src_rt.ro_rt;
6d2010ae
A
7804}
7805
7806#if INET6
39037602 7807struct rtentry *
6d2010ae
A
7808ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
7809{
7810 struct route_in6 src_rt;
7811
7812 ifp_src_route6_copyout(ifp, &src_rt);
7813
39236c6e
A
7814 if (ROUTE_UNUSABLE(&src_rt) ||
7815 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
7816 ROUTE_RELEASE(&src_rt);
7817 if (src_rt.ro_dst.sin6_family != AF_INET6) {
0a7de745
A
7818 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
7819 src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
6d2010ae
A
7820 src_rt.ro_dst.sin6_family = AF_INET6;
7821 }
7822 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
316670eb 7823 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
0a7de745 7824 sizeof(src_rt.ro_dst.sin6_addr));
6d2010ae
A
7825
7826 if (src_rt.ro_rt == NULL) {
7827 src_rt.ro_rt = rtalloc1_scoped(
0a7de745
A
7828 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
7829 ifp->if_index);
6d2010ae
A
7830
7831 if (src_rt.ro_rt != NULL) {
7832 /* retain a ref, copyin consumes one */
0a7de745 7833 struct rtentry *rte = src_rt.ro_rt;
6d2010ae
A
7834 RT_ADDREF(rte);
7835 ifp_src_route6_copyin(ifp, &src_rt);
7836 src_rt.ro_rt = rte;
7837 }
7838 }
7839 }
7840
0a7de745 7841 return src_rt.ro_rt;
6d2010ae
A
7842}
7843#endif /* INET6 */
316670eb
A
7844
7845void
3e170ce0 7846if_lqm_update(struct ifnet *ifp, int lqm, int locked)
316670eb
A
7847{
7848 struct kev_dl_link_quality_metric_data ev_lqm_data;
7849
7850 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
7851
7852 /* Normalize to edge */
5ba3f43e
A
7853 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
7854 lqm = IFNET_LQM_THRESH_ABORT;
7855 atomic_bitset_32(&tcbinfo.ipi_flags,
7856 INPCBINFO_HANDLE_LQM_ABORT);
7857 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
7858 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
7859 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
7860 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
7861 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
7862 lqm <= IFNET_LQM_THRESH_POOR) {
316670eb 7863 lqm = IFNET_LQM_THRESH_POOR;
5ba3f43e
A
7864 } else if (lqm > IFNET_LQM_THRESH_POOR &&
7865 lqm <= IFNET_LQM_THRESH_GOOD) {
316670eb 7866 lqm = IFNET_LQM_THRESH_GOOD;
5ba3f43e 7867 }
316670eb 7868
3e170ce0
A
7869 /*
7870 * Take the lock if needed
7871 */
0a7de745 7872 if (!locked) {
3e170ce0 7873 ifnet_lock_exclusive(ifp);
0a7de745 7874 }
3e170ce0
A
7875
7876 if (lqm == ifp->if_interface_state.lqm_state &&
39037602 7877 (ifp->if_interface_state.valid_bitmask &
3e170ce0
A
7878 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
7879 /*
7880 * Release the lock if was not held by the caller
7881 */
0a7de745 7882 if (!locked) {
3e170ce0 7883 ifnet_lock_done(ifp);
0a7de745
A
7884 }
7885 return; /* nothing to update */
316670eb 7886 }
3e170ce0 7887 ifp->if_interface_state.valid_bitmask |=
0a7de745 7888 IF_INTERFACE_STATE_LQM_STATE_VALID;
3e170ce0
A
7889 ifp->if_interface_state.lqm_state = lqm;
7890
7891 /*
7892 * Don't want to hold the lock when issuing kernel events
7893 */
316670eb
A
7894 ifnet_lock_done(ifp);
7895
0a7de745 7896 bzero(&ev_lqm_data, sizeof(ev_lqm_data));
316670eb
A
7897 ev_lqm_data.link_quality_metric = lqm;
7898
7899 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
0a7de745 7900 (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data));
3e170ce0
A
7901
7902 /*
7903 * Reacquire the lock for the caller
7904 */
0a7de745 7905 if (locked) {
3e170ce0 7906 ifnet_lock_exclusive(ifp);
0a7de745 7907 }
3e170ce0
A
7908}
7909
7910static void
7911if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
7912{
7913 struct kev_dl_rrc_state kev;
39037602 7914
3e170ce0
A
7915 if (rrc_state == ifp->if_interface_state.rrc_state &&
7916 (ifp->if_interface_state.valid_bitmask &
0a7de745 7917 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
3e170ce0 7918 return;
0a7de745 7919 }
3e170ce0
A
7920
7921 ifp->if_interface_state.valid_bitmask |=
7922 IF_INTERFACE_STATE_RRC_STATE_VALID;
7923
7924 ifp->if_interface_state.rrc_state = rrc_state;
7925
7926 /*
7927 * Don't want to hold the lock when issuing kernel events
7928 */
7929 ifnet_lock_done(ifp);
7930
7931 bzero(&kev, sizeof(struct kev_dl_rrc_state));
7932 kev.rrc_state = rrc_state;
7933
7934 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
7935 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
7936
7937 ifnet_lock_exclusive(ifp);
7938}
7939
7940errno_t
7941if_state_update(struct ifnet *ifp,
39037602 7942 struct if_interface_state *if_interface_state)
3e170ce0
A
7943{
7944 u_short if_index_available = 0;
7945
7946 ifnet_lock_exclusive(ifp);
7947
7948 if ((ifp->if_type != IFT_CELLULAR) &&
7949 (if_interface_state->valid_bitmask &
7950 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
7951 ifnet_lock_done(ifp);
0a7de745 7952 return ENOTSUP;
3e170ce0
A
7953 }
7954 if ((if_interface_state->valid_bitmask &
7955 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
7956 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
7957 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
7958 ifnet_lock_done(ifp);
0a7de745 7959 return EINVAL;
3e170ce0
A
7960 }
7961 if ((if_interface_state->valid_bitmask &
7962 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
7963 if_interface_state->rrc_state !=
7964 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
7965 if_interface_state->rrc_state !=
7966 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
7967 ifnet_lock_done(ifp);
0a7de745 7968 return EINVAL;
3e170ce0
A
7969 }
7970
7971 if (if_interface_state->valid_bitmask &
7972 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7973 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
7974 }
7975 if (if_interface_state->valid_bitmask &
7976 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7977 if_rrc_state_update(ifp, if_interface_state->rrc_state);
7978 }
7979 if (if_interface_state->valid_bitmask &
7980 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7981 ifp->if_interface_state.valid_bitmask |=
7982 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7983 ifp->if_interface_state.interface_availability =
7984 if_interface_state->interface_availability;
7985
7986 if (ifp->if_interface_state.interface_availability ==
7987 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
7988 if_index_available = ifp->if_index;
7989 }
7990 }
7991 ifnet_lock_done(ifp);
7992
7993 /*
7994 * Check if the TCP connections going on this interface should be
7995 * forced to send probe packets instead of waiting for TCP timers
7996 * to fire. This will be done when there is an explicit
7997 * notification that the interface became available.
7998 */
0a7de745 7999 if (if_index_available > 0) {
3e170ce0 8000 tcp_interface_send_probe(if_index_available);
0a7de745 8001 }
3e170ce0 8002
0a7de745 8003 return 0;
3e170ce0
A
8004}
8005
8006void
8007if_get_state(struct ifnet *ifp,
39037602 8008 struct if_interface_state *if_interface_state)
3e170ce0
A
8009{
8010 ifnet_lock_shared(ifp);
8011
8012 if_interface_state->valid_bitmask = 0;
8013
8014 if (ifp->if_interface_state.valid_bitmask &
8015 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8016 if_interface_state->valid_bitmask |=
8017 IF_INTERFACE_STATE_RRC_STATE_VALID;
8018 if_interface_state->rrc_state =
8019 ifp->if_interface_state.rrc_state;
8020 }
8021 if (ifp->if_interface_state.valid_bitmask &
8022 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8023 if_interface_state->valid_bitmask |=
8024 IF_INTERFACE_STATE_LQM_STATE_VALID;
8025 if_interface_state->lqm_state =
8026 ifp->if_interface_state.lqm_state;
8027 }
8028 if (ifp->if_interface_state.valid_bitmask &
8029 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8030 if_interface_state->valid_bitmask |=
8031 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8032 if_interface_state->interface_availability =
8033 ifp->if_interface_state.interface_availability;
8034 }
8035
8036 ifnet_lock_done(ifp);
8037}
8038
8039errno_t
8040if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
8041{
8042 ifnet_lock_exclusive(ifp);
8043 if (conn_probe > 1) {
8044 ifnet_lock_done(ifp);
0a7de745 8045 return EINVAL;
3e170ce0 8046 }
0a7de745 8047 if (conn_probe == 0) {
3e170ce0 8048 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
0a7de745 8049 } else {
3e170ce0 8050 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
0a7de745 8051 }
3e170ce0
A
8052 ifnet_lock_done(ifp);
8053
5ba3f43e
A
8054#if NECP
8055 necp_update_all_clients();
8056#endif /* NECP */
8057
3e170ce0 8058 tcp_probe_connectivity(ifp, conn_probe);
0a7de745 8059 return 0;
316670eb
A
8060}
8061
8062/* for uuid.c */
8063int
8064uuid_get_ethernet(u_int8_t *node)
8065{
8066 struct ifnet *ifp;
8067 struct sockaddr_dl *sdl;
8068
8069 ifnet_head_lock_shared();
8070 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
8071 ifnet_lock_shared(ifp);
8072 IFA_LOCK_SPIN(ifp->if_lladdr);
8073 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
8074 if (sdl->sdl_type == IFT_ETHER) {
8075 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
8076 IFA_UNLOCK(ifp->if_lladdr);
8077 ifnet_lock_done(ifp);
8078 ifnet_head_done();
0a7de745 8079 return 0;
316670eb
A
8080 }
8081 IFA_UNLOCK(ifp->if_lladdr);
8082 ifnet_lock_done(ifp);
8083 }
8084 ifnet_head_done();
8085
0a7de745 8086 return -1;
316670eb
A
8087}
8088
8089static int
8090sysctl_rxpoll SYSCTL_HANDLER_ARGS
8091{
8092#pragma unused(arg1, arg2)
39236c6e
A
8093 uint32_t i;
8094 int err;
316670eb
A
8095
8096 i = if_rxpoll;
8097
8098 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
8099 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8100 return err;
8101 }
316670eb 8102
0a7de745
A
8103 if (net_rxpoll == 0) {
8104 return ENXIO;
8105 }
316670eb
A
8106
8107 if_rxpoll = i;
0a7de745 8108 return err;
316670eb
A
8109}
8110
8111static int
39236c6e 8112sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
8113{
8114#pragma unused(arg1, arg2)
39236c6e
A
8115 uint64_t q;
8116 int err;
316670eb 8117
39236c6e 8118 q = if_rxpoll_mode_holdtime;
316670eb 8119
39236c6e 8120 err = sysctl_handle_quad(oidp, &q, 0, req);
0a7de745
A
8121 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8122 return err;
8123 }
316670eb 8124
0a7de745 8125 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) {
39236c6e 8126 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
0a7de745 8127 }
39236c6e
A
8128
8129 if_rxpoll_mode_holdtime = q;
316670eb 8130
0a7de745 8131 return err;
316670eb
A
8132}
8133
8134static int
39236c6e 8135sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
8136{
8137#pragma unused(arg1, arg2)
39236c6e
A
8138 uint64_t q;
8139 int err;
316670eb 8140
39236c6e 8141 q = if_rxpoll_sample_holdtime;
316670eb 8142
39236c6e 8143 err = sysctl_handle_quad(oidp, &q, 0, req);
0a7de745
A
8144 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8145 return err;
8146 }
316670eb 8147
0a7de745 8148 if (q < IF_RXPOLL_SAMPLETIME_MIN) {
39236c6e 8149 q = IF_RXPOLL_SAMPLETIME_MIN;
0a7de745 8150 }
39236c6e
A
8151
8152 if_rxpoll_sample_holdtime = q;
316670eb 8153
0a7de745 8154 return err;
316670eb
A
8155}
8156
39236c6e
A
8157static int
8158sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
316670eb 8159{
39236c6e
A
8160#pragma unused(arg1, arg2)
8161 uint64_t q;
8162 int err;
316670eb 8163
39236c6e 8164 q = if_rxpoll_interval_time;
316670eb 8165
39236c6e 8166 err = sysctl_handle_quad(oidp, &q, 0, req);
0a7de745
A
8167 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8168 return err;
8169 }
39236c6e 8170
0a7de745 8171 if (q < IF_RXPOLL_INTERVALTIME_MIN) {
39236c6e 8172 q = IF_RXPOLL_INTERVALTIME_MIN;
0a7de745 8173 }
316670eb 8174
39236c6e 8175 if_rxpoll_interval_time = q;
316670eb 8176
0a7de745 8177 return err;
316670eb
A
8178}
8179
39236c6e
A
8180static int
8181sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
316670eb 8182{
39236c6e
A
8183#pragma unused(arg1, arg2)
8184 uint32_t i;
8185 int err;
316670eb 8186
39236c6e 8187 i = if_rxpoll_wlowat;
316670eb 8188
39236c6e 8189 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
8190 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8191 return err;
8192 }
316670eb 8193
0a7de745
A
8194 if (i == 0 || i >= if_rxpoll_whiwat) {
8195 return EINVAL;
8196 }
39236c6e
A
8197
8198 if_rxpoll_wlowat = i;
0a7de745 8199 return err;
316670eb
A
8200}
8201
39236c6e
A
8202static int
8203sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
316670eb 8204{
39236c6e
A
8205#pragma unused(arg1, arg2)
8206 uint32_t i;
8207 int err;
316670eb 8208
39236c6e 8209 i = if_rxpoll_whiwat;
316670eb 8210
39236c6e 8211 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
8212 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8213 return err;
8214 }
316670eb 8215
0a7de745
A
8216 if (i <= if_rxpoll_wlowat) {
8217 return EINVAL;
8218 }
39236c6e
A
8219
8220 if_rxpoll_whiwat = i;
0a7de745 8221 return err;
316670eb
A
8222}
8223
8224static int
39236c6e 8225sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
316670eb 8226{
39236c6e
A
8227#pragma unused(arg1, arg2)
8228 int i, err;
316670eb 8229
39236c6e 8230 i = if_sndq_maxlen;
316670eb 8231
39236c6e 8232 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
8233 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8234 return err;
8235 }
316670eb 8236
0a7de745 8237 if (i < IF_SNDQ_MINLEN) {
39236c6e 8238 i = IF_SNDQ_MINLEN;
0a7de745 8239 }
316670eb 8240
39236c6e 8241 if_sndq_maxlen = i;
0a7de745 8242 return err;
316670eb
A
8243}
8244
39236c6e
A
8245static int
8246sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
316670eb 8247{
39236c6e
A
8248#pragma unused(arg1, arg2)
8249 int i, err;
8250
8251 i = if_rcvq_maxlen;
8252
8253 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
8254 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8255 return err;
8256 }
39236c6e 8257
0a7de745 8258 if (i < IF_RCVQ_MINLEN) {
39236c6e 8259 i = IF_RCVQ_MINLEN;
0a7de745 8260 }
39236c6e
A
8261
8262 if_rcvq_maxlen = i;
0a7de745 8263 return err;
316670eb
A
8264}
8265
8266void
8267dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
8268 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
8269{
8270 struct kev_dl_node_presence kev;
8271 struct sockaddr_dl *sdl;
8272 struct sockaddr_in6 *sin6;
8273
8274 VERIFY(ifp);
8275 VERIFY(sa);
8276 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
8277
0a7de745 8278 bzero(&kev, sizeof(kev));
316670eb
A
8279 sin6 = &kev.sin6_node_address;
8280 sdl = &kev.sdl_node_address;
8281 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
8282 kev.rssi = rssi;
8283 kev.link_quality_metric = lqm;
8284 kev.node_proximity_metric = npm;
0a7de745 8285 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
316670eb
A
8286
8287 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
8288 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
0a7de745 8289 &kev.link_data, sizeof(kev));
316670eb
A
8290}
8291
8292void
8293dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
8294{
8295 struct kev_dl_node_absence kev;
8296 struct sockaddr_in6 *sin6;
8297 struct sockaddr_dl *sdl;
8298
8299 VERIFY(ifp);
8300 VERIFY(sa);
8301 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
8302
0a7de745 8303 bzero(&kev, sizeof(kev));
316670eb
A
8304 sin6 = &kev.sin6_node_address;
8305 sdl = &kev.sdl_node_address;
8306 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
8307
8308 nd6_alt_node_absent(ifp, sin6);
8309 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
0a7de745 8310 &kev.link_data, sizeof(kev));
316670eb
A
8311}
8312
39236c6e
A
8313const void *
8314dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
0a7de745 8315 kauth_cred_t *credp)
39236c6e
A
8316{
8317 const u_int8_t *bytes;
8318 size_t size;
8319
8320 bytes = CONST_LLADDR(sdl);
8321 size = sdl->sdl_alen;
8322
8323#if CONFIG_MACF
8324 if (dlil_lladdr_ckreq) {
8325 switch (sdl->sdl_type) {
8326 case IFT_ETHER:
39236c6e 8327 case IFT_IEEE1394:
39236c6e
A
8328 break;
8329 default:
8330 credp = NULL;
8331 break;
0a7de745
A
8332 }
8333 ;
39236c6e
A
8334
8335 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
8336 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
0a7de745 8337 [0] = 2
39236c6e
A
8338 };
8339
5ba3f43e 8340 bytes = unspec;
39236c6e
A
8341 }
8342 }
8343#else
8344#pragma unused(credp)
8345#endif
8346
0a7de745
A
8347 if (sizep != NULL) {
8348 *sizep = size;
8349 }
8350 return bytes;
39236c6e
A
8351}
8352
8353void
8354dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
8355 u_int8_t info[DLIL_MODARGLEN])
8356{
8357 struct kev_dl_issues kev;
8358 struct timeval tv;
8359
8360 VERIFY(ifp != NULL);
8361 VERIFY(modid != NULL);
0a7de745
A
8362 _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
8363 _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
39236c6e 8364
0a7de745 8365 bzero(&kev, sizeof(kev));
39236c6e
A
8366
8367 microtime(&tv);
8368 kev.timestamp = tv.tv_sec;
8369 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
0a7de745 8370 if (info != NULL) {
39236c6e 8371 bcopy(info, &kev.info, DLIL_MODARGLEN);
0a7de745 8372 }
39236c6e
A
8373
8374 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
0a7de745 8375 &kev.link_data, sizeof(kev));
39236c6e
A
8376}
8377
316670eb
A
8378errno_t
8379ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
8380 struct proc *p)
8381{
8382 u_int32_t level = IFNET_THROTTLE_OFF;
8383 errno_t result = 0;
8384
8385 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
8386
8387 if (cmd == SIOCSIFOPPORTUNISTIC) {
8388 /*
8389 * XXX: Use priv_check_cred() instead of root check?
8390 */
0a7de745
A
8391 if ((result = proc_suser(p)) != 0) {
8392 return result;
8393 }
316670eb
A
8394
8395 if (ifr->ifr_opportunistic.ifo_flags ==
0a7de745 8396 IFRIFOF_BLOCK_OPPORTUNISTIC) {
316670eb 8397 level = IFNET_THROTTLE_OPPORTUNISTIC;
0a7de745 8398 } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
316670eb 8399 level = IFNET_THROTTLE_OFF;
0a7de745 8400 } else {
316670eb 8401 result = EINVAL;
0a7de745 8402 }
316670eb 8403
0a7de745 8404 if (result == 0) {
316670eb 8405 result = ifnet_set_throttle(ifp, level);
0a7de745 8406 }
316670eb
A
8407 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
8408 ifr->ifr_opportunistic.ifo_flags = 0;
8409 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
8410 ifr->ifr_opportunistic.ifo_flags |=
8411 IFRIFOF_BLOCK_OPPORTUNISTIC;
8412 }
8413 }
8414
8415 /*
8416 * Return the count of current opportunistic connections
8417 * over the interface.
8418 */
8419 if (result == 0) {
8420 uint32_t flags = 0;
8421 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
0a7de745 8422 INPCB_OPPORTUNISTIC_SETCMD : 0;
39037602 8423 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
0a7de745 8424 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
316670eb
A
8425 ifr->ifr_opportunistic.ifo_inuse =
8426 udp_count_opportunistic(ifp->if_index, flags) +
8427 tcp_count_opportunistic(ifp->if_index, flags);
8428 }
8429
0a7de745 8430 if (result == EALREADY) {
316670eb 8431 result = 0;
0a7de745 8432 }
316670eb 8433
0a7de745 8434 return result;
316670eb
A
8435}
8436
8437int
8438ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
8439{
8440 struct ifclassq *ifq;
8441 int err = 0;
8442
0a7de745
A
8443 if (!(ifp->if_eflags & IFEF_TXSTART)) {
8444 return ENXIO;
8445 }
316670eb
A
8446
8447 *level = IFNET_THROTTLE_OFF;
8448
8449 ifq = &ifp->if_snd;
8450 IFCQ_LOCK(ifq);
8451 /* Throttling works only for IFCQ, not ALTQ instances */
0a7de745 8452 if (IFCQ_IS_ENABLED(ifq)) {
316670eb 8453 IFCQ_GET_THROTTLE(ifq, *level, err);
0a7de745 8454 }
316670eb
A
8455 IFCQ_UNLOCK(ifq);
8456
0a7de745 8457 return err;
316670eb
A
8458}
8459
8460int
8461ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
8462{
8463 struct ifclassq *ifq;
8464 int err = 0;
8465
0a7de745
A
8466 if (!(ifp->if_eflags & IFEF_TXSTART)) {
8467 return ENXIO;
8468 }
316670eb 8469
39236c6e
A
8470 ifq = &ifp->if_snd;
8471
316670eb
A
8472 switch (level) {
8473 case IFNET_THROTTLE_OFF:
8474 case IFNET_THROTTLE_OPPORTUNISTIC:
316670eb
A
8475 break;
8476 default:
0a7de745 8477 return EINVAL;
316670eb
A
8478 }
8479
316670eb 8480 IFCQ_LOCK(ifq);
0a7de745 8481 if (IFCQ_IS_ENABLED(ifq)) {
316670eb 8482 IFCQ_SET_THROTTLE(ifq, level, err);
0a7de745 8483 }
316670eb
A
8484 IFCQ_UNLOCK(ifq);
8485
8486 if (err == 0) {
39236c6e
A
8487 printf("%s: throttling level set to %d\n", if_name(ifp),
8488 level);
0a7de745 8489 if (level == IFNET_THROTTLE_OFF) {
316670eb 8490 ifnet_start(ifp);
0a7de745 8491 }
316670eb
A
8492 }
8493
0a7de745 8494 return err;
316670eb 8495}
39236c6e
A
8496
8497errno_t
8498ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
8499 struct proc *p)
8500{
8501#pragma unused(p)
8502 errno_t result = 0;
8503 uint32_t flags;
8504 int level, category, subcategory;
8505
8506 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
8507
8508 if (cmd == SIOCSIFLOG) {
8509 if ((result = priv_check_cred(kauth_cred_get(),
0a7de745
A
8510 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
8511 return result;
8512 }
39236c6e
A
8513
8514 level = ifr->ifr_log.ifl_level;
0a7de745 8515 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
39236c6e 8516 result = EINVAL;
0a7de745 8517 }
39236c6e
A
8518
8519 flags = ifr->ifr_log.ifl_flags;
0a7de745 8520 if ((flags &= IFNET_LOGF_MASK) == 0) {
39236c6e 8521 result = EINVAL;
0a7de745 8522 }
39236c6e
A
8523
8524 category = ifr->ifr_log.ifl_category;
8525 subcategory = ifr->ifr_log.ifl_subcategory;
8526
0a7de745 8527 if (result == 0) {
39236c6e
A
8528 result = ifnet_set_log(ifp, level, flags,
8529 category, subcategory);
0a7de745 8530 }
39236c6e
A
8531 } else {
8532 result = ifnet_get_log(ifp, &level, &flags, &category,
8533 &subcategory);
8534 if (result == 0) {
8535 ifr->ifr_log.ifl_level = level;
8536 ifr->ifr_log.ifl_flags = flags;
8537 ifr->ifr_log.ifl_category = category;
8538 ifr->ifr_log.ifl_subcategory = subcategory;
8539 }
8540 }
8541
0a7de745 8542 return result;
39236c6e
A
8543}
8544
8545int
8546ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
8547 int32_t category, int32_t subcategory)
8548{
8549 int err = 0;
8550
8551 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
8552 VERIFY(flags & IFNET_LOGF_MASK);
8553
8554 /*
8555 * The logging level applies to all facilities; make sure to
8556 * update them all with the most current level.
8557 */
8558 flags |= ifp->if_log.flags;
8559
8560 if (ifp->if_output_ctl != NULL) {
8561 struct ifnet_log_params l;
8562
0a7de745 8563 bzero(&l, sizeof(l));
39236c6e
A
8564 l.level = level;
8565 l.flags = flags;
8566 l.flags &= ~IFNET_LOGF_DLIL;
8567 l.category = category;
8568 l.subcategory = subcategory;
8569
8570 /* Send this request to lower layers */
8571 if (l.flags != 0) {
8572 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
0a7de745 8573 sizeof(l), &l);
39236c6e
A
8574 }
8575 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
8576 /*
8577 * If targeted to the lower layers without an output
8578 * control callback registered on the interface, just
8579 * silently ignore facilities other than ours.
8580 */
8581 flags &= IFNET_LOGF_DLIL;
0a7de745 8582 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
39236c6e 8583 level = 0;
0a7de745 8584 }
39236c6e
A
8585 }
8586
8587 if (err == 0) {
0a7de745 8588 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
39236c6e 8589 ifp->if_log.flags = 0;
0a7de745 8590 } else {
39236c6e 8591 ifp->if_log.flags |= flags;
0a7de745 8592 }
39236c6e
A
8593
8594 log(LOG_INFO, "%s: logging level set to %d flags=%b "
8595 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
8596 ifp->if_log.level, ifp->if_log.flags,
8597 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
8598 category, subcategory);
8599 }
8600
0a7de745 8601 return err;
39236c6e
A
8602}
8603
8604int
8605ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
8606 int32_t *category, int32_t *subcategory)
8607{
0a7de745 8608 if (level != NULL) {
39236c6e 8609 *level = ifp->if_log.level;
0a7de745
A
8610 }
8611 if (flags != NULL) {
39236c6e 8612 *flags = ifp->if_log.flags;
0a7de745
A
8613 }
8614 if (category != NULL) {
39236c6e 8615 *category = ifp->if_log.category;
0a7de745
A
8616 }
8617 if (subcategory != NULL) {
39236c6e 8618 *subcategory = ifp->if_log.subcategory;
0a7de745 8619 }
39236c6e 8620
0a7de745 8621 return 0;
39236c6e
A
8622}
8623
8624int
8625ifnet_notify_address(struct ifnet *ifp, int af)
8626{
8627 struct ifnet_notify_address_params na;
8628
8629#if PF
8630 (void) pf_ifaddr_hook(ifp);
8631#endif /* PF */
8632
0a7de745
A
8633 if (ifp->if_output_ctl == NULL) {
8634 return EOPNOTSUPP;
8635 }
39236c6e 8636
0a7de745 8637 bzero(&na, sizeof(na));
39236c6e
A
8638 na.address_family = af;
8639
0a7de745
A
8640 return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
8641 sizeof(na), &na);
39236c6e
A
8642}
8643
8644errno_t
8645ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
8646{
8647 if (ifp == NULL || flowid == NULL) {
0a7de745 8648 return EINVAL;
39236c6e 8649 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 8650 !IF_FULLY_ATTACHED(ifp)) {
0a7de745 8651 return ENXIO;
39236c6e
A
8652 }
8653
8654 *flowid = ifp->if_flowhash;
8655
0a7de745 8656 return 0;
39236c6e
A
8657}
8658
8659errno_t
8660ifnet_disable_output(struct ifnet *ifp)
8661{
8662 int err;
8663
8664 if (ifp == NULL) {
0a7de745 8665 return EINVAL;
39236c6e 8666 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 8667 !IF_FULLY_ATTACHED(ifp)) {
0a7de745 8668 return ENXIO;
39236c6e
A
8669 }
8670
8671 if ((err = ifnet_fc_add(ifp)) == 0) {
8672 lck_mtx_lock_spin(&ifp->if_start_lock);
8673 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
8674 lck_mtx_unlock(&ifp->if_start_lock);
8675 }
0a7de745 8676 return err;
39236c6e
A
8677}
8678
8679errno_t
8680ifnet_enable_output(struct ifnet *ifp)
8681{
8682 if (ifp == NULL) {
0a7de745 8683 return EINVAL;
39236c6e 8684 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5ba3f43e 8685 !IF_FULLY_ATTACHED(ifp)) {
0a7de745 8686 return ENXIO;
39236c6e
A
8687 }
8688
5c9f4661 8689 ifnet_start_common(ifp, TRUE);
0a7de745 8690 return 0;
39236c6e
A
8691}
8692
8693void
8694ifnet_flowadv(uint32_t flowhash)
8695{
8696 struct ifnet_fc_entry *ifce;
8697 struct ifnet *ifp;
8698
8699 ifce = ifnet_fc_get(flowhash);
0a7de745 8700 if (ifce == NULL) {
39236c6e 8701 return;
0a7de745 8702 }
39236c6e
A
8703
8704 VERIFY(ifce->ifce_ifp != NULL);
8705 ifp = ifce->ifce_ifp;
8706
8707 /* flow hash gets recalculated per attach, so check */
8708 if (ifnet_is_attached(ifp, 1)) {
0a7de745 8709 if (ifp->if_flowhash == flowhash) {
39236c6e 8710 (void) ifnet_enable_output(ifp);
0a7de745 8711 }
39236c6e
A
8712 ifnet_decr_iorefcnt(ifp);
8713 }
8714 ifnet_fc_entry_free(ifce);
8715}
8716
8717/*
8718 * Function to compare ifnet_fc_entries in ifnet flow control tree
8719 */
8720static inline int
8721ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
8722{
0a7de745 8723 return fc1->ifce_flowhash - fc2->ifce_flowhash;
39236c6e
A
8724}
8725
8726static int
8727ifnet_fc_add(struct ifnet *ifp)
8728{
8729 struct ifnet_fc_entry keyfc, *ifce;
8730 uint32_t flowhash;
8731
8732 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
8733 VERIFY(ifp->if_flowhash != 0);
8734 flowhash = ifp->if_flowhash;
8735
0a7de745 8736 bzero(&keyfc, sizeof(keyfc));
39236c6e
A
8737 keyfc.ifce_flowhash = flowhash;
8738
8739 lck_mtx_lock_spin(&ifnet_fc_lock);
8740 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
8741 if (ifce != NULL && ifce->ifce_ifp == ifp) {
8742 /* Entry is already in ifnet_fc_tree, return */
8743 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 8744 return 0;
39236c6e
A
8745 }
8746
8747 if (ifce != NULL) {
8748 /*
8749 * There is a different fc entry with the same flow hash
8750 * but different ifp pointer. There can be a collision
8751 * on flow hash but the probability is low. Let's just
8752 * avoid adding a second one when there is a collision.
8753 */
8754 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 8755 return EAGAIN;
39236c6e
A
8756 }
8757
8758 /* become regular mutex */
8759 lck_mtx_convert_spin(&ifnet_fc_lock);
8760
5c9f4661 8761 ifce = zalloc(ifnet_fc_zone);
39236c6e
A
8762 if (ifce == NULL) {
8763 /* memory allocation failed */
8764 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 8765 return ENOMEM;
39236c6e
A
8766 }
8767 bzero(ifce, ifnet_fc_zone_size);
8768
8769 ifce->ifce_flowhash = flowhash;
8770 ifce->ifce_ifp = ifp;
8771
8772 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
8773 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 8774 return 0;
39236c6e
A
8775}
8776
8777static struct ifnet_fc_entry *
8778ifnet_fc_get(uint32_t flowhash)
8779{
8780 struct ifnet_fc_entry keyfc, *ifce;
8781 struct ifnet *ifp;
8782
0a7de745 8783 bzero(&keyfc, sizeof(keyfc));
39236c6e
A
8784 keyfc.ifce_flowhash = flowhash;
8785
8786 lck_mtx_lock_spin(&ifnet_fc_lock);
8787 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
8788 if (ifce == NULL) {
8789 /* Entry is not present in ifnet_fc_tree, return */
8790 lck_mtx_unlock(&ifnet_fc_lock);
0a7de745 8791 return NULL;
39236c6e
A
8792 }
8793
8794 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
8795
8796 VERIFY(ifce->ifce_ifp != NULL);
8797 ifp = ifce->ifce_ifp;
8798
8799 /* become regular mutex */
8800 lck_mtx_convert_spin(&ifnet_fc_lock);
8801
8802 if (!ifnet_is_attached(ifp, 0)) {
8803 /*
8804 * This ifp is not attached or in the process of being
8805 * detached; just don't process it.
8806 */
8807 ifnet_fc_entry_free(ifce);
8808 ifce = NULL;
8809 }
8810 lck_mtx_unlock(&ifnet_fc_lock);
8811
0a7de745 8812 return ifce;
39236c6e
A
8813}
8814
8815static void
8816ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
8817{
8818 zfree(ifnet_fc_zone, ifce);
8819}
8820
8821static uint32_t
8822ifnet_calc_flowhash(struct ifnet *ifp)
8823{
8824 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
8825 uint32_t flowhash = 0;
8826
0a7de745 8827 if (ifnet_flowhash_seed == 0) {
39236c6e 8828 ifnet_flowhash_seed = RandomULong();
0a7de745 8829 }
39236c6e 8830
0a7de745 8831 bzero(&fh, sizeof(fh));
39236c6e 8832
0a7de745 8833 (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
39236c6e
A
8834 fh.ifk_unit = ifp->if_unit;
8835 fh.ifk_flags = ifp->if_flags;
8836 fh.ifk_eflags = ifp->if_eflags;
8837 fh.ifk_capabilities = ifp->if_capabilities;
8838 fh.ifk_capenable = ifp->if_capenable;
8839 fh.ifk_output_sched_model = ifp->if_output_sched_model;
8840 fh.ifk_rand1 = RandomULong();
8841 fh.ifk_rand2 = RandomULong();
8842
8843try_again:
0a7de745 8844 flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
39236c6e
A
8845 if (flowhash == 0) {
8846 /* try to get a non-zero flowhash */
8847 ifnet_flowhash_seed = RandomULong();
8848 goto try_again;
8849 }
8850
0a7de745 8851 return flowhash;
39236c6e
A
8852}
8853
3e170ce0
A
8854int
8855ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
8856 uint16_t flags, uint8_t *data)
8857{
8858#pragma unused(flags)
8859 int error = 0;
8860
8861 switch (family) {
8862 case AF_INET:
8863 if_inetdata_lock_exclusive(ifp);
8864 if (IN_IFEXTRA(ifp) != NULL) {
8865 if (len == 0) {
8866 /* Allow clearing the signature */
8867 IN_IFEXTRA(ifp)->netsig_len = 0;
8868 bzero(IN_IFEXTRA(ifp)->netsig,
0a7de745 8869 sizeof(IN_IFEXTRA(ifp)->netsig));
3e170ce0
A
8870 if_inetdata_lock_done(ifp);
8871 break;
0a7de745 8872 } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
3e170ce0
A
8873 error = EINVAL;
8874 if_inetdata_lock_done(ifp);
8875 break;
8876 }
8877 IN_IFEXTRA(ifp)->netsig_len = len;
8878 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
8879 } else {
8880 error = ENOMEM;
8881 }
8882 if_inetdata_lock_done(ifp);
8883 break;
8884
8885 case AF_INET6:
8886 if_inet6data_lock_exclusive(ifp);
8887 if (IN6_IFEXTRA(ifp) != NULL) {
8888 if (len == 0) {
8889 /* Allow clearing the signature */
8890 IN6_IFEXTRA(ifp)->netsig_len = 0;
8891 bzero(IN6_IFEXTRA(ifp)->netsig,
0a7de745 8892 sizeof(IN6_IFEXTRA(ifp)->netsig));
3e170ce0
A
8893 if_inet6data_lock_done(ifp);
8894 break;
0a7de745 8895 } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
3e170ce0
A
8896 error = EINVAL;
8897 if_inet6data_lock_done(ifp);
8898 break;
8899 }
8900 IN6_IFEXTRA(ifp)->netsig_len = len;
8901 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
8902 } else {
8903 error = ENOMEM;
8904 }
8905 if_inet6data_lock_done(ifp);
8906 break;
8907
8908 default:
8909 error = EINVAL;
8910 break;
8911 }
8912
0a7de745 8913 return error;
3e170ce0
A
8914}
8915
8916int
8917ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
8918 uint16_t *flags, uint8_t *data)
8919{
8920 int error = 0;
8921
0a7de745
A
8922 if (ifp == NULL || len == NULL || data == NULL) {
8923 return EINVAL;
8924 }
3e170ce0
A
8925
8926 switch (family) {
8927 case AF_INET:
8928 if_inetdata_lock_shared(ifp);
8929 if (IN_IFEXTRA(ifp) != NULL) {
8930 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
8931 error = EINVAL;
8932 if_inetdata_lock_done(ifp);
8933 break;
8934 }
0a7de745 8935 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0) {
3e170ce0 8936 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
0a7de745 8937 } else {
3e170ce0 8938 error = ENOENT;
0a7de745 8939 }
3e170ce0
A
8940 } else {
8941 error = ENOMEM;
8942 }
8943 if_inetdata_lock_done(ifp);
8944 break;
8945
8946 case AF_INET6:
8947 if_inet6data_lock_shared(ifp);
8948 if (IN6_IFEXTRA(ifp) != NULL) {
8949 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
8950 error = EINVAL;
8951 if_inet6data_lock_done(ifp);
8952 break;
8953 }
0a7de745 8954 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0) {
3e170ce0 8955 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
0a7de745 8956 } else {
3e170ce0 8957 error = ENOENT;
0a7de745 8958 }
3e170ce0
A
8959 } else {
8960 error = ENOMEM;
8961 }
8962 if_inet6data_lock_done(ifp);
8963 break;
8964
8965 default:
8966 error = EINVAL;
8967 break;
8968 }
8969
0a7de745 8970 if (error == 0 && flags != NULL) {
3e170ce0 8971 *flags = 0;
0a7de745 8972 }
3e170ce0 8973
0a7de745 8974 return error;
3e170ce0
A
8975}
8976
5ba3f43e
A
8977#if INET6
8978int
8979ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8980{
8981 int i, error = 0, one_set = 0;
8982
8983 if_inet6data_lock_exclusive(ifp);
8984
8985 if (IN6_IFEXTRA(ifp) == NULL) {
8986 error = ENOMEM;
8987 goto out;
8988 }
8989
8990 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8991 uint32_t prefix_len =
8992 prefixes[i].prefix_len;
8993 struct in6_addr *prefix =
8994 &prefixes[i].ipv6_prefix;
8995
8996 if (prefix_len == 0) {
d9a64523
A
8997 clat_log0((LOG_DEBUG,
8998 "NAT64 prefixes purged from Interface %s\n",
8999 if_name(ifp)));
5ba3f43e
A
9000 /* Allow clearing the signature */
9001 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
9002 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9003 sizeof(struct in6_addr));
9004
9005 continue;
9006 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
0a7de745
A
9007 prefix_len != NAT64_PREFIX_LEN_40 &&
9008 prefix_len != NAT64_PREFIX_LEN_48 &&
9009 prefix_len != NAT64_PREFIX_LEN_56 &&
9010 prefix_len != NAT64_PREFIX_LEN_64 &&
9011 prefix_len != NAT64_PREFIX_LEN_96) {
d9a64523
A
9012 clat_log0((LOG_DEBUG,
9013 "NAT64 prefixlen is incorrect %d\n", prefix_len));
5ba3f43e
A
9014 error = EINVAL;
9015 goto out;
9016 }
9017
9018 if (IN6_IS_SCOPE_EMBED(prefix)) {
d9a64523
A
9019 clat_log0((LOG_DEBUG,
9020 "NAT64 prefix has interface/link local scope.\n"));
5ba3f43e
A
9021 error = EINVAL;
9022 goto out;
9023 }
9024
9025 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
9026 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9027 sizeof(struct in6_addr));
d9a64523
A
9028 clat_log0((LOG_DEBUG,
9029 "NAT64 prefix set to %s with prefixlen: %d\n",
9030 ip6_sprintf(prefix), prefix_len));
5ba3f43e
A
9031 one_set = 1;
9032 }
9033
9034out:
9035 if_inet6data_lock_done(ifp);
9036
0a7de745 9037 if (error == 0 && one_set != 0) {
5ba3f43e 9038 necp_update_all_clients();
0a7de745 9039 }
5ba3f43e 9040
0a7de745 9041 return error;
5ba3f43e
A
9042}
9043
9044int
9045ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
9046{
9047 int i, found_one = 0, error = 0;
9048
0a7de745
A
9049 if (ifp == NULL) {
9050 return EINVAL;
9051 }
5ba3f43e
A
9052
9053 if_inet6data_lock_shared(ifp);
9054
9055 if (IN6_IFEXTRA(ifp) == NULL) {
9056 error = ENOMEM;
9057 goto out;
9058 }
9059
9060 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
0a7de745 9061 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
5ba3f43e 9062 found_one = 1;
0a7de745 9063 }
5ba3f43e
A
9064 }
9065
9066 if (found_one == 0) {
9067 error = ENOENT;
9068 goto out;
9069 }
9070
0a7de745 9071 if (prefixes) {
5ba3f43e
A
9072 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
9073 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
0a7de745 9074 }
5ba3f43e
A
9075
9076out:
9077 if_inet6data_lock_done(ifp);
9078
0a7de745 9079 return error;
5ba3f43e
A
9080}
9081#endif
9082
39236c6e
A
9083static void
9084dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
9085 protocol_family_t pf)
9086{
9087#pragma unused(ifp)
9088 uint32_t did_sw;
9089
9090 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
0a7de745 9091 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) {
39236c6e 9092 return;
0a7de745 9093 }
39236c6e
A
9094
9095 switch (pf) {
9096 case PF_INET:
9097 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
0a7de745 9098 if (did_sw & CSUM_DELAY_IP) {
39236c6e 9099 hwcksum_dbg_finalized_hdr++;
0a7de745
A
9100 }
9101 if (did_sw & CSUM_DELAY_DATA) {
39236c6e 9102 hwcksum_dbg_finalized_data++;
0a7de745 9103 }
39236c6e
A
9104 break;
9105#if INET6
9106 case PF_INET6:
9107 /*
9108 * Checksum offload should not have been enabled when
9109 * extension headers exist; that also means that we
9110 * cannot force-finalize packets with extension headers.
9111 * Indicate to the callee should it skip such case by
9112 * setting optlen to -1.
9113 */
9114 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
9115 m->m_pkthdr.csum_flags);
0a7de745 9116 if (did_sw & CSUM_DELAY_IPV6_DATA) {
39236c6e 9117 hwcksum_dbg_finalized_data++;
0a7de745 9118 }
39236c6e
A
9119 break;
9120#endif /* INET6 */
9121 default:
9122 return;
9123 }
9124}
9125
9126static void
9127dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
9128 protocol_family_t pf)
9129{
5ba3f43e 9130 uint16_t sum = 0;
39236c6e
A
9131 uint32_t hlen;
9132
9133 if (frame_header == NULL ||
9134 frame_header < (char *)mbuf_datastart(m) ||
9135 frame_header > (char *)m->m_data) {
9136 printf("%s: frame header pointer 0x%llx out of range "
9137 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
9138 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
9139 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
9140 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
9141 (uint64_t)VM_KERNEL_ADDRPERM(m));
9142 return;
9143 }
9144 hlen = (m->m_data - frame_header);
9145
9146 switch (pf) {
9147 case PF_INET:
9148#if INET6
9149 case PF_INET6:
9150#endif /* INET6 */
9151 break;
9152 default:
9153 return;
9154 }
9155
9156 /*
9157 * Force partial checksum offload; useful to simulate cases
9158 * where the hardware does not support partial checksum offload,
9159 * in order to validate correctness throughout the layers above.
9160 */
9161 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
9162 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
9163
0a7de745 9164 if (foff > (uint32_t)m->m_pkthdr.len) {
39236c6e 9165 return;
0a7de745 9166 }
39236c6e
A
9167
9168 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
9169
9170 /* Compute 16-bit 1's complement sum from forced offset */
9171 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
9172
9173 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
9174 m->m_pkthdr.csum_rx_val = sum;
9175 m->m_pkthdr.csum_rx_start = (foff + hlen);
9176
9177 hwcksum_dbg_partial_forced++;
9178 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
9179 }
9180
9181 /*
9182 * Partial checksum offload verification (and adjustment);
9183 * useful to validate and test cases where the hardware
9184 * supports partial checksum offload.
9185 */
9186 if ((m->m_pkthdr.csum_flags &
9187 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
9188 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
9189 uint32_t rxoff;
9190
9191 /* Start offset must begin after frame header */
9192 rxoff = m->m_pkthdr.csum_rx_start;
9193 if (hlen > rxoff) {
9194 hwcksum_dbg_bad_rxoff++;
9195 if (dlil_verbose) {
9196 printf("%s: partial cksum start offset %d "
9197 "is less than frame header length %d for "
9198 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
9199 (uint64_t)VM_KERNEL_ADDRPERM(m));
9200 }
9201 return;
9202 }
39037602 9203 rxoff -= hlen;
39236c6e
A
9204
9205 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
9206 /*
9207 * Compute the expected 16-bit 1's complement sum;
9208 * skip this if we've already computed it above
9209 * when partial checksum offload is forced.
9210 */
9211 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
9212
9213 /* Hardware or driver is buggy */
9214 if (sum != m->m_pkthdr.csum_rx_val) {
9215 hwcksum_dbg_bad_cksum++;
9216 if (dlil_verbose) {
9217 printf("%s: bad partial cksum value "
9218 "0x%x (expected 0x%x) for mbuf "
9219 "0x%llx [rx_start %d]\n",
9220 if_name(ifp),
9221 m->m_pkthdr.csum_rx_val, sum,
9222 (uint64_t)VM_KERNEL_ADDRPERM(m),
9223 m->m_pkthdr.csum_rx_start);
9224 }
9225 return;
9226 }
9227 }
9228 hwcksum_dbg_verified++;
9229
9230 /*
9231 * This code allows us to emulate various hardwares that
9232 * perform 16-bit 1's complement sum beginning at various
9233 * start offset values.
9234 */
9235 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
9236 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
9237
0a7de745 9238 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) {
39236c6e 9239 return;
0a7de745 9240 }
39236c6e 9241
5ba3f43e
A
9242 sum = m_adj_sum16(m, rxoff, aoff,
9243 m_pktlen(m) - aoff, sum);
39236c6e
A
9244
9245 m->m_pkthdr.csum_rx_val = sum;
9246 m->m_pkthdr.csum_rx_start = (aoff + hlen);
9247
9248 hwcksum_dbg_adjusted++;
9249 }
9250 }
9251}
9252
9253static int
9254sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
9255{
9256#pragma unused(arg1, arg2)
9257 u_int32_t i;
9258 int err;
9259
9260 i = hwcksum_dbg_mode;
9261
9262 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
9263 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9264 return err;
9265 }
39236c6e 9266
0a7de745
A
9267 if (hwcksum_dbg == 0) {
9268 return ENODEV;
9269 }
39236c6e 9270
0a7de745
A
9271 if ((i & ~HWCKSUM_DBG_MASK) != 0) {
9272 return EINVAL;
9273 }
39236c6e
A
9274
9275 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
9276
0a7de745 9277 return err;
39236c6e
A
9278}
9279
9280static int
9281sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
9282{
9283#pragma unused(arg1, arg2)
9284 u_int32_t i;
9285 int err;
9286
9287 i = hwcksum_dbg_partial_rxoff_forced;
9288
9289 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
9290 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9291 return err;
9292 }
39236c6e 9293
0a7de745
A
9294 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
9295 return ENODEV;
9296 }
39236c6e
A
9297
9298 hwcksum_dbg_partial_rxoff_forced = i;
9299
0a7de745 9300 return err;
39236c6e
A
9301}
9302
9303static int
9304sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
9305{
9306#pragma unused(arg1, arg2)
9307 u_int32_t i;
9308 int err;
9309
9310 i = hwcksum_dbg_partial_rxoff_adj;
9311
9312 err = sysctl_handle_int(oidp, &i, 0, req);
0a7de745
A
9313 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9314 return err;
9315 }
39236c6e 9316
0a7de745
A
9317 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) {
9318 return ENODEV;
9319 }
39236c6e
A
9320
9321 hwcksum_dbg_partial_rxoff_adj = i;
9322
0a7de745 9323 return err;
39236c6e
A
9324}
9325
3e170ce0
A
9326static int
9327sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
9328{
9329#pragma unused(oidp, arg1, arg2)
9330 int err;
39037602 9331
3e170ce0 9332 if (req->oldptr == USER_ADDR_NULL) {
3e170ce0
A
9333 }
9334 if (req->newptr != USER_ADDR_NULL) {
0a7de745 9335 return EPERM;
3e170ce0
A
9336 }
9337 err = SYSCTL_OUT(req, &tx_chain_len_stats,
9338 sizeof(struct chain_len_stats));
9339
0a7de745 9340 return err;
3e170ce0
A
9341}
9342
9343
5ba3f43e 9344#if DEBUG || DEVELOPMENT
39236c6e
A
9345/* Blob for sum16 verification */
9346static uint8_t sumdata[] = {
9347 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
9348 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
9349 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
9350 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
9351 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
9352 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
9353 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
9354 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
9355 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
9356 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
9357 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
9358 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
9359 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
9360 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
9361 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
9362 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
9363 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
9364 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
9365 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
9366 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
9367 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
9368 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
9369 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
9370 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
9371 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
9372 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
9373 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
9374 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
9375 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
9376 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
9377 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
9378 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
9379 0xc8, 0x28, 0x02, 0x00, 0x00
9380};
9381
9382/* Precomputed 16-bit 1's complement sums for various spans of the above data */
9383static struct {
0a7de745
A
9384 boolean_t init;
9385 uint16_t len;
9386 uint16_t sumr; /* reference */
9387 uint16_t sumrp; /* reference, precomputed */
39236c6e 9388} sumtbl[] = {
0a7de745
A
9389 { FALSE, 0, 0, 0x0000 },
9390 { FALSE, 1, 0, 0x001f },
9391 { FALSE, 2, 0, 0x8b1f },
9392 { FALSE, 3, 0, 0x8b27 },
9393 { FALSE, 7, 0, 0x790e },
9394 { FALSE, 11, 0, 0xcb6d },
9395 { FALSE, 20, 0, 0x20dd },
9396 { FALSE, 27, 0, 0xbabd },
9397 { FALSE, 32, 0, 0xf3e8 },
9398 { FALSE, 37, 0, 0x197d },
9399 { FALSE, 43, 0, 0x9eae },
9400 { FALSE, 64, 0, 0x4678 },
5ba3f43e
A
9401 { FALSE, 127, 0, 0x9399 },
9402 { FALSE, 256, 0, 0xd147 },
9403 { FALSE, 325, 0, 0x0358 },
39236c6e 9404};
0a7de745 9405#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
39236c6e
A
9406
9407static void
9408dlil_verify_sum16(void)
9409{
9410 struct mbuf *m;
9411 uint8_t *buf;
9412 int n;
9413
9414 /* Make sure test data plus extra room for alignment fits in cluster */
0a7de745 9415 _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
39236c6e 9416
5ba3f43e
A
9417 kprintf("DLIL: running SUM16 self-tests ... ");
9418
39236c6e 9419 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
0a7de745 9420 m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
d9a64523 9421
0a7de745 9422 buf = mtod(m, uint8_t *); /* base address */
39236c6e
A
9423
9424 for (n = 0; n < SUMTBL_MAX; n++) {
9425 uint16_t len = sumtbl[n].len;
9426 int i;
9427
9428 /* Verify for all possible alignments */
0a7de745 9429 for (i = 0; i < (int)sizeof(uint64_t); i++) {
5ba3f43e 9430 uint16_t sum, sumr;
39236c6e
A
9431 uint8_t *c;
9432
9433 /* Copy over test data to mbuf */
0a7de745 9434 VERIFY(len <= sizeof(sumdata));
39236c6e
A
9435 c = buf + i;
9436 bcopy(sumdata, c, len);
9437
9438 /* Zero-offset test (align by data pointer) */
9439 m->m_data = (caddr_t)c;
9440 m->m_len = len;
9441 sum = m_sum16(m, 0, len);
9442
5ba3f43e
A
9443 if (!sumtbl[n].init) {
9444 sumr = in_cksum_mbuf_ref(m, len, 0, 0);
9445 sumtbl[n].sumr = sumr;
9446 sumtbl[n].init = TRUE;
9447 } else {
9448 sumr = sumtbl[n].sumr;
9449 }
9450
39236c6e 9451 /* Something is horribly broken; stop now */
5ba3f43e
A
9452 if (sumr != sumtbl[n].sumrp) {
9453 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
9454 "for len=%d align=%d sum=0x%04x "
9455 "[expected=0x%04x]\n", __func__,
9456 len, i, sum, sumr);
9457 /* NOTREACHED */
9458 } else if (sum != sumr) {
9459 panic_plain("\n%s: broken m_sum16() for len=%d "
9460 "align=%d sum=0x%04x [expected=0x%04x]\n",
9461 __func__, len, i, sum, sumr);
39236c6e
A
9462 /* NOTREACHED */
9463 }
9464
9465 /* Alignment test by offset (fixed data pointer) */
9466 m->m_data = (caddr_t)buf;
9467 m->m_len = i + len;
9468 sum = m_sum16(m, i, len);
9469
9470 /* Something is horribly broken; stop now */
5ba3f43e
A
9471 if (sum != sumr) {
9472 panic_plain("\n%s: broken m_sum16() for len=%d "
9473 "offset=%d sum=0x%04x [expected=0x%04x]\n",
9474 __func__, len, i, sum, sumr);
39236c6e
A
9475 /* NOTREACHED */
9476 }
9477#if INET
9478 /* Simple sum16 contiguous buffer test by aligment */
9479 sum = b_sum16(c, len);
9480
9481 /* Something is horribly broken; stop now */
5ba3f43e
A
9482 if (sum != sumr) {
9483 panic_plain("\n%s: broken b_sum16() for len=%d "
9484 "align=%d sum=0x%04x [expected=0x%04x]\n",
9485 __func__, len, i, sum, sumr);
39236c6e
A
9486 /* NOTREACHED */
9487 }
9488#endif /* INET */
9489 }
9490 }
9491 m_freem(m);
9492
5ba3f43e 9493 kprintf("PASSED\n");
39236c6e 9494}
5ba3f43e 9495#endif /* DEBUG || DEVELOPMENT */
39236c6e 9496
0a7de745 9497#define CASE_STRINGIFY(x) case x: return #x
39236c6e
A
9498
9499__private_extern__ const char *
9500dlil_kev_dl_code_str(u_int32_t event_code)
9501{
9502 switch (event_code) {
0a7de745
A
9503 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
9504 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
9505 CASE_STRINGIFY(KEV_DL_SIFMTU);
9506 CASE_STRINGIFY(KEV_DL_SIFPHYS);
9507 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
9508 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
9509 CASE_STRINGIFY(KEV_DL_ADDMULTI);
9510 CASE_STRINGIFY(KEV_DL_DELMULTI);
9511 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
9512 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
9513 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
9514 CASE_STRINGIFY(KEV_DL_LINK_OFF);
9515 CASE_STRINGIFY(KEV_DL_LINK_ON);
9516 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
9517 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
9518 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
9519 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
9520 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
9521 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
9522 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
9523 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
9524 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
9525 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
9526 CASE_STRINGIFY(KEV_DL_ISSUES);
9527 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
39236c6e
A
9528 default:
9529 break;
9530 }
0a7de745 9531 return "";
39236c6e 9532}
3e170ce0 9533
5ba3f43e
A
9534static void
9535dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
9536{
9537#pragma unused(arg1)
9538 struct ifnet *ifp = arg0;
9539
9540 if (ifnet_is_attached(ifp, 1)) {
9541 nstat_ifnet_threshold_reached(ifp->if_index);
9542 ifnet_decr_iorefcnt(ifp);
9543 }
9544}
9545
9546void
9547ifnet_notify_data_threshold(struct ifnet *ifp)
9548{
9549 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
9550 uint64_t oldbytes = ifp->if_dt_bytes;
9551
9552 ASSERT(ifp->if_dt_tcall != NULL);
9553
9554 /*
9555 * If we went over the threshold, notify NetworkStatistics.
9556 * We rate-limit it based on the threshold interval value.
9557 */
9558 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
9559 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
9560 !thread_call_isactive(ifp->if_dt_tcall)) {
9561 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
9562 uint64_t now = mach_absolute_time(), deadline = now;
9563 uint64_t ival;
9564
9565 if (tival != 0) {
9566 nanoseconds_to_absolutetime(tival, &ival);
9567 clock_deadline_for_periodic_event(ival, now, &deadline);
9568 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
9569 deadline);
9570 } else {
9571 (void) thread_call_enter(ifp->if_dt_tcall);
9572 }
9573 }
9574}
9575
39037602
A
9576#if (DEVELOPMENT || DEBUG)
9577/*
9578 * The sysctl variable name contains the input parameters of
9579 * ifnet_get_keepalive_offload_frames()
9580 * ifp (interface index): name[0]
9581 * frames_array_count: name[1]
9582 * frame_data_offset: name[2]
9583 * The return length gives used_frames_count
9584 */
9585static int
9586sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
9587{
9588#pragma unused(oidp)
9589 int *name = (int *)arg1;
9590 u_int namelen = arg2;
9591 int idx;
9592 ifnet_t ifp = NULL;
9593 u_int32_t frames_array_count;
9594 size_t frame_data_offset;
9595 u_int32_t used_frames_count;
9596 struct ifnet_keepalive_offload_frame *frames_array = NULL;
9597 int error = 0;
9598 u_int32_t i;
9599
9600 /*
9601 * Only root can get look at other people TCP frames
9602 */
9603 error = proc_suser(current_proc());
0a7de745 9604 if (error != 0) {
39037602 9605 goto done;
0a7de745 9606 }
39037602
A
9607 /*
9608 * Validate the input parameters
9609 */
9610 if (req->newptr != USER_ADDR_NULL) {
9611 error = EPERM;
9612 goto done;
9613 }
9614 if (namelen != 3) {
9615 error = EINVAL;
9616 goto done;
9617 }
9618 if (req->oldptr == USER_ADDR_NULL) {
9619 error = EINVAL;
9620 goto done;
9621 }
9622 if (req->oldlen == 0) {
9623 error = EINVAL;
9624 goto done;
9625 }
9626 idx = name[0];
9627 frames_array_count = name[1];
9628 frame_data_offset = name[2];
9629
9630 /* Make sure the passed buffer is large enough */
9631 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
9632 req->oldlen) {
9633 error = ENOMEM;
9634 goto done;
9635 }
9636
9637 ifnet_head_lock_shared();
4d15aeb1 9638 if (!IF_INDEX_IN_RANGE(idx)) {
39037602
A
9639 ifnet_head_done();
9640 error = ENOENT;
9641 goto done;
9642 }
9643 ifp = ifindex2ifnet[idx];
9644 ifnet_head_done();
9645
9646 frames_array = _MALLOC(frames_array_count *
9647 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
9648 if (frames_array == NULL) {
9649 error = ENOMEM;
9650 goto done;
9651 }
9652
9653 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
9654 frames_array_count, frame_data_offset, &used_frames_count);
9655 if (error != 0) {
9656 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
9657 __func__, error);
9658 goto done;
9659 }
9660
9661 for (i = 0; i < used_frames_count; i++) {
9662 error = SYSCTL_OUT(req, frames_array + i,
9663 sizeof(struct ifnet_keepalive_offload_frame));
9664 if (error != 0) {
9665 goto done;
9666 }
9667 }
9668done:
0a7de745 9669 if (frames_array != NULL) {
39037602 9670 _FREE(frames_array, M_TEMP);
0a7de745
A
9671 }
9672 return error;
39037602
A
9673}
9674#endif /* DEVELOPMENT || DEBUG */
5ba3f43e
A
9675
9676void
9677ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
9678 struct ifnet *ifp)
9679{
9680 tcp_update_stats_per_flow(ifs, ifp);
9681}
9682
9683static void
9684dlil_mit_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
9685{
9686#pragma unused(arg1)
9687 struct ifnet *ifp = (struct ifnet *)arg0;
9688 struct dlil_threading_info *inp = ifp->if_inp;
9689
9690 ifnet_lock_shared(ifp);
9691 if (!IF_FULLY_ATTACHED(ifp) || inp == NULL) {
9692 ifnet_lock_done(ifp);
9693 return;
9694 }
9695
9696 lck_mtx_lock_spin(&inp->input_lck);
9697 inp->input_waiting |= DLIL_INPUT_WAITING;
9698 if (!(inp->input_waiting & DLIL_INPUT_RUNNING) ||
9699 !qempty(&inp->rcvq_pkts)) {
9700 inp->wtot++;
9701 wakeup_one((caddr_t)&inp->input_waiting);
9702 }
9703 lck_mtx_unlock(&inp->input_lck);
9704 ifnet_lock_done(ifp);
9705}