]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/dlil.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
1 /*
2 * Copyright (c) 1999-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35 #include <ptrauth.h>
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/domain.h>
44 #include <sys/user.h>
45 #include <sys/random.h>
46 #include <sys/socketvar.h>
47 #include <net/if_dl.h>
48 #include <net/if.h>
49 #include <net/route.h>
50 #include <net/if_var.h>
51 #include <net/dlil.h>
52 #include <net/if_arp.h>
53 #include <net/iptap.h>
54 #include <net/pktap.h>
55 #include <sys/kern_event.h>
56 #include <sys/kdebug.h>
57 #include <sys/mcache.h>
58 #include <sys/syslog.h>
59 #include <sys/protosw.h>
60 #include <sys/priv.h>
61
62 #include <kern/assert.h>
63 #include <kern/task.h>
64 #include <kern/thread.h>
65 #include <kern/sched_prim.h>
66 #include <kern/locks.h>
67 #include <kern/zalloc.h>
68
69 #include <net/kpi_protocol.h>
70 #include <net/if_types.h>
71 #include <net/if_ipsec.h>
72 #include <net/if_llreach.h>
73 #include <net/if_utun.h>
74 #include <net/kpi_interfacefilter.h>
75 #include <net/classq/classq.h>
76 #include <net/classq/classq_sfb.h>
77 #include <net/flowhash.h>
78 #include <net/ntstat.h>
79 #include <net/if_llatbl.h>
80 #include <net/net_api_stats.h>
81 #include <net/if_ports_used.h>
82 #include <net/if_vlan_var.h>
83 #include <netinet/in.h>
84 #if INET
85 #include <netinet/in_var.h>
86 #include <netinet/igmp_var.h>
87 #include <netinet/ip_var.h>
88 #include <netinet/tcp.h>
89 #include <netinet/tcp_var.h>
90 #include <netinet/udp.h>
91 #include <netinet/udp_var.h>
92 #include <netinet/if_ether.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet/in_tclass.h>
95 #include <netinet/ip.h>
96 #include <netinet/ip_icmp.h>
97 #include <netinet/icmp_var.h>
98 #endif /* INET */
99
100 #include <net/nat464_utils.h>
101 #include <netinet6/in6_var.h>
102 #include <netinet6/nd6.h>
103 #include <netinet6/mld6_var.h>
104 #include <netinet6/scope6_var.h>
105 #include <netinet/ip6.h>
106 #include <netinet/icmp6.h>
107 #include <net/pf_pbuf.h>
108 #include <libkern/OSAtomic.h>
109 #include <libkern/tree.h>
110
111 #include <dev/random/randomdev.h>
112 #include <machine/machine_routines.h>
113
114 #include <mach/thread_act.h>
115 #include <mach/sdt.h>
116
117 #if CONFIG_MACF
118 #include <sys/kauth.h>
119 #include <security/mac_framework.h>
120 #include <net/ethernet.h>
121 #include <net/firewire.h>
122 #endif
123
124 #if PF
125 #include <net/pfvar.h>
126 #endif /* PF */
127 #include <net/pktsched/pktsched.h>
128 #include <net/pktsched/pktsched_netem.h>
129
130 #if NECP
131 #include <net/necp.h>
132 #endif /* NECP */
133
134
135 #include <os/log.h>
136
137 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
138 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
139 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
140 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
141 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
142
143 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
144 #define MAX_LINKADDR 4 /* LONGWORDS */
145 #define M_NKE M_IFADDR
146
147 #if 1
148 #define DLIL_PRINTF printf
149 #else
150 #define DLIL_PRINTF kprintf
151 #endif
152
153 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
154 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
155
156 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
157 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
158
159 enum {
160 kProtoKPI_v1 = 1,
161 kProtoKPI_v2 = 2
162 };
163
164 /*
165 * List of if_proto structures in if_proto_hash[] is protected by
166 * the ifnet lock. The rest of the fields are initialized at protocol
167 * attach time and never change, thus no lock required as long as
168 * a reference to it is valid, via if_proto_ref().
169 */
170 struct if_proto {
171 SLIST_ENTRY(if_proto) next_hash;
172 u_int32_t refcount;
173 u_int32_t detached;
174 struct ifnet *ifp;
175 protocol_family_t protocol_family;
176 int proto_kpi;
177 union {
178 struct {
179 proto_media_input input;
180 proto_media_preout pre_output;
181 proto_media_event event;
182 proto_media_ioctl ioctl;
183 proto_media_detached detached;
184 proto_media_resolve_multi resolve_multi;
185 proto_media_send_arp send_arp;
186 } v1;
187 struct {
188 proto_media_input_v2 input;
189 proto_media_preout pre_output;
190 proto_media_event event;
191 proto_media_ioctl ioctl;
192 proto_media_detached detached;
193 proto_media_resolve_multi resolve_multi;
194 proto_media_send_arp send_arp;
195 } v2;
196 } kpi;
197 };
198
199 SLIST_HEAD(proto_hash_entry, if_proto);
200
201 #define DLIL_SDLDATALEN \
202 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
203
204 struct dlil_ifnet {
205 struct ifnet dl_if; /* public ifnet */
206 /*
207 * DLIL private fields, protected by dl_if_lock
208 */
209 decl_lck_mtx_data(, dl_if_lock);
210 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
211 u_int32_t dl_if_flags; /* flags (below) */
212 u_int32_t dl_if_refcnt; /* refcnt */
213 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
214 void *dl_if_uniqueid; /* unique interface id */
215 size_t dl_if_uniqueid_len; /* length of the unique id */
216 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
217 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
218 struct {
219 struct ifaddr ifa; /* lladdr ifa */
220 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
221 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
222 } dl_if_lladdr;
223 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
224 u_int8_t dl_if_permanent_ether[ETHER_ADDR_LEN]; /* permanent address */
225 u_int8_t dl_if_permanent_ether_is_set;
226 u_int8_t dl_if_unused;
227 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
228 ctrace_t dl_if_attach; /* attach PC stacktrace */
229 ctrace_t dl_if_detach; /* detach PC stacktrace */
230 };
231
232 /* Values for dl_if_flags (private to DLIL) */
233 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
234 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
235 #define DLIF_DEBUG 0x4 /* has debugging info */
236
237 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
238
239 /* For gdb */
240 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
241
242 struct dlil_ifnet_dbg {
243 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
244 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
245 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
246 /*
247 * Circular lists of ifnet_{reference,release} callers.
248 */
249 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
250 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
251 };
252
253 #define DLIL_TO_IFP(s) (&s->dl_if)
254 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
255
256 struct ifnet_filter {
257 TAILQ_ENTRY(ifnet_filter) filt_next;
258 u_int32_t filt_skip;
259 u_int32_t filt_flags;
260 ifnet_t filt_ifp;
261 const char *filt_name;
262 void *filt_cookie;
263 protocol_family_t filt_protocol;
264 iff_input_func filt_input;
265 iff_output_func filt_output;
266 iff_event_func filt_event;
267 iff_ioctl_func filt_ioctl;
268 iff_detached_func filt_detached;
269 };
270
271 struct proto_input_entry;
272
273 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
274 static lck_grp_t *dlil_lock_group;
275 lck_grp_t *ifnet_lock_group;
276 static lck_grp_t *ifnet_head_lock_group;
277 static lck_grp_t *ifnet_snd_lock_group;
278 static lck_grp_t *ifnet_rcv_lock_group;
279 lck_attr_t *ifnet_lock_attr;
280 decl_lck_rw_data(static, ifnet_head_lock);
281 decl_lck_mtx_data(static, dlil_ifnet_lock);
282 u_int32_t dlil_filter_disable_tso_count = 0;
283
284 #if DEBUG
285 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
286 #else
287 static unsigned int ifnet_debug; /* debugging (disabled) */
288 #endif /* !DEBUG */
289 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
290 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
291 static struct zone *dlif_zone; /* zone for dlil_ifnet */
292 #define DLIF_ZONE_NAME "ifnet" /* zone name */
293
294 static ZONE_DECLARE(dlif_filt_zone, "ifnet_filter",
295 sizeof(struct ifnet_filter), ZC_ZFREE_CLEARMEM);
296
297 static ZONE_DECLARE(dlif_phash_zone, "ifnet_proto_hash",
298 sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS, ZC_ZFREE_CLEARMEM);
299
300 static ZONE_DECLARE(dlif_proto_zone, "ifnet_proto",
301 sizeof(struct if_proto), ZC_ZFREE_CLEARMEM);
302
303 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
304 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
305 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
306 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
307
308 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
309 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
310 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
311 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
312
313 static u_int32_t net_rtref;
314
315 static struct dlil_main_threading_info dlil_main_input_thread_info;
316 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
317 (struct dlil_threading_info *)&dlil_main_input_thread_info;
318
319 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
320 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
321 static void dlil_if_trace(struct dlil_ifnet *, int);
322 static void if_proto_ref(struct if_proto *);
323 static void if_proto_free(struct if_proto *);
324 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
325 static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
326 u_int32_t list_count);
327 static void if_flt_monitor_busy(struct ifnet *);
328 static void if_flt_monitor_unbusy(struct ifnet *);
329 static void if_flt_monitor_enter(struct ifnet *);
330 static void if_flt_monitor_leave(struct ifnet *);
331 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
332 char **, protocol_family_t);
333 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
334 protocol_family_t);
335 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
336 const struct sockaddr_dl *);
337 static int ifnet_lookup(struct ifnet *);
338 static void if_purgeaddrs(struct ifnet *);
339
340 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
341 struct mbuf *, char *);
342 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
343 struct mbuf *);
344 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
345 mbuf_t *, const struct sockaddr *, void *, char *, char *);
346 static void ifproto_media_event(struct ifnet *, protocol_family_t,
347 const struct kev_msg *);
348 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
349 unsigned long, void *);
350 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
351 struct sockaddr_dl *, size_t);
352 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
353 const struct sockaddr_dl *, const struct sockaddr *,
354 const struct sockaddr_dl *, const struct sockaddr *);
355
356 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
357 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
358 boolean_t poll, struct thread *tp);
359 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
360 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
361 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
362 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
363 protocol_family_t *);
364 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
365 const struct ifnet_demux_desc *, u_int32_t);
366 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
367 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
368 #if !XNU_TARGET_OS_OSX
369 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
370 const struct sockaddr *, const char *, const char *,
371 u_int32_t *, u_int32_t *);
372 #else /* XNU_TARGET_OS_OSX */
373 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
374 const struct sockaddr *, const char *, const char *);
375 #endif /* XNU_TARGET_OS_OSX */
376 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
377 const struct sockaddr *, const char *, const char *,
378 u_int32_t *, u_int32_t *);
379 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
380 static void ifp_if_free(struct ifnet *);
381 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
382 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
383 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
384
385 static errno_t dlil_input_async(struct dlil_threading_info *, struct ifnet *,
386 struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *,
387 boolean_t, struct thread *);
388 static errno_t dlil_input_sync(struct dlil_threading_info *, struct ifnet *,
389 struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *,
390 boolean_t, struct thread *);
391
392 static void dlil_main_input_thread_func(void *, wait_result_t);
393 static void dlil_main_input_thread_cont(void *, wait_result_t);
394
395 static void dlil_input_thread_func(void *, wait_result_t);
396 static void dlil_input_thread_cont(void *, wait_result_t);
397
398 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
399 static void dlil_rxpoll_input_thread_cont(void *, wait_result_t);
400
401 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *,
402 thread_continue_t *);
403 static void dlil_terminate_input_thread(struct dlil_threading_info *);
404 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
405 struct dlil_threading_info *, struct ifnet *, boolean_t);
406 static boolean_t dlil_input_stats_sync(struct ifnet *,
407 struct dlil_threading_info *);
408 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
409 u_int32_t, ifnet_model_t, boolean_t);
410 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
411 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
412 static int dlil_is_clat_needed(protocol_family_t, mbuf_t );
413 static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *);
414 static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *);
415 #if DEBUG || DEVELOPMENT
416 static void dlil_verify_sum16(void);
417 #endif /* DEBUG || DEVELOPMENT */
418 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
419 protocol_family_t);
420 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
421 protocol_family_t);
422
423 static void dlil_incr_pending_thread_count(void);
424 static void dlil_decr_pending_thread_count(void);
425
426 static void ifnet_detacher_thread_func(void *, wait_result_t);
427 static void ifnet_detacher_thread_cont(void *, wait_result_t);
428 static void ifnet_detach_final(struct ifnet *);
429 static void ifnet_detaching_enqueue(struct ifnet *);
430 static struct ifnet *ifnet_detaching_dequeue(void);
431
432 static void ifnet_start_thread_func(void *, wait_result_t);
433 static void ifnet_start_thread_cont(void *, wait_result_t);
434
435 static void ifnet_poll_thread_func(void *, wait_result_t);
436 static void ifnet_poll_thread_cont(void *, wait_result_t);
437
438 static errno_t ifnet_enqueue_common(struct ifnet *, classq_pkt_t *,
439 boolean_t, boolean_t *);
440
441 static void ifp_src_route_copyout(struct ifnet *, struct route *);
442 static void ifp_src_route_copyin(struct ifnet *, struct route *);
443 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
444 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
445
446 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
447 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
448 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
449 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
450 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
451 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
452 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
453 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
454 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
455 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
456 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
457
458 struct chain_len_stats tx_chain_len_stats;
459 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
460
461 #if TEST_INPUT_THREAD_TERMINATION
462 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
463 #endif /* TEST_INPUT_THREAD_TERMINATION */
464
465 /* The following are protected by dlil_ifnet_lock */
466 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
467 static u_int32_t ifnet_detaching_cnt;
468 static boolean_t ifnet_detaching_embryonic;
469 static void *ifnet_delayed_run; /* wait channel for detaching thread */
470
471 decl_lck_mtx_data(static, ifnet_fc_lock);
472
473 static uint32_t ifnet_flowhash_seed;
474
475 struct ifnet_flowhash_key {
476 char ifk_name[IFNAMSIZ];
477 uint32_t ifk_unit;
478 uint32_t ifk_flags;
479 uint32_t ifk_eflags;
480 uint32_t ifk_capabilities;
481 uint32_t ifk_capenable;
482 uint32_t ifk_output_sched_model;
483 uint32_t ifk_rand1;
484 uint32_t ifk_rand2;
485 };
486
487 /* Flow control entry per interface */
488 struct ifnet_fc_entry {
489 RB_ENTRY(ifnet_fc_entry) ifce_entry;
490 u_int32_t ifce_flowhash;
491 struct ifnet *ifce_ifp;
492 };
493
494 static uint32_t ifnet_calc_flowhash(struct ifnet *);
495 static int ifce_cmp(const struct ifnet_fc_entry *,
496 const struct ifnet_fc_entry *);
497 static int ifnet_fc_add(struct ifnet *);
498 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
499 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
500
501 /* protected by ifnet_fc_lock */
502 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
503 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
504 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
505
506 static ZONE_DECLARE(ifnet_fc_zone, "ifnet_fc_zone",
507 sizeof(struct ifnet_fc_entry), ZC_ZFREE_CLEARMEM);
508
509 extern void bpfdetach(struct ifnet *);
510 extern void proto_input_run(void);
511
512 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
513 u_int32_t flags);
514 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
515 u_int32_t flags);
516
517 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
518
519 #if CONFIG_MACF
520 #if !XNU_TARGET_OS_OSX
521 int dlil_lladdr_ckreq = 1;
522 #else /* XNU_TARGET_OS_OSX */
523 int dlil_lladdr_ckreq = 0;
524 #endif /* XNU_TARGET_OS_OSX */
525 #endif /* CONFIG_MACF */
526
527 #if DEBUG
528 int dlil_verbose = 1;
529 #else
530 int dlil_verbose = 0;
531 #endif /* DEBUG */
532 #if IFNET_INPUT_SANITY_CHK
533 /* sanity checking of input packet lists received */
534 static u_int32_t dlil_input_sanity_check = 0;
535 #endif /* IFNET_INPUT_SANITY_CHK */
536 /* rate limit debug messages */
537 struct timespec dlil_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
538
539 SYSCTL_DECL(_net_link_generic_system);
540
541 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
542 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
543
544 #define IF_SNDQ_MINLEN 32
545 u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
546 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
547 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
548 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
549
550 #define IF_RCVQ_MINLEN 32
551 #define IF_RCVQ_MAXLEN 256
552 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
553 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
554 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
555 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
556
557 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
558 u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
559 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
560 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
561 "ilog2 of EWMA decay rate of avg inbound packets");
562
563 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
564 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
565 static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
566 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
567 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
568 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
569 "Q", "input poll mode freeze time");
570
571 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
572 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
573 static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
574 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
575 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
576 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
577 "Q", "input poll sampling time");
578
579 static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
580 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
581 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
582 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
583 "Q", "input poll interval (time)");
584
585 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
586 u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
587 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
588 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
589 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
590
591 #define IF_RXPOLL_WLOWAT 10
592 static u_int32_t if_sysctl_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
593 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
594 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_wlowat,
595 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
596 "I", "input poll wakeup low watermark");
597
598 #define IF_RXPOLL_WHIWAT 100
599 static u_int32_t if_sysctl_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
600 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
601 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_whiwat,
602 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
603 "I", "input poll wakeup high watermark");
604
605 static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
606 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
607 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
608 "max packets per poll call");
609
610 u_int32_t if_rxpoll = 1;
611 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
612 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
613 sysctl_rxpoll, "I", "enable opportunistic input polling");
614
615 #if TEST_INPUT_THREAD_TERMINATION
616 static u_int32_t if_input_thread_termination_spin = 0;
617 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
618 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
619 &if_input_thread_termination_spin, 0,
620 sysctl_input_thread_termination_spin,
621 "I", "input thread termination spin limit");
622 #endif /* TEST_INPUT_THREAD_TERMINATION */
623
624 static u_int32_t cur_dlil_input_threads = 0;
625 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
626 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
627 "Current number of DLIL input threads");
628
629 #if IFNET_INPUT_SANITY_CHK
630 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
631 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
632 "Turn on sanity checking in DLIL input");
633 #endif /* IFNET_INPUT_SANITY_CHK */
634
635 static u_int32_t if_flowadv = 1;
636 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
637 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
638 "enable flow-advisory mechanism");
639
640 static u_int32_t if_delaybased_queue = 1;
641 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
642 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
643 "enable delay based dynamic queue sizing");
644
645 static uint64_t hwcksum_in_invalidated = 0;
646 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
647 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
648 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
649
650 uint32_t hwcksum_dbg = 0;
651 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
652 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
653 "enable hardware cksum debugging");
654
655 u_int32_t ifnet_start_delayed = 0;
656 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
657 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
658 "number of times start was delayed");
659
660 u_int32_t ifnet_delay_start_disabled = 0;
661 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
662 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
663 "number of times start was delayed");
664
665 static inline void
666 ifnet_delay_start_disabled_increment(void)
667 {
668 OSIncrementAtomic(&ifnet_delay_start_disabled);
669 }
670
671 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
672 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
673 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
674 #define HWCKSUM_DBG_MASK \
675 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
676 HWCKSUM_DBG_FINALIZE_FORCED)
677
678 static uint32_t hwcksum_dbg_mode = 0;
679 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
680 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
681 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
682
683 static uint64_t hwcksum_dbg_partial_forced = 0;
684 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
685 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
686 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
687
688 static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
689 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
690 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
691 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
692
693 static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
694 SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
695 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
696 &hwcksum_dbg_partial_rxoff_forced, 0,
697 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
698 "forced partial cksum rx offset");
699
700 static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
701 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
702 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
703 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
704 "adjusted partial cksum rx offset");
705
706 static uint64_t hwcksum_dbg_verified = 0;
707 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
708 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
709 &hwcksum_dbg_verified, "packets verified for having good checksum");
710
711 static uint64_t hwcksum_dbg_bad_cksum = 0;
712 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
713 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
714 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
715
716 static uint64_t hwcksum_dbg_bad_rxoff = 0;
717 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
718 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
719 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
720
721 static uint64_t hwcksum_dbg_adjusted = 0;
722 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
723 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
724 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
725
726 static uint64_t hwcksum_dbg_finalized_hdr = 0;
727 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
728 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
729 &hwcksum_dbg_finalized_hdr, "finalized headers");
730
731 static uint64_t hwcksum_dbg_finalized_data = 0;
732 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
733 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
734 &hwcksum_dbg_finalized_data, "finalized payloads");
735
736 uint32_t hwcksum_tx = 1;
737 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
738 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
739 "enable transmit hardware checksum offload");
740
741 uint32_t hwcksum_rx = 1;
742 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
743 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
744 "enable receive hardware checksum offload");
745
746 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
747 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
748 sysctl_tx_chain_len_stats, "S", "");
749
750 uint32_t tx_chain_len_count = 0;
751 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
752 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
753
754 static uint32_t threshold_notify = 1; /* enable/disable */
755 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
756 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
757
758 static uint32_t threshold_interval = 2; /* in seconds */
759 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
760 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
761
762 #if (DEVELOPMENT || DEBUG)
763 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
764 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
765 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
766 #endif /* DEVELOPMENT || DEBUG */
767
768 struct net_api_stats net_api_stats;
769 SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD | CTLFLAG_LOCKED,
770 &net_api_stats, net_api_stats, "");
771
772 unsigned int net_rxpoll = 1;
773 unsigned int net_affinity = 1;
774 unsigned int net_async = 1; /* 0: synchronous, 1: asynchronous */
775
776 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
777
778 extern u_int32_t inject_buckets;
779
780 static lck_grp_attr_t *dlil_grp_attributes = NULL;
781 static lck_attr_t *dlil_lck_attributes = NULL;
782
783 /* DLIL data threshold thread call */
784 static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
785
786 void
787 ifnet_filter_update_tso(boolean_t filter_enable)
788 {
789 /*
790 * update filter count and route_generation ID to let TCP
791 * know it should reevalute doing TSO or not
792 */
793 OSAddAtomic(filter_enable ? 1 : -1, &dlil_filter_disable_tso_count);
794 routegenid_update();
795 }
796
797
798 #define DLIL_INPUT_CHECK(m, ifp) { \
799 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
800 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
801 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
802 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
803 /* NOTREACHED */ \
804 } \
805 }
806
807 #define DLIL_EWMA(old, new, decay) do { \
808 u_int32_t _avg; \
809 if ((_avg = (old)) > 0) \
810 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
811 else \
812 _avg = (new); \
813 (old) = _avg; \
814 } while (0)
815
816 #define MBPS (1ULL * 1000 * 1000)
817 #define GBPS (MBPS * 1000)
818
819 struct rxpoll_time_tbl {
820 u_int64_t speed; /* downlink speed */
821 u_int32_t plowat; /* packets low watermark */
822 u_int32_t phiwat; /* packets high watermark */
823 u_int32_t blowat; /* bytes low watermark */
824 u_int32_t bhiwat; /* bytes high watermark */
825 };
826
827 static struct rxpoll_time_tbl rxpoll_tbl[] = {
828 { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) },
829 { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
830 { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
831 { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
832 { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
833 { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
834 };
835
836 decl_lck_mtx_data(static, dlil_thread_sync_lock);
837 static uint32_t dlil_pending_thread_cnt = 0;
838
839 static void
840 dlil_incr_pending_thread_count(void)
841 {
842 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
843 lck_mtx_lock(&dlil_thread_sync_lock);
844 dlil_pending_thread_cnt++;
845 lck_mtx_unlock(&dlil_thread_sync_lock);
846 }
847
848 static void
849 dlil_decr_pending_thread_count(void)
850 {
851 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
852 lck_mtx_lock(&dlil_thread_sync_lock);
853 VERIFY(dlil_pending_thread_cnt > 0);
854 dlil_pending_thread_cnt--;
855 if (dlil_pending_thread_cnt == 0) {
856 wakeup(&dlil_pending_thread_cnt);
857 }
858 lck_mtx_unlock(&dlil_thread_sync_lock);
859 }
860
861 int
862 proto_hash_value(u_int32_t protocol_family)
863 {
864 /*
865 * dlil_proto_unplumb_all() depends on the mapping between
866 * the hash bucket index and the protocol family defined
867 * here; future changes must be applied there as well.
868 */
869 switch (protocol_family) {
870 case PF_INET:
871 return 0;
872 case PF_INET6:
873 return 1;
874 case PF_VLAN:
875 return 2;
876 case PF_802154:
877 return 3;
878 case PF_UNSPEC:
879 default:
880 return 4;
881 }
882 }
883
884 /*
885 * Caller must already be holding ifnet lock.
886 */
887 static struct if_proto *
888 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
889 {
890 struct if_proto *proto = NULL;
891 u_int32_t i = proto_hash_value(protocol_family);
892
893 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
894
895 if (ifp->if_proto_hash != NULL) {
896 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
897 }
898
899 while (proto != NULL && proto->protocol_family != protocol_family) {
900 proto = SLIST_NEXT(proto, next_hash);
901 }
902
903 if (proto != NULL) {
904 if_proto_ref(proto);
905 }
906
907 return proto;
908 }
909
910 static void
911 if_proto_ref(struct if_proto *proto)
912 {
913 atomic_add_32(&proto->refcount, 1);
914 }
915
916 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
917
918 static void
919 if_proto_free(struct if_proto *proto)
920 {
921 u_int32_t oldval;
922 struct ifnet *ifp = proto->ifp;
923 u_int32_t proto_family = proto->protocol_family;
924 struct kev_dl_proto_data ev_pr_data;
925
926 oldval = atomic_add_32_ov(&proto->refcount, -1);
927 if (oldval > 1) {
928 return;
929 }
930
931 if (proto->proto_kpi == kProtoKPI_v1) {
932 if (proto->kpi.v1.detached) {
933 proto->kpi.v1.detached(ifp, proto->protocol_family);
934 }
935 }
936 if (proto->proto_kpi == kProtoKPI_v2) {
937 if (proto->kpi.v2.detached) {
938 proto->kpi.v2.detached(ifp, proto->protocol_family);
939 }
940 }
941
942 /*
943 * Cleanup routes that may still be in the routing table for that
944 * interface/protocol pair.
945 */
946 if_rtproto_del(ifp, proto_family);
947
948 ifnet_lock_shared(ifp);
949
950 /* No more reference on this, protocol must have been detached */
951 VERIFY(proto->detached);
952
953 /*
954 * The reserved field carries the number of protocol still attached
955 * (subject to change)
956 */
957 ev_pr_data.proto_family = proto_family;
958 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
959
960 ifnet_lock_done(ifp);
961
962 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
963 (struct net_event_data *)&ev_pr_data,
964 sizeof(struct kev_dl_proto_data));
965
966 if (ev_pr_data.proto_remaining_count == 0) {
967 /*
968 * The protocol count has gone to zero, mark the interface down.
969 * This used to be done by configd.KernelEventMonitor, but that
970 * is inherently prone to races (rdar://problem/30810208).
971 */
972 (void) ifnet_set_flags(ifp, 0, IFF_UP);
973 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
974 dlil_post_sifflags_msg(ifp);
975 }
976
977 zfree(dlif_proto_zone, proto);
978 }
979
980 __private_extern__ void
981 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
982 {
983 #if !MACH_ASSERT
984 #pragma unused(ifp)
985 #endif
986 unsigned int type = 0;
987 int ass = 1;
988
989 switch (what) {
990 case IFNET_LCK_ASSERT_EXCLUSIVE:
991 type = LCK_RW_ASSERT_EXCLUSIVE;
992 break;
993
994 case IFNET_LCK_ASSERT_SHARED:
995 type = LCK_RW_ASSERT_SHARED;
996 break;
997
998 case IFNET_LCK_ASSERT_OWNED:
999 type = LCK_RW_ASSERT_HELD;
1000 break;
1001
1002 case IFNET_LCK_ASSERT_NOTOWNED:
1003 /* nothing to do here for RW lock; bypass assert */
1004 ass = 0;
1005 break;
1006
1007 default:
1008 panic("bad ifnet assert type: %d", what);
1009 /* NOTREACHED */
1010 }
1011 if (ass) {
1012 LCK_RW_ASSERT(&ifp->if_lock, type);
1013 }
1014 }
1015
1016 __private_extern__ void
1017 ifnet_lock_shared(struct ifnet *ifp)
1018 {
1019 lck_rw_lock_shared(&ifp->if_lock);
1020 }
1021
1022 __private_extern__ void
1023 ifnet_lock_exclusive(struct ifnet *ifp)
1024 {
1025 lck_rw_lock_exclusive(&ifp->if_lock);
1026 }
1027
1028 __private_extern__ void
1029 ifnet_lock_done(struct ifnet *ifp)
1030 {
1031 lck_rw_done(&ifp->if_lock);
1032 }
1033
1034 #if INET
1035 __private_extern__ void
1036 if_inetdata_lock_shared(struct ifnet *ifp)
1037 {
1038 lck_rw_lock_shared(&ifp->if_inetdata_lock);
1039 }
1040
1041 __private_extern__ void
1042 if_inetdata_lock_exclusive(struct ifnet *ifp)
1043 {
1044 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
1045 }
1046
1047 __private_extern__ void
1048 if_inetdata_lock_done(struct ifnet *ifp)
1049 {
1050 lck_rw_done(&ifp->if_inetdata_lock);
1051 }
1052 #endif
1053
1054 __private_extern__ void
1055 if_inet6data_lock_shared(struct ifnet *ifp)
1056 {
1057 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1058 }
1059
1060 __private_extern__ void
1061 if_inet6data_lock_exclusive(struct ifnet *ifp)
1062 {
1063 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1064 }
1065
1066 __private_extern__ void
1067 if_inet6data_lock_done(struct ifnet *ifp)
1068 {
1069 lck_rw_done(&ifp->if_inet6data_lock);
1070 }
1071
1072 __private_extern__ void
1073 ifnet_head_lock_shared(void)
1074 {
1075 lck_rw_lock_shared(&ifnet_head_lock);
1076 }
1077
1078 __private_extern__ void
1079 ifnet_head_lock_exclusive(void)
1080 {
1081 lck_rw_lock_exclusive(&ifnet_head_lock);
1082 }
1083
1084 __private_extern__ void
1085 ifnet_head_done(void)
1086 {
1087 lck_rw_done(&ifnet_head_lock);
1088 }
1089
1090 __private_extern__ void
1091 ifnet_head_assert_exclusive(void)
1092 {
1093 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1094 }
1095
1096 /*
1097 * dlil_ifp_protolist
1098 * - get the list of protocols attached to the interface, or just the number
1099 * of attached protocols
1100 * - if the number returned is greater than 'list_count', truncation occurred
1101 *
1102 * Note:
1103 * - caller must already be holding ifnet lock.
1104 */
1105 static u_int32_t
1106 dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1107 u_int32_t list_count)
1108 {
1109 u_int32_t count = 0;
1110 int i;
1111
1112 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1113
1114 if (ifp->if_proto_hash == NULL) {
1115 goto done;
1116 }
1117
1118 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1119 struct if_proto *proto;
1120 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1121 if (list != NULL && count < list_count) {
1122 list[count] = proto->protocol_family;
1123 }
1124 count++;
1125 }
1126 }
1127 done:
1128 return count;
1129 }
1130
1131 __private_extern__ u_int32_t
1132 if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1133 {
1134 ifnet_lock_shared(ifp);
1135 count = dlil_ifp_protolist(ifp, protolist, count);
1136 ifnet_lock_done(ifp);
1137 return count;
1138 }
1139
1140 __private_extern__ void
1141 if_free_protolist(u_int32_t *list)
1142 {
1143 _FREE(list, M_TEMP);
1144 }
1145
1146 __private_extern__ int
1147 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1148 u_int32_t event_code, struct net_event_data *event_data,
1149 u_int32_t event_data_len)
1150 {
1151 struct net_event_data ev_data;
1152 struct kev_msg ev_msg;
1153
1154 bzero(&ev_msg, sizeof(ev_msg));
1155 bzero(&ev_data, sizeof(ev_data));
1156 /*
1157 * a net event always starts with a net_event_data structure
1158 * but the caller can generate a simple net event or
1159 * provide a longer event structure to post
1160 */
1161 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1162 ev_msg.kev_class = KEV_NETWORK_CLASS;
1163 ev_msg.kev_subclass = event_subclass;
1164 ev_msg.event_code = event_code;
1165
1166 if (event_data == NULL) {
1167 event_data = &ev_data;
1168 event_data_len = sizeof(struct net_event_data);
1169 }
1170
1171 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1172 event_data->if_family = ifp->if_family;
1173 event_data->if_unit = (u_int32_t)ifp->if_unit;
1174
1175 ev_msg.dv[0].data_length = event_data_len;
1176 ev_msg.dv[0].data_ptr = event_data;
1177 ev_msg.dv[1].data_length = 0;
1178
1179 bool update_generation = true;
1180 if (event_subclass == KEV_DL_SUBCLASS) {
1181 /* Don't update interface generation for frequent link quality and state changes */
1182 switch (event_code) {
1183 case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1184 case KEV_DL_RRC_STATE_CHANGED:
1185 case KEV_DL_NODE_PRESENCE:
1186 case KEV_DL_NODE_ABSENCE:
1187 case KEV_DL_MASTER_ELECTED:
1188 update_generation = false;
1189 break;
1190 default:
1191 break;
1192 }
1193 }
1194
1195 return dlil_event_internal(ifp, &ev_msg, update_generation);
1196 }
1197
1198 __private_extern__ int
1199 dlil_alloc_local_stats(struct ifnet *ifp)
1200 {
1201 int ret = EINVAL;
1202 void *buf, *base, **pbuf;
1203
1204 if (ifp == NULL) {
1205 goto end;
1206 }
1207
1208 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1209 /* allocate tcpstat_local structure */
1210 buf = zalloc_flags(dlif_tcpstat_zone, Z_WAITOK | Z_ZERO);
1211 if (buf == NULL) {
1212 ret = ENOMEM;
1213 goto end;
1214 }
1215
1216 /* Get the 64-bit aligned base address for this object */
1217 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1218 sizeof(u_int64_t));
1219 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1220 ((intptr_t)buf + dlif_tcpstat_bufsize));
1221
1222 /*
1223 * Wind back a pointer size from the aligned base and
1224 * save the original address so we can free it later.
1225 */
1226 pbuf = (void **)((intptr_t)base - sizeof(void *));
1227 *pbuf = buf;
1228 ifp->if_tcp_stat = base;
1229
1230 /* allocate udpstat_local structure */
1231 buf = zalloc_flags(dlif_udpstat_zone, Z_WAITOK | Z_ZERO);
1232 if (buf == NULL) {
1233 ret = ENOMEM;
1234 goto end;
1235 }
1236
1237 /* Get the 64-bit aligned base address for this object */
1238 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1239 sizeof(u_int64_t));
1240 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1241 ((intptr_t)buf + dlif_udpstat_bufsize));
1242
1243 /*
1244 * Wind back a pointer size from the aligned base and
1245 * save the original address so we can free it later.
1246 */
1247 pbuf = (void **)((intptr_t)base - sizeof(void *));
1248 *pbuf = buf;
1249 ifp->if_udp_stat = base;
1250
1251 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
1252 IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
1253
1254 ret = 0;
1255 }
1256
1257 if (ifp->if_ipv4_stat == NULL) {
1258 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1259 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
1260 if (ifp->if_ipv4_stat == NULL) {
1261 ret = ENOMEM;
1262 goto end;
1263 }
1264 }
1265
1266 if (ifp->if_ipv6_stat == NULL) {
1267 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1268 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
1269 if (ifp->if_ipv6_stat == NULL) {
1270 ret = ENOMEM;
1271 goto end;
1272 }
1273 }
1274 end:
1275 if (ifp != NULL && ret != 0) {
1276 if (ifp->if_tcp_stat != NULL) {
1277 pbuf = (void **)
1278 ((intptr_t)ifp->if_tcp_stat - sizeof(void *));
1279 zfree(dlif_tcpstat_zone, *pbuf);
1280 ifp->if_tcp_stat = NULL;
1281 }
1282 if (ifp->if_udp_stat != NULL) {
1283 pbuf = (void **)
1284 ((intptr_t)ifp->if_udp_stat - sizeof(void *));
1285 zfree(dlif_udpstat_zone, *pbuf);
1286 ifp->if_udp_stat = NULL;
1287 }
1288 if (ifp->if_ipv4_stat != NULL) {
1289 FREE(ifp->if_ipv4_stat, M_TEMP);
1290 ifp->if_ipv4_stat = NULL;
1291 }
1292 if (ifp->if_ipv6_stat != NULL) {
1293 FREE(ifp->if_ipv6_stat, M_TEMP);
1294 ifp->if_ipv6_stat = NULL;
1295 }
1296 }
1297
1298 return ret;
1299 }
1300
1301 static void
1302 dlil_reset_rxpoll_params(ifnet_t ifp)
1303 {
1304 ASSERT(ifp != NULL);
1305 ifnet_set_poll_cycle(ifp, NULL);
1306 ifp->if_poll_update = 0;
1307 ifp->if_poll_flags = 0;
1308 ifp->if_poll_req = 0;
1309 ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1310 bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1311 bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1312 bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1313 net_timerclear(&ifp->if_poll_mode_holdtime);
1314 net_timerclear(&ifp->if_poll_mode_lasttime);
1315 net_timerclear(&ifp->if_poll_sample_holdtime);
1316 net_timerclear(&ifp->if_poll_sample_lasttime);
1317 net_timerclear(&ifp->if_poll_dbg_lasttime);
1318 }
1319
1320 static int
1321 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp,
1322 thread_continue_t *thfunc)
1323 {
1324 boolean_t dlil_rxpoll_input;
1325 thread_continue_t func = NULL;
1326 u_int32_t limit;
1327 int error = 0;
1328
1329 dlil_rxpoll_input = (ifp != NULL && net_rxpoll &&
1330 (ifp->if_eflags & IFEF_RXPOLL) && (ifp->if_xflags & IFXF_LEGACY));
1331
1332 /* default strategy utilizes the DLIL worker thread */
1333 inp->dlth_strategy = dlil_input_async;
1334
1335 /* NULL ifp indicates the main input thread, called at dlil_init time */
1336 if (ifp == NULL) {
1337 /*
1338 * Main input thread only.
1339 */
1340 func = dlil_main_input_thread_func;
1341 VERIFY(inp == dlil_main_input_thread);
1342 (void) strlcat(inp->dlth_name,
1343 "main_input", DLIL_THREADNAME_LEN);
1344 } else if (dlil_rxpoll_input) {
1345 /*
1346 * Legacy (non-netif) hybrid polling.
1347 */
1348 func = dlil_rxpoll_input_thread_func;
1349 VERIFY(inp != dlil_main_input_thread);
1350 (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN,
1351 "%s_input_poll", if_name(ifp));
1352 } else if (net_async || (ifp->if_xflags & IFXF_LEGACY)) {
1353 /*
1354 * Asynchronous strategy.
1355 */
1356 func = dlil_input_thread_func;
1357 VERIFY(inp != dlil_main_input_thread);
1358 (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN,
1359 "%s_input", if_name(ifp));
1360 } else {
1361 /*
1362 * Synchronous strategy if there's a netif below and
1363 * the device isn't capable of hybrid polling.
1364 */
1365 ASSERT(func == NULL);
1366 ASSERT(!(ifp->if_xflags & IFXF_LEGACY));
1367 VERIFY(inp != dlil_main_input_thread);
1368 ASSERT(!inp->dlth_affinity);
1369 inp->dlth_strategy = dlil_input_sync;
1370 }
1371 VERIFY(inp->dlth_thread == THREAD_NULL);
1372
1373 /* let caller know */
1374 if (thfunc != NULL) {
1375 *thfunc = func;
1376 }
1377
1378 inp->dlth_lock_grp = lck_grp_alloc_init(inp->dlth_name,
1379 dlil_grp_attributes);
1380 lck_mtx_init(&inp->dlth_lock, inp->dlth_lock_grp, dlil_lck_attributes);
1381
1382 inp->dlth_ifp = ifp; /* NULL for main input thread */
1383 /*
1384 * For interfaces that support opportunistic polling, set the
1385 * low and high watermarks for outstanding inbound packets/bytes.
1386 * Also define freeze times for transitioning between modes
1387 * and updating the average.
1388 */
1389 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1390 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1391 if (ifp->if_xflags & IFXF_LEGACY) {
1392 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1393 }
1394 } else {
1395 limit = (u_int32_t)-1;
1396 }
1397
1398 _qinit(&inp->dlth_pkts, Q_DROPTAIL, limit, QP_MBUF);
1399 if (inp == dlil_main_input_thread) {
1400 struct dlil_main_threading_info *inpm =
1401 (struct dlil_main_threading_info *)inp;
1402 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1403 }
1404
1405 if (func == NULL) {
1406 ASSERT(!(ifp->if_xflags & IFXF_LEGACY));
1407 ASSERT(error == 0);
1408 error = ENODEV;
1409 goto done;
1410 }
1411
1412 error = kernel_thread_start(func, inp, &inp->dlth_thread);
1413 if (error == KERN_SUCCESS) {
1414 thread_precedence_policy_data_t info;
1415 __unused kern_return_t kret;
1416
1417 bzero(&info, sizeof(info));
1418 info.importance = 0;
1419 kret = thread_policy_set(inp->dlth_thread,
1420 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
1421 THREAD_PRECEDENCE_POLICY_COUNT);
1422 ASSERT(kret == KERN_SUCCESS);
1423 /*
1424 * We create an affinity set so that the matching workloop
1425 * thread or the starter thread (for loopback) can be
1426 * scheduled on the same processor set as the input thread.
1427 */
1428 if (net_affinity) {
1429 struct thread *tp = inp->dlth_thread;
1430 u_int32_t tag;
1431 /*
1432 * Randomize to reduce the probability
1433 * of affinity tag namespace collision.
1434 */
1435 read_frandom(&tag, sizeof(tag));
1436 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1437 thread_reference(tp);
1438 inp->dlth_affinity_tag = tag;
1439 inp->dlth_affinity = TRUE;
1440 }
1441 }
1442 } else if (inp == dlil_main_input_thread) {
1443 panic_plain("%s: couldn't create main input thread", __func__);
1444 /* NOTREACHED */
1445 } else {
1446 panic_plain("%s: couldn't create %s input thread", __func__,
1447 if_name(ifp));
1448 /* NOTREACHED */
1449 }
1450 OSAddAtomic(1, &cur_dlil_input_threads);
1451
1452 done:
1453 return error;
1454 }
1455
1456 #if TEST_INPUT_THREAD_TERMINATION
1457 static int
1458 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1459 {
1460 #pragma unused(arg1, arg2)
1461 uint32_t i;
1462 int err;
1463
1464 i = if_input_thread_termination_spin;
1465
1466 err = sysctl_handle_int(oidp, &i, 0, req);
1467 if (err != 0 || req->newptr == USER_ADDR_NULL) {
1468 return err;
1469 }
1470
1471 if (net_rxpoll == 0) {
1472 return ENXIO;
1473 }
1474
1475 if_input_thread_termination_spin = i;
1476 return err;
1477 }
1478 #endif /* TEST_INPUT_THREAD_TERMINATION */
1479
1480 static void
1481 dlil_clean_threading_info(struct dlil_threading_info *inp)
1482 {
1483 lck_mtx_destroy(&inp->dlth_lock, inp->dlth_lock_grp);
1484 lck_grp_free(inp->dlth_lock_grp);
1485 inp->dlth_lock_grp = NULL;
1486
1487 inp->dlth_flags = 0;
1488 inp->dlth_wtot = 0;
1489 bzero(inp->dlth_name, sizeof(inp->dlth_name));
1490 inp->dlth_ifp = NULL;
1491 VERIFY(qhead(&inp->dlth_pkts) == NULL && qempty(&inp->dlth_pkts));
1492 qlimit(&inp->dlth_pkts) = 0;
1493 bzero(&inp->dlth_stats, sizeof(inp->dlth_stats));
1494
1495 VERIFY(!inp->dlth_affinity);
1496 inp->dlth_thread = THREAD_NULL;
1497 inp->dlth_strategy = NULL;
1498 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
1499 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
1500 VERIFY(inp->dlth_affinity_tag == 0);
1501 #if IFNET_INPUT_SANITY_CHK
1502 inp->dlth_pkts_cnt = 0;
1503 #endif /* IFNET_INPUT_SANITY_CHK */
1504 }
1505
1506 static void
1507 dlil_terminate_input_thread(struct dlil_threading_info *inp)
1508 {
1509 struct ifnet *ifp = inp->dlth_ifp;
1510 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
1511
1512 VERIFY(current_thread() == inp->dlth_thread);
1513 VERIFY(inp != dlil_main_input_thread);
1514
1515 OSAddAtomic(-1, &cur_dlil_input_threads);
1516
1517 #if TEST_INPUT_THREAD_TERMINATION
1518 { /* do something useless that won't get optimized away */
1519 uint32_t v = 1;
1520 for (uint32_t i = 0;
1521 i < if_input_thread_termination_spin;
1522 i++) {
1523 v = (i + 1) * v;
1524 }
1525 DLIL_PRINTF("the value is %d\n", v);
1526 }
1527 #endif /* TEST_INPUT_THREAD_TERMINATION */
1528
1529 lck_mtx_lock_spin(&inp->dlth_lock);
1530 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
1531 VERIFY((inp->dlth_flags & DLIL_INPUT_TERMINATE) != 0);
1532 inp->dlth_flags |= DLIL_INPUT_TERMINATE_COMPLETE;
1533 wakeup_one((caddr_t)&inp->dlth_flags);
1534 lck_mtx_unlock(&inp->dlth_lock);
1535
1536 /* free up pending packets */
1537 if (pkt.cp_mbuf != NULL) {
1538 mbuf_freem_list(pkt.cp_mbuf);
1539 }
1540
1541 /* for the extra refcnt from kernel_thread_start() */
1542 thread_deallocate(current_thread());
1543
1544 if (dlil_verbose) {
1545 DLIL_PRINTF("%s: input thread terminated\n",
1546 if_name(ifp));
1547 }
1548
1549 /* this is the end */
1550 thread_terminate(current_thread());
1551 /* NOTREACHED */
1552 }
1553
1554 static kern_return_t
1555 dlil_affinity_set(struct thread *tp, u_int32_t tag)
1556 {
1557 thread_affinity_policy_data_t policy;
1558
1559 bzero(&policy, sizeof(policy));
1560 policy.affinity_tag = tag;
1561 return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1562 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
1563 }
1564
1565 void
1566 dlil_init(void)
1567 {
1568 thread_t thread = THREAD_NULL;
1569
1570 /*
1571 * The following fields must be 64-bit aligned for atomic operations.
1572 */
1573 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1574 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1575 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1576 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1577 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1578 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1579 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1580 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1581 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1582 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1583 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1584 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1585 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1586 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1587 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1588
1589 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1590 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1591 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1592 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1593 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1594 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1595 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1596 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1597 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1598 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1599 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1600 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1601 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1602 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1603 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1604
1605 /*
1606 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1607 */
1608 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1609 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1610 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1611 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1612 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1613 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1614 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1615 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1616 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1617 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1618 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1619 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1620 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1621 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1622
1623 /*
1624 * ... as well as the mbuf checksum flags counterparts.
1625 */
1626 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1627 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1628 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1629 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1630 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1631 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1632 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1633 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1634 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1635 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1636 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1637
1638 /*
1639 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1640 */
1641 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1642 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1643
1644 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1645 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1646 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1647 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1648
1649 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1650 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1651 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1652
1653 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1654 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1655 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1656 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1657 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1658 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1659 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1660 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1661 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1662 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1663 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1664 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1665 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1666 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1667 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1668 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1669 _CASSERT(IFRTYPE_FAMILY_6LOWPAN == IFNET_FAMILY_6LOWPAN);
1670 _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1671 _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
1672
1673 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1674 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1675 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1676 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1677 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1678 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1679 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1680 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1681 _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT == IFNET_SUBFAMILY_DEFAULT);
1682
1683 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1684 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1685
1686 PE_parse_boot_argn("net_affinity", &net_affinity,
1687 sizeof(net_affinity));
1688
1689 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
1690
1691 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
1692
1693 PE_parse_boot_argn("net_async", &net_async, sizeof(net_async));
1694
1695 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
1696
1697 VERIFY(dlil_pending_thread_cnt == 0);
1698 dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) :
1699 sizeof(struct dlil_ifnet_dbg);
1700 /* Enforce 64-bit alignment for dlil_ifnet structure */
1701 dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t);
1702 dlif_bufsize = (uint32_t)P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t));
1703 dlif_zone = zone_create(DLIF_ZONE_NAME, dlif_bufsize, ZC_ZFREE_CLEARMEM);
1704
1705 dlif_tcpstat_size = sizeof(struct tcpstat_local);
1706 /* Enforce 64-bit alignment for tcpstat_local structure */
1707 dlif_tcpstat_bufsize =
1708 dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t);
1709 dlif_tcpstat_bufsize = (uint32_t)
1710 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t));
1711 dlif_tcpstat_zone = zone_create(DLIF_TCPSTAT_ZONE_NAME,
1712 dlif_tcpstat_bufsize, ZC_ZFREE_CLEARMEM);
1713
1714 dlif_udpstat_size = sizeof(struct udpstat_local);
1715 /* Enforce 64-bit alignment for udpstat_local structure */
1716 dlif_udpstat_bufsize =
1717 dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t);
1718 dlif_udpstat_bufsize = (uint32_t)
1719 P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t));
1720 dlif_udpstat_zone = zone_create(DLIF_UDPSTAT_ZONE_NAME,
1721 dlif_udpstat_bufsize, ZC_ZFREE_CLEARMEM);
1722
1723 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1724
1725 TAILQ_INIT(&dlil_ifnet_head);
1726 TAILQ_INIT(&ifnet_head);
1727 TAILQ_INIT(&ifnet_detaching_head);
1728 TAILQ_INIT(&ifnet_ordered_head);
1729
1730 /* Setup the lock groups we will use */
1731 dlil_grp_attributes = lck_grp_attr_alloc_init();
1732
1733 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1734 dlil_grp_attributes);
1735 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1736 dlil_grp_attributes);
1737 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1738 dlil_grp_attributes);
1739 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1740 dlil_grp_attributes);
1741 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1742 dlil_grp_attributes);
1743
1744 /* Setup the lock attributes we will use */
1745 dlil_lck_attributes = lck_attr_alloc_init();
1746
1747 ifnet_lock_attr = lck_attr_alloc_init();
1748
1749 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1750 dlil_lck_attributes);
1751 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1752 lck_mtx_init(&dlil_thread_sync_lock, dlil_lock_group, dlil_lck_attributes);
1753
1754 /* Setup interface flow control related items */
1755 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1756
1757 /* Initialize interface address subsystem */
1758 ifa_init();
1759
1760 #if PF
1761 /* Initialize the packet filter */
1762 pfinit();
1763 #endif /* PF */
1764
1765 /* Initialize queue algorithms */
1766 classq_init();
1767
1768 /* Initialize packet schedulers */
1769 pktsched_init();
1770
1771 /* Initialize flow advisory subsystem */
1772 flowadv_init();
1773
1774 /* Initialize the pktap virtual interface */
1775 pktap_init();
1776
1777 /* Initialize the service class to dscp map */
1778 net_qos_map_init();
1779
1780 /* Initialize the interface port list */
1781 if_ports_used_init();
1782
1783 /* Initialize the interface low power mode event handler */
1784 if_low_power_evhdlr_init();
1785
1786 #if DEBUG || DEVELOPMENT
1787 /* Run self-tests */
1788 dlil_verify_sum16();
1789 #endif /* DEBUG || DEVELOPMENT */
1790
1791 /* Initialize link layer table */
1792 lltable_glbl_init();
1793
1794 /*
1795 * Create and start up the main DLIL input thread and the interface
1796 * detacher threads once everything is initialized.
1797 */
1798 dlil_incr_pending_thread_count();
1799 (void) dlil_create_input_thread(NULL, dlil_main_input_thread, NULL);
1800
1801 /*
1802 * Create ifnet detacher thread.
1803 * When an interface gets detached, part of the detach processing
1804 * is delayed. The interface is added to delayed detach list
1805 * and this thread is woken up to call ifnet_detach_final
1806 * on these interfaces.
1807 */
1808 dlil_incr_pending_thread_count();
1809 if (kernel_thread_start(ifnet_detacher_thread_func,
1810 NULL, &thread) != KERN_SUCCESS) {
1811 panic_plain("%s: couldn't create detacher thread", __func__);
1812 /* NOTREACHED */
1813 }
1814 thread_deallocate(thread);
1815
1816 /*
1817 * Wait for the created kernel threads for dlil to get
1818 * scheduled and run at least once before we proceed
1819 */
1820 lck_mtx_lock(&dlil_thread_sync_lock);
1821 while (dlil_pending_thread_cnt != 0) {
1822 DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
1823 "threads to get scheduled at least once.\n", __func__);
1824 (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock,
1825 (PZERO - 1), __func__, NULL);
1826 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1827 }
1828 lck_mtx_unlock(&dlil_thread_sync_lock);
1829 DLIL_PRINTF("%s: All the created dlil kernel threads have been "
1830 "scheduled at least once. Proceeding.\n", __func__);
1831 }
1832
1833 static void
1834 if_flt_monitor_busy(struct ifnet *ifp)
1835 {
1836 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1837
1838 ++ifp->if_flt_busy;
1839 VERIFY(ifp->if_flt_busy != 0);
1840 }
1841
1842 static void
1843 if_flt_monitor_unbusy(struct ifnet *ifp)
1844 {
1845 if_flt_monitor_leave(ifp);
1846 }
1847
1848 static void
1849 if_flt_monitor_enter(struct ifnet *ifp)
1850 {
1851 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1852
1853 while (ifp->if_flt_busy) {
1854 ++ifp->if_flt_waiters;
1855 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1856 (PZERO - 1), "if_flt_monitor", NULL);
1857 }
1858 if_flt_monitor_busy(ifp);
1859 }
1860
1861 static void
1862 if_flt_monitor_leave(struct ifnet *ifp)
1863 {
1864 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1865
1866 VERIFY(ifp->if_flt_busy != 0);
1867 --ifp->if_flt_busy;
1868
1869 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1870 ifp->if_flt_waiters = 0;
1871 wakeup(&ifp->if_flt_head);
1872 }
1873 }
1874
1875 __private_extern__ int
1876 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1877 interface_filter_t *filter_ref, u_int32_t flags)
1878 {
1879 int retval = 0;
1880 struct ifnet_filter *filter = NULL;
1881
1882 ifnet_head_lock_shared();
1883 /* Check that the interface is in the global list */
1884 if (!ifnet_lookup(ifp)) {
1885 retval = ENXIO;
1886 goto done;
1887 }
1888
1889 filter = zalloc_flags(dlif_filt_zone, Z_WAITOK | Z_ZERO);
1890 if (filter == NULL) {
1891 retval = ENOMEM;
1892 goto done;
1893 }
1894
1895 /* refcnt held above during lookup */
1896 filter->filt_flags = flags;
1897 filter->filt_ifp = ifp;
1898 filter->filt_cookie = if_filter->iff_cookie;
1899 filter->filt_name = if_filter->iff_name;
1900 filter->filt_protocol = if_filter->iff_protocol;
1901 /*
1902 * Do not install filter callbacks for internal coproc interface
1903 */
1904 if (!IFNET_IS_INTCOPROC(ifp)) {
1905 filter->filt_input = if_filter->iff_input;
1906 filter->filt_output = if_filter->iff_output;
1907 filter->filt_event = if_filter->iff_event;
1908 filter->filt_ioctl = if_filter->iff_ioctl;
1909 }
1910 filter->filt_detached = if_filter->iff_detached;
1911
1912 lck_mtx_lock(&ifp->if_flt_lock);
1913 if_flt_monitor_enter(ifp);
1914
1915 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1916 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1917
1918 if_flt_monitor_leave(ifp);
1919 lck_mtx_unlock(&ifp->if_flt_lock);
1920
1921 *filter_ref = filter;
1922
1923 /*
1924 * Bump filter count and route_generation ID to let TCP
1925 * know it shouldn't do TSO on this connection
1926 */
1927 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1928 ifnet_filter_update_tso(TRUE);
1929 }
1930 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1931 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1932 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1933 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1934 }
1935 if (dlil_verbose) {
1936 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
1937 if_filter->iff_name);
1938 }
1939 done:
1940 ifnet_head_done();
1941 if (retval != 0 && ifp != NULL) {
1942 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1943 if_name(ifp), if_filter->iff_name, retval);
1944 }
1945 if (retval != 0 && filter != NULL) {
1946 zfree(dlif_filt_zone, filter);
1947 }
1948
1949 return retval;
1950 }
1951
1952 static int
1953 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1954 {
1955 int retval = 0;
1956
1957 if (detached == 0) {
1958 ifnet_t ifp = NULL;
1959
1960 ifnet_head_lock_shared();
1961 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1962 interface_filter_t entry = NULL;
1963
1964 lck_mtx_lock(&ifp->if_flt_lock);
1965 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1966 if (entry != filter || entry->filt_skip) {
1967 continue;
1968 }
1969 /*
1970 * We've found a match; since it's possible
1971 * that the thread gets blocked in the monitor,
1972 * we do the lock dance. Interface should
1973 * not be detached since we still have a use
1974 * count held during filter attach.
1975 */
1976 entry->filt_skip = 1; /* skip input/output */
1977 lck_mtx_unlock(&ifp->if_flt_lock);
1978 ifnet_head_done();
1979
1980 lck_mtx_lock(&ifp->if_flt_lock);
1981 if_flt_monitor_enter(ifp);
1982 LCK_MTX_ASSERT(&ifp->if_flt_lock,
1983 LCK_MTX_ASSERT_OWNED);
1984
1985 /* Remove the filter from the list */
1986 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1987 filt_next);
1988
1989 if_flt_monitor_leave(ifp);
1990 lck_mtx_unlock(&ifp->if_flt_lock);
1991 if (dlil_verbose) {
1992 DLIL_PRINTF("%s: %s filter detached\n",
1993 if_name(ifp), filter->filt_name);
1994 }
1995 goto destroy;
1996 }
1997 lck_mtx_unlock(&ifp->if_flt_lock);
1998 }
1999 ifnet_head_done();
2000
2001 /* filter parameter is not a valid filter ref */
2002 retval = EINVAL;
2003 goto done;
2004 }
2005
2006 if (dlil_verbose) {
2007 DLIL_PRINTF("%s filter detached\n", filter->filt_name);
2008 }
2009
2010 destroy:
2011
2012 /* Call the detached function if there is one */
2013 if (filter->filt_detached) {
2014 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
2015 }
2016
2017 /*
2018 * Decrease filter count and route_generation ID to let TCP
2019 * know it should reevalute doing TSO or not
2020 */
2021 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
2022 ifnet_filter_update_tso(FALSE);
2023 }
2024
2025 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
2026
2027 /* Free the filter */
2028 zfree(dlif_filt_zone, filter);
2029 filter = NULL;
2030 done:
2031 if (retval != 0 && filter != NULL) {
2032 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
2033 filter->filt_name, retval);
2034 }
2035
2036 return retval;
2037 }
2038
2039 __private_extern__ void
2040 dlil_detach_filter(interface_filter_t filter)
2041 {
2042 if (filter == NULL) {
2043 return;
2044 }
2045 dlil_detach_filter_internal(filter, 0);
2046 }
2047
2048 __private_extern__ boolean_t
2049 dlil_has_ip_filter(void)
2050 {
2051 boolean_t has_filter = (net_api_stats.nas_ipf_add_count > 0);
2052 DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter);
2053 return has_filter;
2054 }
2055
2056 __private_extern__ boolean_t
2057 dlil_has_if_filter(struct ifnet *ifp)
2058 {
2059 boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head);
2060 DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter);
2061 return has_filter;
2062 }
2063
2064 static inline void
2065 dlil_input_wakeup(struct dlil_threading_info *inp)
2066 {
2067 LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2068
2069 inp->dlth_flags |= DLIL_INPUT_WAITING;
2070 if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
2071 inp->dlth_wtot++;
2072 wakeup_one((caddr_t)&inp->dlth_flags);
2073 }
2074 }
2075
2076 __attribute__((noreturn))
2077 static void
2078 dlil_main_input_thread_func(void *v, wait_result_t w)
2079 {
2080 #pragma unused(w)
2081 struct dlil_threading_info *inp = v;
2082
2083 VERIFY(inp == dlil_main_input_thread);
2084 VERIFY(inp->dlth_ifp == NULL);
2085 VERIFY(current_thread() == inp->dlth_thread);
2086
2087 lck_mtx_lock(&inp->dlth_lock);
2088 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2089 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2090 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2091 /* wake up once to get out of embryonic state */
2092 dlil_input_wakeup(inp);
2093 lck_mtx_unlock(&inp->dlth_lock);
2094 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2095 /* NOTREACHED */
2096 __builtin_unreachable();
2097 }
2098
2099 /*
2100 * Main input thread:
2101 *
2102 * a) handles all inbound packets for lo0
2103 * b) handles all inbound packets for interfaces with no dedicated
2104 * input thread (e.g. anything but Ethernet/PDP or those that support
2105 * opportunistic polling.)
2106 * c) protocol registrations
2107 * d) packet injections
2108 */
2109 __attribute__((noreturn))
2110 static void
2111 dlil_main_input_thread_cont(void *v, wait_result_t wres)
2112 {
2113 struct dlil_main_threading_info *inpm = v;
2114 struct dlil_threading_info *inp = v;
2115
2116 /* main input thread is uninterruptible */
2117 VERIFY(wres != THREAD_INTERRUPTED);
2118 lck_mtx_lock_spin(&inp->dlth_lock);
2119 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_TERMINATE |
2120 DLIL_INPUT_RUNNING)));
2121 inp->dlth_flags |= DLIL_INPUT_RUNNING;
2122
2123 while (1) {
2124 struct mbuf *m = NULL, *m_loop = NULL;
2125 u_int32_t m_cnt, m_cnt_loop;
2126 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2127 boolean_t proto_req;
2128 boolean_t embryonic;
2129
2130 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2131
2132 if (__improbable(embryonic =
2133 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2134 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2135 }
2136
2137 proto_req = (inp->dlth_flags &
2138 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
2139
2140 /* Packets for non-dedicated interfaces other than lo0 */
2141 m_cnt = qlen(&inp->dlth_pkts);
2142 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
2143 m = pkt.cp_mbuf;
2144
2145 /* Packets exclusive to lo0 */
2146 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
2147 _getq_all(&inpm->lo_rcvq_pkts, &pkt, NULL, NULL, NULL);
2148 m_loop = pkt.cp_mbuf;
2149
2150 inp->dlth_wtot = 0;
2151
2152 lck_mtx_unlock(&inp->dlth_lock);
2153
2154 if (__improbable(embryonic)) {
2155 dlil_decr_pending_thread_count();
2156 }
2157
2158 /*
2159 * NOTE warning %%% attention !!!!
2160 * We should think about putting some thread starvation
2161 * safeguards if we deal with long chains of packets.
2162 */
2163 if (__probable(m_loop != NULL)) {
2164 dlil_input_packet_list_extended(lo_ifp, m_loop,
2165 m_cnt_loop, IFNET_MODEL_INPUT_POLL_OFF);
2166 }
2167
2168 if (__probable(m != NULL)) {
2169 dlil_input_packet_list_extended(NULL, m,
2170 m_cnt, IFNET_MODEL_INPUT_POLL_OFF);
2171 }
2172
2173 if (__improbable(proto_req)) {
2174 proto_input_run();
2175 }
2176
2177 lck_mtx_lock_spin(&inp->dlth_lock);
2178 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
2179 /* main input thread cannot be terminated */
2180 VERIFY(!(inp->dlth_flags & DLIL_INPUT_TERMINATE));
2181 if (!(inp->dlth_flags & ~DLIL_INPUT_RUNNING)) {
2182 break;
2183 }
2184 }
2185
2186 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
2187 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2188 lck_mtx_unlock(&inp->dlth_lock);
2189 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2190
2191 VERIFY(0); /* we should never get here */
2192 /* NOTREACHED */
2193 __builtin_unreachable();
2194 }
2195
2196 /*
2197 * Input thread for interfaces with legacy input model.
2198 */
2199 __attribute__((noreturn))
2200 static void
2201 dlil_input_thread_func(void *v, wait_result_t w)
2202 {
2203 #pragma unused(w)
2204 char thread_name[MAXTHREADNAMESIZE];
2205 struct dlil_threading_info *inp = v;
2206 struct ifnet *ifp = inp->dlth_ifp;
2207
2208 VERIFY(inp != dlil_main_input_thread);
2209 VERIFY(ifp != NULL);
2210 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll ||
2211 !(ifp->if_xflags & IFXF_LEGACY));
2212 VERIFY(ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_OFF ||
2213 !(ifp->if_xflags & IFXF_LEGACY));
2214 VERIFY(current_thread() == inp->dlth_thread);
2215
2216 /* construct the name for this thread, and then apply it */
2217 bzero(thread_name, sizeof(thread_name));
2218 (void) snprintf(thread_name, sizeof(thread_name),
2219 "dlil_input_%s", ifp->if_xname);
2220 thread_set_thread_name(inp->dlth_thread, thread_name);
2221
2222 lck_mtx_lock(&inp->dlth_lock);
2223 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2224 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2225 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2226 /* wake up once to get out of embryonic state */
2227 dlil_input_wakeup(inp);
2228 lck_mtx_unlock(&inp->dlth_lock);
2229 (void) thread_block_parameter(dlil_input_thread_cont, inp);
2230 /* NOTREACHED */
2231 __builtin_unreachable();
2232 }
2233
2234 __attribute__((noreturn))
2235 static void
2236 dlil_input_thread_cont(void *v, wait_result_t wres)
2237 {
2238 struct dlil_threading_info *inp = v;
2239 struct ifnet *ifp = inp->dlth_ifp;
2240
2241 lck_mtx_lock_spin(&inp->dlth_lock);
2242 if (__improbable(wres == THREAD_INTERRUPTED ||
2243 (inp->dlth_flags & DLIL_INPUT_TERMINATE))) {
2244 goto terminate;
2245 }
2246
2247 VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING));
2248 inp->dlth_flags |= DLIL_INPUT_RUNNING;
2249
2250 while (1) {
2251 struct mbuf *m = NULL;
2252 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2253 boolean_t notify = FALSE;
2254 boolean_t embryonic;
2255 u_int32_t m_cnt;
2256
2257 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2258
2259 if (__improbable(embryonic =
2260 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2261 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2262 }
2263
2264 /*
2265 * Protocol registration and injection must always use
2266 * the main input thread; in theory the latter can utilize
2267 * the corresponding input thread where the packet arrived
2268 * on, but that requires our knowing the interface in advance
2269 * (and the benefits might not worth the trouble.)
2270 */
2271 VERIFY(!(inp->dlth_flags &
2272 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2273
2274 /* Packets for this interface */
2275 m_cnt = qlen(&inp->dlth_pkts);
2276 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
2277 m = pkt.cp_mbuf;
2278
2279 inp->dlth_wtot = 0;
2280
2281 notify = dlil_input_stats_sync(ifp, inp);
2282
2283 lck_mtx_unlock(&inp->dlth_lock);
2284
2285 if (__improbable(embryonic)) {
2286 ifnet_decr_pending_thread_count(ifp);
2287 }
2288
2289 if (__improbable(notify)) {
2290 ifnet_notify_data_threshold(ifp);
2291 }
2292
2293 /*
2294 * NOTE warning %%% attention !!!!
2295 * We should think about putting some thread starvation
2296 * safeguards if we deal with long chains of packets.
2297 */
2298 if (__probable(m != NULL)) {
2299 dlil_input_packet_list_extended(NULL, m,
2300 m_cnt, ifp->if_poll_mode);
2301 }
2302
2303 lck_mtx_lock_spin(&inp->dlth_lock);
2304 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
2305 if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING |
2306 DLIL_INPUT_TERMINATE))) {
2307 break;
2308 }
2309 }
2310
2311 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
2312
2313 if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) {
2314 terminate:
2315 lck_mtx_unlock(&inp->dlth_lock);
2316 dlil_terminate_input_thread(inp);
2317 /* NOTREACHED */
2318 } else {
2319 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2320 lck_mtx_unlock(&inp->dlth_lock);
2321 (void) thread_block_parameter(dlil_input_thread_cont, inp);
2322 /* NOTREACHED */
2323 }
2324
2325 VERIFY(0); /* we should never get here */
2326 /* NOTREACHED */
2327 __builtin_unreachable();
2328 }
2329
2330 /*
2331 * Input thread for interfaces with opportunistic polling input model.
2332 */
2333 __attribute__((noreturn))
2334 static void
2335 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2336 {
2337 #pragma unused(w)
2338 char thread_name[MAXTHREADNAMESIZE];
2339 struct dlil_threading_info *inp = v;
2340 struct ifnet *ifp = inp->dlth_ifp;
2341
2342 VERIFY(inp != dlil_main_input_thread);
2343 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL) &&
2344 (ifp->if_xflags & IFXF_LEGACY));
2345 VERIFY(current_thread() == inp->dlth_thread);
2346
2347 /* construct the name for this thread, and then apply it */
2348 bzero(thread_name, sizeof(thread_name));
2349 (void) snprintf(thread_name, sizeof(thread_name),
2350 "dlil_input_poll_%s", ifp->if_xname);
2351 thread_set_thread_name(inp->dlth_thread, thread_name);
2352
2353 lck_mtx_lock(&inp->dlth_lock);
2354 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2355 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2356 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2357 /* wake up once to get out of embryonic state */
2358 dlil_input_wakeup(inp);
2359 lck_mtx_unlock(&inp->dlth_lock);
2360 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont, inp);
2361 /* NOTREACHED */
2362 __builtin_unreachable();
2363 }
2364
2365 __attribute__((noreturn))
2366 static void
2367 dlil_rxpoll_input_thread_cont(void *v, wait_result_t wres)
2368 {
2369 struct dlil_threading_info *inp = v;
2370 struct ifnet *ifp = inp->dlth_ifp;
2371 struct timespec ts;
2372
2373 lck_mtx_lock_spin(&inp->dlth_lock);
2374 if (__improbable(wres == THREAD_INTERRUPTED ||
2375 (inp->dlth_flags & DLIL_INPUT_TERMINATE))) {
2376 goto terminate;
2377 }
2378
2379 VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING));
2380 inp->dlth_flags |= DLIL_INPUT_RUNNING;
2381
2382 while (1) {
2383 struct mbuf *m = NULL;
2384 uint32_t m_cnt, poll_req = 0;
2385 uint64_t m_size = 0;
2386 ifnet_model_t mode;
2387 struct timespec now, delta;
2388 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2389 boolean_t notify;
2390 boolean_t embryonic;
2391 uint64_t ival;
2392
2393 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2394
2395 if (__improbable(embryonic =
2396 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2397 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2398 goto skip;
2399 }
2400
2401 if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
2402 ival = IF_RXPOLL_INTERVALTIME_MIN;
2403 }
2404
2405 /* Link parameters changed? */
2406 if (ifp->if_poll_update != 0) {
2407 ifp->if_poll_update = 0;
2408 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
2409 }
2410
2411 /* Current operating mode */
2412 mode = ifp->if_poll_mode;
2413
2414 /*
2415 * Protocol registration and injection must always use
2416 * the main input thread; in theory the latter can utilize
2417 * the corresponding input thread where the packet arrived
2418 * on, but that requires our knowing the interface in advance
2419 * (and the benefits might not worth the trouble.)
2420 */
2421 VERIFY(!(inp->dlth_flags &
2422 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2423
2424 /* Total count of all packets */
2425 m_cnt = qlen(&inp->dlth_pkts);
2426
2427 /* Total bytes of all packets */
2428 m_size = qsize(&inp->dlth_pkts);
2429
2430 /* Packets for this interface */
2431 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
2432 m = pkt.cp_mbuf;
2433 VERIFY(m != NULL || m_cnt == 0);
2434
2435 nanouptime(&now);
2436 if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
2437 *(&ifp->if_poll_sample_lasttime) = *(&now);
2438 }
2439
2440 net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
2441 if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
2442 u_int32_t ptot, btot;
2443
2444 /* Accumulate statistics for current sampling */
2445 PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
2446
2447 if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
2448 goto skip;
2449 }
2450
2451 *(&ifp->if_poll_sample_lasttime) = *(&now);
2452
2453 /* Calculate min/max of inbound bytes */
2454 btot = (u_int32_t)ifp->if_poll_sstats.bytes;
2455 if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
2456 ifp->if_rxpoll_bmin = btot;
2457 }
2458 if (btot > ifp->if_rxpoll_bmax) {
2459 ifp->if_rxpoll_bmax = btot;
2460 }
2461
2462 /* Calculate EWMA of inbound bytes */
2463 DLIL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
2464
2465 /* Calculate min/max of inbound packets */
2466 ptot = (u_int32_t)ifp->if_poll_sstats.packets;
2467 if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
2468 ifp->if_rxpoll_pmin = ptot;
2469 }
2470 if (ptot > ifp->if_rxpoll_pmax) {
2471 ifp->if_rxpoll_pmax = ptot;
2472 }
2473
2474 /* Calculate EWMA of inbound packets */
2475 DLIL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
2476
2477 /* Reset sampling statistics */
2478 PKTCNTR_CLEAR(&ifp->if_poll_sstats);
2479
2480 /* Calculate EWMA of wakeup requests */
2481 DLIL_EWMA(ifp->if_rxpoll_wavg, inp->dlth_wtot,
2482 if_rxpoll_decay);
2483 inp->dlth_wtot = 0;
2484
2485 if (dlil_verbose) {
2486 if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
2487 *(&ifp->if_poll_dbg_lasttime) = *(&now);
2488 }
2489 net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
2490 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2491 *(&ifp->if_poll_dbg_lasttime) = *(&now);
2492 DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
2493 "limits [%d/%d], wreq avg %d "
2494 "limits [%d/%d], bytes avg %d "
2495 "limits [%d/%d]\n", if_name(ifp),
2496 (ifp->if_poll_mode ==
2497 IFNET_MODEL_INPUT_POLL_ON) ?
2498 "ON" : "OFF", ifp->if_rxpoll_pavg,
2499 ifp->if_rxpoll_pmax,
2500 ifp->if_rxpoll_plowat,
2501 ifp->if_rxpoll_phiwat,
2502 ifp->if_rxpoll_wavg,
2503 ifp->if_rxpoll_wlowat,
2504 ifp->if_rxpoll_whiwat,
2505 ifp->if_rxpoll_bavg,
2506 ifp->if_rxpoll_blowat,
2507 ifp->if_rxpoll_bhiwat);
2508 }
2509 }
2510
2511 /* Perform mode transition, if necessary */
2512 if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
2513 *(&ifp->if_poll_mode_lasttime) = *(&now);
2514 }
2515
2516 net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
2517 if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
2518 goto skip;
2519 }
2520
2521 if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
2522 ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
2523 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
2524 mode = IFNET_MODEL_INPUT_POLL_OFF;
2525 } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
2526 (ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat ||
2527 ifp->if_rxpoll_wavg >= ifp->if_rxpoll_whiwat) &&
2528 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
2529 mode = IFNET_MODEL_INPUT_POLL_ON;
2530 }
2531
2532 if (mode != ifp->if_poll_mode) {
2533 ifp->if_poll_mode = mode;
2534 *(&ifp->if_poll_mode_lasttime) = *(&now);
2535 poll_req++;
2536 }
2537 }
2538 skip:
2539 notify = dlil_input_stats_sync(ifp, inp);
2540
2541 lck_mtx_unlock(&inp->dlth_lock);
2542
2543 if (__improbable(embryonic)) {
2544 ifnet_decr_pending_thread_count(ifp);
2545 }
2546
2547 if (__improbable(notify)) {
2548 ifnet_notify_data_threshold(ifp);
2549 }
2550
2551 /*
2552 * If there's a mode change and interface is still attached,
2553 * perform a downcall to the driver for the new mode. Also
2554 * hold an IO refcnt on the interface to prevent it from
2555 * being detached (will be release below.)
2556 */
2557 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2558 struct ifnet_model_params p = {
2559 .model = mode, .reserved = { 0 }
2560 };
2561 errno_t err;
2562
2563 if (dlil_verbose) {
2564 DLIL_PRINTF("%s: polling is now %s, "
2565 "pkts avg %d max %d limits [%d/%d], "
2566 "wreq avg %d limits [%d/%d], "
2567 "bytes avg %d limits [%d/%d]\n",
2568 if_name(ifp),
2569 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2570 "ON" : "OFF", ifp->if_rxpoll_pavg,
2571 ifp->if_rxpoll_pmax, ifp->if_rxpoll_plowat,
2572 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wavg,
2573 ifp->if_rxpoll_wlowat, ifp->if_rxpoll_whiwat,
2574 ifp->if_rxpoll_bavg, ifp->if_rxpoll_blowat,
2575 ifp->if_rxpoll_bhiwat);
2576 }
2577
2578 if ((err = ((*ifp->if_input_ctl)(ifp,
2579 IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) {
2580 DLIL_PRINTF("%s: error setting polling mode "
2581 "to %s (%d)\n", if_name(ifp),
2582 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2583 "ON" : "OFF", err);
2584 }
2585
2586 switch (mode) {
2587 case IFNET_MODEL_INPUT_POLL_OFF:
2588 ifnet_set_poll_cycle(ifp, NULL);
2589 ifp->if_rxpoll_offreq++;
2590 if (err != 0) {
2591 ifp->if_rxpoll_offerr++;
2592 }
2593 break;
2594
2595 case IFNET_MODEL_INPUT_POLL_ON:
2596 net_nsectimer(&ival, &ts);
2597 ifnet_set_poll_cycle(ifp, &ts);
2598 ifnet_poll(ifp);
2599 ifp->if_rxpoll_onreq++;
2600 if (err != 0) {
2601 ifp->if_rxpoll_onerr++;
2602 }
2603 break;
2604
2605 default:
2606 VERIFY(0);
2607 /* NOTREACHED */
2608 }
2609
2610 /* Release the IO refcnt */
2611 ifnet_decr_iorefcnt(ifp);
2612 }
2613
2614 /*
2615 * NOTE warning %%% attention !!!!
2616 * We should think about putting some thread starvation
2617 * safeguards if we deal with long chains of packets.
2618 */
2619 if (__probable(m != NULL)) {
2620 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2621 }
2622
2623 lck_mtx_lock_spin(&inp->dlth_lock);
2624 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
2625 if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING |
2626 DLIL_INPUT_TERMINATE))) {
2627 break;
2628 }
2629 }
2630
2631 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
2632
2633 if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) {
2634 terminate:
2635 lck_mtx_unlock(&inp->dlth_lock);
2636 dlil_terminate_input_thread(inp);
2637 /* NOTREACHED */
2638 } else {
2639 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2640 lck_mtx_unlock(&inp->dlth_lock);
2641 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont,
2642 inp);
2643 /* NOTREACHED */
2644 }
2645
2646 VERIFY(0); /* we should never get here */
2647 /* NOTREACHED */
2648 __builtin_unreachable();
2649 }
2650
2651 errno_t
2652 dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
2653 {
2654 if (p != NULL) {
2655 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2656 (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
2657 return EINVAL;
2658 }
2659 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2660 p->packets_lowat >= p->packets_hiwat) {
2661 return EINVAL;
2662 }
2663 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2664 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
2665 return EINVAL;
2666 }
2667 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2668 p->bytes_lowat >= p->bytes_hiwat) {
2669 return EINVAL;
2670 }
2671 if (p->interval_time != 0 &&
2672 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
2673 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2674 }
2675 }
2676 return 0;
2677 }
2678
2679 void
2680 dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2681 {
2682 u_int64_t sample_holdtime, inbw;
2683
2684 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2685 sample_holdtime = 0; /* polling is disabled */
2686 ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
2687 ifp->if_rxpoll_blowat = 0;
2688 ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
2689 ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
2690 ifp->if_rxpoll_plim = 0;
2691 ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2692 } else {
2693 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2694 u_int64_t ival;
2695 unsigned int n, i;
2696
2697 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2698 if (inbw < rxpoll_tbl[i].speed) {
2699 break;
2700 }
2701 n = i;
2702 }
2703 /* auto-tune if caller didn't specify a value */
2704 plowat = ((p == NULL || p->packets_lowat == 0) ?
2705 rxpoll_tbl[n].plowat : p->packets_lowat);
2706 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2707 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2708 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2709 rxpoll_tbl[n].blowat : p->bytes_lowat);
2710 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2711 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2712 plim = ((p == NULL || p->packets_limit == 0) ?
2713 if_rxpoll_max : p->packets_limit);
2714 ival = ((p == NULL || p->interval_time == 0) ?
2715 if_rxpoll_interval_time : p->interval_time);
2716
2717 VERIFY(plowat != 0 && phiwat != 0);
2718 VERIFY(blowat != 0 && bhiwat != 0);
2719 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2720
2721 sample_holdtime = if_rxpoll_sample_holdtime;
2722 ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
2723 ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
2724 ifp->if_rxpoll_plowat = plowat;
2725 ifp->if_rxpoll_phiwat = phiwat;
2726 ifp->if_rxpoll_blowat = blowat;
2727 ifp->if_rxpoll_bhiwat = bhiwat;
2728 ifp->if_rxpoll_plim = plim;
2729 ifp->if_rxpoll_ival = ival;
2730 }
2731
2732 net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
2733 net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
2734
2735 if (dlil_verbose) {
2736 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
2737 "poll interval %llu nsec, pkts per poll %u, "
2738 "pkt limits [%u/%u], wreq limits [%u/%u], "
2739 "bytes limits [%u/%u]\n", if_name(ifp),
2740 inbw, sample_holdtime, ifp->if_rxpoll_ival,
2741 ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
2742 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
2743 ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
2744 ifp->if_rxpoll_bhiwat);
2745 }
2746 }
2747
2748 /*
2749 * Must be called on an attached ifnet (caller is expected to check.)
2750 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2751 */
2752 errno_t
2753 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2754 boolean_t locked)
2755 {
2756 errno_t err;
2757 struct dlil_threading_info *inp;
2758
2759 VERIFY(ifp != NULL);
2760 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2761 return ENXIO;
2762 }
2763 err = dlil_rxpoll_validate_params(p);
2764 if (err != 0) {
2765 return err;
2766 }
2767
2768 if (!locked) {
2769 lck_mtx_lock(&inp->dlth_lock);
2770 }
2771 LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2772 /*
2773 * Normally, we'd reset the parameters to the auto-tuned values
2774 * if the the input thread detects a change in link rate. If the
2775 * driver provides its own parameters right after a link rate
2776 * changes, but before the input thread gets to run, we want to
2777 * make sure to keep the driver's values. Clearing if_poll_update
2778 * will achieve that.
2779 */
2780 if (p != NULL && !locked && ifp->if_poll_update != 0) {
2781 ifp->if_poll_update = 0;
2782 }
2783 dlil_rxpoll_update_params(ifp, p);
2784 if (!locked) {
2785 lck_mtx_unlock(&inp->dlth_lock);
2786 }
2787 return 0;
2788 }
2789
2790 /*
2791 * Must be called on an attached ifnet (caller is expected to check.)
2792 */
2793 errno_t
2794 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2795 {
2796 struct dlil_threading_info *inp;
2797
2798 VERIFY(ifp != NULL && p != NULL);
2799 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2800 return ENXIO;
2801 }
2802
2803 bzero(p, sizeof(*p));
2804
2805 lck_mtx_lock(&inp->dlth_lock);
2806 p->packets_limit = ifp->if_rxpoll_plim;
2807 p->packets_lowat = ifp->if_rxpoll_plowat;
2808 p->packets_hiwat = ifp->if_rxpoll_phiwat;
2809 p->bytes_lowat = ifp->if_rxpoll_blowat;
2810 p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2811 p->interval_time = ifp->if_rxpoll_ival;
2812 lck_mtx_unlock(&inp->dlth_lock);
2813
2814 return 0;
2815 }
2816
2817 errno_t
2818 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2819 const struct ifnet_stat_increment_param *s)
2820 {
2821 return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
2822 }
2823
2824 errno_t
2825 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2826 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2827 {
2828 return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
2829 }
2830
2831 errno_t
2832 ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2833 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2834 {
2835 return ifnet_input_common(ifp, m_head, m_tail, s,
2836 (m_head != NULL), TRUE);
2837 }
2838
2839 static errno_t
2840 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2841 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2842 {
2843 dlil_input_func input_func;
2844 struct ifnet_stat_increment_param _s;
2845 u_int32_t m_cnt = 0, m_size = 0;
2846 struct mbuf *last;
2847 errno_t err = 0;
2848
2849 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2850 if (m_head != NULL) {
2851 mbuf_freem_list(m_head);
2852 }
2853 return EINVAL;
2854 }
2855
2856 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2857 VERIFY(m_tail == NULL || ext);
2858 VERIFY(s != NULL || !ext);
2859
2860 /*
2861 * Drop the packet(s) if the parameters are invalid, or if the
2862 * interface is no longer attached; else hold an IO refcnt to
2863 * prevent it from being detached (will be released below.)
2864 */
2865 if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
2866 if (m_head != NULL) {
2867 mbuf_freem_list(m_head);
2868 }
2869 return EINVAL;
2870 }
2871
2872 input_func = ifp->if_input_dlil;
2873 VERIFY(input_func != NULL);
2874
2875 if (m_tail == NULL) {
2876 last = m_head;
2877 while (m_head != NULL) {
2878 #if IFNET_INPUT_SANITY_CHK
2879 if (__improbable(dlil_input_sanity_check != 0)) {
2880 DLIL_INPUT_CHECK(last, ifp);
2881 }
2882 #endif /* IFNET_INPUT_SANITY_CHK */
2883 m_cnt++;
2884 m_size += m_length(last);
2885 if (mbuf_nextpkt(last) == NULL) {
2886 break;
2887 }
2888 last = mbuf_nextpkt(last);
2889 }
2890 m_tail = last;
2891 } else {
2892 #if IFNET_INPUT_SANITY_CHK
2893 if (__improbable(dlil_input_sanity_check != 0)) {
2894 last = m_head;
2895 while (1) {
2896 DLIL_INPUT_CHECK(last, ifp);
2897 m_cnt++;
2898 m_size += m_length(last);
2899 if (mbuf_nextpkt(last) == NULL) {
2900 break;
2901 }
2902 last = mbuf_nextpkt(last);
2903 }
2904 } else {
2905 m_cnt = s->packets_in;
2906 m_size = s->bytes_in;
2907 last = m_tail;
2908 }
2909 #else
2910 m_cnt = s->packets_in;
2911 m_size = s->bytes_in;
2912 last = m_tail;
2913 #endif /* IFNET_INPUT_SANITY_CHK */
2914 }
2915
2916 if (last != m_tail) {
2917 panic_plain("%s: invalid input packet chain for %s, "
2918 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2919 m_tail, last);
2920 }
2921
2922 /*
2923 * Assert packet count only for the extended variant, for backwards
2924 * compatibility, since this came directly from the device driver.
2925 * Relax this assertion for input bytes, as the driver may have
2926 * included the link-layer headers in the computation; hence
2927 * m_size is just an approximation.
2928 */
2929 if (ext && s->packets_in != m_cnt) {
2930 panic_plain("%s: input packet count mismatch for %s, "
2931 "%d instead of %d\n", __func__, if_name(ifp),
2932 s->packets_in, m_cnt);
2933 }
2934
2935 if (s == NULL) {
2936 bzero(&_s, sizeof(_s));
2937 s = &_s;
2938 } else {
2939 _s = *s;
2940 }
2941 _s.packets_in = m_cnt;
2942 _s.bytes_in = m_size;
2943
2944 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2945
2946 if (ifp != lo_ifp) {
2947 /* Release the IO refcnt */
2948 ifnet_datamov_end(ifp);
2949 }
2950
2951 return err;
2952 }
2953
2954
2955 errno_t
2956 dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2957 {
2958 return ifp->if_output(ifp, m);
2959 }
2960
2961 errno_t
2962 dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2963 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2964 boolean_t poll, struct thread *tp)
2965 {
2966 struct dlil_threading_info *inp = ifp->if_inp;
2967
2968 if (__improbable(inp == NULL)) {
2969 inp = dlil_main_input_thread;
2970 }
2971
2972 return inp->dlth_strategy(inp, ifp, m_head, m_tail, s, poll, tp);
2973 }
2974
2975 static errno_t
2976 dlil_input_async(struct dlil_threading_info *inp,
2977 struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2978 const struct ifnet_stat_increment_param *s, boolean_t poll,
2979 struct thread *tp)
2980 {
2981 u_int32_t m_cnt = s->packets_in;
2982 u_int32_t m_size = s->bytes_in;
2983 boolean_t notify = FALSE;
2984
2985 /*
2986 * If there is a matching DLIL input thread associated with an
2987 * affinity set, associate this thread with the same set. We
2988 * will only do this once.
2989 */
2990 lck_mtx_lock_spin(&inp->dlth_lock);
2991 if (inp != dlil_main_input_thread && inp->dlth_affinity && tp != NULL &&
2992 ((!poll && inp->dlth_driver_thread == THREAD_NULL) ||
2993 (poll && inp->dlth_poller_thread == THREAD_NULL))) {
2994 u_int32_t tag = inp->dlth_affinity_tag;
2995
2996 if (poll) {
2997 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
2998 inp->dlth_poller_thread = tp;
2999 } else {
3000 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
3001 inp->dlth_driver_thread = tp;
3002 }
3003 lck_mtx_unlock(&inp->dlth_lock);
3004
3005 /* Associate the current thread with the new affinity tag */
3006 (void) dlil_affinity_set(tp, tag);
3007
3008 /*
3009 * Take a reference on the current thread; during detach,
3010 * we will need to refer to it in order to tear down its
3011 * affinity.
3012 */
3013 thread_reference(tp);
3014 lck_mtx_lock_spin(&inp->dlth_lock);
3015 }
3016
3017 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
3018
3019 /*
3020 * Because of loopbacked multicast we cannot stuff the ifp in
3021 * the rcvif of the packet header: loopback (lo0) packets use a
3022 * dedicated list so that we can later associate them with lo_ifp
3023 * on their way up the stack. Packets for other interfaces without
3024 * dedicated input threads go to the regular list.
3025 */
3026 if (m_head != NULL) {
3027 classq_pkt_t head, tail;
3028 CLASSQ_PKT_INIT_MBUF(&head, m_head);
3029 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3030 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
3031 struct dlil_main_threading_info *inpm =
3032 (struct dlil_main_threading_info *)inp;
3033 _addq_multi(&inpm->lo_rcvq_pkts, &head, &tail,
3034 m_cnt, m_size);
3035 } else {
3036 _addq_multi(&inp->dlth_pkts, &head, &tail,
3037 m_cnt, m_size);
3038 }
3039 }
3040
3041 #if IFNET_INPUT_SANITY_CHK
3042 if (__improbable(dlil_input_sanity_check != 0)) {
3043 u_int32_t count = 0, size = 0;
3044 struct mbuf *m0;
3045
3046 for (m0 = m_head; m0; m0 = mbuf_nextpkt(m0)) {
3047 size += m_length(m0);
3048 count++;
3049 }
3050
3051 if (count != m_cnt) {
3052 panic_plain("%s: invalid total packet count %u "
3053 "(expected %u)\n", if_name(ifp), count, m_cnt);
3054 /* NOTREACHED */
3055 __builtin_unreachable();
3056 } else if (size != m_size) {
3057 panic_plain("%s: invalid total packet size %u "
3058 "(expected %u)\n", if_name(ifp), size, m_size);
3059 /* NOTREACHED */
3060 __builtin_unreachable();
3061 }
3062
3063 inp->dlth_pkts_cnt += m_cnt;
3064 }
3065 #endif /* IFNET_INPUT_SANITY_CHK */
3066
3067 dlil_input_stats_add(s, inp, ifp, poll);
3068 /*
3069 * If we're using the main input thread, synchronize the
3070 * stats now since we have the interface context. All
3071 * other cases involving dedicated input threads will
3072 * have their stats synchronized there.
3073 */
3074 if (inp == dlil_main_input_thread) {
3075 notify = dlil_input_stats_sync(ifp, inp);
3076 }
3077
3078 dlil_input_wakeup(inp);
3079 lck_mtx_unlock(&inp->dlth_lock);
3080
3081 if (notify) {
3082 ifnet_notify_data_threshold(ifp);
3083 }
3084
3085 return 0;
3086 }
3087
3088 static errno_t
3089 dlil_input_sync(struct dlil_threading_info *inp,
3090 struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
3091 const struct ifnet_stat_increment_param *s, boolean_t poll,
3092 struct thread *tp)
3093 {
3094 #pragma unused(tp)
3095 u_int32_t m_cnt = s->packets_in;
3096 u_int32_t m_size = s->bytes_in;
3097 boolean_t notify = FALSE;
3098 classq_pkt_t head, tail;
3099
3100 ASSERT(inp != dlil_main_input_thread);
3101
3102 /* XXX: should we just assert instead? */
3103 if (__improbable(m_head == NULL)) {
3104 return 0;
3105 }
3106
3107 CLASSQ_PKT_INIT_MBUF(&head, m_head);
3108 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3109
3110 lck_mtx_lock_spin(&inp->dlth_lock);
3111 _addq_multi(&inp->dlth_pkts, &head, &tail, m_cnt, m_size);
3112
3113 #if IFNET_INPUT_SANITY_CHK
3114 if (__improbable(dlil_input_sanity_check != 0)) {
3115 u_int32_t count = 0, size = 0;
3116 struct mbuf *m0;
3117
3118 for (m0 = m_head; m0; m0 = mbuf_nextpkt(m0)) {
3119 size += m_length(m0);
3120 count++;
3121 }
3122
3123 if (count != m_cnt) {
3124 panic_plain("%s: invalid total packet count %u "
3125 "(expected %u)\n", if_name(ifp), count, m_cnt);
3126 /* NOTREACHED */
3127 __builtin_unreachable();
3128 } else if (size != m_size) {
3129 panic_plain("%s: invalid total packet size %u "
3130 "(expected %u)\n", if_name(ifp), size, m_size);
3131 /* NOTREACHED */
3132 __builtin_unreachable();
3133 }
3134
3135 inp->dlth_pkts_cnt += m_cnt;
3136 }
3137 #endif /* IFNET_INPUT_SANITY_CHK */
3138
3139 dlil_input_stats_add(s, inp, ifp, poll);
3140
3141 m_cnt = qlen(&inp->dlth_pkts);
3142 _getq_all(&inp->dlth_pkts, &head, NULL, NULL, NULL);
3143
3144 notify = dlil_input_stats_sync(ifp, inp);
3145
3146 lck_mtx_unlock(&inp->dlth_lock);
3147
3148 if (notify) {
3149 ifnet_notify_data_threshold(ifp);
3150 }
3151
3152 /*
3153 * NOTE warning %%% attention !!!!
3154 * We should think about putting some thread starvation
3155 * safeguards if we deal with long chains of packets.
3156 */
3157 if (head.cp_mbuf != NULL) {
3158 dlil_input_packet_list_extended(NULL, head.cp_mbuf,
3159 m_cnt, ifp->if_poll_mode);
3160 }
3161
3162 return 0;
3163 }
3164
3165
3166 static void
3167 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
3168 {
3169 if (!(ifp->if_eflags & IFEF_TXSTART)) {
3170 return;
3171 }
3172 /*
3173 * If the starter thread is inactive, signal it to do work,
3174 * unless the interface is being flow controlled from below,
3175 * e.g. a virtual interface being flow controlled by a real
3176 * network interface beneath it, or it's been disabled via
3177 * a call to ifnet_disable_output().
3178 */
3179 lck_mtx_lock_spin(&ifp->if_start_lock);
3180 if (resetfc) {
3181 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
3182 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
3183 lck_mtx_unlock(&ifp->if_start_lock);
3184 return;
3185 }
3186 ifp->if_start_req++;
3187 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
3188 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
3189 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
3190 ifp->if_start_delayed == 0)) {
3191 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
3192 }
3193 lck_mtx_unlock(&ifp->if_start_lock);
3194 }
3195
3196 void
3197 ifnet_start(struct ifnet *ifp)
3198 {
3199 ifnet_start_common(ifp, FALSE);
3200 }
3201
3202 __attribute__((noreturn))
3203 static void
3204 ifnet_start_thread_func(void *v, wait_result_t w)
3205 {
3206 #pragma unused(w)
3207 struct ifnet *ifp = v;
3208 char thread_name[MAXTHREADNAMESIZE];
3209
3210 /* Construct the name for this thread, and then apply it. */
3211 bzero(thread_name, sizeof(thread_name));
3212 (void) snprintf(thread_name, sizeof(thread_name),
3213 "ifnet_start_%s", ifp->if_xname);
3214 ASSERT(ifp->if_start_thread == current_thread());
3215 thread_set_thread_name(current_thread(), thread_name);
3216
3217 /*
3218 * Treat the dedicated starter thread for lo0 as equivalent to
3219 * the driver workloop thread; if net_affinity is enabled for
3220 * the main input thread, associate this starter thread to it
3221 * by binding them with the same affinity tag. This is done
3222 * only once (as we only have one lo_ifp which never goes away.)
3223 */
3224 if (ifp == lo_ifp) {
3225 struct dlil_threading_info *inp = dlil_main_input_thread;
3226 struct thread *tp = current_thread();
3227
3228 lck_mtx_lock(&inp->dlth_lock);
3229 if (inp->dlth_affinity) {
3230 u_int32_t tag = inp->dlth_affinity_tag;
3231
3232 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
3233 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
3234 inp->dlth_driver_thread = tp;
3235 lck_mtx_unlock(&inp->dlth_lock);
3236
3237 /* Associate this thread with the affinity tag */
3238 (void) dlil_affinity_set(tp, tag);
3239 } else {
3240 lck_mtx_unlock(&inp->dlth_lock);
3241 }
3242 }
3243
3244 lck_mtx_lock(&ifp->if_start_lock);
3245 VERIFY(!ifp->if_start_embryonic && !ifp->if_start_active);
3246 (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
3247 ifp->if_start_embryonic = 1;
3248 /* wake up once to get out of embryonic state */
3249 ifp->if_start_req++;
3250 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
3251 lck_mtx_unlock(&ifp->if_start_lock);
3252 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3253 /* NOTREACHED */
3254 __builtin_unreachable();
3255 }
3256
3257 __attribute__((noreturn))
3258 static void
3259 ifnet_start_thread_cont(void *v, wait_result_t wres)
3260 {
3261 struct ifnet *ifp = v;
3262 struct ifclassq *ifq = &ifp->if_snd;
3263
3264 lck_mtx_lock_spin(&ifp->if_start_lock);
3265 if (__improbable(wres == THREAD_INTERRUPTED ||
3266 ifp->if_start_thread == THREAD_NULL)) {
3267 goto terminate;
3268 }
3269
3270 if (__improbable(ifp->if_start_embryonic)) {
3271 ifp->if_start_embryonic = 0;
3272 lck_mtx_unlock(&ifp->if_start_lock);
3273 ifnet_decr_pending_thread_count(ifp);
3274 lck_mtx_lock_spin(&ifp->if_start_lock);
3275 goto skip;
3276 }
3277
3278 ifp->if_start_active = 1;
3279
3280 /*
3281 * Keep on servicing until no more request.
3282 */
3283 for (;;) {
3284 u_int32_t req = ifp->if_start_req;
3285 if (!IFCQ_IS_EMPTY(ifq) &&
3286 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3287 ifp->if_start_delayed == 0 &&
3288 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
3289 (ifp->if_eflags & IFEF_DELAY_START)) {
3290 ifp->if_start_delayed = 1;
3291 ifnet_start_delayed++;
3292 break;
3293 } else {
3294 ifp->if_start_delayed = 0;
3295 }
3296 lck_mtx_unlock(&ifp->if_start_lock);
3297
3298 /*
3299 * If no longer attached, don't call start because ifp
3300 * is being destroyed; else hold an IO refcnt to
3301 * prevent the interface from being detached (will be
3302 * released below.)
3303 */
3304 if (!ifnet_datamov_begin(ifp)) {
3305 lck_mtx_lock_spin(&ifp->if_start_lock);
3306 break;
3307 }
3308
3309 /* invoke the driver's start routine */
3310 ((*ifp->if_start)(ifp));
3311
3312 /*
3313 * Release the io ref count taken above.
3314 */
3315 ifnet_datamov_end(ifp);
3316
3317 lck_mtx_lock_spin(&ifp->if_start_lock);
3318
3319 /*
3320 * If there's no pending request or if the
3321 * interface has been disabled, we're done.
3322 */
3323 if (req == ifp->if_start_req ||
3324 (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
3325 break;
3326 }
3327 }
3328 skip:
3329 ifp->if_start_req = 0;
3330 ifp->if_start_active = 0;
3331
3332
3333 if (__probable(ifp->if_start_thread != THREAD_NULL)) {
3334 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3335 struct timespec delay_start_ts;
3336 struct timespec *ts;
3337
3338 /*
3339 * Wakeup N ns from now if rate-controlled by TBR, and if
3340 * there are still packets in the send queue which haven't
3341 * been dequeued so far; else sleep indefinitely (ts = NULL)
3342 * until ifnet_start() is called again.
3343 */
3344 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
3345 &ifp->if_start_cycle : NULL);
3346
3347 if (ts == NULL && ifp->if_start_delayed == 1) {
3348 delay_start_ts.tv_sec = 0;
3349 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
3350 ts = &delay_start_ts;
3351 }
3352
3353 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
3354 ts = NULL;
3355 }
3356
3357 if (__improbable(ts != NULL)) {
3358 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
3359 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3360 }
3361
3362 (void) assert_wait_deadline(&ifp->if_start_thread,
3363 THREAD_UNINT, deadline);
3364 lck_mtx_unlock(&ifp->if_start_lock);
3365 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3366 /* NOTREACHED */
3367 } else {
3368 terminate:
3369 /* interface is detached? */
3370 ifnet_set_start_cycle(ifp, NULL);
3371 lck_mtx_unlock(&ifp->if_start_lock);
3372 ifnet_purge(ifp);
3373
3374 if (dlil_verbose) {
3375 DLIL_PRINTF("%s: starter thread terminated\n",
3376 if_name(ifp));
3377 }
3378
3379 /* for the extra refcnt from kernel_thread_start() */
3380 thread_deallocate(current_thread());
3381 /* this is the end */
3382 thread_terminate(current_thread());
3383 /* NOTREACHED */
3384 }
3385
3386 /* must never get here */
3387 VERIFY(0);
3388 /* NOTREACHED */
3389 __builtin_unreachable();
3390 }
3391
3392 void
3393 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
3394 {
3395 if (ts == NULL) {
3396 bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
3397 } else {
3398 *(&ifp->if_start_cycle) = *ts;
3399 }
3400
3401 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
3402 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
3403 if_name(ifp), ts->tv_nsec);
3404 }
3405 }
3406
3407 static inline void
3408 ifnet_poll_wakeup(struct ifnet *ifp)
3409 {
3410 LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
3411
3412 ifp->if_poll_req++;
3413 if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
3414 ifp->if_poll_thread != THREAD_NULL) {
3415 wakeup_one((caddr_t)&ifp->if_poll_thread);
3416 }
3417 }
3418
3419 void
3420 ifnet_poll(struct ifnet *ifp)
3421 {
3422 /*
3423 * If the poller thread is inactive, signal it to do work.
3424 */
3425 lck_mtx_lock_spin(&ifp->if_poll_lock);
3426 ifnet_poll_wakeup(ifp);
3427 lck_mtx_unlock(&ifp->if_poll_lock);
3428 }
3429
3430 __attribute__((noreturn))
3431 static void
3432 ifnet_poll_thread_func(void *v, wait_result_t w)
3433 {
3434 #pragma unused(w)
3435 char thread_name[MAXTHREADNAMESIZE];
3436 struct ifnet *ifp = v;
3437
3438 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
3439 VERIFY(current_thread() == ifp->if_poll_thread);
3440
3441 /* construct the name for this thread, and then apply it */
3442 bzero(thread_name, sizeof(thread_name));
3443 (void) snprintf(thread_name, sizeof(thread_name),
3444 "ifnet_poller_%s", ifp->if_xname);
3445 thread_set_thread_name(ifp->if_poll_thread, thread_name);
3446
3447 lck_mtx_lock(&ifp->if_poll_lock);
3448 VERIFY(!(ifp->if_poll_flags & (IF_POLLF_EMBRYONIC | IF_POLLF_RUNNING)));
3449 (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
3450 ifp->if_poll_flags |= IF_POLLF_EMBRYONIC;
3451 /* wake up once to get out of embryonic state */
3452 ifnet_poll_wakeup(ifp);
3453 lck_mtx_unlock(&ifp->if_poll_lock);
3454 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3455 /* NOTREACHED */
3456 __builtin_unreachable();
3457 }
3458
3459 __attribute__((noreturn))
3460 static void
3461 ifnet_poll_thread_cont(void *v, wait_result_t wres)
3462 {
3463 struct dlil_threading_info *inp;
3464 struct ifnet *ifp = v;
3465 struct ifnet_stat_increment_param s;
3466 struct timespec start_time;
3467
3468 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
3469
3470 bzero(&s, sizeof(s));
3471 net_timerclear(&start_time);
3472
3473 lck_mtx_lock_spin(&ifp->if_poll_lock);
3474 if (__improbable(wres == THREAD_INTERRUPTED ||
3475 ifp->if_poll_thread == THREAD_NULL)) {
3476 goto terminate;
3477 }
3478
3479 inp = ifp->if_inp;
3480 VERIFY(inp != NULL);
3481
3482 if (__improbable(ifp->if_poll_flags & IF_POLLF_EMBRYONIC)) {
3483 ifp->if_poll_flags &= ~IF_POLLF_EMBRYONIC;
3484 lck_mtx_unlock(&ifp->if_poll_lock);
3485 ifnet_decr_pending_thread_count(ifp);
3486 lck_mtx_lock_spin(&ifp->if_poll_lock);
3487 goto skip;
3488 }
3489
3490 ifp->if_poll_flags |= IF_POLLF_RUNNING;
3491
3492 /*
3493 * Keep on servicing until no more request.
3494 */
3495 for (;;) {
3496 struct mbuf *m_head, *m_tail;
3497 u_int32_t m_lim, m_cnt, m_totlen;
3498 u_int16_t req = ifp->if_poll_req;
3499
3500 m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
3501 MAX((qlimit(&inp->dlth_pkts)), (ifp->if_rxpoll_phiwat << 2));
3502 lck_mtx_unlock(&ifp->if_poll_lock);
3503
3504 /*
3505 * If no longer attached, there's nothing to do;
3506 * else hold an IO refcnt to prevent the interface
3507 * from being detached (will be released below.)
3508 */
3509 if (!ifnet_is_attached(ifp, 1)) {
3510 lck_mtx_lock_spin(&ifp->if_poll_lock);
3511 break;
3512 }
3513
3514 if (dlil_verbose > 1) {
3515 DLIL_PRINTF("%s: polling up to %d pkts, "
3516 "pkts avg %d max %d, wreq avg %d, "
3517 "bytes avg %d\n",
3518 if_name(ifp), m_lim,
3519 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3520 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
3521 }
3522
3523 /* invoke the driver's input poll routine */
3524 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3525 &m_cnt, &m_totlen));
3526
3527 if (m_head != NULL) {
3528 VERIFY(m_tail != NULL && m_cnt > 0);
3529
3530 if (dlil_verbose > 1) {
3531 DLIL_PRINTF("%s: polled %d pkts, "
3532 "pkts avg %d max %d, wreq avg %d, "
3533 "bytes avg %d\n",
3534 if_name(ifp), m_cnt,
3535 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3536 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
3537 }
3538
3539 /* stats are required for extended variant */
3540 s.packets_in = m_cnt;
3541 s.bytes_in = m_totlen;
3542
3543 (void) ifnet_input_common(ifp, m_head, m_tail,
3544 &s, TRUE, TRUE);
3545 } else {
3546 if (dlil_verbose > 1) {
3547 DLIL_PRINTF("%s: no packets, "
3548 "pkts avg %d max %d, wreq avg %d, "
3549 "bytes avg %d\n",
3550 if_name(ifp), ifp->if_rxpoll_pavg,
3551 ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
3552 ifp->if_rxpoll_bavg);
3553 }
3554
3555 (void) ifnet_input_common(ifp, NULL, NULL,
3556 NULL, FALSE, TRUE);
3557 }
3558
3559 /* Release the io ref count */
3560 ifnet_decr_iorefcnt(ifp);
3561
3562 lck_mtx_lock_spin(&ifp->if_poll_lock);
3563
3564 /* if there's no pending request, we're done */
3565 if (req == ifp->if_poll_req ||
3566 ifp->if_poll_thread == THREAD_NULL) {
3567 break;
3568 }
3569 }
3570 skip:
3571 ifp->if_poll_req = 0;
3572 ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
3573
3574 if (ifp->if_poll_thread != THREAD_NULL) {
3575 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3576 struct timespec *ts;
3577
3578 /*
3579 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3580 * until ifnet_poll() is called again.
3581 */
3582 ts = &ifp->if_poll_cycle;
3583 if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
3584 ts = NULL;
3585 }
3586
3587 if (ts != NULL) {
3588 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
3589 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3590 }
3591
3592 (void) assert_wait_deadline(&ifp->if_poll_thread,
3593 THREAD_UNINT, deadline);
3594 lck_mtx_unlock(&ifp->if_poll_lock);
3595 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3596 /* NOTREACHED */
3597 } else {
3598 terminate:
3599 /* interface is detached (maybe while asleep)? */
3600 ifnet_set_poll_cycle(ifp, NULL);
3601 lck_mtx_unlock(&ifp->if_poll_lock);
3602
3603 if (dlil_verbose) {
3604 DLIL_PRINTF("%s: poller thread terminated\n",
3605 if_name(ifp));
3606 }
3607
3608 /* for the extra refcnt from kernel_thread_start() */
3609 thread_deallocate(current_thread());
3610 /* this is the end */
3611 thread_terminate(current_thread());
3612 /* NOTREACHED */
3613 }
3614
3615 /* must never get here */
3616 VERIFY(0);
3617 /* NOTREACHED */
3618 __builtin_unreachable();
3619 }
3620
3621 void
3622 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3623 {
3624 if (ts == NULL) {
3625 bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
3626 } else {
3627 *(&ifp->if_poll_cycle) = *ts;
3628 }
3629
3630 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
3631 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
3632 if_name(ifp), ts->tv_nsec);
3633 }
3634 }
3635
3636 void
3637 ifnet_purge(struct ifnet *ifp)
3638 {
3639 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
3640 if_qflush(ifp, 0);
3641 }
3642 }
3643
3644 void
3645 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3646 {
3647 IFCQ_LOCK_ASSERT_HELD(ifq);
3648
3649 if (!(IFCQ_IS_READY(ifq))) {
3650 return;
3651 }
3652
3653 if (IFCQ_TBR_IS_ENABLED(ifq)) {
3654 struct tb_profile tb = {
3655 .rate = ifq->ifcq_tbr.tbr_rate_raw,
3656 .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
3657 };
3658 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3659 }
3660
3661 ifclassq_update(ifq, ev);
3662 }
3663
3664 void
3665 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3666 {
3667 switch (ev) {
3668 case CLASSQ_EV_LINK_BANDWIDTH:
3669 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
3670 ifp->if_poll_update++;
3671 }
3672 break;
3673
3674 default:
3675 break;
3676 }
3677 }
3678
3679 errno_t
3680 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3681 {
3682 struct ifclassq *ifq;
3683 u_int32_t omodel;
3684 errno_t err;
3685
3686 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
3687 return EINVAL;
3688 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3689 return ENXIO;
3690 }
3691
3692 ifq = &ifp->if_snd;
3693 IFCQ_LOCK(ifq);
3694 omodel = ifp->if_output_sched_model;
3695 ifp->if_output_sched_model = model;
3696 if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
3697 ifp->if_output_sched_model = omodel;
3698 }
3699 IFCQ_UNLOCK(ifq);
3700
3701 return err;
3702 }
3703
3704 errno_t
3705 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3706 {
3707 if (ifp == NULL) {
3708 return EINVAL;
3709 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3710 return ENXIO;
3711 }
3712
3713 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3714
3715 return 0;
3716 }
3717
3718 errno_t
3719 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3720 {
3721 if (ifp == NULL || maxqlen == NULL) {
3722 return EINVAL;
3723 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3724 return ENXIO;
3725 }
3726
3727 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3728
3729 return 0;
3730 }
3731
3732 errno_t
3733 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
3734 {
3735 errno_t err;
3736
3737 if (ifp == NULL || pkts == NULL) {
3738 err = EINVAL;
3739 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3740 err = ENXIO;
3741 } else {
3742 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3743 pkts, NULL);
3744 }
3745
3746 return err;
3747 }
3748
3749 errno_t
3750 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3751 u_int32_t *pkts, u_int32_t *bytes)
3752 {
3753 errno_t err;
3754
3755 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3756 (pkts == NULL && bytes == NULL)) {
3757 err = EINVAL;
3758 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3759 err = ENXIO;
3760 } else {
3761 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3762 }
3763
3764 return err;
3765 }
3766
3767 errno_t
3768 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3769 {
3770 struct dlil_threading_info *inp;
3771
3772 if (ifp == NULL) {
3773 return EINVAL;
3774 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3775 return ENXIO;
3776 }
3777
3778 if (maxqlen == 0) {
3779 maxqlen = if_rcvq_maxlen;
3780 } else if (maxqlen < IF_RCVQ_MINLEN) {
3781 maxqlen = IF_RCVQ_MINLEN;
3782 }
3783
3784 inp = ifp->if_inp;
3785 lck_mtx_lock(&inp->dlth_lock);
3786 qlimit(&inp->dlth_pkts) = maxqlen;
3787 lck_mtx_unlock(&inp->dlth_lock);
3788
3789 return 0;
3790 }
3791
3792 errno_t
3793 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3794 {
3795 struct dlil_threading_info *inp;
3796
3797 if (ifp == NULL || maxqlen == NULL) {
3798 return EINVAL;
3799 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3800 return ENXIO;
3801 }
3802
3803 inp = ifp->if_inp;
3804 lck_mtx_lock(&inp->dlth_lock);
3805 *maxqlen = qlimit(&inp->dlth_pkts);
3806 lck_mtx_unlock(&inp->dlth_lock);
3807 return 0;
3808 }
3809
3810 void
3811 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3812 uint16_t delay_timeout)
3813 {
3814 if (delay_qlen > 0 && delay_timeout > 0) {
3815 if_set_eflags(ifp, IFEF_ENQUEUE_MULTI);
3816 ifp->if_start_delay_qlen = MIN(100, delay_qlen);
3817 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3818 /* convert timeout to nanoseconds */
3819 ifp->if_start_delay_timeout *= 1000;
3820 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3821 ifp->if_xname, (uint32_t)delay_qlen,
3822 (uint32_t)delay_timeout);
3823 } else {
3824 if_clear_eflags(ifp, IFEF_ENQUEUE_MULTI);
3825 }
3826 }
3827
3828 /*
3829 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
3830 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
3831 * buf holds the full header.
3832 */
3833 static __attribute__((noinline)) void
3834 ifnet_mcast_clear_dscp(uint8_t *buf, uint8_t ip_ver)
3835 {
3836 struct ip *ip;
3837 struct ip6_hdr *ip6;
3838 uint8_t lbuf[64] __attribute__((aligned(8)));
3839 uint8_t *p = buf;
3840
3841 if (ip_ver == IPVERSION) {
3842 uint8_t old_tos;
3843 uint32_t sum;
3844
3845 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3846 DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
3847 bcopy(buf, lbuf, sizeof(struct ip));
3848 p = lbuf;
3849 }
3850 ip = (struct ip *)(void *)p;
3851 if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
3852 return;
3853 }
3854
3855 DTRACE_IP1(clear__v4, struct ip *, ip);
3856 old_tos = ip->ip_tos;
3857 ip->ip_tos &= IPTOS_ECN_MASK;
3858 sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
3859 sum = (sum >> 16) + (sum & 0xffff);
3860 ip->ip_sum = (uint16_t)(sum & 0xffff);
3861
3862 if (__improbable(p == lbuf)) {
3863 bcopy(lbuf, buf, sizeof(struct ip));
3864 }
3865 } else {
3866 uint32_t flow;
3867 ASSERT(ip_ver == IPV6_VERSION);
3868
3869 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3870 DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
3871 bcopy(buf, lbuf, sizeof(struct ip6_hdr));
3872 p = lbuf;
3873 }
3874 ip6 = (struct ip6_hdr *)(void *)p;
3875 flow = ntohl(ip6->ip6_flow);
3876 if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
3877 return;
3878 }
3879
3880 DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
3881 ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
3882
3883 if (__improbable(p == lbuf)) {
3884 bcopy(lbuf, buf, sizeof(struct ip6_hdr));
3885 }
3886 }
3887 }
3888
3889 static inline errno_t
3890 ifnet_enqueue_ifclassq(struct ifnet *ifp, classq_pkt_t *p, boolean_t flush,
3891 boolean_t *pdrop)
3892 {
3893 volatile uint64_t *fg_ts = NULL;
3894 volatile uint64_t *rt_ts = NULL;
3895 struct timespec now;
3896 u_int64_t now_nsec = 0;
3897 int error = 0;
3898 uint8_t *mcast_buf = NULL;
3899 uint8_t ip_ver;
3900 uint32_t pktlen;
3901
3902 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3903
3904 /*
3905 * If packet already carries a timestamp, either from dlil_output()
3906 * or from flowswitch, use it here. Otherwise, record timestamp.
3907 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3908 * the timestamp value is used internally there.
3909 */
3910 switch (p->cp_ptype) {
3911 case QP_MBUF:
3912 ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
3913 ASSERT(p->cp_mbuf->m_nextpkt == NULL);
3914
3915 if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3916 p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
3917 nanouptime(&now);
3918 net_timernsec(&now, &now_nsec);
3919 p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
3920 }
3921 p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3922 /*
3923 * If the packet service class is not background,
3924 * update the timestamp to indicate recent activity
3925 * on a foreground socket.
3926 */
3927 if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3928 p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3929 if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
3930 PKTF_SO_BACKGROUND)) {
3931 ifp->if_fg_sendts = (uint32_t)_net_uptime;
3932 if (fg_ts != NULL) {
3933 *fg_ts = (uint32_t)_net_uptime;
3934 }
3935 }
3936 if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3937 ifp->if_rt_sendts = (uint32_t)_net_uptime;
3938 if (rt_ts != NULL) {
3939 *rt_ts = (uint32_t)_net_uptime;
3940 }
3941 }
3942 }
3943 pktlen = m_pktlen(p->cp_mbuf);
3944
3945 /*
3946 * Some Wi-Fi AP implementations do not correctly handle
3947 * multicast IP packets with DSCP bits set (radr://9331522).
3948 * As a workaround we clear the DSCP bits but keep service
3949 * class (rdar://51507725).
3950 */
3951 if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3952 IFNET_IS_WIFI_INFRA(ifp)) {
3953 size_t len = mbuf_len(p->cp_mbuf), hlen;
3954 struct ether_header *eh;
3955 boolean_t pullup = FALSE;
3956 uint16_t etype;
3957
3958 if (__improbable(len < sizeof(struct ether_header))) {
3959 DTRACE_IP1(small__ether, size_t, len);
3960 if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3961 sizeof(struct ether_header))) == NULL) {
3962 return ENOMEM;
3963 }
3964 }
3965 eh = (struct ether_header *)mbuf_data(p->cp_mbuf);
3966 etype = ntohs(eh->ether_type);
3967 if (etype == ETHERTYPE_IP) {
3968 hlen = sizeof(struct ether_header) +
3969 sizeof(struct ip);
3970 if (len < hlen) {
3971 DTRACE_IP1(small__v4, size_t, len);
3972 pullup = TRUE;
3973 }
3974 ip_ver = IPVERSION;
3975 } else if (etype == ETHERTYPE_IPV6) {
3976 hlen = sizeof(struct ether_header) +
3977 sizeof(struct ip6_hdr);
3978 if (len < hlen) {
3979 DTRACE_IP1(small__v6, size_t, len);
3980 pullup = TRUE;
3981 }
3982 ip_ver = IPV6_VERSION;
3983 } else {
3984 DTRACE_IP1(invalid__etype, uint16_t, etype);
3985 break;
3986 }
3987 if (pullup) {
3988 if ((p->cp_mbuf = m_pullup(p->cp_mbuf, (int)hlen)) ==
3989 NULL) {
3990 return ENOMEM;
3991 }
3992
3993 eh = (struct ether_header *)mbuf_data(
3994 p->cp_mbuf);
3995 }
3996 mcast_buf = (uint8_t *)(eh + 1);
3997 /*
3998 * ifnet_mcast_clear_dscp() will finish the work below.
3999 * Note that the pullups above ensure that mcast_buf
4000 * points to a full IP header.
4001 */
4002 }
4003 break;
4004
4005
4006 default:
4007 VERIFY(0);
4008 /* NOTREACHED */
4009 __builtin_unreachable();
4010 }
4011
4012 if (mcast_buf != NULL) {
4013 ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
4014 }
4015
4016 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
4017 if (now_nsec == 0) {
4018 nanouptime(&now);
4019 net_timernsec(&now, &now_nsec);
4020 }
4021 /*
4022 * If the driver chose to delay start callback for
4023 * coalescing multiple packets, Then use the following
4024 * heuristics to make sure that start callback will
4025 * be delayed only when bulk data transfer is detected.
4026 * 1. number of packets enqueued in (delay_win * 2) is
4027 * greater than or equal to the delay qlen.
4028 * 2. If delay_start is enabled it will stay enabled for
4029 * another 10 idle windows. This is to take into account
4030 * variable RTT and burst traffic.
4031 * 3. If the time elapsed since last enqueue is more
4032 * than 200ms we disable delaying start callback. This is
4033 * is to take idle time into account.
4034 */
4035 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
4036 if (ifp->if_start_delay_swin > 0) {
4037 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
4038 ifp->if_start_delay_cnt++;
4039 } else if ((now_nsec - ifp->if_start_delay_swin)
4040 >= (200 * 1000 * 1000)) {
4041 ifp->if_start_delay_swin = now_nsec;
4042 ifp->if_start_delay_cnt = 1;
4043 ifp->if_start_delay_idle = 0;
4044 if (ifp->if_eflags & IFEF_DELAY_START) {
4045 if_clear_eflags(ifp, IFEF_DELAY_START);
4046 ifnet_delay_start_disabled_increment();
4047 }
4048 } else {
4049 if (ifp->if_start_delay_cnt >=
4050 ifp->if_start_delay_qlen) {
4051 if_set_eflags(ifp, IFEF_DELAY_START);
4052 ifp->if_start_delay_idle = 0;
4053 } else {
4054 if (ifp->if_start_delay_idle >= 10) {
4055 if_clear_eflags(ifp,
4056 IFEF_DELAY_START);
4057 ifnet_delay_start_disabled_increment();
4058 } else {
4059 ifp->if_start_delay_idle++;
4060 }
4061 }
4062 ifp->if_start_delay_swin = now_nsec;
4063 ifp->if_start_delay_cnt = 1;
4064 }
4065 } else {
4066 ifp->if_start_delay_swin = now_nsec;
4067 ifp->if_start_delay_cnt = 1;
4068 ifp->if_start_delay_idle = 0;
4069 if_clear_eflags(ifp, IFEF_DELAY_START);
4070 }
4071 } else {
4072 if_clear_eflags(ifp, IFEF_DELAY_START);
4073 }
4074
4075 /* enqueue the packet (caller consumes object) */
4076 error = ifclassq_enqueue(&ifp->if_snd, p, p, 1, pktlen, pdrop);
4077
4078 /*
4079 * Tell the driver to start dequeueing; do this even when the queue
4080 * for the packet is suspended (EQSUSPENDED), as the driver could still
4081 * be dequeueing from other unsuspended queues.
4082 */
4083 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
4084 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
4085 ifnet_start(ifp);
4086 }
4087
4088 return error;
4089 }
4090
4091 static inline errno_t
4092 ifnet_enqueue_ifclassq_chain(struct ifnet *ifp, classq_pkt_t *head,
4093 classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
4094 boolean_t *pdrop)
4095 {
4096 int error;
4097
4098 /* enqueue the packet (caller consumes object) */
4099 error = ifclassq_enqueue(&ifp->if_snd, head, tail, cnt, bytes, pdrop);
4100
4101 /*
4102 * Tell the driver to start dequeueing; do this even when the queue
4103 * for the packet is suspended (EQSUSPENDED), as the driver could still
4104 * be dequeueing from other unsuspended queues.
4105 */
4106 if ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED) {
4107 ifnet_start(ifp);
4108 }
4109 return error;
4110 }
4111
4112 int
4113 ifnet_enqueue_netem(void *handle, pktsched_pkt_t *pkts, uint32_t n_pkts)
4114 {
4115 struct ifnet *ifp = handle;
4116 boolean_t pdrop; /* dummy */
4117 uint32_t i;
4118
4119 ASSERT(n_pkts >= 1);
4120 for (i = 0; i < n_pkts - 1; i++) {
4121 (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt,
4122 FALSE, &pdrop);
4123 }
4124 /* flush with the last packet */
4125 (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt, TRUE, &pdrop);
4126
4127 return 0;
4128 }
4129
4130 static inline errno_t
4131 ifnet_enqueue_common(struct ifnet *ifp, classq_pkt_t *pkt, boolean_t flush,
4132 boolean_t *pdrop)
4133 {
4134 if (ifp->if_output_netem != NULL) {
4135 return netem_enqueue(ifp->if_output_netem, pkt, pdrop);
4136 } else {
4137 return ifnet_enqueue_ifclassq(ifp, pkt, flush, pdrop);
4138 }
4139 }
4140
4141 errno_t
4142 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
4143 {
4144 boolean_t pdrop;
4145 return ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop);
4146 }
4147
4148 errno_t
4149 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
4150 boolean_t *pdrop)
4151 {
4152 classq_pkt_t pkt;
4153
4154 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
4155 m->m_nextpkt != NULL) {
4156 if (m != NULL) {
4157 m_freem_list(m);
4158 *pdrop = TRUE;
4159 }
4160 return EINVAL;
4161 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4162 !IF_FULLY_ATTACHED(ifp)) {
4163 /* flag tested without lock for performance */
4164 m_freem(m);
4165 *pdrop = TRUE;
4166 return ENXIO;
4167 } else if (!(ifp->if_flags & IFF_UP)) {
4168 m_freem(m);
4169 *pdrop = TRUE;
4170 return ENETDOWN;
4171 }
4172
4173 CLASSQ_PKT_INIT_MBUF(&pkt, m);
4174 return ifnet_enqueue_common(ifp, &pkt, flush, pdrop);
4175 }
4176
4177 errno_t
4178 ifnet_enqueue_mbuf_chain(struct ifnet *ifp, struct mbuf *m_head,
4179 struct mbuf *m_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
4180 boolean_t *pdrop)
4181 {
4182 classq_pkt_t head, tail;
4183
4184 ASSERT(m_head != NULL);
4185 ASSERT((m_head->m_flags & M_PKTHDR) != 0);
4186 ASSERT(m_tail != NULL);
4187 ASSERT((m_tail->m_flags & M_PKTHDR) != 0);
4188 ASSERT(ifp != NULL);
4189 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
4190
4191 if (!IF_FULLY_ATTACHED(ifp)) {
4192 /* flag tested without lock for performance */
4193 m_freem_list(m_head);
4194 *pdrop = TRUE;
4195 return ENXIO;
4196 } else if (!(ifp->if_flags & IFF_UP)) {
4197 m_freem_list(m_head);
4198 *pdrop = TRUE;
4199 return ENETDOWN;
4200 }
4201
4202 CLASSQ_PKT_INIT_MBUF(&head, m_head);
4203 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
4204 return ifnet_enqueue_ifclassq_chain(ifp, &head, &tail, cnt, bytes,
4205 flush, pdrop);
4206 }
4207
4208
4209 errno_t
4210 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
4211 {
4212 errno_t rc;
4213 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
4214
4215 if (ifp == NULL || mp == NULL) {
4216 return EINVAL;
4217 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4218 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4219 return ENXIO;
4220 }
4221 if (!ifnet_is_attached(ifp, 1)) {
4222 return ENXIO;
4223 }
4224
4225 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
4226 &pkt, NULL, NULL, NULL);
4227 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
4228 ifnet_decr_iorefcnt(ifp);
4229 *mp = pkt.cp_mbuf;
4230 return rc;
4231 }
4232
4233 errno_t
4234 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
4235 struct mbuf **mp)
4236 {
4237 errno_t rc;
4238 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
4239
4240 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
4241 return EINVAL;
4242 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4243 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4244 return ENXIO;
4245 }
4246 if (!ifnet_is_attached(ifp, 1)) {
4247 return ENXIO;
4248 }
4249
4250 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
4251 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL);
4252 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
4253 ifnet_decr_iorefcnt(ifp);
4254 *mp = pkt.cp_mbuf;
4255 return rc;
4256 }
4257
4258 errno_t
4259 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
4260 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
4261 {
4262 errno_t rc;
4263 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4264 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4265
4266 if (ifp == NULL || head == NULL || pkt_limit < 1) {
4267 return EINVAL;
4268 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4269 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4270 return ENXIO;
4271 }
4272 if (!ifnet_is_attached(ifp, 1)) {
4273 return ENXIO;
4274 }
4275
4276 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
4277 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len);
4278 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4279 ifnet_decr_iorefcnt(ifp);
4280 *head = pkt_head.cp_mbuf;
4281 if (tail != NULL) {
4282 *tail = pkt_tail.cp_mbuf;
4283 }
4284 return rc;
4285 }
4286
4287 errno_t
4288 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
4289 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
4290 {
4291 errno_t rc;
4292 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4293 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4294
4295 if (ifp == NULL || head == NULL || byte_limit < 1) {
4296 return EINVAL;
4297 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4298 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4299 return ENXIO;
4300 }
4301 if (!ifnet_is_attached(ifp, 1)) {
4302 return ENXIO;
4303 }
4304
4305 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
4306 byte_limit, &pkt_head, &pkt_tail, cnt, len);
4307 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4308 ifnet_decr_iorefcnt(ifp);
4309 *head = pkt_head.cp_mbuf;
4310 if (tail != NULL) {
4311 *tail = pkt_tail.cp_mbuf;
4312 }
4313 return rc;
4314 }
4315
4316 errno_t
4317 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
4318 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
4319 u_int32_t *len)
4320 {
4321 errno_t rc;
4322 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4323 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4324
4325 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
4326 !MBUF_VALID_SC(sc)) {
4327 return EINVAL;
4328 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4329 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4330 return ENXIO;
4331 }
4332 if (!ifnet_is_attached(ifp, 1)) {
4333 return ENXIO;
4334 }
4335
4336 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
4337 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
4338 cnt, len);
4339 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4340 ifnet_decr_iorefcnt(ifp);
4341 *head = pkt_head.cp_mbuf;
4342 if (tail != NULL) {
4343 *tail = pkt_tail.cp_mbuf;
4344 }
4345 return rc;
4346 }
4347
4348 #if XNU_TARGET_OS_OSX
4349 errno_t
4350 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
4351 const struct sockaddr *dest, const char *dest_linkaddr,
4352 const char *frame_type, u_int32_t *pre, u_int32_t *post)
4353 {
4354 if (pre != NULL) {
4355 *pre = 0;
4356 }
4357 if (post != NULL) {
4358 *post = 0;
4359 }
4360
4361 return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
4362 }
4363 #endif /* XNU_TARGET_OS_OSX */
4364
4365 static boolean_t
4366 packet_has_vlan_tag(struct mbuf * m)
4367 {
4368 u_int tag = 0;
4369
4370 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
4371 tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag);
4372 if (tag == 0) {
4373 /* the packet is just priority-tagged, clear the bit */
4374 m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
4375 }
4376 }
4377 return tag != 0;
4378 }
4379
4380 static int
4381 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
4382 char **frame_header_p, protocol_family_t protocol_family)
4383 {
4384 boolean_t is_vlan_packet = FALSE;
4385 struct ifnet_filter *filter;
4386 struct mbuf *m = *m_p;
4387
4388 is_vlan_packet = packet_has_vlan_tag(m);
4389
4390 if (TAILQ_EMPTY(&ifp->if_flt_head)) {
4391 return 0;
4392 }
4393
4394 /*
4395 * Pass the inbound packet to the interface filters
4396 */
4397 lck_mtx_lock_spin(&ifp->if_flt_lock);
4398 /* prevent filter list from changing in case we drop the lock */
4399 if_flt_monitor_busy(ifp);
4400 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4401 int result;
4402
4403 /* exclude VLAN packets from external filters PR-3586856 */
4404 if (is_vlan_packet &&
4405 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4406 continue;
4407 }
4408
4409 if (!filter->filt_skip && filter->filt_input != NULL &&
4410 (filter->filt_protocol == 0 ||
4411 filter->filt_protocol == protocol_family)) {
4412 lck_mtx_unlock(&ifp->if_flt_lock);
4413
4414 result = (*filter->filt_input)(filter->filt_cookie,
4415 ifp, protocol_family, m_p, frame_header_p);
4416
4417 lck_mtx_lock_spin(&ifp->if_flt_lock);
4418 if (result != 0) {
4419 /* we're done with the filter list */
4420 if_flt_monitor_unbusy(ifp);
4421 lck_mtx_unlock(&ifp->if_flt_lock);
4422 return result;
4423 }
4424 }
4425 }
4426 /* we're done with the filter list */
4427 if_flt_monitor_unbusy(ifp);
4428 lck_mtx_unlock(&ifp->if_flt_lock);
4429
4430 /*
4431 * Strip away M_PROTO1 bit prior to sending packet up the stack as
4432 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
4433 */
4434 if (*m_p != NULL) {
4435 (*m_p)->m_flags &= ~M_PROTO1;
4436 }
4437
4438 return 0;
4439 }
4440
4441 static int
4442 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
4443 protocol_family_t protocol_family)
4444 {
4445 boolean_t is_vlan_packet;
4446 struct ifnet_filter *filter;
4447 struct mbuf *m = *m_p;
4448
4449 is_vlan_packet = packet_has_vlan_tag(m);
4450
4451 /*
4452 * Pass the outbound packet to the interface filters
4453 */
4454 lck_mtx_lock_spin(&ifp->if_flt_lock);
4455 /* prevent filter list from changing in case we drop the lock */
4456 if_flt_monitor_busy(ifp);
4457 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4458 int result;
4459
4460 /* exclude VLAN packets from external filters PR-3586856 */
4461 if (is_vlan_packet &&
4462 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4463 continue;
4464 }
4465
4466 if (!filter->filt_skip && filter->filt_output != NULL &&
4467 (filter->filt_protocol == 0 ||
4468 filter->filt_protocol == protocol_family)) {
4469 lck_mtx_unlock(&ifp->if_flt_lock);
4470
4471 result = filter->filt_output(filter->filt_cookie, ifp,
4472 protocol_family, m_p);
4473
4474 lck_mtx_lock_spin(&ifp->if_flt_lock);
4475 if (result != 0) {
4476 /* we're done with the filter list */
4477 if_flt_monitor_unbusy(ifp);
4478 lck_mtx_unlock(&ifp->if_flt_lock);
4479 return result;
4480 }
4481 }
4482 }
4483 /* we're done with the filter list */
4484 if_flt_monitor_unbusy(ifp);
4485 lck_mtx_unlock(&ifp->if_flt_lock);
4486
4487 return 0;
4488 }
4489
4490 static void
4491 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
4492 {
4493 int error;
4494
4495 if (ifproto->proto_kpi == kProtoKPI_v1) {
4496 /* Version 1 protocols get one packet at a time */
4497 while (m != NULL) {
4498 char * frame_header;
4499 mbuf_t next_packet;
4500
4501 next_packet = m->m_nextpkt;
4502 m->m_nextpkt = NULL;
4503 frame_header = m->m_pkthdr.pkt_hdr;
4504 m->m_pkthdr.pkt_hdr = NULL;
4505 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
4506 ifproto->protocol_family, m, frame_header);
4507 if (error != 0 && error != EJUSTRETURN) {
4508 m_freem(m);
4509 }
4510 m = next_packet;
4511 }
4512 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
4513 /* Version 2 protocols support packet lists */
4514 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
4515 ifproto->protocol_family, m);
4516 if (error != 0 && error != EJUSTRETURN) {
4517 m_freem_list(m);
4518 }
4519 }
4520 }
4521
4522 static void
4523 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
4524 struct dlil_threading_info *inp, struct ifnet *ifp, boolean_t poll)
4525 {
4526 struct ifnet_stat_increment_param *d = &inp->dlth_stats;
4527
4528 if (s->packets_in != 0) {
4529 d->packets_in += s->packets_in;
4530 }
4531 if (s->bytes_in != 0) {
4532 d->bytes_in += s->bytes_in;
4533 }
4534 if (s->errors_in != 0) {
4535 d->errors_in += s->errors_in;
4536 }
4537
4538 if (s->packets_out != 0) {
4539 d->packets_out += s->packets_out;
4540 }
4541 if (s->bytes_out != 0) {
4542 d->bytes_out += s->bytes_out;
4543 }
4544 if (s->errors_out != 0) {
4545 d->errors_out += s->errors_out;
4546 }
4547
4548 if (s->collisions != 0) {
4549 d->collisions += s->collisions;
4550 }
4551 if (s->dropped != 0) {
4552 d->dropped += s->dropped;
4553 }
4554
4555 if (poll) {
4556 PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in);
4557 }
4558 }
4559
4560 static boolean_t
4561 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
4562 {
4563 struct ifnet_stat_increment_param *s = &inp->dlth_stats;
4564
4565 /*
4566 * Use of atomic operations is unavoidable here because
4567 * these stats may also be incremented elsewhere via KPIs.
4568 */
4569 if (s->packets_in != 0) {
4570 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
4571 s->packets_in = 0;
4572 }
4573 if (s->bytes_in != 0) {
4574 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
4575 s->bytes_in = 0;
4576 }
4577 if (s->errors_in != 0) {
4578 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
4579 s->errors_in = 0;
4580 }
4581
4582 if (s->packets_out != 0) {
4583 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
4584 s->packets_out = 0;
4585 }
4586 if (s->bytes_out != 0) {
4587 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
4588 s->bytes_out = 0;
4589 }
4590 if (s->errors_out != 0) {
4591 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
4592 s->errors_out = 0;
4593 }
4594
4595 if (s->collisions != 0) {
4596 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
4597 s->collisions = 0;
4598 }
4599 if (s->dropped != 0) {
4600 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
4601 s->dropped = 0;
4602 }
4603
4604 /*
4605 * No need for atomic operations as they are modified here
4606 * only from within the DLIL input thread context.
4607 */
4608 if (ifp->if_poll_tstats.packets != 0) {
4609 ifp->if_poll_pstats.ifi_poll_packets += ifp->if_poll_tstats.packets;
4610 ifp->if_poll_tstats.packets = 0;
4611 }
4612 if (ifp->if_poll_tstats.bytes != 0) {
4613 ifp->if_poll_pstats.ifi_poll_bytes += ifp->if_poll_tstats.bytes;
4614 ifp->if_poll_tstats.bytes = 0;
4615 }
4616
4617 return ifp->if_data_threshold != 0;
4618 }
4619
4620 __private_extern__ void
4621 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
4622 {
4623 return dlil_input_packet_list_common(ifp, m, 0,
4624 IFNET_MODEL_INPUT_POLL_OFF, FALSE);
4625 }
4626
4627 __private_extern__ void
4628 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
4629 u_int32_t cnt, ifnet_model_t mode)
4630 {
4631 return dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE);
4632 }
4633
4634 static void
4635 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
4636 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
4637 {
4638 int error = 0;
4639 protocol_family_t protocol_family;
4640 mbuf_t next_packet;
4641 ifnet_t ifp = ifp_param;
4642 char *frame_header = NULL;
4643 struct if_proto *last_ifproto = NULL;
4644 mbuf_t pkt_first = NULL;
4645 mbuf_t *pkt_next = NULL;
4646 u_int32_t poll_thresh = 0, poll_ival = 0;
4647 int iorefcnt = 0;
4648
4649 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4650
4651 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
4652 (poll_ival = if_rxpoll_interval_pkts) > 0) {
4653 poll_thresh = cnt;
4654 }
4655
4656 while (m != NULL) {
4657 struct if_proto *ifproto = NULL;
4658 uint32_t pktf_mask; /* pkt flags to preserve */
4659
4660 if (ifp_param == NULL) {
4661 ifp = m->m_pkthdr.rcvif;
4662 }
4663
4664 if ((ifp->if_eflags & IFEF_RXPOLL) &&
4665 (ifp->if_xflags & IFXF_LEGACY) && poll_thresh != 0 &&
4666 poll_ival > 0 && (--poll_thresh % poll_ival) == 0) {
4667 ifnet_poll(ifp);
4668 }
4669
4670 /* Check if this mbuf looks valid */
4671 MBUF_INPUT_CHECK(m, ifp);
4672
4673 next_packet = m->m_nextpkt;
4674 m->m_nextpkt = NULL;
4675 frame_header = m->m_pkthdr.pkt_hdr;
4676 m->m_pkthdr.pkt_hdr = NULL;
4677
4678 /*
4679 * Get an IO reference count if the interface is not
4680 * loopback (lo0) and it is attached; lo0 never goes
4681 * away, so optimize for that.
4682 */
4683 if (ifp != lo_ifp) {
4684 /* iorefcnt is 0 if it hasn't been taken yet */
4685 if (iorefcnt == 0) {
4686 if (!ifnet_datamov_begin(ifp)) {
4687 m_freem(m);
4688 goto next;
4689 }
4690 }
4691 iorefcnt = 1;
4692 /*
4693 * Preserve the time stamp and skip pktap flags.
4694 */
4695 pktf_mask = PKTF_TS_VALID | PKTF_SKIP_PKTAP;
4696 } else {
4697 /*
4698 * If this arrived on lo0, preserve interface addr
4699 * info to allow for connectivity between loopback
4700 * and local interface addresses.
4701 */
4702 pktf_mask = (PKTF_LOOP | PKTF_IFAINFO);
4703 }
4704
4705 /* make sure packet comes in clean */
4706 m_classifier_init(m, pktf_mask);
4707
4708 ifp_inc_traffic_class_in(ifp, m);
4709
4710 /* find which protocol family this packet is for */
4711 ifnet_lock_shared(ifp);
4712 error = (*ifp->if_demux)(ifp, m, frame_header,
4713 &protocol_family);
4714 ifnet_lock_done(ifp);
4715 if (error != 0) {
4716 if (error == EJUSTRETURN) {
4717 goto next;
4718 }
4719 protocol_family = 0;
4720 }
4721
4722 pktap_input(ifp, protocol_family, m, frame_header);
4723
4724 /* Drop v4 packets received on CLAT46 enabled interface */
4725 if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp)) {
4726 m_freem(m);
4727 ip6stat.ip6s_clat464_in_v4_drop++;
4728 goto next;
4729 }
4730
4731 /* Translate the packet if it is received on CLAT interface */
4732 if (protocol_family == PF_INET6 && IS_INTF_CLAT46(ifp)
4733 && dlil_is_clat_needed(protocol_family, m)) {
4734 char *data = NULL;
4735 struct ether_header eh;
4736 struct ether_header *ehp = NULL;
4737
4738 if (ifp->if_type == IFT_ETHER) {
4739 ehp = (struct ether_header *)(void *)frame_header;
4740 /* Skip RX Ethernet packets if they are not IPV6 */
4741 if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6) {
4742 goto skip_clat;
4743 }
4744
4745 /* Keep a copy of frame_header for Ethernet packets */
4746 bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN);
4747 }
4748 error = dlil_clat64(ifp, &protocol_family, &m);
4749 data = (char *) mbuf_data(m);
4750 if (error != 0) {
4751 m_freem(m);
4752 ip6stat.ip6s_clat464_in_drop++;
4753 goto next;
4754 }
4755 /* Native v6 should be No-op */
4756 if (protocol_family != PF_INET) {
4757 goto skip_clat;
4758 }
4759
4760 /* Do this only for translated v4 packets. */
4761 switch (ifp->if_type) {
4762 case IFT_CELLULAR:
4763 frame_header = data;
4764 break;
4765 case IFT_ETHER:
4766 /*
4767 * Drop if the mbuf doesn't have enough
4768 * space for Ethernet header
4769 */
4770 if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) {
4771 m_free(m);
4772 ip6stat.ip6s_clat464_in_drop++;
4773 goto next;
4774 }
4775 /*
4776 * Set the frame_header ETHER_HDR_LEN bytes
4777 * preceeding the data pointer. Change
4778 * the ether_type too.
4779 */
4780 frame_header = data - ETHER_HDR_LEN;
4781 eh.ether_type = htons(ETHERTYPE_IP);
4782 bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN);
4783 break;
4784 }
4785 }
4786 skip_clat:
4787 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
4788 !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
4789 dlil_input_cksum_dbg(ifp, m, frame_header,
4790 protocol_family);
4791 }
4792 /*
4793 * For partial checksum offload, we expect the driver to
4794 * set the start offset indicating the start of the span
4795 * that is covered by the hardware-computed checksum;
4796 * adjust this start offset accordingly because the data
4797 * pointer has been advanced beyond the link-layer header.
4798 *
4799 * Virtual lan types (bridge, vlan, bond) can call
4800 * dlil_input_packet_list() with the same packet with the
4801 * checksum flags set. Set a flag indicating that the
4802 * adjustment has already been done.
4803 */
4804 if ((m->m_pkthdr.csum_flags & CSUM_ADJUST_DONE) != 0) {
4805 /* adjustment has already been done */
4806 } else if ((m->m_pkthdr.csum_flags &
4807 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4808 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4809 int adj;
4810 if (frame_header == NULL ||
4811 frame_header < (char *)mbuf_datastart(m) ||
4812 frame_header > (char *)m->m_data ||
4813 (adj = (int)(m->m_data - frame_header)) >
4814 m->m_pkthdr.csum_rx_start) {
4815 m->m_pkthdr.csum_data = 0;
4816 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
4817 hwcksum_in_invalidated++;
4818 } else {
4819 m->m_pkthdr.csum_rx_start -= adj;
4820 }
4821 /* make sure we don't adjust more than once */
4822 m->m_pkthdr.csum_flags |= CSUM_ADJUST_DONE;
4823 }
4824 if (clat_debug) {
4825 pktap_input(ifp, protocol_family, m, frame_header);
4826 }
4827
4828 if (m->m_flags & (M_BCAST | M_MCAST)) {
4829 atomic_add_64(&ifp->if_imcasts, 1);
4830 }
4831
4832 /* run interface filters */
4833 error = dlil_interface_filters_input(ifp, &m,
4834 &frame_header, protocol_family);
4835 if (error != 0) {
4836 if (error != EJUSTRETURN) {
4837 m_freem(m);
4838 }
4839 goto next;
4840 }
4841 /*
4842 * A VLAN interface receives VLAN-tagged packets by attaching
4843 * its PF_VLAN protocol to a parent interface. When a VLAN
4844 * interface is a member of a bridge, the parent interface
4845 * receives VLAN-tagged M_PROMISC packets. A VLAN-tagged
4846 * M_PROMISC packet must be processed by the VLAN protocol
4847 * so that it can be sent up the stack via
4848 * dlil_input_packet_list(). That allows the bridge interface's
4849 * input filter, attached to the VLAN interface, to process
4850 * the packet.
4851 */
4852 if (protocol_family != PF_VLAN &&
4853 (m->m_flags & M_PROMISC) != 0) {
4854 m_freem(m);
4855 goto next;
4856 }
4857
4858 /* Lookup the protocol attachment to this interface */
4859 if (protocol_family == 0) {
4860 ifproto = NULL;
4861 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
4862 (last_ifproto->protocol_family == protocol_family)) {
4863 VERIFY(ifproto == NULL);
4864 ifproto = last_ifproto;
4865 if_proto_ref(last_ifproto);
4866 } else {
4867 VERIFY(ifproto == NULL);
4868 ifnet_lock_shared(ifp);
4869 /* callee holds a proto refcnt upon success */
4870 ifproto = find_attached_proto(ifp, protocol_family);
4871 ifnet_lock_done(ifp);
4872 }
4873 if (ifproto == NULL) {
4874 /* no protocol for this packet, discard */
4875 m_freem(m);
4876 goto next;
4877 }
4878 if (ifproto != last_ifproto) {
4879 if (last_ifproto != NULL) {
4880 /* pass up the list for the previous protocol */
4881 dlil_ifproto_input(last_ifproto, pkt_first);
4882 pkt_first = NULL;
4883 if_proto_free(last_ifproto);
4884 }
4885 last_ifproto = ifproto;
4886 if_proto_ref(ifproto);
4887 }
4888 /* extend the list */
4889 m->m_pkthdr.pkt_hdr = frame_header;
4890 if (pkt_first == NULL) {
4891 pkt_first = m;
4892 } else {
4893 *pkt_next = m;
4894 }
4895 pkt_next = &m->m_nextpkt;
4896
4897 next:
4898 if (next_packet == NULL && last_ifproto != NULL) {
4899 /* pass up the last list of packets */
4900 dlil_ifproto_input(last_ifproto, pkt_first);
4901 if_proto_free(last_ifproto);
4902 last_ifproto = NULL;
4903 }
4904 if (ifproto != NULL) {
4905 if_proto_free(ifproto);
4906 ifproto = NULL;
4907 }
4908
4909 m = next_packet;
4910
4911 /* update the driver's multicast filter, if needed */
4912 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
4913 ifp->if_updatemcasts = 0;
4914 }
4915 if (iorefcnt == 1) {
4916 /* If the next mbuf is on a different interface, unlock data-mov */
4917 if (!m || (ifp != ifp_param && ifp != m->m_pkthdr.rcvif)) {
4918 ifnet_datamov_end(ifp);
4919 iorefcnt = 0;
4920 }
4921 }
4922 }
4923
4924 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4925 }
4926
4927 errno_t
4928 if_mcasts_update(struct ifnet *ifp)
4929 {
4930 errno_t err;
4931
4932 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
4933 if (err == EAFNOSUPPORT) {
4934 err = 0;
4935 }
4936 DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
4937 "(err=%d)\n", if_name(ifp),
4938 (err == 0 ? "successfully restored" : "failed to restore"),
4939 ifp->if_updatemcasts, err);
4940
4941 /* just return success */
4942 return 0;
4943 }
4944
4945 /* If ifp is set, we will increment the generation for the interface */
4946 int
4947 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
4948 {
4949 if (ifp != NULL) {
4950 ifnet_increment_generation(ifp);
4951 }
4952
4953 #if NECP
4954 necp_update_all_clients();
4955 #endif /* NECP */
4956
4957 return kev_post_msg(event);
4958 }
4959
4960 __private_extern__ void
4961 dlil_post_sifflags_msg(struct ifnet * ifp)
4962 {
4963 struct kev_msg ev_msg;
4964 struct net_event_data ev_data;
4965
4966 bzero(&ev_data, sizeof(ev_data));
4967 bzero(&ev_msg, sizeof(ev_msg));
4968 ev_msg.vendor_code = KEV_VENDOR_APPLE;
4969 ev_msg.kev_class = KEV_NETWORK_CLASS;
4970 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
4971 ev_msg.event_code = KEV_DL_SIFFLAGS;
4972 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
4973 ev_data.if_family = ifp->if_family;
4974 ev_data.if_unit = (u_int32_t) ifp->if_unit;
4975 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
4976 ev_msg.dv[0].data_ptr = &ev_data;
4977 ev_msg.dv[1].data_length = 0;
4978 dlil_post_complete_msg(ifp, &ev_msg);
4979 }
4980
4981 #define TMP_IF_PROTO_ARR_SIZE 10
4982 static int
4983 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
4984 {
4985 struct ifnet_filter *filter = NULL;
4986 struct if_proto *proto = NULL;
4987 int if_proto_count = 0;
4988 struct if_proto **tmp_ifproto_arr = NULL;
4989 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4990 int tmp_ifproto_arr_idx = 0;
4991 bool tmp_malloc = false;
4992
4993 /*
4994 * Pass the event to the interface filters
4995 */
4996 lck_mtx_lock_spin(&ifp->if_flt_lock);
4997 /* prevent filter list from changing in case we drop the lock */
4998 if_flt_monitor_busy(ifp);
4999 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5000 if (filter->filt_event != NULL) {
5001 lck_mtx_unlock(&ifp->if_flt_lock);
5002
5003 filter->filt_event(filter->filt_cookie, ifp,
5004 filter->filt_protocol, event);
5005
5006 lck_mtx_lock_spin(&ifp->if_flt_lock);
5007 }
5008 }
5009 /* we're done with the filter list */
5010 if_flt_monitor_unbusy(ifp);
5011 lck_mtx_unlock(&ifp->if_flt_lock);
5012
5013 /* Get an io ref count if the interface is attached */
5014 if (!ifnet_is_attached(ifp, 1)) {
5015 goto done;
5016 }
5017
5018 /*
5019 * An embedded tmp_list_entry in if_proto may still get
5020 * over-written by another thread after giving up ifnet lock,
5021 * therefore we are avoiding embedded pointers here.
5022 */
5023 ifnet_lock_shared(ifp);
5024 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
5025 if (if_proto_count) {
5026 int i;
5027 VERIFY(ifp->if_proto_hash != NULL);
5028 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
5029 tmp_ifproto_arr = tmp_ifproto_stack_arr;
5030 } else {
5031 MALLOC(tmp_ifproto_arr, struct if_proto **,
5032 sizeof(*tmp_ifproto_arr) * if_proto_count,
5033 M_TEMP, M_ZERO);
5034 if (tmp_ifproto_arr == NULL) {
5035 ifnet_lock_done(ifp);
5036 goto cleanup;
5037 }
5038 tmp_malloc = true;
5039 }
5040
5041 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
5042 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
5043 next_hash) {
5044 if_proto_ref(proto);
5045 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
5046 tmp_ifproto_arr_idx++;
5047 }
5048 }
5049 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
5050 }
5051 ifnet_lock_done(ifp);
5052
5053 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
5054 tmp_ifproto_arr_idx++) {
5055 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
5056 VERIFY(proto != NULL);
5057 proto_media_event eventp =
5058 (proto->proto_kpi == kProtoKPI_v1 ?
5059 proto->kpi.v1.event :
5060 proto->kpi.v2.event);
5061
5062 if (eventp != NULL) {
5063 eventp(ifp, proto->protocol_family,
5064 event);
5065 }
5066 if_proto_free(proto);
5067 }
5068
5069 cleanup:
5070 if (tmp_malloc) {
5071 FREE(tmp_ifproto_arr, M_TEMP);
5072 }
5073
5074 /* Pass the event to the interface */
5075 if (ifp->if_event != NULL) {
5076 ifp->if_event(ifp, event);
5077 }
5078
5079 /* Release the io ref count */
5080 ifnet_decr_iorefcnt(ifp);
5081 done:
5082 return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
5083 }
5084
5085 errno_t
5086 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
5087 {
5088 struct kev_msg kev_msg;
5089 int result = 0;
5090
5091 if (ifp == NULL || event == NULL) {
5092 return EINVAL;
5093 }
5094
5095 bzero(&kev_msg, sizeof(kev_msg));
5096 kev_msg.vendor_code = event->vendor_code;
5097 kev_msg.kev_class = event->kev_class;
5098 kev_msg.kev_subclass = event->kev_subclass;
5099 kev_msg.event_code = event->event_code;
5100 kev_msg.dv[0].data_ptr = &event->event_data[0];
5101 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
5102 kev_msg.dv[1].data_length = 0;
5103
5104 result = dlil_event_internal(ifp, &kev_msg, TRUE);
5105
5106 return result;
5107 }
5108
5109 static void
5110 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
5111 {
5112 mbuf_t n = m;
5113 int chainlen = 0;
5114
5115 while (n != NULL) {
5116 chainlen++;
5117 n = n->m_next;
5118 }
5119 switch (chainlen) {
5120 case 0:
5121 break;
5122 case 1:
5123 atomic_add_64(&cls->cls_one, 1);
5124 break;
5125 case 2:
5126 atomic_add_64(&cls->cls_two, 1);
5127 break;
5128 case 3:
5129 atomic_add_64(&cls->cls_three, 1);
5130 break;
5131 case 4:
5132 atomic_add_64(&cls->cls_four, 1);
5133 break;
5134 case 5:
5135 default:
5136 atomic_add_64(&cls->cls_five_or_more, 1);
5137 break;
5138 }
5139 }
5140
5141 /*
5142 * dlil_output
5143 *
5144 * Caller should have a lock on the protocol domain if the protocol
5145 * doesn't support finer grained locking. In most cases, the lock
5146 * will be held from the socket layer and won't be released until
5147 * we return back to the socket layer.
5148 *
5149 * This does mean that we must take a protocol lock before we take
5150 * an interface lock if we're going to take both. This makes sense
5151 * because a protocol is likely to interact with an ifp while it
5152 * is under the protocol lock.
5153 *
5154 * An advisory code will be returned if adv is not null. This
5155 * can be used to provide feedback about interface queues to the
5156 * application.
5157 */
5158 errno_t
5159 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
5160 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
5161 {
5162 char *frame_type = NULL;
5163 char *dst_linkaddr = NULL;
5164 int retval = 0;
5165 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
5166 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
5167 struct if_proto *proto = NULL;
5168 mbuf_t m = NULL;
5169 mbuf_t send_head = NULL;
5170 mbuf_t *send_tail = &send_head;
5171 int iorefcnt = 0;
5172 u_int32_t pre = 0, post = 0;
5173 u_int32_t fpkts = 0, fbytes = 0;
5174 int32_t flen = 0;
5175 struct timespec now;
5176 u_int64_t now_nsec;
5177 boolean_t did_clat46 = FALSE;
5178 protocol_family_t old_proto_family = proto_family;
5179 struct sockaddr_in6 dest6;
5180 struct rtentry *rt = NULL;
5181 u_int32_t m_loop_set = 0;
5182
5183 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
5184
5185 /*
5186 * Get an io refcnt if the interface is attached to prevent ifnet_detach
5187 * from happening while this operation is in progress
5188 */
5189 if (!ifnet_datamov_begin(ifp)) {
5190 retval = ENXIO;
5191 goto cleanup;
5192 }
5193 iorefcnt = 1;
5194
5195 VERIFY(ifp->if_output_dlil != NULL);
5196
5197 /* update the driver's multicast filter, if needed */
5198 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
5199 ifp->if_updatemcasts = 0;
5200 }
5201
5202 frame_type = frame_type_buffer;
5203 dst_linkaddr = dst_linkaddr_buffer;
5204
5205 if (raw == 0) {
5206 ifnet_lock_shared(ifp);
5207 /* callee holds a proto refcnt upon success */
5208 proto = find_attached_proto(ifp, proto_family);
5209 if (proto == NULL) {
5210 ifnet_lock_done(ifp);
5211 retval = ENXIO;
5212 goto cleanup;
5213 }
5214 ifnet_lock_done(ifp);
5215 }
5216
5217 preout_again:
5218 if (packetlist == NULL) {
5219 goto cleanup;
5220 }
5221
5222 m = packetlist;
5223 packetlist = packetlist->m_nextpkt;
5224 m->m_nextpkt = NULL;
5225
5226 /*
5227 * Perform address family translation for the first
5228 * packet outside the loop in order to perform address
5229 * lookup for the translated proto family.
5230 */
5231 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5232 (ifp->if_type == IFT_CELLULAR ||
5233 dlil_is_clat_needed(proto_family, m))) {
5234 retval = dlil_clat46(ifp, &proto_family, &m);
5235 /*
5236 * Go to the next packet if translation fails
5237 */
5238 if (retval != 0) {
5239 m_freem(m);
5240 m = NULL;
5241 ip6stat.ip6s_clat464_out_drop++;
5242 /* Make sure that the proto family is PF_INET */
5243 ASSERT(proto_family == PF_INET);
5244 goto preout_again;
5245 }
5246 /*
5247 * Free the old one and make it point to the IPv6 proto structure.
5248 *
5249 * Change proto for the first time we have successfully
5250 * performed address family translation.
5251 */
5252 if (!did_clat46 && proto_family == PF_INET6) {
5253 did_clat46 = TRUE;
5254
5255 if (proto != NULL) {
5256 if_proto_free(proto);
5257 }
5258 ifnet_lock_shared(ifp);
5259 /* callee holds a proto refcnt upon success */
5260 proto = find_attached_proto(ifp, proto_family);
5261 if (proto == NULL) {
5262 ifnet_lock_done(ifp);
5263 retval = ENXIO;
5264 m_freem(m);
5265 m = NULL;
5266 goto cleanup;
5267 }
5268 ifnet_lock_done(ifp);
5269 if (ifp->if_type == IFT_ETHER) {
5270 /* Update the dest to translated v6 address */
5271 dest6.sin6_len = sizeof(struct sockaddr_in6);
5272 dest6.sin6_family = AF_INET6;
5273 dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
5274 dest = (const struct sockaddr *)&dest6;
5275
5276 /*
5277 * Lookup route to the translated destination
5278 * Free this route ref during cleanup
5279 */
5280 rt = rtalloc1_scoped((struct sockaddr *)&dest6,
5281 0, 0, ifp->if_index);
5282
5283 route = rt;
5284 }
5285 }
5286 }
5287
5288 /*
5289 * This path gets packet chain going to the same destination.
5290 * The pre output routine is used to either trigger resolution of
5291 * the next hop or retreive the next hop's link layer addressing.
5292 * For ex: ether_inet(6)_pre_output routine.
5293 *
5294 * If the routine returns EJUSTRETURN, it implies that packet has
5295 * been queued, and therefore we have to call preout_again for the
5296 * following packet in the chain.
5297 *
5298 * For errors other than EJUSTRETURN, the current packet is freed
5299 * and the rest of the chain (pointed by packetlist is freed as
5300 * part of clean up.
5301 *
5302 * Else if there is no error the retrieved information is used for
5303 * all the packets in the chain.
5304 */
5305 if (raw == 0) {
5306 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
5307 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
5308 retval = 0;
5309 if (preoutp != NULL) {
5310 retval = preoutp(ifp, proto_family, &m, dest, route,
5311 frame_type, dst_linkaddr);
5312
5313 if (retval != 0) {
5314 if (retval == EJUSTRETURN) {
5315 goto preout_again;
5316 }
5317 m_freem(m);
5318 m = NULL;
5319 goto cleanup;
5320 }
5321 }
5322 }
5323
5324 do {
5325 /*
5326 * pkt_hdr is set here to point to m_data prior to
5327 * calling into the framer. This value of pkt_hdr is
5328 * used by the netif gso logic to retrieve the ip header
5329 * for the TCP packets, offloaded for TSO processing.
5330 */
5331 if ((raw != 0) && (ifp->if_family == IFNET_FAMILY_ETHERNET)) {
5332 uint8_t vlan_encap_len = 0;
5333
5334 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_ENCAP_PRESENT) != 0) {
5335 vlan_encap_len = ETHER_VLAN_ENCAP_LEN;
5336 }
5337 m->m_pkthdr.pkt_hdr = mtod(m, char *) + ETHER_HDR_LEN + vlan_encap_len;
5338 } else {
5339 m->m_pkthdr.pkt_hdr = mtod(m, void *);
5340 }
5341
5342 /*
5343 * Perform address family translation if needed.
5344 * For now we only support stateless 4 to 6 translation
5345 * on the out path.
5346 *
5347 * The routine below translates IP header, updates protocol
5348 * checksum and also translates ICMP.
5349 *
5350 * We skip the first packet as it is already translated and
5351 * the proto family is set to PF_INET6.
5352 */
5353 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5354 (ifp->if_type == IFT_CELLULAR ||
5355 dlil_is_clat_needed(proto_family, m))) {
5356 retval = dlil_clat46(ifp, &proto_family, &m);
5357 /* Goto the next packet if the translation fails */
5358 if (retval != 0) {
5359 m_freem(m);
5360 m = NULL;
5361 ip6stat.ip6s_clat464_out_drop++;
5362 goto next;
5363 }
5364 }
5365
5366 #if CONFIG_DTRACE
5367 if (!raw && proto_family == PF_INET) {
5368 struct ip *ip = mtod(m, struct ip *);
5369 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
5370 struct ip *, ip, struct ifnet *, ifp,
5371 struct ip *, ip, struct ip6_hdr *, NULL);
5372 } else if (!raw && proto_family == PF_INET6) {
5373 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
5374 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
5375 struct ip6_hdr *, ip6, struct ifnet *, ifp,
5376 struct ip *, NULL, struct ip6_hdr *, ip6);
5377 }
5378 #endif /* CONFIG_DTRACE */
5379
5380 if (raw == 0 && ifp->if_framer != NULL) {
5381 int rcvif_set = 0;
5382
5383 /*
5384 * If this is a broadcast packet that needs to be
5385 * looped back into the system, set the inbound ifp
5386 * to that of the outbound ifp. This will allow
5387 * us to determine that it is a legitimate packet
5388 * for the system. Only set the ifp if it's not
5389 * already set, just to be safe.
5390 */
5391 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
5392 m->m_pkthdr.rcvif == NULL) {
5393 m->m_pkthdr.rcvif = ifp;
5394 rcvif_set = 1;
5395 }
5396 m_loop_set = m->m_flags & M_LOOP;
5397 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
5398 frame_type, &pre, &post);
5399 if (retval != 0) {
5400 if (retval != EJUSTRETURN) {
5401 m_freem(m);
5402 }
5403 goto next;
5404 }
5405
5406 /*
5407 * For partial checksum offload, adjust the start
5408 * and stuff offsets based on the prepended header.
5409 */
5410 if ((m->m_pkthdr.csum_flags &
5411 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5412 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5413 m->m_pkthdr.csum_tx_stuff += pre;
5414 m->m_pkthdr.csum_tx_start += pre;
5415 }
5416
5417 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) {
5418 dlil_output_cksum_dbg(ifp, m, pre,
5419 proto_family);
5420 }
5421
5422 /*
5423 * Clear the ifp if it was set above, and to be
5424 * safe, only if it is still the same as the
5425 * outbound ifp we have in context. If it was
5426 * looped back, then a copy of it was sent to the
5427 * loopback interface with the rcvif set, and we
5428 * are clearing the one that will go down to the
5429 * layer below.
5430 */
5431 if (rcvif_set && m->m_pkthdr.rcvif == ifp) {
5432 m->m_pkthdr.rcvif = NULL;
5433 }
5434 }
5435
5436 /*
5437 * Let interface filters (if any) do their thing ...
5438 */
5439 retval = dlil_interface_filters_output(ifp, &m, proto_family);
5440 if (retval != 0) {
5441 if (retval != EJUSTRETURN) {
5442 m_freem(m);
5443 }
5444 goto next;
5445 }
5446 /*
5447 * Strip away M_PROTO1 bit prior to sending packet
5448 * to the driver as this field may be used by the driver
5449 */
5450 m->m_flags &= ~M_PROTO1;
5451
5452 /*
5453 * If the underlying interface is not capable of handling a
5454 * packet whose data portion spans across physically disjoint
5455 * pages, we need to "normalize" the packet so that we pass
5456 * down a chain of mbufs where each mbuf points to a span that
5457 * resides in the system page boundary. If the packet does
5458 * not cross page(s), the following is a no-op.
5459 */
5460 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
5461 if ((m = m_normalize(m)) == NULL) {
5462 goto next;
5463 }
5464 }
5465
5466 /*
5467 * If this is a TSO packet, make sure the interface still
5468 * advertise TSO capability.
5469 */
5470 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
5471 retval = EMSGSIZE;
5472 m_freem(m);
5473 goto cleanup;
5474 }
5475
5476 ifp_inc_traffic_class_out(ifp, m);
5477
5478 pktap_output(ifp, proto_family, m, pre, post);
5479
5480 /*
5481 * Count the number of elements in the mbuf chain
5482 */
5483 if (tx_chain_len_count) {
5484 dlil_count_chain_len(m, &tx_chain_len_stats);
5485 }
5486
5487 /*
5488 * Record timestamp; ifnet_enqueue() will use this info
5489 * rather than redoing the work. An optimization could
5490 * involve doing this just once at the top, if there are
5491 * no interface filters attached, but that's probably
5492 * not a big deal.
5493 */
5494 nanouptime(&now);
5495 net_timernsec(&now, &now_nsec);
5496 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
5497
5498 /*
5499 * Discard partial sum information if this packet originated
5500 * from another interface; the packet would already have the
5501 * final checksum and we shouldn't recompute it.
5502 */
5503 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
5504 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5505 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5506 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5507 m->m_pkthdr.csum_data = 0;
5508 }
5509
5510 /*
5511 * Finally, call the driver.
5512 */
5513 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
5514 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5515 flen += (m_pktlen(m) - (pre + post));
5516 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5517 }
5518 *send_tail = m;
5519 send_tail = &m->m_nextpkt;
5520 } else {
5521 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5522 flen = (m_pktlen(m) - (pre + post));
5523 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5524 } else {
5525 flen = 0;
5526 }
5527 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
5528 0, 0, 0, 0, 0);
5529 retval = (*ifp->if_output_dlil)(ifp, m);
5530 if (retval == EQFULL || retval == EQSUSPENDED) {
5531 if (adv != NULL && adv->code == FADV_SUCCESS) {
5532 adv->code = (retval == EQFULL ?
5533 FADV_FLOW_CONTROLLED :
5534 FADV_SUSPENDED);
5535 }
5536 retval = 0;
5537 }
5538 if (retval == 0 && flen > 0) {
5539 fbytes += flen;
5540 fpkts++;
5541 }
5542 if (retval != 0 && dlil_verbose) {
5543 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5544 __func__, if_name(ifp),
5545 retval);
5546 }
5547 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
5548 0, 0, 0, 0, 0);
5549 }
5550 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5551
5552 next:
5553 m = packetlist;
5554 if (m != NULL) {
5555 m->m_flags |= m_loop_set;
5556 packetlist = packetlist->m_nextpkt;
5557 m->m_nextpkt = NULL;
5558 }
5559 /* Reset the proto family to old proto family for CLAT */
5560 if (did_clat46) {
5561 proto_family = old_proto_family;
5562 }
5563 } while (m != NULL);
5564
5565 if (send_head != NULL) {
5566 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
5567 0, 0, 0, 0, 0);
5568 if (ifp->if_eflags & IFEF_SENDLIST) {
5569 retval = (*ifp->if_output_dlil)(ifp, send_head);
5570 if (retval == EQFULL || retval == EQSUSPENDED) {
5571 if (adv != NULL) {
5572 adv->code = (retval == EQFULL ?
5573 FADV_FLOW_CONTROLLED :
5574 FADV_SUSPENDED);
5575 }
5576 retval = 0;
5577 }
5578 if (retval == 0 && flen > 0) {
5579 fbytes += flen;
5580 fpkts++;
5581 }
5582 if (retval != 0 && dlil_verbose) {
5583 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5584 __func__, if_name(ifp), retval);
5585 }
5586 } else {
5587 struct mbuf *send_m;
5588 int enq_cnt = 0;
5589 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
5590 while (send_head != NULL) {
5591 send_m = send_head;
5592 send_head = send_m->m_nextpkt;
5593 send_m->m_nextpkt = NULL;
5594 retval = (*ifp->if_output_dlil)(ifp, send_m);
5595 if (retval == EQFULL || retval == EQSUSPENDED) {
5596 if (adv != NULL) {
5597 adv->code = (retval == EQFULL ?
5598 FADV_FLOW_CONTROLLED :
5599 FADV_SUSPENDED);
5600 }
5601 retval = 0;
5602 }
5603 if (retval == 0) {
5604 enq_cnt++;
5605 if (flen > 0) {
5606 fpkts++;
5607 }
5608 }
5609 if (retval != 0 && dlil_verbose) {
5610 DLIL_PRINTF("%s: output error on %s "
5611 "retval = %d\n",
5612 __func__, if_name(ifp), retval);
5613 }
5614 }
5615 if (enq_cnt > 0) {
5616 fbytes += flen;
5617 ifnet_start(ifp);
5618 }
5619 }
5620 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5621 }
5622
5623 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5624
5625 cleanup:
5626 if (fbytes > 0) {
5627 ifp->if_fbytes += fbytes;
5628 }
5629 if (fpkts > 0) {
5630 ifp->if_fpackets += fpkts;
5631 }
5632 if (proto != NULL) {
5633 if_proto_free(proto);
5634 }
5635 if (packetlist) { /* if any packets are left, clean up */
5636 mbuf_freem_list(packetlist);
5637 }
5638 if (retval == EJUSTRETURN) {
5639 retval = 0;
5640 }
5641 if (iorefcnt == 1) {
5642 ifnet_datamov_end(ifp);
5643 }
5644 if (rt != NULL) {
5645 rtfree(rt);
5646 rt = NULL;
5647 }
5648
5649 return retval;
5650 }
5651
5652 /*
5653 * This routine checks if the destination address is not a loopback, link-local,
5654 * multicast or broadcast address.
5655 */
5656 static int
5657 dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
5658 {
5659 int ret = 0;
5660 switch (proto_family) {
5661 case PF_INET: {
5662 struct ip *iph = mtod(m, struct ip *);
5663 if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
5664 ret = 1;
5665 }
5666 break;
5667 }
5668 case PF_INET6: {
5669 struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
5670 if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
5671 CLAT64_NEEDED(&ip6h->ip6_dst)) {
5672 ret = 1;
5673 }
5674 break;
5675 }
5676 }
5677
5678 return ret;
5679 }
5680 /*
5681 * @brief This routine translates IPv4 packet to IPv6 packet,
5682 * updates protocol checksum and also translates ICMP for code
5683 * along with inner header translation.
5684 *
5685 * @param ifp Pointer to the interface
5686 * @param proto_family pointer to protocol family. It is updated if function
5687 * performs the translation successfully.
5688 * @param m Pointer to the pointer pointing to the packet. Needed because this
5689 * routine can end up changing the mbuf to a different one.
5690 *
5691 * @return 0 on success or else a negative value.
5692 */
5693 static errno_t
5694 dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5695 {
5696 VERIFY(*proto_family == PF_INET);
5697 VERIFY(IS_INTF_CLAT46(ifp));
5698
5699 pbuf_t pbuf_store, *pbuf = NULL;
5700 struct ip *iph = NULL;
5701 struct in_addr osrc, odst;
5702 uint8_t proto = 0;
5703 struct in6_ifaddr *ia6_clat_src = NULL;
5704 struct in6_addr *src = NULL;
5705 struct in6_addr dst;
5706 int error = 0;
5707 uint16_t off = 0;
5708 uint16_t tot_len = 0;
5709 uint16_t ip_id_val = 0;
5710 uint16_t ip_frag_off = 0;
5711
5712 boolean_t is_frag = FALSE;
5713 boolean_t is_first_frag = TRUE;
5714 boolean_t is_last_frag = TRUE;
5715
5716 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5717 pbuf = &pbuf_store;
5718 iph = pbuf->pb_data;
5719
5720 osrc = iph->ip_src;
5721 odst = iph->ip_dst;
5722 proto = iph->ip_p;
5723 off = (uint16_t)(iph->ip_hl << 2);
5724 ip_id_val = iph->ip_id;
5725 ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
5726
5727 tot_len = ntohs(iph->ip_len);
5728
5729 /*
5730 * For packets that are not first frags
5731 * we only need to adjust CSUM.
5732 * For 4 to 6, Fragmentation header gets appended
5733 * after proto translation.
5734 */
5735 if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
5736 is_frag = TRUE;
5737
5738 /* If the offset is not zero, it is not first frag */
5739 if (ip_frag_off != 0) {
5740 is_first_frag = FALSE;
5741 }
5742
5743 /* If IP_MF is set, then it is not last frag */
5744 if (ntohs(iph->ip_off) & IP_MF) {
5745 is_last_frag = FALSE;
5746 }
5747 }
5748
5749 /*
5750 * Retrive the local IPv6 CLAT46 address reserved for stateless
5751 * translation.
5752 */
5753 ia6_clat_src = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5754 if (ia6_clat_src == NULL) {
5755 ip6stat.ip6s_clat464_out_nov6addr_drop++;
5756 error = -1;
5757 goto cleanup;
5758 }
5759
5760 src = &ia6_clat_src->ia_addr.sin6_addr;
5761
5762 /*
5763 * Translate IPv4 destination to IPv6 destination by using the
5764 * prefixes learned through prior PLAT discovery.
5765 */
5766 if ((error = nat464_synthesize_ipv6(ifp, &odst, &dst)) != 0) {
5767 ip6stat.ip6s_clat464_out_v6synthfail_drop++;
5768 goto cleanup;
5769 }
5770
5771 /* Translate the IP header part first */
5772 error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
5773 iph->ip_ttl, *src, dst, tot_len) == NT_NAT64) ? 0 : -1;
5774
5775 iph = NULL; /* Invalidate iph as pbuf has been modified */
5776
5777 if (error != 0) {
5778 ip6stat.ip6s_clat464_out_46transfail_drop++;
5779 goto cleanup;
5780 }
5781
5782 /*
5783 * Translate protocol header, update checksum, checksum flags
5784 * and related fields.
5785 */
5786 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
5787 proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
5788
5789 if (error != 0) {
5790 ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
5791 goto cleanup;
5792 }
5793
5794 /* Now insert the IPv6 fragment header */
5795 if (is_frag) {
5796 error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
5797
5798 if (error != 0) {
5799 ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
5800 goto cleanup;
5801 }
5802 }
5803
5804 cleanup:
5805 if (ia6_clat_src != NULL) {
5806 IFA_REMREF(&ia6_clat_src->ia_ifa);
5807 }
5808
5809 if (pbuf_is_valid(pbuf)) {
5810 *m = pbuf->pb_mbuf;
5811 pbuf->pb_mbuf = NULL;
5812 pbuf_destroy(pbuf);
5813 } else {
5814 error = -1;
5815 ip6stat.ip6s_clat464_out_invalpbuf_drop++;
5816 }
5817
5818 if (error == 0) {
5819 *proto_family = PF_INET6;
5820 ip6stat.ip6s_clat464_out_success++;
5821 }
5822
5823 return error;
5824 }
5825
5826 /*
5827 * @brief This routine translates incoming IPv6 to IPv4 packet,
5828 * updates protocol checksum and also translates ICMPv6 outer
5829 * and inner headers
5830 *
5831 * @return 0 on success or else a negative value.
5832 */
5833 static errno_t
5834 dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5835 {
5836 VERIFY(*proto_family == PF_INET6);
5837 VERIFY(IS_INTF_CLAT46(ifp));
5838
5839 struct ip6_hdr *ip6h = NULL;
5840 struct in6_addr osrc, odst;
5841 uint8_t proto = 0;
5842 struct in6_ifaddr *ia6_clat_dst = NULL;
5843 struct in_ifaddr *ia4_clat_dst = NULL;
5844 struct in_addr *dst = NULL;
5845 struct in_addr src;
5846 int error = 0;
5847 uint32_t off = 0;
5848 u_int64_t tot_len = 0;
5849 uint8_t tos = 0;
5850 boolean_t is_first_frag = TRUE;
5851
5852 /* Incoming mbuf does not contain valid IP6 header */
5853 if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
5854 ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
5855 (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
5856 ip6stat.ip6s_clat464_in_tooshort_drop++;
5857 return -1;
5858 }
5859
5860 ip6h = mtod(*m, struct ip6_hdr *);
5861 /* Validate that mbuf contains IP payload equal to ip6_plen */
5862 if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
5863 ip6stat.ip6s_clat464_in_tooshort_drop++;
5864 return -1;
5865 }
5866
5867 osrc = ip6h->ip6_src;
5868 odst = ip6h->ip6_dst;
5869
5870 /*
5871 * Retrieve the local CLAT46 reserved IPv6 address.
5872 * Let the packet pass if we don't find one, as the flag
5873 * may get set before IPv6 configuration has taken place.
5874 */
5875 ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5876 if (ia6_clat_dst == NULL) {
5877 goto done;
5878 }
5879
5880 /*
5881 * Check if the original dest in the packet is same as the reserved
5882 * CLAT46 IPv6 address
5883 */
5884 if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
5885 pbuf_t pbuf_store, *pbuf = NULL;
5886 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5887 pbuf = &pbuf_store;
5888
5889 /*
5890 * Retrive the local CLAT46 IPv4 address reserved for stateless
5891 * translation.
5892 */
5893 ia4_clat_dst = inifa_ifpclatv4(ifp);
5894 if (ia4_clat_dst == NULL) {
5895 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5896 ip6stat.ip6s_clat464_in_nov4addr_drop++;
5897 error = -1;
5898 goto cleanup;
5899 }
5900 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5901
5902 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5903 dst = &ia4_clat_dst->ia_addr.sin_addr;
5904 if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) {
5905 ip6stat.ip6s_clat464_in_v4synthfail_drop++;
5906 error = -1;
5907 goto cleanup;
5908 }
5909
5910 ip6h = pbuf->pb_data;
5911 off = sizeof(struct ip6_hdr);
5912 proto = ip6h->ip6_nxt;
5913 tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
5914 tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
5915
5916 /*
5917 * Translate the IP header and update the fragmentation
5918 * header if needed
5919 */
5920 error = (nat464_translate_64(pbuf, off, tos, &proto,
5921 ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
5922 0 : -1;
5923
5924 ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
5925
5926 if (error != 0) {
5927 ip6stat.ip6s_clat464_in_64transfail_drop++;
5928 goto cleanup;
5929 }
5930
5931 /*
5932 * Translate protocol header, update checksum, checksum flags
5933 * and related fields.
5934 */
5935 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
5936 (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
5937 NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
5938
5939 if (error != 0) {
5940 ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
5941 goto cleanup;
5942 }
5943
5944 cleanup:
5945 if (ia4_clat_dst != NULL) {
5946 IFA_REMREF(&ia4_clat_dst->ia_ifa);
5947 }
5948
5949 if (pbuf_is_valid(pbuf)) {
5950 *m = pbuf->pb_mbuf;
5951 pbuf->pb_mbuf = NULL;
5952 pbuf_destroy(pbuf);
5953 } else {
5954 error = -1;
5955 ip6stat.ip6s_clat464_in_invalpbuf_drop++;
5956 }
5957
5958 if (error == 0) {
5959 *proto_family = PF_INET;
5960 ip6stat.ip6s_clat464_in_success++;
5961 }
5962 } /* CLAT traffic */
5963
5964 done:
5965 return error;
5966 }
5967
5968 errno_t
5969 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
5970 void *ioctl_arg)
5971 {
5972 struct ifnet_filter *filter;
5973 int retval = EOPNOTSUPP;
5974 int result = 0;
5975
5976 if (ifp == NULL || ioctl_code == 0) {
5977 return EINVAL;
5978 }
5979
5980 /* Get an io ref count if the interface is attached */
5981 if (!ifnet_is_attached(ifp, 1)) {
5982 return EOPNOTSUPP;
5983 }
5984
5985 /*
5986 * Run the interface filters first.
5987 * We want to run all filters before calling the protocol,
5988 * interface family, or interface.
5989 */
5990 lck_mtx_lock_spin(&ifp->if_flt_lock);
5991 /* prevent filter list from changing in case we drop the lock */
5992 if_flt_monitor_busy(ifp);
5993 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5994 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
5995 filter->filt_protocol == proto_fam)) {
5996 lck_mtx_unlock(&ifp->if_flt_lock);
5997
5998 result = filter->filt_ioctl(filter->filt_cookie, ifp,
5999 proto_fam, ioctl_code, ioctl_arg);
6000
6001 lck_mtx_lock_spin(&ifp->if_flt_lock);
6002
6003 /* Only update retval if no one has handled the ioctl */
6004 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
6005 if (result == ENOTSUP) {
6006 result = EOPNOTSUPP;
6007 }
6008 retval = result;
6009 if (retval != 0 && retval != EOPNOTSUPP) {
6010 /* we're done with the filter list */
6011 if_flt_monitor_unbusy(ifp);
6012 lck_mtx_unlock(&ifp->if_flt_lock);
6013 goto cleanup;
6014 }
6015 }
6016 }
6017 }
6018 /* we're done with the filter list */
6019 if_flt_monitor_unbusy(ifp);
6020 lck_mtx_unlock(&ifp->if_flt_lock);
6021
6022 /* Allow the protocol to handle the ioctl */
6023 if (proto_fam != 0) {
6024 struct if_proto *proto;
6025
6026 /* callee holds a proto refcnt upon success */
6027 ifnet_lock_shared(ifp);
6028 proto = find_attached_proto(ifp, proto_fam);
6029 ifnet_lock_done(ifp);
6030 if (proto != NULL) {
6031 proto_media_ioctl ioctlp =
6032 (proto->proto_kpi == kProtoKPI_v1 ?
6033 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
6034 result = EOPNOTSUPP;
6035 if (ioctlp != NULL) {
6036 result = ioctlp(ifp, proto_fam, ioctl_code,
6037 ioctl_arg);
6038 }
6039 if_proto_free(proto);
6040
6041 /* Only update retval if no one has handled the ioctl */
6042 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
6043 if (result == ENOTSUP) {
6044 result = EOPNOTSUPP;
6045 }
6046 retval = result;
6047 if (retval && retval != EOPNOTSUPP) {
6048 goto cleanup;
6049 }
6050 }
6051 }
6052 }
6053
6054 /* retval is either 0 or EOPNOTSUPP */
6055
6056 /*
6057 * Let the interface handle this ioctl.
6058 * If it returns EOPNOTSUPP, ignore that, we may have
6059 * already handled this in the protocol or family.
6060 */
6061 if (ifp->if_ioctl) {
6062 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
6063 }
6064
6065 /* Only update retval if no one has handled the ioctl */
6066 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
6067 if (result == ENOTSUP) {
6068 result = EOPNOTSUPP;
6069 }
6070 retval = result;
6071 if (retval && retval != EOPNOTSUPP) {
6072 goto cleanup;
6073 }
6074 }
6075
6076 cleanup:
6077 if (retval == EJUSTRETURN) {
6078 retval = 0;
6079 }
6080
6081 ifnet_decr_iorefcnt(ifp);
6082
6083 return retval;
6084 }
6085
6086 __private_extern__ errno_t
6087 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
6088 {
6089 errno_t error = 0;
6090
6091
6092 if (ifp->if_set_bpf_tap) {
6093 /* Get an io reference on the interface if it is attached */
6094 if (!ifnet_is_attached(ifp, 1)) {
6095 return ENXIO;
6096 }
6097 error = ifp->if_set_bpf_tap(ifp, mode, callback);
6098 ifnet_decr_iorefcnt(ifp);
6099 }
6100 return error;
6101 }
6102
6103 errno_t
6104 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
6105 struct sockaddr *ll_addr, size_t ll_len)
6106 {
6107 errno_t result = EOPNOTSUPP;
6108 struct if_proto *proto;
6109 const struct sockaddr *verify;
6110 proto_media_resolve_multi resolvep;
6111
6112 if (!ifnet_is_attached(ifp, 1)) {
6113 return result;
6114 }
6115
6116 bzero(ll_addr, ll_len);
6117
6118 /* Call the protocol first; callee holds a proto refcnt upon success */
6119 ifnet_lock_shared(ifp);
6120 proto = find_attached_proto(ifp, proto_addr->sa_family);
6121 ifnet_lock_done(ifp);
6122 if (proto != NULL) {
6123 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
6124 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
6125 if (resolvep != NULL) {
6126 result = resolvep(ifp, proto_addr,
6127 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
6128 }
6129 if_proto_free(proto);
6130 }
6131
6132 /* Let the interface verify the multicast address */
6133 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
6134 if (result == 0) {
6135 verify = ll_addr;
6136 } else {
6137 verify = proto_addr;
6138 }
6139 result = ifp->if_check_multi(ifp, verify);
6140 }
6141
6142 ifnet_decr_iorefcnt(ifp);
6143 return result;
6144 }
6145
6146 __private_extern__ errno_t
6147 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
6148 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6149 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
6150 {
6151 struct if_proto *proto;
6152 errno_t result = 0;
6153
6154 /* callee holds a proto refcnt upon success */
6155 ifnet_lock_shared(ifp);
6156 proto = find_attached_proto(ifp, target_proto->sa_family);
6157 ifnet_lock_done(ifp);
6158 if (proto == NULL) {
6159 result = ENOTSUP;
6160 } else {
6161 proto_media_send_arp arpp;
6162 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
6163 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
6164 if (arpp == NULL) {
6165 result = ENOTSUP;
6166 } else {
6167 switch (arpop) {
6168 case ARPOP_REQUEST:
6169 arpstat.txrequests++;
6170 if (target_hw != NULL) {
6171 arpstat.txurequests++;
6172 }
6173 break;
6174 case ARPOP_REPLY:
6175 arpstat.txreplies++;
6176 break;
6177 }
6178 result = arpp(ifp, arpop, sender_hw, sender_proto,
6179 target_hw, target_proto);
6180 }
6181 if_proto_free(proto);
6182 }
6183
6184 return result;
6185 }
6186
6187 struct net_thread_marks { };
6188 static const struct net_thread_marks net_thread_marks_base = { };
6189
6190 __private_extern__ const net_thread_marks_t net_thread_marks_none =
6191 &net_thread_marks_base;
6192
6193 __private_extern__ net_thread_marks_t
6194 net_thread_marks_push(u_int32_t push)
6195 {
6196 static const char *const base = (const void*)&net_thread_marks_base;
6197 u_int32_t pop = 0;
6198
6199 if (push != 0) {
6200 struct uthread *uth = get_bsdthread_info(current_thread());
6201
6202 pop = push & ~uth->uu_network_marks;
6203 if (pop != 0) {
6204 uth->uu_network_marks |= pop;
6205 }
6206 }
6207
6208 return (net_thread_marks_t)&base[pop];
6209 }
6210
6211 __private_extern__ net_thread_marks_t
6212 net_thread_unmarks_push(u_int32_t unpush)
6213 {
6214 static const char *const base = (const void*)&net_thread_marks_base;
6215 u_int32_t unpop = 0;
6216
6217 if (unpush != 0) {
6218 struct uthread *uth = get_bsdthread_info(current_thread());
6219
6220 unpop = unpush & uth->uu_network_marks;
6221 if (unpop != 0) {
6222 uth->uu_network_marks &= ~unpop;
6223 }
6224 }
6225
6226 return (net_thread_marks_t)&base[unpop];
6227 }
6228
6229 __private_extern__ void
6230 net_thread_marks_pop(net_thread_marks_t popx)
6231 {
6232 static const char *const base = (const void*)&net_thread_marks_base;
6233 const ptrdiff_t pop = (const char *)popx - (const char *)base;
6234
6235 if (pop != 0) {
6236 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6237 struct uthread *uth = get_bsdthread_info(current_thread());
6238
6239 VERIFY((pop & ones) == pop);
6240 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
6241 uth->uu_network_marks &= ~pop;
6242 }
6243 }
6244
6245 __private_extern__ void
6246 net_thread_unmarks_pop(net_thread_marks_t unpopx)
6247 {
6248 static const char *const base = (const void*)&net_thread_marks_base;
6249 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
6250
6251 if (unpop != 0) {
6252 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6253 struct uthread *uth = get_bsdthread_info(current_thread());
6254
6255 VERIFY((unpop & ones) == unpop);
6256 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
6257 uth->uu_network_marks |= unpop;
6258 }
6259 }
6260
6261 __private_extern__ u_int32_t
6262 net_thread_is_marked(u_int32_t check)
6263 {
6264 if (check != 0) {
6265 struct uthread *uth = get_bsdthread_info(current_thread());
6266 return uth->uu_network_marks & check;
6267 } else {
6268 return 0;
6269 }
6270 }
6271
6272 __private_extern__ u_int32_t
6273 net_thread_is_unmarked(u_int32_t check)
6274 {
6275 if (check != 0) {
6276 struct uthread *uth = get_bsdthread_info(current_thread());
6277 return ~uth->uu_network_marks & check;
6278 } else {
6279 return 0;
6280 }
6281 }
6282
6283 static __inline__ int
6284 _is_announcement(const struct sockaddr_in * sender_sin,
6285 const struct sockaddr_in * target_sin)
6286 {
6287 if (target_sin == NULL || sender_sin == NULL) {
6288 return FALSE;
6289 }
6290
6291 return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
6292 }
6293
6294 __private_extern__ errno_t
6295 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
6296 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
6297 const struct sockaddr *target_proto0, u_int32_t rtflags)
6298 {
6299 errno_t result = 0;
6300 const struct sockaddr_in * sender_sin;
6301 const struct sockaddr_in * target_sin;
6302 struct sockaddr_inarp target_proto_sinarp;
6303 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
6304
6305 if (target_proto == NULL || sender_proto == NULL) {
6306 return EINVAL;
6307 }
6308
6309 if (sender_proto->sa_family != target_proto->sa_family) {
6310 return EINVAL;
6311 }
6312
6313 /*
6314 * If the target is a (default) router, provide that
6315 * information to the send_arp callback routine.
6316 */
6317 if (rtflags & RTF_ROUTER) {
6318 bcopy(target_proto, &target_proto_sinarp,
6319 sizeof(struct sockaddr_in));
6320 target_proto_sinarp.sin_other |= SIN_ROUTER;
6321 target_proto = (struct sockaddr *)&target_proto_sinarp;
6322 }
6323
6324 /*
6325 * If this is an ARP request and the target IP is IPv4LL,
6326 * send the request on all interfaces. The exception is
6327 * an announcement, which must only appear on the specific
6328 * interface.
6329 */
6330 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
6331 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
6332 if (target_proto->sa_family == AF_INET &&
6333 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
6334 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
6335 !_is_announcement(sender_sin, target_sin)) {
6336 ifnet_t *ifp_list;
6337 u_int32_t count;
6338 u_int32_t ifp_on;
6339
6340 result = ENOTSUP;
6341
6342 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
6343 for (ifp_on = 0; ifp_on < count; ifp_on++) {
6344 errno_t new_result;
6345 ifaddr_t source_hw = NULL;
6346 ifaddr_t source_ip = NULL;
6347 struct sockaddr_in source_ip_copy;
6348 struct ifnet *cur_ifp = ifp_list[ifp_on];
6349
6350 /*
6351 * Only arp on interfaces marked for IPv4LL
6352 * ARPing. This may mean that we don't ARP on
6353 * the interface the subnet route points to.
6354 */
6355 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
6356 continue;
6357 }
6358
6359 /* Find the source IP address */
6360 ifnet_lock_shared(cur_ifp);
6361 source_hw = cur_ifp->if_lladdr;
6362 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
6363 ifa_link) {
6364 IFA_LOCK(source_ip);
6365 if (source_ip->ifa_addr != NULL &&
6366 source_ip->ifa_addr->sa_family ==
6367 AF_INET) {
6368 /* Copy the source IP address */
6369 source_ip_copy =
6370 *(struct sockaddr_in *)
6371 (void *)source_ip->ifa_addr;
6372 IFA_UNLOCK(source_ip);
6373 break;
6374 }
6375 IFA_UNLOCK(source_ip);
6376 }
6377
6378 /* No IP Source, don't arp */
6379 if (source_ip == NULL) {
6380 ifnet_lock_done(cur_ifp);
6381 continue;
6382 }
6383
6384 IFA_ADDREF(source_hw);
6385 ifnet_lock_done(cur_ifp);
6386
6387 /* Send the ARP */
6388 new_result = dlil_send_arp_internal(cur_ifp,
6389 arpop, (struct sockaddr_dl *)(void *)
6390 source_hw->ifa_addr,
6391 (struct sockaddr *)&source_ip_copy, NULL,
6392 target_proto);
6393
6394 IFA_REMREF(source_hw);
6395 if (result == ENOTSUP) {
6396 result = new_result;
6397 }
6398 }
6399 ifnet_list_free(ifp_list);
6400 }
6401 } else {
6402 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
6403 sender_proto, target_hw, target_proto);
6404 }
6405
6406 return result;
6407 }
6408
6409 /*
6410 * Caller must hold ifnet head lock.
6411 */
6412 static int
6413 ifnet_lookup(struct ifnet *ifp)
6414 {
6415 struct ifnet *_ifp;
6416
6417 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
6418 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
6419 if (_ifp == ifp) {
6420 break;
6421 }
6422 }
6423 return _ifp != NULL;
6424 }
6425
6426 /*
6427 * Caller has to pass a non-zero refio argument to get a
6428 * IO reference count. This will prevent ifnet_detach from
6429 * being called when there are outstanding io reference counts.
6430 */
6431 int
6432 ifnet_is_attached(struct ifnet *ifp, int refio)
6433 {
6434 int ret;
6435
6436 lck_mtx_lock_spin(&ifp->if_ref_lock);
6437 if ((ret = IF_FULLY_ATTACHED(ifp))) {
6438 if (refio > 0) {
6439 ifp->if_refio++;
6440 }
6441 }
6442 lck_mtx_unlock(&ifp->if_ref_lock);
6443
6444 return ret;
6445 }
6446
6447 void
6448 ifnet_incr_pending_thread_count(struct ifnet *ifp)
6449 {
6450 lck_mtx_lock_spin(&ifp->if_ref_lock);
6451 ifp->if_threads_pending++;
6452 lck_mtx_unlock(&ifp->if_ref_lock);
6453 }
6454
6455 void
6456 ifnet_decr_pending_thread_count(struct ifnet *ifp)
6457 {
6458 lck_mtx_lock_spin(&ifp->if_ref_lock);
6459 VERIFY(ifp->if_threads_pending > 0);
6460 ifp->if_threads_pending--;
6461 if (ifp->if_threads_pending == 0) {
6462 wakeup(&ifp->if_threads_pending);
6463 }
6464 lck_mtx_unlock(&ifp->if_ref_lock);
6465 }
6466
6467 /*
6468 * Caller must ensure the interface is attached; the assumption is that
6469 * there is at least an outstanding IO reference count held already.
6470 * Most callers would call ifnet_is_{attached,data_ready}() instead.
6471 */
6472 void
6473 ifnet_incr_iorefcnt(struct ifnet *ifp)
6474 {
6475 lck_mtx_lock_spin(&ifp->if_ref_lock);
6476 VERIFY(IF_FULLY_ATTACHED(ifp));
6477 VERIFY(ifp->if_refio > 0);
6478 ifp->if_refio++;
6479 lck_mtx_unlock(&ifp->if_ref_lock);
6480 }
6481
6482 __attribute__((always_inline))
6483 static void
6484 ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
6485 {
6486 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
6487
6488 VERIFY(ifp->if_refio > 0);
6489 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6490
6491 ifp->if_refio--;
6492 VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
6493
6494 /*
6495 * if there are no more outstanding io references, wakeup the
6496 * ifnet_detach thread if detaching flag is set.
6497 */
6498 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
6499 wakeup(&(ifp->if_refio));
6500 }
6501 }
6502
6503 void
6504 ifnet_decr_iorefcnt(struct ifnet *ifp)
6505 {
6506 lck_mtx_lock_spin(&ifp->if_ref_lock);
6507 ifnet_decr_iorefcnt_locked(ifp);
6508 lck_mtx_unlock(&ifp->if_ref_lock);
6509 }
6510
6511 boolean_t
6512 ifnet_datamov_begin(struct ifnet *ifp)
6513 {
6514 boolean_t ret;
6515
6516 lck_mtx_lock_spin(&ifp->if_ref_lock);
6517 if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
6518 ifp->if_refio++;
6519 ifp->if_datamov++;
6520 }
6521 lck_mtx_unlock(&ifp->if_ref_lock);
6522
6523 return ret;
6524 }
6525
6526 void
6527 ifnet_datamov_end(struct ifnet *ifp)
6528 {
6529 lck_mtx_lock_spin(&ifp->if_ref_lock);
6530 VERIFY(ifp->if_datamov > 0);
6531 /*
6532 * if there's no more thread moving data, wakeup any
6533 * drainers that's blocked waiting for this.
6534 */
6535 if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
6536 wakeup(&(ifp->if_datamov));
6537 }
6538 ifnet_decr_iorefcnt_locked(ifp);
6539 lck_mtx_unlock(&ifp->if_ref_lock);
6540 }
6541
6542 void
6543 ifnet_datamov_suspend(struct ifnet *ifp)
6544 {
6545 lck_mtx_lock_spin(&ifp->if_ref_lock);
6546 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6547 ifp->if_refio++;
6548 if (ifp->if_suspend++ == 0) {
6549 VERIFY(ifp->if_refflags & IFRF_READY);
6550 ifp->if_refflags &= ~IFRF_READY;
6551 }
6552 lck_mtx_unlock(&ifp->if_ref_lock);
6553 }
6554
6555 void
6556 ifnet_datamov_drain(struct ifnet *ifp)
6557 {
6558 lck_mtx_lock(&ifp->if_ref_lock);
6559 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6560 /* data movement must already be suspended */
6561 VERIFY(ifp->if_suspend > 0);
6562 VERIFY(!(ifp->if_refflags & IFRF_READY));
6563 ifp->if_drainers++;
6564 while (ifp->if_datamov != 0) {
6565 (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
6566 (PZERO - 1), __func__, NULL);
6567 }
6568 VERIFY(!(ifp->if_refflags & IFRF_READY));
6569 VERIFY(ifp->if_drainers > 0);
6570 ifp->if_drainers--;
6571 lck_mtx_unlock(&ifp->if_ref_lock);
6572
6573 /* purge the interface queues */
6574 if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
6575 if_qflush(ifp, 0);
6576 }
6577 }
6578
6579 void
6580 ifnet_datamov_resume(struct ifnet *ifp)
6581 {
6582 lck_mtx_lock(&ifp->if_ref_lock);
6583 /* data movement must already be suspended */
6584 VERIFY(ifp->if_suspend > 0);
6585 if (--ifp->if_suspend == 0) {
6586 VERIFY(!(ifp->if_refflags & IFRF_READY));
6587 ifp->if_refflags |= IFRF_READY;
6588 }
6589 ifnet_decr_iorefcnt_locked(ifp);
6590 lck_mtx_unlock(&ifp->if_ref_lock);
6591 }
6592
6593 static void
6594 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
6595 {
6596 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
6597 ctrace_t *tr;
6598 u_int32_t idx;
6599 u_int16_t *cnt;
6600
6601 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
6602 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
6603 /* NOTREACHED */
6604 }
6605
6606 if (refhold) {
6607 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
6608 tr = dl_if_dbg->dldbg_if_refhold;
6609 } else {
6610 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
6611 tr = dl_if_dbg->dldbg_if_refrele;
6612 }
6613
6614 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
6615 ctrace_record(&tr[idx]);
6616 }
6617
6618 errno_t
6619 dlil_if_ref(struct ifnet *ifp)
6620 {
6621 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6622
6623 if (dl_if == NULL) {
6624 return EINVAL;
6625 }
6626
6627 lck_mtx_lock_spin(&dl_if->dl_if_lock);
6628 ++dl_if->dl_if_refcnt;
6629 if (dl_if->dl_if_refcnt == 0) {
6630 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
6631 /* NOTREACHED */
6632 }
6633 if (dl_if->dl_if_trace != NULL) {
6634 (*dl_if->dl_if_trace)(dl_if, TRUE);
6635 }
6636 lck_mtx_unlock(&dl_if->dl_if_lock);
6637
6638 return 0;
6639 }
6640
6641 errno_t
6642 dlil_if_free(struct ifnet *ifp)
6643 {
6644 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6645 bool need_release = FALSE;
6646
6647 if (dl_if == NULL) {
6648 return EINVAL;
6649 }
6650
6651 lck_mtx_lock_spin(&dl_if->dl_if_lock);
6652 switch (dl_if->dl_if_refcnt) {
6653 case 0:
6654 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
6655 /* NOTREACHED */
6656 break;
6657 case 1:
6658 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
6659 need_release = TRUE;
6660 }
6661 break;
6662 default:
6663 break;
6664 }
6665 --dl_if->dl_if_refcnt;
6666 if (dl_if->dl_if_trace != NULL) {
6667 (*dl_if->dl_if_trace)(dl_if, FALSE);
6668 }
6669 lck_mtx_unlock(&dl_if->dl_if_lock);
6670 if (need_release) {
6671 dlil_if_release(ifp);
6672 }
6673 return 0;
6674 }
6675
6676 static errno_t
6677 dlil_attach_protocol_internal(struct if_proto *proto,
6678 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
6679 uint32_t * proto_count)
6680 {
6681 struct kev_dl_proto_data ev_pr_data;
6682 struct ifnet *ifp = proto->ifp;
6683 int retval = 0;
6684 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
6685 struct if_proto *prev_proto;
6686 struct if_proto *_proto;
6687
6688 /* callee holds a proto refcnt upon success */
6689 ifnet_lock_exclusive(ifp);
6690 _proto = find_attached_proto(ifp, proto->protocol_family);
6691 if (_proto != NULL) {
6692 ifnet_lock_done(ifp);
6693 if_proto_free(_proto);
6694 return EEXIST;
6695 }
6696
6697 /*
6698 * Call family module add_proto routine so it can refine the
6699 * demux descriptors as it wishes.
6700 */
6701 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
6702 demux_count);
6703 if (retval) {
6704 ifnet_lock_done(ifp);
6705 return retval;
6706 }
6707
6708 /*
6709 * Insert the protocol in the hash
6710 */
6711 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
6712 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
6713 prev_proto = SLIST_NEXT(prev_proto, next_hash);
6714 }
6715 if (prev_proto) {
6716 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
6717 } else {
6718 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
6719 proto, next_hash);
6720 }
6721
6722 /* hold a proto refcnt for attach */
6723 if_proto_ref(proto);
6724
6725 /*
6726 * The reserved field carries the number of protocol still attached
6727 * (subject to change)
6728 */
6729 ev_pr_data.proto_family = proto->protocol_family;
6730 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
6731
6732 ifnet_lock_done(ifp);
6733
6734 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
6735 (struct net_event_data *)&ev_pr_data,
6736 sizeof(struct kev_dl_proto_data));
6737 if (proto_count != NULL) {
6738 *proto_count = ev_pr_data.proto_remaining_count;
6739 }
6740 return retval;
6741 }
6742
6743 errno_t
6744 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
6745 const struct ifnet_attach_proto_param *proto_details)
6746 {
6747 int retval = 0;
6748 struct if_proto *ifproto = NULL;
6749 uint32_t proto_count = 0;
6750
6751 ifnet_head_lock_shared();
6752 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6753 retval = EINVAL;
6754 goto end;
6755 }
6756 /* Check that the interface is in the global list */
6757 if (!ifnet_lookup(ifp)) {
6758 retval = ENXIO;
6759 goto end;
6760 }
6761
6762 ifproto = zalloc_flags(dlif_proto_zone, Z_WAITOK | Z_ZERO);
6763 if (ifproto == NULL) {
6764 retval = ENOMEM;
6765 goto end;
6766 }
6767
6768 /* refcnt held above during lookup */
6769 ifproto->ifp = ifp;
6770 ifproto->protocol_family = protocol;
6771 ifproto->proto_kpi = kProtoKPI_v1;
6772 ifproto->kpi.v1.input = proto_details->input;
6773 ifproto->kpi.v1.pre_output = proto_details->pre_output;
6774 ifproto->kpi.v1.event = proto_details->event;
6775 ifproto->kpi.v1.ioctl = proto_details->ioctl;
6776 ifproto->kpi.v1.detached = proto_details->detached;
6777 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
6778 ifproto->kpi.v1.send_arp = proto_details->send_arp;
6779
6780 retval = dlil_attach_protocol_internal(ifproto,
6781 proto_details->demux_list, proto_details->demux_count,
6782 &proto_count);
6783
6784 end:
6785 if (retval != 0 && retval != EEXIST) {
6786 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
6787 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
6788 } else {
6789 if (dlil_verbose) {
6790 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
6791 ifp != NULL ? if_name(ifp) : "N/A",
6792 protocol, proto_count);
6793 }
6794 }
6795 ifnet_head_done();
6796 if (retval == 0) {
6797 /*
6798 * A protocol has been attached, mark the interface up.
6799 * This used to be done by configd.KernelEventMonitor, but that
6800 * is inherently prone to races (rdar://problem/30810208).
6801 */
6802 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6803 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6804 dlil_post_sifflags_msg(ifp);
6805 } else if (ifproto != NULL) {
6806 zfree(dlif_proto_zone, ifproto);
6807 }
6808 return retval;
6809 }
6810
6811 errno_t
6812 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6813 const struct ifnet_attach_proto_param_v2 *proto_details)
6814 {
6815 int retval = 0;
6816 struct if_proto *ifproto = NULL;
6817 uint32_t proto_count = 0;
6818
6819 ifnet_head_lock_shared();
6820 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6821 retval = EINVAL;
6822 goto end;
6823 }
6824 /* Check that the interface is in the global list */
6825 if (!ifnet_lookup(ifp)) {
6826 retval = ENXIO;
6827 goto end;
6828 }
6829
6830 ifproto = zalloc(dlif_proto_zone);
6831 if (ifproto == NULL) {
6832 retval = ENOMEM;
6833 goto end;
6834 }
6835 bzero(ifproto, sizeof(*ifproto));
6836
6837 /* refcnt held above during lookup */
6838 ifproto->ifp = ifp;
6839 ifproto->protocol_family = protocol;
6840 ifproto->proto_kpi = kProtoKPI_v2;
6841 ifproto->kpi.v2.input = proto_details->input;
6842 ifproto->kpi.v2.pre_output = proto_details->pre_output;
6843 ifproto->kpi.v2.event = proto_details->event;
6844 ifproto->kpi.v2.ioctl = proto_details->ioctl;
6845 ifproto->kpi.v2.detached = proto_details->detached;
6846 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
6847 ifproto->kpi.v2.send_arp = proto_details->send_arp;
6848
6849 retval = dlil_attach_protocol_internal(ifproto,
6850 proto_details->demux_list, proto_details->demux_count,
6851 &proto_count);
6852
6853 end:
6854 if (retval != 0 && retval != EEXIST) {
6855 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
6856 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
6857 } else {
6858 if (dlil_verbose) {
6859 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
6860 ifp != NULL ? if_name(ifp) : "N/A",
6861 protocol, proto_count);
6862 }
6863 }
6864 ifnet_head_done();
6865 if (retval == 0) {
6866 /*
6867 * A protocol has been attached, mark the interface up.
6868 * This used to be done by configd.KernelEventMonitor, but that
6869 * is inherently prone to races (rdar://problem/30810208).
6870 */
6871 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6872 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6873 dlil_post_sifflags_msg(ifp);
6874 } else if (ifproto != NULL) {
6875 zfree(dlif_proto_zone, ifproto);
6876 }
6877 return retval;
6878 }
6879
6880 errno_t
6881 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
6882 {
6883 struct if_proto *proto = NULL;
6884 int retval = 0;
6885
6886 if (ifp == NULL || proto_family == 0) {
6887 retval = EINVAL;
6888 goto end;
6889 }
6890
6891 ifnet_lock_exclusive(ifp);
6892 /* callee holds a proto refcnt upon success */
6893 proto = find_attached_proto(ifp, proto_family);
6894 if (proto == NULL) {
6895 retval = ENXIO;
6896 ifnet_lock_done(ifp);
6897 goto end;
6898 }
6899
6900 /* call family module del_proto */
6901 if (ifp->if_del_proto) {
6902 ifp->if_del_proto(ifp, proto->protocol_family);
6903 }
6904
6905 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
6906 proto, if_proto, next_hash);
6907
6908 if (proto->proto_kpi == kProtoKPI_v1) {
6909 proto->kpi.v1.input = ifproto_media_input_v1;
6910 proto->kpi.v1.pre_output = ifproto_media_preout;
6911 proto->kpi.v1.event = ifproto_media_event;
6912 proto->kpi.v1.ioctl = ifproto_media_ioctl;
6913 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
6914 proto->kpi.v1.send_arp = ifproto_media_send_arp;
6915 } else {
6916 proto->kpi.v2.input = ifproto_media_input_v2;
6917 proto->kpi.v2.pre_output = ifproto_media_preout;
6918 proto->kpi.v2.event = ifproto_media_event;
6919 proto->kpi.v2.ioctl = ifproto_media_ioctl;
6920 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
6921 proto->kpi.v2.send_arp = ifproto_media_send_arp;
6922 }
6923 proto->detached = 1;
6924 ifnet_lock_done(ifp);
6925
6926 if (dlil_verbose) {
6927 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
6928 (proto->proto_kpi == kProtoKPI_v1) ?
6929 "v1" : "v2", proto_family);
6930 }
6931
6932 /* release proto refcnt held during protocol attach */
6933 if_proto_free(proto);
6934
6935 /*
6936 * Release proto refcnt held during lookup; the rest of
6937 * protocol detach steps will happen when the last proto
6938 * reference is released.
6939 */
6940 if_proto_free(proto);
6941
6942 end:
6943 return retval;
6944 }
6945
6946
6947 static errno_t
6948 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
6949 struct mbuf *packet, char *header)
6950 {
6951 #pragma unused(ifp, protocol, packet, header)
6952 return ENXIO;
6953 }
6954
6955 static errno_t
6956 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
6957 struct mbuf *packet)
6958 {
6959 #pragma unused(ifp, protocol, packet)
6960 return ENXIO;
6961 }
6962
6963 static errno_t
6964 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
6965 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
6966 char *link_layer_dest)
6967 {
6968 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
6969 return ENXIO;
6970 }
6971
6972 static void
6973 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
6974 const struct kev_msg *event)
6975 {
6976 #pragma unused(ifp, protocol, event)
6977 }
6978
6979 static errno_t
6980 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
6981 unsigned long command, void *argument)
6982 {
6983 #pragma unused(ifp, protocol, command, argument)
6984 return ENXIO;
6985 }
6986
6987 static errno_t
6988 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
6989 struct sockaddr_dl *out_ll, size_t ll_len)
6990 {
6991 #pragma unused(ifp, proto_addr, out_ll, ll_len)
6992 return ENXIO;
6993 }
6994
6995 static errno_t
6996 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
6997 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6998 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
6999 {
7000 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
7001 return ENXIO;
7002 }
7003
7004 extern int if_next_index(void);
7005 extern int tcp_ecn_outbound;
7006
7007 errno_t
7008 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
7009 {
7010 struct ifnet *tmp_if;
7011 struct ifaddr *ifa;
7012 struct if_data_internal if_data_saved;
7013 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7014 struct dlil_threading_info *dl_inp;
7015 thread_continue_t thfunc = NULL;
7016 u_int32_t sflags = 0;
7017 int err;
7018
7019 if (ifp == NULL) {
7020 return EINVAL;
7021 }
7022
7023 /*
7024 * Serialize ifnet attach using dlil_ifnet_lock, in order to
7025 * prevent the interface from being configured while it is
7026 * embryonic, as ifnet_head_lock is dropped and reacquired
7027 * below prior to marking the ifnet with IFRF_ATTACHED.
7028 */
7029 dlil_if_lock();
7030 ifnet_head_lock_exclusive();
7031 /* Verify we aren't already on the list */
7032 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
7033 if (tmp_if == ifp) {
7034 ifnet_head_done();
7035 dlil_if_unlock();
7036 return EEXIST;
7037 }
7038 }
7039
7040 lck_mtx_lock_spin(&ifp->if_ref_lock);
7041 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
7042 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
7043 __func__, ifp);
7044 /* NOTREACHED */
7045 }
7046 lck_mtx_unlock(&ifp->if_ref_lock);
7047
7048 ifnet_lock_exclusive(ifp);
7049
7050 /* Sanity check */
7051 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
7052 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
7053 VERIFY(ifp->if_threads_pending == 0);
7054
7055 if (ll_addr != NULL) {
7056 if (ifp->if_addrlen == 0) {
7057 ifp->if_addrlen = ll_addr->sdl_alen;
7058 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
7059 ifnet_lock_done(ifp);
7060 ifnet_head_done();
7061 dlil_if_unlock();
7062 return EINVAL;
7063 }
7064 }
7065
7066 /*
7067 * Allow interfaces without protocol families to attach
7068 * only if they have the necessary fields filled out.
7069 */
7070 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
7071 DLIL_PRINTF("%s: Attempt to attach interface without "
7072 "family module - %d\n", __func__, ifp->if_family);
7073 ifnet_lock_done(ifp);
7074 ifnet_head_done();
7075 dlil_if_unlock();
7076 return ENODEV;
7077 }
7078
7079 /* Allocate protocol hash table */
7080 VERIFY(ifp->if_proto_hash == NULL);
7081 ifp->if_proto_hash = zalloc_flags(dlif_phash_zone, Z_WAITOK | Z_ZERO);
7082 if (ifp->if_proto_hash == NULL) {
7083 ifnet_lock_done(ifp);
7084 ifnet_head_done();
7085 dlil_if_unlock();
7086 return ENOBUFS;
7087 }
7088
7089 lck_mtx_lock_spin(&ifp->if_flt_lock);
7090 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
7091 TAILQ_INIT(&ifp->if_flt_head);
7092 VERIFY(ifp->if_flt_busy == 0);
7093 VERIFY(ifp->if_flt_waiters == 0);
7094 lck_mtx_unlock(&ifp->if_flt_lock);
7095
7096 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
7097 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
7098 LIST_INIT(&ifp->if_multiaddrs);
7099 }
7100
7101 VERIFY(ifp->if_allhostsinm == NULL);
7102 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
7103 TAILQ_INIT(&ifp->if_addrhead);
7104
7105 if (ifp->if_index == 0) {
7106 int idx = if_next_index();
7107
7108 if (idx == -1) {
7109 ifp->if_index = 0;
7110 ifnet_lock_done(ifp);
7111 ifnet_head_done();
7112 dlil_if_unlock();
7113 return ENOBUFS;
7114 }
7115 ifp->if_index = (uint16_t)idx;
7116
7117 /* the lladdr passed at attach time is the permanent address */
7118 if (ll_addr != NULL && ifp->if_type == IFT_ETHER &&
7119 ll_addr->sdl_alen == ETHER_ADDR_LEN) {
7120 bcopy(CONST_LLADDR(ll_addr),
7121 dl_if->dl_if_permanent_ether,
7122 ETHER_ADDR_LEN);
7123 dl_if->dl_if_permanent_ether_is_set = 1;
7124 }
7125 }
7126 /* There should not be anything occupying this slot */
7127 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
7128
7129 /* allocate (if needed) and initialize a link address */
7130 ifa = dlil_alloc_lladdr(ifp, ll_addr);
7131 if (ifa == NULL) {
7132 ifnet_lock_done(ifp);
7133 ifnet_head_done();
7134 dlil_if_unlock();
7135 return ENOBUFS;
7136 }
7137
7138 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
7139 ifnet_addrs[ifp->if_index - 1] = ifa;
7140
7141 /* make this address the first on the list */
7142 IFA_LOCK(ifa);
7143 /* hold a reference for ifnet_addrs[] */
7144 IFA_ADDREF_LOCKED(ifa);
7145 /* if_attach_link_ifa() holds a reference for ifa_link */
7146 if_attach_link_ifa(ifp, ifa);
7147 IFA_UNLOCK(ifa);
7148
7149 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
7150 ifindex2ifnet[ifp->if_index] = ifp;
7151
7152 /* Hold a reference to the underlying dlil_ifnet */
7153 ifnet_reference(ifp);
7154
7155 /* Clear stats (save and restore other fields that we care) */
7156 if_data_saved = ifp->if_data;
7157 bzero(&ifp->if_data, sizeof(ifp->if_data));
7158 ifp->if_data.ifi_type = if_data_saved.ifi_type;
7159 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
7160 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
7161 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
7162 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
7163 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
7164 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
7165 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
7166 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
7167 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
7168 ifnet_touch_lastchange(ifp);
7169
7170 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
7171 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
7172 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
7173
7174 /* By default, use SFB and enable flow advisory */
7175 sflags = PKTSCHEDF_QALG_SFB;
7176 if (if_flowadv) {
7177 sflags |= PKTSCHEDF_QALG_FLOWCTL;
7178 }
7179
7180 if (if_delaybased_queue) {
7181 sflags |= PKTSCHEDF_QALG_DELAYBASED;
7182 }
7183
7184 if (ifp->if_output_sched_model ==
7185 IFNET_SCHED_MODEL_DRIVER_MANAGED) {
7186 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
7187 }
7188
7189 /* Initialize transmit queue(s) */
7190 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
7191 if (err != 0) {
7192 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
7193 "err=%d", __func__, ifp, err);
7194 /* NOTREACHED */
7195 }
7196
7197 /* Sanity checks on the input thread storage */
7198 dl_inp = &dl_if->dl_if_inpstorage;
7199 bzero(&dl_inp->dlth_stats, sizeof(dl_inp->dlth_stats));
7200 VERIFY(dl_inp->dlth_flags == 0);
7201 VERIFY(dl_inp->dlth_wtot == 0);
7202 VERIFY(dl_inp->dlth_ifp == NULL);
7203 VERIFY(qhead(&dl_inp->dlth_pkts) == NULL && qempty(&dl_inp->dlth_pkts));
7204 VERIFY(qlimit(&dl_inp->dlth_pkts) == 0);
7205 VERIFY(!dl_inp->dlth_affinity);
7206 VERIFY(ifp->if_inp == NULL);
7207 VERIFY(dl_inp->dlth_thread == THREAD_NULL);
7208 VERIFY(dl_inp->dlth_strategy == NULL);
7209 VERIFY(dl_inp->dlth_driver_thread == THREAD_NULL);
7210 VERIFY(dl_inp->dlth_poller_thread == THREAD_NULL);
7211 VERIFY(dl_inp->dlth_affinity_tag == 0);
7212
7213 #if IFNET_INPUT_SANITY_CHK
7214 VERIFY(dl_inp->dlth_pkts_cnt == 0);
7215 #endif /* IFNET_INPUT_SANITY_CHK */
7216
7217 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7218 dlil_reset_rxpoll_params(ifp);
7219 /*
7220 * A specific DLIL input thread is created per non-loopback interface.
7221 */
7222 if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
7223 ifp->if_inp = dl_inp;
7224 ifnet_incr_pending_thread_count(ifp);
7225 err = dlil_create_input_thread(ifp, ifp->if_inp, &thfunc);
7226 if (err == ENODEV) {
7227 VERIFY(thfunc == NULL);
7228 ifnet_decr_pending_thread_count(ifp);
7229 } else if (err != 0) {
7230 panic_plain("%s: ifp=%p couldn't get an input thread; "
7231 "err=%d", __func__, ifp, err);
7232 /* NOTREACHED */
7233 }
7234 }
7235 /*
7236 * If the driver supports the new transmit model, calculate flow hash
7237 * and create a workloop starter thread to invoke the if_start callback
7238 * where the packets may be dequeued and transmitted.
7239 */
7240 if (ifp->if_eflags & IFEF_TXSTART) {
7241 thread_precedence_policy_data_t info;
7242 __unused kern_return_t kret;
7243
7244 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
7245 VERIFY(ifp->if_flowhash != 0);
7246 VERIFY(ifp->if_start_thread == THREAD_NULL);
7247
7248 ifnet_set_start_cycle(ifp, NULL);
7249 ifp->if_start_active = 0;
7250 ifp->if_start_req = 0;
7251 ifp->if_start_flags = 0;
7252 VERIFY(ifp->if_start != NULL);
7253 ifnet_incr_pending_thread_count(ifp);
7254 if ((err = kernel_thread_start(ifnet_start_thread_func,
7255 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
7256 panic_plain("%s: "
7257 "ifp=%p couldn't get a start thread; "
7258 "err=%d", __func__, ifp, err);
7259 /* NOTREACHED */
7260 }
7261 bzero(&info, sizeof(info));
7262 info.importance = 1;
7263 kret = thread_policy_set(ifp->if_start_thread,
7264 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
7265 THREAD_PRECEDENCE_POLICY_COUNT);
7266 ASSERT(kret == KERN_SUCCESS);
7267 } else {
7268 ifp->if_flowhash = 0;
7269 }
7270
7271 /* Reset polling parameters */
7272 ifnet_set_poll_cycle(ifp, NULL);
7273 ifp->if_poll_update = 0;
7274 ifp->if_poll_flags = 0;
7275 ifp->if_poll_req = 0;
7276 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7277
7278 /*
7279 * If the driver supports the new receive model, create a poller
7280 * thread to invoke if_input_poll callback where the packets may
7281 * be dequeued from the driver and processed for reception.
7282 * if the interface is netif compat then the poller thread is
7283 * managed by netif.
7284 */
7285 if (thfunc == dlil_rxpoll_input_thread_func) {
7286 thread_precedence_policy_data_t info;
7287 __unused kern_return_t kret;
7288 VERIFY(ifp->if_input_poll != NULL);
7289 VERIFY(ifp->if_input_ctl != NULL);
7290 ifnet_incr_pending_thread_count(ifp);
7291 if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
7292 &ifp->if_poll_thread)) != KERN_SUCCESS) {
7293 panic_plain("%s: ifp=%p couldn't get a poll thread; "
7294 "err=%d", __func__, ifp, err);
7295 /* NOTREACHED */
7296 }
7297 bzero(&info, sizeof(info));
7298 info.importance = 1;
7299 kret = thread_policy_set(ifp->if_poll_thread,
7300 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
7301 THREAD_PRECEDENCE_POLICY_COUNT);
7302 ASSERT(kret == KERN_SUCCESS);
7303 }
7304
7305 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
7306 VERIFY(ifp->if_desc.ifd_len == 0);
7307 VERIFY(ifp->if_desc.ifd_desc != NULL);
7308
7309 /* Record attach PC stacktrace */
7310 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
7311
7312 ifp->if_updatemcasts = 0;
7313 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
7314 struct ifmultiaddr *ifma;
7315 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
7316 IFMA_LOCK(ifma);
7317 if (ifma->ifma_addr->sa_family == AF_LINK ||
7318 ifma->ifma_addr->sa_family == AF_UNSPEC) {
7319 ifp->if_updatemcasts++;
7320 }
7321 IFMA_UNLOCK(ifma);
7322 }
7323
7324 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
7325 "membership(s)\n", if_name(ifp),
7326 ifp->if_updatemcasts);
7327 }
7328
7329 /* Clear logging parameters */
7330 bzero(&ifp->if_log, sizeof(ifp->if_log));
7331
7332 /* Clear foreground/realtime activity timestamps */
7333 ifp->if_fg_sendts = 0;
7334 ifp->if_rt_sendts = 0;
7335
7336 VERIFY(ifp->if_delegated.ifp == NULL);
7337 VERIFY(ifp->if_delegated.type == 0);
7338 VERIFY(ifp->if_delegated.family == 0);
7339 VERIFY(ifp->if_delegated.subfamily == 0);
7340 VERIFY(ifp->if_delegated.expensive == 0);
7341 VERIFY(ifp->if_delegated.constrained == 0);
7342
7343 VERIFY(ifp->if_agentids == NULL);
7344 VERIFY(ifp->if_agentcount == 0);
7345
7346 /* Reset interface state */
7347 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
7348 ifp->if_interface_state.valid_bitmask |=
7349 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7350 ifp->if_interface_state.interface_availability =
7351 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
7352
7353 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
7354 if (ifp == lo_ifp) {
7355 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
7356 ifp->if_interface_state.valid_bitmask |=
7357 IF_INTERFACE_STATE_LQM_STATE_VALID;
7358 } else {
7359 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
7360 }
7361
7362 /*
7363 * Enable ECN capability on this interface depending on the
7364 * value of ECN global setting
7365 */
7366 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
7367 if_set_eflags(ifp, IFEF_ECN_ENABLE);
7368 if_clear_eflags(ifp, IFEF_ECN_DISABLE);
7369 }
7370
7371 /*
7372 * Built-in Cyclops always on policy for WiFi infra
7373 */
7374 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
7375 errno_t error;
7376
7377 error = if_set_qosmarking_mode(ifp,
7378 IFRTYPE_QOSMARKING_FASTLANE);
7379 if (error != 0) {
7380 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
7381 __func__, ifp->if_xname, error);
7382 } else {
7383 if_set_eflags(ifp, IFEF_QOSMARKING_ENABLED);
7384 #if (DEVELOPMENT || DEBUG)
7385 DLIL_PRINTF("%s fastlane enabled on %s\n",
7386 __func__, ifp->if_xname);
7387 #endif /* (DEVELOPMENT || DEBUG) */
7388 }
7389 }
7390
7391 ifnet_lock_done(ifp);
7392 ifnet_head_done();
7393
7394
7395 lck_mtx_lock(&ifp->if_cached_route_lock);
7396 /* Enable forwarding cached route */
7397 ifp->if_fwd_cacheok = 1;
7398 /* Clean up any existing cached routes */
7399 ROUTE_RELEASE(&ifp->if_fwd_route);
7400 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
7401 ROUTE_RELEASE(&ifp->if_src_route);
7402 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
7403 ROUTE_RELEASE(&ifp->if_src_route6);
7404 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
7405 lck_mtx_unlock(&ifp->if_cached_route_lock);
7406
7407 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
7408
7409 /*
7410 * Allocate and attach IGMPv3/MLDv2 interface specific variables
7411 * and trees; do this before the ifnet is marked as attached.
7412 * The ifnet keeps the reference to the info structures even after
7413 * the ifnet is detached, since the network-layer records still
7414 * refer to the info structures even after that. This also
7415 * makes it possible for them to still function after the ifnet
7416 * is recycled or reattached.
7417 */
7418 #if INET
7419 if (IGMP_IFINFO(ifp) == NULL) {
7420 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, Z_WAITOK);
7421 VERIFY(IGMP_IFINFO(ifp) != NULL);
7422 } else {
7423 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
7424 igmp_domifreattach(IGMP_IFINFO(ifp));
7425 }
7426 #endif /* INET */
7427 if (MLD_IFINFO(ifp) == NULL) {
7428 MLD_IFINFO(ifp) = mld_domifattach(ifp, Z_WAITOK);
7429 VERIFY(MLD_IFINFO(ifp) != NULL);
7430 } else {
7431 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
7432 mld_domifreattach(MLD_IFINFO(ifp));
7433 }
7434
7435 VERIFY(ifp->if_data_threshold == 0);
7436 VERIFY(ifp->if_dt_tcall != NULL);
7437
7438 /*
7439 * Wait for the created kernel threads for I/O to get
7440 * scheduled and run at least once before we proceed
7441 * to mark interface as attached.
7442 */
7443 lck_mtx_lock(&ifp->if_ref_lock);
7444 while (ifp->if_threads_pending != 0) {
7445 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
7446 "interface %s to get scheduled at least once.\n",
7447 __func__, ifp->if_xname);
7448 (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
7449 __func__, NULL);
7450 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
7451 }
7452 lck_mtx_unlock(&ifp->if_ref_lock);
7453 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
7454 "at least once. Proceeding.\n", __func__, ifp->if_xname);
7455
7456 /* Final mark this ifnet as attached. */
7457 lck_mtx_lock(rnh_lock);
7458 ifnet_lock_exclusive(ifp);
7459 lck_mtx_lock_spin(&ifp->if_ref_lock);
7460 ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
7461 lck_mtx_unlock(&ifp->if_ref_lock);
7462 if (net_rtref) {
7463 /* boot-args override; enable idle notification */
7464 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
7465 IFRF_IDLE_NOTIFY);
7466 } else {
7467 /* apply previous request(s) to set the idle flags, if any */
7468 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
7469 ifp->if_idle_new_flags_mask);
7470 }
7471 ifnet_lock_done(ifp);
7472 lck_mtx_unlock(rnh_lock);
7473 dlil_if_unlock();
7474
7475 #if PF
7476 /*
7477 * Attach packet filter to this interface, if enabled.
7478 */
7479 pf_ifnet_hook(ifp, 1);
7480 #endif /* PF */
7481
7482 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
7483
7484 if (dlil_verbose) {
7485 DLIL_PRINTF("%s: attached%s\n", if_name(ifp),
7486 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
7487 }
7488
7489 return 0;
7490 }
7491
7492 /*
7493 * Prepare the storage for the first/permanent link address, which must
7494 * must have the same lifetime as the ifnet itself. Although the link
7495 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
7496 * its location in memory must never change as it may still be referred
7497 * to by some parts of the system afterwards (unfortunate implementation
7498 * artifacts inherited from BSD.)
7499 *
7500 * Caller must hold ifnet lock as writer.
7501 */
7502 static struct ifaddr *
7503 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
7504 {
7505 struct ifaddr *ifa, *oifa;
7506 struct sockaddr_dl *asdl, *msdl;
7507 char workbuf[IFNAMSIZ * 2];
7508 int namelen, masklen, socksize;
7509 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7510
7511 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
7512 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
7513
7514 namelen = scnprintf(workbuf, sizeof(workbuf), "%s",
7515 if_name(ifp));
7516 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
7517 + ((namelen > 0) ? namelen : 0);
7518 socksize = masklen + ifp->if_addrlen;
7519 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
7520 if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
7521 socksize = sizeof(struct sockaddr_dl);
7522 }
7523 socksize = ROUNDUP(socksize);
7524 #undef ROUNDUP
7525
7526 ifa = ifp->if_lladdr;
7527 if (socksize > DLIL_SDLMAXLEN ||
7528 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
7529 /*
7530 * Rare, but in the event that the link address requires
7531 * more storage space than DLIL_SDLMAXLEN, allocate the
7532 * largest possible storages for address and mask, such
7533 * that we can reuse the same space when if_addrlen grows.
7534 * This same space will be used when if_addrlen shrinks.
7535 */
7536 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
7537 int ifasize = sizeof(*ifa) + 2 * SOCK_MAXADDRLEN;
7538 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
7539 if (ifa == NULL) {
7540 return NULL;
7541 }
7542 ifa_lock_init(ifa);
7543 /* Don't set IFD_ALLOC, as this is permanent */
7544 ifa->ifa_debug = IFD_LINK;
7545 }
7546 IFA_LOCK(ifa);
7547 /* address and mask sockaddr_dl locations */
7548 asdl = (struct sockaddr_dl *)(ifa + 1);
7549 bzero(asdl, SOCK_MAXADDRLEN);
7550 msdl = (struct sockaddr_dl *)(void *)
7551 ((char *)asdl + SOCK_MAXADDRLEN);
7552 bzero(msdl, SOCK_MAXADDRLEN);
7553 } else {
7554 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
7555 /*
7556 * Use the storage areas for address and mask within the
7557 * dlil_ifnet structure. This is the most common case.
7558 */
7559 if (ifa == NULL) {
7560 ifa = &dl_if->dl_if_lladdr.ifa;
7561 ifa_lock_init(ifa);
7562 /* Don't set IFD_ALLOC, as this is permanent */
7563 ifa->ifa_debug = IFD_LINK;
7564 }
7565 IFA_LOCK(ifa);
7566 /* address and mask sockaddr_dl locations */
7567 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
7568 bzero(asdl, sizeof(dl_if->dl_if_lladdr.asdl));
7569 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
7570 bzero(msdl, sizeof(dl_if->dl_if_lladdr.msdl));
7571 }
7572
7573 /* hold a permanent reference for the ifnet itself */
7574 IFA_ADDREF_LOCKED(ifa);
7575 oifa = ifp->if_lladdr;
7576 ifp->if_lladdr = ifa;
7577
7578 VERIFY(ifa->ifa_debug == IFD_LINK);
7579 ifa->ifa_ifp = ifp;
7580 ifa->ifa_rtrequest = link_rtrequest;
7581 ifa->ifa_addr = (struct sockaddr *)asdl;
7582 asdl->sdl_len = (u_char)socksize;
7583 asdl->sdl_family = AF_LINK;
7584 if (namelen > 0) {
7585 bcopy(workbuf, asdl->sdl_data, min(namelen,
7586 sizeof(asdl->sdl_data)));
7587 asdl->sdl_nlen = (u_char)namelen;
7588 } else {
7589 asdl->sdl_nlen = 0;
7590 }
7591 asdl->sdl_index = ifp->if_index;
7592 asdl->sdl_type = ifp->if_type;
7593 if (ll_addr != NULL) {
7594 asdl->sdl_alen = ll_addr->sdl_alen;
7595 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
7596 } else {
7597 asdl->sdl_alen = 0;
7598 }
7599 ifa->ifa_netmask = (struct sockaddr *)msdl;
7600 msdl->sdl_len = (u_char)masklen;
7601 while (namelen > 0) {
7602 msdl->sdl_data[--namelen] = 0xff;
7603 }
7604 IFA_UNLOCK(ifa);
7605
7606 if (oifa != NULL) {
7607 IFA_REMREF(oifa);
7608 }
7609
7610 return ifa;
7611 }
7612
7613 static void
7614 if_purgeaddrs(struct ifnet *ifp)
7615 {
7616 #if INET
7617 in_purgeaddrs(ifp);
7618 #endif /* INET */
7619 in6_purgeaddrs(ifp);
7620 }
7621
7622 errno_t
7623 ifnet_detach(ifnet_t ifp)
7624 {
7625 struct ifnet *delegated_ifp;
7626 struct nd_ifinfo *ndi = NULL;
7627
7628 if (ifp == NULL) {
7629 return EINVAL;
7630 }
7631
7632 ndi = ND_IFINFO(ifp);
7633 if (NULL != ndi) {
7634 ndi->cga_initialized = FALSE;
7635 }
7636
7637 lck_mtx_lock(rnh_lock);
7638 ifnet_head_lock_exclusive();
7639 ifnet_lock_exclusive(ifp);
7640
7641 if (ifp->if_output_netem != NULL) {
7642 netem_destroy(ifp->if_output_netem);
7643 ifp->if_output_netem = NULL;
7644 }
7645
7646 /*
7647 * Check to see if this interface has previously triggered
7648 * aggressive protocol draining; if so, decrement the global
7649 * refcnt and clear PR_AGGDRAIN on the route domain if
7650 * there are no more of such an interface around.
7651 */
7652 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
7653
7654 lck_mtx_lock_spin(&ifp->if_ref_lock);
7655 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
7656 lck_mtx_unlock(&ifp->if_ref_lock);
7657 ifnet_lock_done(ifp);
7658 ifnet_head_done();
7659 lck_mtx_unlock(rnh_lock);
7660 return EINVAL;
7661 } else if (ifp->if_refflags & IFRF_DETACHING) {
7662 /* Interface has already been detached */
7663 lck_mtx_unlock(&ifp->if_ref_lock);
7664 ifnet_lock_done(ifp);
7665 ifnet_head_done();
7666 lck_mtx_unlock(rnh_lock);
7667 return ENXIO;
7668 }
7669 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
7670 /* Indicate this interface is being detached */
7671 ifp->if_refflags &= ~IFRF_ATTACHED;
7672 ifp->if_refflags |= IFRF_DETACHING;
7673 lck_mtx_unlock(&ifp->if_ref_lock);
7674
7675 if (dlil_verbose) {
7676 DLIL_PRINTF("%s: detaching\n", if_name(ifp));
7677 }
7678
7679 /* clean up flow control entry object if there's any */
7680 if (ifp->if_eflags & IFEF_TXSTART) {
7681 ifnet_flowadv(ifp->if_flowhash);
7682 }
7683
7684 /* Reset ECN enable/disable flags */
7685 /* Reset CLAT46 flag */
7686 if_clear_eflags(ifp, IFEF_ECN_ENABLE | IFEF_ECN_DISABLE | IFEF_CLAT46);
7687
7688 /*
7689 * We do not reset the TCP keep alive counters in case
7690 * a TCP connection stays connection after the interface
7691 * went down
7692 */
7693 if (ifp->if_tcp_kao_cnt > 0) {
7694 os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
7695 __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
7696 }
7697 ifp->if_tcp_kao_max = 0;
7698
7699 /*
7700 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
7701 * no longer be visible during lookups from this point.
7702 */
7703 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
7704 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
7705 ifp->if_link.tqe_next = NULL;
7706 ifp->if_link.tqe_prev = NULL;
7707 if (ifp->if_ordered_link.tqe_next != NULL ||
7708 ifp->if_ordered_link.tqe_prev != NULL) {
7709 ifnet_remove_from_ordered_list(ifp);
7710 }
7711 ifindex2ifnet[ifp->if_index] = NULL;
7712
7713 /* 18717626 - reset router mode */
7714 if_clear_eflags(ifp, IFEF_IPV4_ROUTER);
7715 ifp->if_ipv6_router_mode = IPV6_ROUTER_MODE_DISABLED;
7716
7717 /* Record detach PC stacktrace */
7718 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
7719
7720 /* Clear logging parameters */
7721 bzero(&ifp->if_log, sizeof(ifp->if_log));
7722
7723 /* Clear delegated interface info (reference released below) */
7724 delegated_ifp = ifp->if_delegated.ifp;
7725 bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
7726
7727 /* Reset interface state */
7728 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
7729
7730 ifnet_lock_done(ifp);
7731 ifnet_head_done();
7732 lck_mtx_unlock(rnh_lock);
7733
7734
7735 /* Release reference held on the delegated interface */
7736 if (delegated_ifp != NULL) {
7737 ifnet_release(delegated_ifp);
7738 }
7739
7740 /* Reset Link Quality Metric (unless loopback [lo0]) */
7741 if (ifp != lo_ifp) {
7742 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
7743 }
7744
7745 /* Reset TCP local statistics */
7746 if (ifp->if_tcp_stat != NULL) {
7747 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
7748 }
7749
7750 /* Reset UDP local statistics */
7751 if (ifp->if_udp_stat != NULL) {
7752 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
7753 }
7754
7755 /* Reset ifnet IPv4 stats */
7756 if (ifp->if_ipv4_stat != NULL) {
7757 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
7758 }
7759
7760 /* Reset ifnet IPv6 stats */
7761 if (ifp->if_ipv6_stat != NULL) {
7762 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
7763 }
7764
7765 /* Release memory held for interface link status report */
7766 if (ifp->if_link_status != NULL) {
7767 FREE(ifp->if_link_status, M_TEMP);
7768 ifp->if_link_status = NULL;
7769 }
7770
7771 /* Clear agent IDs */
7772 if (ifp->if_agentids != NULL) {
7773 FREE(ifp->if_agentids, M_NETAGENT);
7774 ifp->if_agentids = NULL;
7775 }
7776 ifp->if_agentcount = 0;
7777
7778
7779 /* Let BPF know we're detaching */
7780 bpfdetach(ifp);
7781
7782 /* Mark the interface as DOWN */
7783 if_down(ifp);
7784
7785 /* Disable forwarding cached route */
7786 lck_mtx_lock(&ifp->if_cached_route_lock);
7787 ifp->if_fwd_cacheok = 0;
7788 lck_mtx_unlock(&ifp->if_cached_route_lock);
7789
7790 /* Disable data threshold and wait for any pending event posting */
7791 ifp->if_data_threshold = 0;
7792 VERIFY(ifp->if_dt_tcall != NULL);
7793 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
7794
7795 /*
7796 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
7797 * references to the info structures and leave them attached to
7798 * this ifnet.
7799 */
7800 #if INET
7801 igmp_domifdetach(ifp);
7802 #endif /* INET */
7803 mld_domifdetach(ifp);
7804
7805 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
7806
7807 /* Let worker thread take care of the rest, to avoid reentrancy */
7808 dlil_if_lock();
7809 ifnet_detaching_enqueue(ifp);
7810 dlil_if_unlock();
7811
7812 return 0;
7813 }
7814
7815 static void
7816 ifnet_detaching_enqueue(struct ifnet *ifp)
7817 {
7818 dlil_if_lock_assert();
7819
7820 ++ifnet_detaching_cnt;
7821 VERIFY(ifnet_detaching_cnt != 0);
7822 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
7823 wakeup((caddr_t)&ifnet_delayed_run);
7824 }
7825
7826 static struct ifnet *
7827 ifnet_detaching_dequeue(void)
7828 {
7829 struct ifnet *ifp;
7830
7831 dlil_if_lock_assert();
7832
7833 ifp = TAILQ_FIRST(&ifnet_detaching_head);
7834 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
7835 if (ifp != NULL) {
7836 VERIFY(ifnet_detaching_cnt != 0);
7837 --ifnet_detaching_cnt;
7838 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
7839 ifp->if_detaching_link.tqe_next = NULL;
7840 ifp->if_detaching_link.tqe_prev = NULL;
7841 }
7842 return ifp;
7843 }
7844
7845 __attribute__((noreturn))
7846 static void
7847 ifnet_detacher_thread_cont(void *v, wait_result_t wres)
7848 {
7849 #pragma unused(v, wres)
7850 struct ifnet *ifp;
7851
7852 dlil_if_lock();
7853 if (__improbable(ifnet_detaching_embryonic)) {
7854 ifnet_detaching_embryonic = FALSE;
7855 /* there's no lock ordering constrain so OK to do this here */
7856 dlil_decr_pending_thread_count();
7857 }
7858
7859 for (;;) {
7860 dlil_if_lock_assert();
7861
7862 if (ifnet_detaching_cnt == 0) {
7863 break;
7864 }
7865
7866 net_update_uptime();
7867
7868 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
7869
7870 /* Take care of detaching ifnet */
7871 ifp = ifnet_detaching_dequeue();
7872 if (ifp != NULL) {
7873 dlil_if_unlock();
7874 ifnet_detach_final(ifp);
7875 dlil_if_lock();
7876 }
7877 }
7878
7879 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
7880 dlil_if_unlock();
7881 (void) thread_block(ifnet_detacher_thread_cont);
7882
7883 VERIFY(0); /* we should never get here */
7884 /* NOTREACHED */
7885 __builtin_unreachable();
7886 }
7887
7888 __dead2
7889 static void
7890 ifnet_detacher_thread_func(void *v, wait_result_t w)
7891 {
7892 #pragma unused(v, w)
7893 dlil_if_lock();
7894 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
7895 ifnet_detaching_embryonic = TRUE;
7896 /* wake up once to get out of embryonic state */
7897 wakeup((caddr_t)&ifnet_delayed_run);
7898 dlil_if_unlock();
7899 (void) thread_block(ifnet_detacher_thread_cont);
7900 VERIFY(0);
7901 /* NOTREACHED */
7902 __builtin_unreachable();
7903 }
7904
7905 static void
7906 ifnet_detach_final(struct ifnet *ifp)
7907 {
7908 struct ifnet_filter *filter, *filter_next;
7909 struct ifnet_filter_head fhead;
7910 struct dlil_threading_info *inp;
7911 struct ifaddr *ifa;
7912 ifnet_detached_func if_free;
7913 int i;
7914
7915 lck_mtx_lock(&ifp->if_ref_lock);
7916 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7917 panic("%s: flags mismatch (detaching not set) ifp=%p",
7918 __func__, ifp);
7919 /* NOTREACHED */
7920 }
7921
7922 /*
7923 * Wait until the existing IO references get released
7924 * before we proceed with ifnet_detach. This is not a
7925 * common case, so block without using a continuation.
7926 */
7927 while (ifp->if_refio > 0) {
7928 DLIL_PRINTF("%s: Waiting for IO references on %s interface "
7929 "to be released\n", __func__, if_name(ifp));
7930 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
7931 (PZERO - 1), "ifnet_ioref_wait", NULL);
7932 }
7933
7934 VERIFY(ifp->if_datamov == 0);
7935 VERIFY(ifp->if_drainers == 0);
7936 VERIFY(ifp->if_suspend == 0);
7937 ifp->if_refflags &= ~IFRF_READY;
7938 lck_mtx_unlock(&ifp->if_ref_lock);
7939
7940 /* Drain and destroy send queue */
7941 ifclassq_teardown(ifp);
7942
7943 /* Detach interface filters */
7944 lck_mtx_lock(&ifp->if_flt_lock);
7945 if_flt_monitor_enter(ifp);
7946
7947 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
7948 fhead = ifp->if_flt_head;
7949 TAILQ_INIT(&ifp->if_flt_head);
7950
7951 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
7952 filter_next = TAILQ_NEXT(filter, filt_next);
7953 lck_mtx_unlock(&ifp->if_flt_lock);
7954
7955 dlil_detach_filter_internal(filter, 1);
7956 lck_mtx_lock(&ifp->if_flt_lock);
7957 }
7958 if_flt_monitor_leave(ifp);
7959 lck_mtx_unlock(&ifp->if_flt_lock);
7960
7961 /* Tell upper layers to drop their network addresses */
7962 if_purgeaddrs(ifp);
7963
7964 ifnet_lock_exclusive(ifp);
7965
7966 /* Unplumb all protocols */
7967 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
7968 struct if_proto *proto;
7969
7970 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7971 while (proto != NULL) {
7972 protocol_family_t family = proto->protocol_family;
7973 ifnet_lock_done(ifp);
7974 proto_unplumb(family, ifp);
7975 ifnet_lock_exclusive(ifp);
7976 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7977 }
7978 /* There should not be any protocols left */
7979 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
7980 }
7981 zfree(dlif_phash_zone, ifp->if_proto_hash);
7982 ifp->if_proto_hash = NULL;
7983
7984 /* Detach (permanent) link address from if_addrhead */
7985 ifa = TAILQ_FIRST(&ifp->if_addrhead);
7986 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
7987 IFA_LOCK(ifa);
7988 if_detach_link_ifa(ifp, ifa);
7989 IFA_UNLOCK(ifa);
7990
7991 /* Remove (permanent) link address from ifnet_addrs[] */
7992 IFA_REMREF(ifa);
7993 ifnet_addrs[ifp->if_index - 1] = NULL;
7994
7995 /* This interface should not be on {ifnet_head,detaching} */
7996 VERIFY(ifp->if_link.tqe_next == NULL);
7997 VERIFY(ifp->if_link.tqe_prev == NULL);
7998 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
7999 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
8000 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
8001 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
8002
8003 /* The slot should have been emptied */
8004 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
8005
8006 /* There should not be any addresses left */
8007 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
8008
8009 /*
8010 * Signal the starter thread to terminate itself.
8011 */
8012 if (ifp->if_start_thread != THREAD_NULL) {
8013 lck_mtx_lock_spin(&ifp->if_start_lock);
8014 ifp->if_start_flags = 0;
8015 ifp->if_start_thread = THREAD_NULL;
8016 wakeup_one((caddr_t)&ifp->if_start_thread);
8017 lck_mtx_unlock(&ifp->if_start_lock);
8018 }
8019
8020 /*
8021 * Signal the poller thread to terminate itself.
8022 */
8023 if (ifp->if_poll_thread != THREAD_NULL) {
8024 lck_mtx_lock_spin(&ifp->if_poll_lock);
8025 ifp->if_poll_thread = THREAD_NULL;
8026 wakeup_one((caddr_t)&ifp->if_poll_thread);
8027 lck_mtx_unlock(&ifp->if_poll_lock);
8028 }
8029
8030 /*
8031 * If thread affinity was set for the workloop thread, we will need
8032 * to tear down the affinity and release the extra reference count
8033 * taken at attach time. Does not apply to lo0 or other interfaces
8034 * without dedicated input threads.
8035 */
8036 if ((inp = ifp->if_inp) != NULL) {
8037 VERIFY(inp != dlil_main_input_thread);
8038
8039 if (inp->dlth_affinity) {
8040 struct thread *tp, *wtp, *ptp;
8041
8042 lck_mtx_lock_spin(&inp->dlth_lock);
8043 wtp = inp->dlth_driver_thread;
8044 inp->dlth_driver_thread = THREAD_NULL;
8045 ptp = inp->dlth_poller_thread;
8046 inp->dlth_poller_thread = THREAD_NULL;
8047 ASSERT(inp->dlth_thread != THREAD_NULL);
8048 tp = inp->dlth_thread; /* don't nullify now */
8049 inp->dlth_affinity_tag = 0;
8050 inp->dlth_affinity = FALSE;
8051 lck_mtx_unlock(&inp->dlth_lock);
8052
8053 /* Tear down poll thread affinity */
8054 if (ptp != NULL) {
8055 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
8056 VERIFY(ifp->if_xflags & IFXF_LEGACY);
8057 (void) dlil_affinity_set(ptp,
8058 THREAD_AFFINITY_TAG_NULL);
8059 thread_deallocate(ptp);
8060 }
8061
8062 /* Tear down workloop thread affinity */
8063 if (wtp != NULL) {
8064 (void) dlil_affinity_set(wtp,
8065 THREAD_AFFINITY_TAG_NULL);
8066 thread_deallocate(wtp);
8067 }
8068
8069 /* Tear down DLIL input thread affinity */
8070 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
8071 thread_deallocate(tp);
8072 }
8073
8074 /* disassociate ifp DLIL input thread */
8075 ifp->if_inp = NULL;
8076
8077 /* if the worker thread was created, tell it to terminate */
8078 if (inp->dlth_thread != THREAD_NULL) {
8079 lck_mtx_lock_spin(&inp->dlth_lock);
8080 inp->dlth_flags |= DLIL_INPUT_TERMINATE;
8081 if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
8082 wakeup_one((caddr_t)&inp->dlth_flags);
8083 }
8084 lck_mtx_unlock(&inp->dlth_lock);
8085 ifnet_lock_done(ifp);
8086
8087 /* wait for the input thread to terminate */
8088 lck_mtx_lock_spin(&inp->dlth_lock);
8089 while ((inp->dlth_flags & DLIL_INPUT_TERMINATE_COMPLETE)
8090 == 0) {
8091 (void) msleep(&inp->dlth_flags, &inp->dlth_lock,
8092 (PZERO - 1) | PSPIN, inp->dlth_name, NULL);
8093 }
8094 lck_mtx_unlock(&inp->dlth_lock);
8095 ifnet_lock_exclusive(ifp);
8096 }
8097
8098 /* clean-up input thread state */
8099 dlil_clean_threading_info(inp);
8100 /* clean-up poll parameters */
8101 VERIFY(ifp->if_poll_thread == THREAD_NULL);
8102 dlil_reset_rxpoll_params(ifp);
8103 }
8104
8105 /* The driver might unload, so point these to ourselves */
8106 if_free = ifp->if_free;
8107 ifp->if_output_dlil = ifp_if_output;
8108 ifp->if_output = ifp_if_output;
8109 ifp->if_pre_enqueue = ifp_if_output;
8110 ifp->if_start = ifp_if_start;
8111 ifp->if_output_ctl = ifp_if_ctl;
8112 ifp->if_input_dlil = ifp_if_input;
8113 ifp->if_input_poll = ifp_if_input_poll;
8114 ifp->if_input_ctl = ifp_if_ctl;
8115 ifp->if_ioctl = ifp_if_ioctl;
8116 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
8117 ifp->if_free = ifp_if_free;
8118 ifp->if_demux = ifp_if_demux;
8119 ifp->if_event = ifp_if_event;
8120 ifp->if_framer_legacy = ifp_if_framer;
8121 ifp->if_framer = ifp_if_framer_extended;
8122 ifp->if_add_proto = ifp_if_add_proto;
8123 ifp->if_del_proto = ifp_if_del_proto;
8124 ifp->if_check_multi = ifp_if_check_multi;
8125
8126 /* wipe out interface description */
8127 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
8128 ifp->if_desc.ifd_len = 0;
8129 VERIFY(ifp->if_desc.ifd_desc != NULL);
8130 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
8131
8132 /* there shouldn't be any delegation by now */
8133 VERIFY(ifp->if_delegated.ifp == NULL);
8134 VERIFY(ifp->if_delegated.type == 0);
8135 VERIFY(ifp->if_delegated.family == 0);
8136 VERIFY(ifp->if_delegated.subfamily == 0);
8137 VERIFY(ifp->if_delegated.expensive == 0);
8138 VERIFY(ifp->if_delegated.constrained == 0);
8139
8140 /* QoS marking get cleared */
8141 if_clear_eflags(ifp, IFEF_QOSMARKING_ENABLED);
8142 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
8143
8144
8145 ifnet_lock_done(ifp);
8146
8147 #if PF
8148 /*
8149 * Detach this interface from packet filter, if enabled.
8150 */
8151 pf_ifnet_hook(ifp, 0);
8152 #endif /* PF */
8153
8154 /* Filter list should be empty */
8155 lck_mtx_lock_spin(&ifp->if_flt_lock);
8156 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
8157 VERIFY(ifp->if_flt_busy == 0);
8158 VERIFY(ifp->if_flt_waiters == 0);
8159 lck_mtx_unlock(&ifp->if_flt_lock);
8160
8161 /* Last chance to drain send queue */
8162 if_qflush(ifp, 0);
8163
8164 /* Last chance to cleanup any cached route */
8165 lck_mtx_lock(&ifp->if_cached_route_lock);
8166 VERIFY(!ifp->if_fwd_cacheok);
8167 ROUTE_RELEASE(&ifp->if_fwd_route);
8168 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
8169 ROUTE_RELEASE(&ifp->if_src_route);
8170 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
8171 ROUTE_RELEASE(&ifp->if_src_route6);
8172 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
8173 lck_mtx_unlock(&ifp->if_cached_route_lock);
8174
8175 VERIFY(ifp->if_data_threshold == 0);
8176 VERIFY(ifp->if_dt_tcall != NULL);
8177 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
8178
8179 ifnet_llreach_ifdetach(ifp);
8180
8181 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
8182
8183 /*
8184 * Finally, mark this ifnet as detached.
8185 */
8186 lck_mtx_lock_spin(&ifp->if_ref_lock);
8187 if (!(ifp->if_refflags & IFRF_DETACHING)) {
8188 panic("%s: flags mismatch (detaching not set) ifp=%p",
8189 __func__, ifp);
8190 /* NOTREACHED */
8191 }
8192 ifp->if_refflags &= ~IFRF_DETACHING;
8193 lck_mtx_unlock(&ifp->if_ref_lock);
8194 if (if_free != NULL) {
8195 if_free(ifp);
8196 }
8197
8198 if (dlil_verbose) {
8199 DLIL_PRINTF("%s: detached\n", if_name(ifp));
8200 }
8201
8202 /* Release reference held during ifnet attach */
8203 ifnet_release(ifp);
8204 }
8205
8206 errno_t
8207 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
8208 {
8209 #pragma unused(ifp)
8210 m_freem_list(m);
8211 return 0;
8212 }
8213
8214 void
8215 ifp_if_start(struct ifnet *ifp)
8216 {
8217 ifnet_purge(ifp);
8218 }
8219
8220 static errno_t
8221 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
8222 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
8223 boolean_t poll, struct thread *tp)
8224 {
8225 #pragma unused(ifp, m_tail, s, poll, tp)
8226 m_freem_list(m_head);
8227 return ENXIO;
8228 }
8229
8230 static void
8231 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
8232 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
8233 {
8234 #pragma unused(ifp, flags, max_cnt)
8235 if (m_head != NULL) {
8236 *m_head = NULL;
8237 }
8238 if (m_tail != NULL) {
8239 *m_tail = NULL;
8240 }
8241 if (cnt != NULL) {
8242 *cnt = 0;
8243 }
8244 if (len != NULL) {
8245 *len = 0;
8246 }
8247 }
8248
8249 static errno_t
8250 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
8251 {
8252 #pragma unused(ifp, cmd, arglen, arg)
8253 return EOPNOTSUPP;
8254 }
8255
8256 static errno_t
8257 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
8258 {
8259 #pragma unused(ifp, fh, pf)
8260 m_freem(m);
8261 return EJUSTRETURN;
8262 }
8263
8264 static errno_t
8265 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
8266 const struct ifnet_demux_desc *da, u_int32_t dc)
8267 {
8268 #pragma unused(ifp, pf, da, dc)
8269 return EINVAL;
8270 }
8271
8272 static errno_t
8273 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
8274 {
8275 #pragma unused(ifp, pf)
8276 return EINVAL;
8277 }
8278
8279 static errno_t
8280 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
8281 {
8282 #pragma unused(ifp, sa)
8283 return EOPNOTSUPP;
8284 }
8285
8286 #if !XNU_TARGET_OS_OSX
8287 static errno_t
8288 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8289 const struct sockaddr *sa, const char *ll, const char *t,
8290 u_int32_t *pre, u_int32_t *post)
8291 #else /* XNU_TARGET_OS_OSX */
8292 static errno_t
8293 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8294 const struct sockaddr *sa, const char *ll, const char *t)
8295 #endif /* XNU_TARGET_OS_OSX */
8296 {
8297 #pragma unused(ifp, m, sa, ll, t)
8298 #if !XNU_TARGET_OS_OSX
8299 return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
8300 #else /* XNU_TARGET_OS_OSX */
8301 return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
8302 #endif /* XNU_TARGET_OS_OSX */
8303 }
8304
8305 static errno_t
8306 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
8307 const struct sockaddr *sa, const char *ll, const char *t,
8308 u_int32_t *pre, u_int32_t *post)
8309 {
8310 #pragma unused(ifp, sa, ll, t)
8311 m_freem(*m);
8312 *m = NULL;
8313
8314 if (pre != NULL) {
8315 *pre = 0;
8316 }
8317 if (post != NULL) {
8318 *post = 0;
8319 }
8320
8321 return EJUSTRETURN;
8322 }
8323
8324 errno_t
8325 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
8326 {
8327 #pragma unused(ifp, cmd, arg)
8328 return EOPNOTSUPP;
8329 }
8330
8331 static errno_t
8332 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
8333 {
8334 #pragma unused(ifp, tm, f)
8335 /* XXX not sure what to do here */
8336 return 0;
8337 }
8338
8339 static void
8340 ifp_if_free(struct ifnet *ifp)
8341 {
8342 #pragma unused(ifp)
8343 }
8344
8345 static void
8346 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
8347 {
8348 #pragma unused(ifp, e)
8349 }
8350
8351 int
8352 dlil_if_acquire(u_int32_t family, const void *uniqueid,
8353 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
8354 {
8355 struct ifnet *ifp1 = NULL;
8356 struct dlil_ifnet *dlifp1 = NULL;
8357 struct dlil_ifnet *dlifp1_saved = NULL;
8358 void *buf, *base, **pbuf;
8359 int ret = 0;
8360
8361 VERIFY(*ifp == NULL);
8362 dlil_if_lock();
8363 /*
8364 * We absolutely can't have an interface with the same name
8365 * in in-use state.
8366 * To make sure of that list has to be traversed completely
8367 */
8368 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
8369 ifp1 = (struct ifnet *)dlifp1;
8370
8371 if (ifp1->if_family != family) {
8372 continue;
8373 }
8374
8375 /*
8376 * If interface is in use, return EBUSY if either unique id
8377 * or interface extended names are the same
8378 */
8379 lck_mtx_lock(&dlifp1->dl_if_lock);
8380 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
8381 if (dlifp1->dl_if_flags & DLIF_INUSE) {
8382 lck_mtx_unlock(&dlifp1->dl_if_lock);
8383 ret = EBUSY;
8384 goto end;
8385 }
8386 }
8387
8388 if (uniqueid_len) {
8389 if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
8390 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
8391 if (dlifp1->dl_if_flags & DLIF_INUSE) {
8392 lck_mtx_unlock(&dlifp1->dl_if_lock);
8393 ret = EBUSY;
8394 goto end;
8395 } else {
8396 /* Cache the first interface that can be recycled */
8397 if (*ifp == NULL) {
8398 *ifp = ifp1;
8399 dlifp1_saved = dlifp1;
8400 }
8401 /*
8402 * XXX Do not break or jump to end as we have to traverse
8403 * the whole list to ensure there are no name collisions
8404 */
8405 }
8406 }
8407 }
8408 lck_mtx_unlock(&dlifp1->dl_if_lock);
8409 }
8410
8411 /* If there's an interface that can be recycled, use that */
8412 if (*ifp != NULL) {
8413 if (dlifp1_saved != NULL) {
8414 lck_mtx_lock(&dlifp1_saved->dl_if_lock);
8415 dlifp1_saved->dl_if_flags |= (DLIF_INUSE | DLIF_REUSE);
8416 lck_mtx_unlock(&dlifp1_saved->dl_if_lock);
8417 dlifp1_saved = NULL;
8418 }
8419 goto end;
8420 }
8421
8422 /* no interface found, allocate a new one */
8423 buf = zalloc_flags(dlif_zone, Z_WAITOK | Z_ZERO);
8424 if (buf == NULL) {
8425 ret = ENOMEM;
8426 goto end;
8427 }
8428
8429 /* Get the 64-bit aligned base address for this object */
8430 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
8431 sizeof(u_int64_t));
8432 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
8433
8434 /*
8435 * Wind back a pointer size from the aligned base and
8436 * save the original address so we can free it later.
8437 */
8438 pbuf = (void **)((intptr_t)base - sizeof(void *));
8439 *pbuf = buf;
8440 dlifp1 = base;
8441
8442 if (uniqueid_len) {
8443 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
8444 M_NKE, M_WAITOK);
8445 if (dlifp1->dl_if_uniqueid == NULL) {
8446 zfree(dlif_zone, buf);
8447 ret = ENOMEM;
8448 goto end;
8449 }
8450 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
8451 dlifp1->dl_if_uniqueid_len = uniqueid_len;
8452 }
8453
8454 ifp1 = (struct ifnet *)dlifp1;
8455 dlifp1->dl_if_flags = DLIF_INUSE;
8456 if (ifnet_debug) {
8457 dlifp1->dl_if_flags |= DLIF_DEBUG;
8458 dlifp1->dl_if_trace = dlil_if_trace;
8459 }
8460 ifp1->if_name = dlifp1->dl_if_namestorage;
8461 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
8462
8463 /* initialize interface description */
8464 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
8465 ifp1->if_desc.ifd_len = 0;
8466 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
8467
8468
8469 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
8470 DLIL_PRINTF("%s: failed to allocate if local stats, "
8471 "error: %d\n", __func__, ret);
8472 /* This probably shouldn't be fatal */
8473 ret = 0;
8474 }
8475
8476 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
8477 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
8478 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
8479 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
8480 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
8481 ifnet_lock_attr);
8482 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
8483 #if INET
8484 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
8485 ifnet_lock_attr);
8486 ifp1->if_inetdata = NULL;
8487 #endif
8488 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
8489 ifnet_lock_attr);
8490 ifp1->if_inet6data = NULL;
8491 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
8492 ifnet_lock_attr);
8493 ifp1->if_link_status = NULL;
8494
8495 /* for send data paths */
8496 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
8497 ifnet_lock_attr);
8498 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
8499 ifnet_lock_attr);
8500 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
8501 ifnet_lock_attr);
8502
8503 /* for receive data paths */
8504 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
8505 ifnet_lock_attr);
8506
8507 /* thread call allocation is done with sleeping zalloc */
8508 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
8509 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
8510 if (ifp1->if_dt_tcall == NULL) {
8511 panic_plain("%s: couldn't create if_dt_tcall", __func__);
8512 /* NOTREACHED */
8513 }
8514
8515 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
8516
8517 *ifp = ifp1;
8518
8519 end:
8520 dlil_if_unlock();
8521
8522 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof(u_int64_t)) &&
8523 IS_P2ALIGNED(&ifp1->if_data, sizeof(u_int64_t))));
8524
8525 return ret;
8526 }
8527
8528 __private_extern__ void
8529 dlil_if_release(ifnet_t ifp)
8530 {
8531 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
8532
8533 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
8534 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
8535 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
8536 }
8537
8538 ifnet_lock_exclusive(ifp);
8539 lck_mtx_lock(&dlifp->dl_if_lock);
8540 dlifp->dl_if_flags &= ~DLIF_INUSE;
8541 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
8542 ifp->if_name = dlifp->dl_if_namestorage;
8543 /* Reset external name (name + unit) */
8544 ifp->if_xname = dlifp->dl_if_xnamestorage;
8545 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
8546 "%s?", ifp->if_name);
8547 lck_mtx_unlock(&dlifp->dl_if_lock);
8548 ifnet_lock_done(ifp);
8549 }
8550
8551 __private_extern__ void
8552 dlil_if_lock(void)
8553 {
8554 lck_mtx_lock(&dlil_ifnet_lock);
8555 }
8556
8557 __private_extern__ void
8558 dlil_if_unlock(void)
8559 {
8560 lck_mtx_unlock(&dlil_ifnet_lock);
8561 }
8562
8563 __private_extern__ void
8564 dlil_if_lock_assert(void)
8565 {
8566 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
8567 }
8568
8569 __private_extern__ void
8570 dlil_proto_unplumb_all(struct ifnet *ifp)
8571 {
8572 /*
8573 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
8574 * each bucket contains exactly one entry; PF_VLAN does not need an
8575 * explicit unplumb.
8576 *
8577 * if_proto_hash[3] is for other protocols; we expect anything
8578 * in this bucket to respond to the DETACHING event (which would
8579 * have happened by now) and do the unplumb then.
8580 */
8581 (void) proto_unplumb(PF_INET, ifp);
8582 (void) proto_unplumb(PF_INET6, ifp);
8583 }
8584
8585 static void
8586 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
8587 {
8588 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8589 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8590
8591 route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
8592
8593 lck_mtx_unlock(&ifp->if_cached_route_lock);
8594 }
8595
8596 static void
8597 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
8598 {
8599 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8600 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8601
8602 if (ifp->if_fwd_cacheok) {
8603 route_copyin(src, &ifp->if_src_route, sizeof(*src));
8604 } else {
8605 ROUTE_RELEASE(src);
8606 }
8607 lck_mtx_unlock(&ifp->if_cached_route_lock);
8608 }
8609
8610 static void
8611 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
8612 {
8613 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8614 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8615
8616 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
8617 sizeof(*dst));
8618
8619 lck_mtx_unlock(&ifp->if_cached_route_lock);
8620 }
8621
8622 static void
8623 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
8624 {
8625 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8626 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8627
8628 if (ifp->if_fwd_cacheok) {
8629 route_copyin((struct route *)src,
8630 (struct route *)&ifp->if_src_route6, sizeof(*src));
8631 } else {
8632 ROUTE_RELEASE(src);
8633 }
8634 lck_mtx_unlock(&ifp->if_cached_route_lock);
8635 }
8636
8637 struct rtentry *
8638 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
8639 {
8640 struct route src_rt;
8641 struct sockaddr_in *dst;
8642
8643 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
8644
8645 ifp_src_route_copyout(ifp, &src_rt);
8646
8647 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
8648 ROUTE_RELEASE(&src_rt);
8649 if (dst->sin_family != AF_INET) {
8650 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8651 dst->sin_len = sizeof(src_rt.ro_dst);
8652 dst->sin_family = AF_INET;
8653 }
8654 dst->sin_addr = src_ip;
8655
8656 VERIFY(src_rt.ro_rt == NULL);
8657 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
8658 0, 0, ifp->if_index);
8659
8660 if (src_rt.ro_rt != NULL) {
8661 /* retain a ref, copyin consumes one */
8662 struct rtentry *rte = src_rt.ro_rt;
8663 RT_ADDREF(rte);
8664 ifp_src_route_copyin(ifp, &src_rt);
8665 src_rt.ro_rt = rte;
8666 }
8667 }
8668
8669 return src_rt.ro_rt;
8670 }
8671
8672 struct rtentry *
8673 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
8674 {
8675 struct route_in6 src_rt;
8676
8677 ifp_src_route6_copyout(ifp, &src_rt);
8678
8679 if (ROUTE_UNUSABLE(&src_rt) ||
8680 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
8681 ROUTE_RELEASE(&src_rt);
8682 if (src_rt.ro_dst.sin6_family != AF_INET6) {
8683 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8684 src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
8685 src_rt.ro_dst.sin6_family = AF_INET6;
8686 }
8687 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
8688 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
8689 sizeof(src_rt.ro_dst.sin6_addr));
8690
8691 if (src_rt.ro_rt == NULL) {
8692 src_rt.ro_rt = rtalloc1_scoped(
8693 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
8694 ifp->if_index);
8695
8696 if (src_rt.ro_rt != NULL) {
8697 /* retain a ref, copyin consumes one */
8698 struct rtentry *rte = src_rt.ro_rt;
8699 RT_ADDREF(rte);
8700 ifp_src_route6_copyin(ifp, &src_rt);
8701 src_rt.ro_rt = rte;
8702 }
8703 }
8704 }
8705
8706 return src_rt.ro_rt;
8707 }
8708
8709 void
8710 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
8711 {
8712 struct kev_dl_link_quality_metric_data ev_lqm_data;
8713
8714 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
8715
8716 /* Normalize to edge */
8717 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
8718 lqm = IFNET_LQM_THRESH_ABORT;
8719 atomic_bitset_32(&tcbinfo.ipi_flags,
8720 INPCBINFO_HANDLE_LQM_ABORT);
8721 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
8722 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
8723 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
8724 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
8725 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
8726 lqm <= IFNET_LQM_THRESH_POOR) {
8727 lqm = IFNET_LQM_THRESH_POOR;
8728 } else if (lqm > IFNET_LQM_THRESH_POOR &&
8729 lqm <= IFNET_LQM_THRESH_GOOD) {
8730 lqm = IFNET_LQM_THRESH_GOOD;
8731 }
8732
8733 /*
8734 * Take the lock if needed
8735 */
8736 if (!locked) {
8737 ifnet_lock_exclusive(ifp);
8738 }
8739
8740 if (lqm == ifp->if_interface_state.lqm_state &&
8741 (ifp->if_interface_state.valid_bitmask &
8742 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
8743 /*
8744 * Release the lock if was not held by the caller
8745 */
8746 if (!locked) {
8747 ifnet_lock_done(ifp);
8748 }
8749 return; /* nothing to update */
8750 }
8751 ifp->if_interface_state.valid_bitmask |=
8752 IF_INTERFACE_STATE_LQM_STATE_VALID;
8753 ifp->if_interface_state.lqm_state = (int8_t)lqm;
8754
8755 /*
8756 * Don't want to hold the lock when issuing kernel events
8757 */
8758 ifnet_lock_done(ifp);
8759
8760 bzero(&ev_lqm_data, sizeof(ev_lqm_data));
8761 ev_lqm_data.link_quality_metric = lqm;
8762
8763 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
8764 (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data));
8765
8766 /*
8767 * Reacquire the lock for the caller
8768 */
8769 if (locked) {
8770 ifnet_lock_exclusive(ifp);
8771 }
8772 }
8773
8774 static void
8775 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
8776 {
8777 struct kev_dl_rrc_state kev;
8778
8779 if (rrc_state == ifp->if_interface_state.rrc_state &&
8780 (ifp->if_interface_state.valid_bitmask &
8781 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
8782 return;
8783 }
8784
8785 ifp->if_interface_state.valid_bitmask |=
8786 IF_INTERFACE_STATE_RRC_STATE_VALID;
8787
8788 ifp->if_interface_state.rrc_state = (uint8_t)rrc_state;
8789
8790 /*
8791 * Don't want to hold the lock when issuing kernel events
8792 */
8793 ifnet_lock_done(ifp);
8794
8795 bzero(&kev, sizeof(struct kev_dl_rrc_state));
8796 kev.rrc_state = rrc_state;
8797
8798 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
8799 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
8800
8801 ifnet_lock_exclusive(ifp);
8802 }
8803
8804 errno_t
8805 if_state_update(struct ifnet *ifp,
8806 struct if_interface_state *if_interface_state)
8807 {
8808 u_short if_index_available = 0;
8809
8810 ifnet_lock_exclusive(ifp);
8811
8812 if ((ifp->if_type != IFT_CELLULAR) &&
8813 (if_interface_state->valid_bitmask &
8814 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
8815 ifnet_lock_done(ifp);
8816 return ENOTSUP;
8817 }
8818 if ((if_interface_state->valid_bitmask &
8819 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
8820 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
8821 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
8822 ifnet_lock_done(ifp);
8823 return EINVAL;
8824 }
8825 if ((if_interface_state->valid_bitmask &
8826 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
8827 if_interface_state->rrc_state !=
8828 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
8829 if_interface_state->rrc_state !=
8830 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
8831 ifnet_lock_done(ifp);
8832 return EINVAL;
8833 }
8834
8835 if (if_interface_state->valid_bitmask &
8836 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8837 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
8838 }
8839 if (if_interface_state->valid_bitmask &
8840 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8841 if_rrc_state_update(ifp, if_interface_state->rrc_state);
8842 }
8843 if (if_interface_state->valid_bitmask &
8844 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8845 ifp->if_interface_state.valid_bitmask |=
8846 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8847 ifp->if_interface_state.interface_availability =
8848 if_interface_state->interface_availability;
8849
8850 if (ifp->if_interface_state.interface_availability ==
8851 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
8852 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
8853 __func__, if_name(ifp), ifp->if_index);
8854 if_index_available = ifp->if_index;
8855 } else {
8856 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
8857 __func__, if_name(ifp), ifp->if_index);
8858 }
8859 }
8860 ifnet_lock_done(ifp);
8861
8862 /*
8863 * Check if the TCP connections going on this interface should be
8864 * forced to send probe packets instead of waiting for TCP timers
8865 * to fire. This is done on an explicit notification such as
8866 * SIOCSIFINTERFACESTATE which marks the interface as available.
8867 */
8868 if (if_index_available > 0) {
8869 tcp_interface_send_probe(if_index_available);
8870 }
8871
8872 return 0;
8873 }
8874
8875 void
8876 if_get_state(struct ifnet *ifp,
8877 struct if_interface_state *if_interface_state)
8878 {
8879 ifnet_lock_shared(ifp);
8880
8881 if_interface_state->valid_bitmask = 0;
8882
8883 if (ifp->if_interface_state.valid_bitmask &
8884 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8885 if_interface_state->valid_bitmask |=
8886 IF_INTERFACE_STATE_RRC_STATE_VALID;
8887 if_interface_state->rrc_state =
8888 ifp->if_interface_state.rrc_state;
8889 }
8890 if (ifp->if_interface_state.valid_bitmask &
8891 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8892 if_interface_state->valid_bitmask |=
8893 IF_INTERFACE_STATE_LQM_STATE_VALID;
8894 if_interface_state->lqm_state =
8895 ifp->if_interface_state.lqm_state;
8896 }
8897 if (ifp->if_interface_state.valid_bitmask &
8898 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8899 if_interface_state->valid_bitmask |=
8900 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8901 if_interface_state->interface_availability =
8902 ifp->if_interface_state.interface_availability;
8903 }
8904
8905 ifnet_lock_done(ifp);
8906 }
8907
8908 errno_t
8909 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
8910 {
8911 if (conn_probe > 1) {
8912 return EINVAL;
8913 }
8914 if (conn_probe == 0) {
8915 if_clear_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
8916 } else {
8917 if_set_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
8918 }
8919
8920 #if NECP
8921 necp_update_all_clients();
8922 #endif /* NECP */
8923
8924 tcp_probe_connectivity(ifp, conn_probe);
8925 return 0;
8926 }
8927
8928 /* for uuid.c */
8929 static int
8930 get_ether_index(int * ret_other_index)
8931 {
8932 struct ifnet *ifp;
8933 int en0_index = 0;
8934 int other_en_index = 0;
8935 int any_ether_index = 0;
8936 short best_unit = 0;
8937
8938 *ret_other_index = 0;
8939 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
8940 /*
8941 * find en0, or if not en0, the lowest unit en*, and if not
8942 * that, any ethernet
8943 */
8944 ifnet_lock_shared(ifp);
8945 if (strcmp(ifp->if_name, "en") == 0) {
8946 if (ifp->if_unit == 0) {
8947 /* found en0, we're done */
8948 en0_index = ifp->if_index;
8949 ifnet_lock_done(ifp);
8950 break;
8951 }
8952 if (other_en_index == 0 || ifp->if_unit < best_unit) {
8953 other_en_index = ifp->if_index;
8954 best_unit = ifp->if_unit;
8955 }
8956 } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
8957 any_ether_index = ifp->if_index;
8958 }
8959 ifnet_lock_done(ifp);
8960 }
8961 if (en0_index == 0) {
8962 if (other_en_index != 0) {
8963 *ret_other_index = other_en_index;
8964 } else if (any_ether_index != 0) {
8965 *ret_other_index = any_ether_index;
8966 }
8967 }
8968 return en0_index;
8969 }
8970
8971 int
8972 uuid_get_ethernet(u_int8_t *node)
8973 {
8974 static int en0_index;
8975 struct ifnet *ifp;
8976 int other_index = 0;
8977 int the_index = 0;
8978 int ret;
8979
8980 ifnet_head_lock_shared();
8981 if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
8982 en0_index = get_ether_index(&other_index);
8983 }
8984 if (en0_index != 0) {
8985 the_index = en0_index;
8986 } else if (other_index != 0) {
8987 the_index = other_index;
8988 }
8989 if (the_index != 0) {
8990 struct dlil_ifnet *dl_if;
8991
8992 ifp = ifindex2ifnet[the_index];
8993 VERIFY(ifp != NULL);
8994 dl_if = (struct dlil_ifnet *)ifp;
8995 if (dl_if->dl_if_permanent_ether_is_set != 0) {
8996 /*
8997 * Use the permanent ethernet address if it is
8998 * available because it will never change.
8999 */
9000 memcpy(node, dl_if->dl_if_permanent_ether,
9001 ETHER_ADDR_LEN);
9002 } else {
9003 memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
9004 }
9005 ret = 0;
9006 } else {
9007 ret = -1;
9008 }
9009 ifnet_head_done();
9010 return ret;
9011 }
9012
9013 static int
9014 sysctl_rxpoll SYSCTL_HANDLER_ARGS
9015 {
9016 #pragma unused(arg1, arg2)
9017 uint32_t i;
9018 int err;
9019
9020 i = if_rxpoll;
9021
9022 err = sysctl_handle_int(oidp, &i, 0, req);
9023 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9024 return err;
9025 }
9026
9027 if (net_rxpoll == 0) {
9028 return ENXIO;
9029 }
9030
9031 if_rxpoll = i;
9032 return err;
9033 }
9034
9035 static int
9036 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
9037 {
9038 #pragma unused(arg1, arg2)
9039 uint64_t q;
9040 int err;
9041
9042 q = if_rxpoll_mode_holdtime;
9043
9044 err = sysctl_handle_quad(oidp, &q, 0, req);
9045 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9046 return err;
9047 }
9048
9049 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) {
9050 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
9051 }
9052
9053 if_rxpoll_mode_holdtime = q;
9054
9055 return err;
9056 }
9057
9058 static int
9059 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
9060 {
9061 #pragma unused(arg1, arg2)
9062 uint64_t q;
9063 int err;
9064
9065 q = if_rxpoll_sample_holdtime;
9066
9067 err = sysctl_handle_quad(oidp, &q, 0, req);
9068 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9069 return err;
9070 }
9071
9072 if (q < IF_RXPOLL_SAMPLETIME_MIN) {
9073 q = IF_RXPOLL_SAMPLETIME_MIN;
9074 }
9075
9076 if_rxpoll_sample_holdtime = q;
9077
9078 return err;
9079 }
9080
9081 static int
9082 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
9083 {
9084 #pragma unused(arg1, arg2)
9085 uint64_t q;
9086 int err;
9087
9088 q = if_rxpoll_interval_time;
9089
9090 err = sysctl_handle_quad(oidp, &q, 0, req);
9091 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9092 return err;
9093 }
9094
9095 if (q < IF_RXPOLL_INTERVALTIME_MIN) {
9096 q = IF_RXPOLL_INTERVALTIME_MIN;
9097 }
9098
9099 if_rxpoll_interval_time = q;
9100
9101 return err;
9102 }
9103
9104 static int
9105 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
9106 {
9107 #pragma unused(arg1, arg2)
9108 uint32_t i;
9109 int err;
9110
9111 i = if_sysctl_rxpoll_wlowat;
9112
9113 err = sysctl_handle_int(oidp, &i, 0, req);
9114 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9115 return err;
9116 }
9117
9118 if (i == 0 || i >= if_sysctl_rxpoll_whiwat) {
9119 return EINVAL;
9120 }
9121
9122 if_sysctl_rxpoll_wlowat = i;
9123 return err;
9124 }
9125
9126 static int
9127 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
9128 {
9129 #pragma unused(arg1, arg2)
9130 uint32_t i;
9131 int err;
9132
9133 i = if_sysctl_rxpoll_whiwat;
9134
9135 err = sysctl_handle_int(oidp, &i, 0, req);
9136 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9137 return err;
9138 }
9139
9140 if (i <= if_sysctl_rxpoll_wlowat) {
9141 return EINVAL;
9142 }
9143
9144 if_sysctl_rxpoll_whiwat = i;
9145 return err;
9146 }
9147
9148 static int
9149 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
9150 {
9151 #pragma unused(arg1, arg2)
9152 int i, err;
9153
9154 i = if_sndq_maxlen;
9155
9156 err = sysctl_handle_int(oidp, &i, 0, req);
9157 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9158 return err;
9159 }
9160
9161 if (i < IF_SNDQ_MINLEN) {
9162 i = IF_SNDQ_MINLEN;
9163 }
9164
9165 if_sndq_maxlen = i;
9166 return err;
9167 }
9168
9169 static int
9170 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
9171 {
9172 #pragma unused(arg1, arg2)
9173 int i, err;
9174
9175 i = if_rcvq_maxlen;
9176
9177 err = sysctl_handle_int(oidp, &i, 0, req);
9178 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9179 return err;
9180 }
9181
9182 if (i < IF_RCVQ_MINLEN) {
9183 i = IF_RCVQ_MINLEN;
9184 }
9185
9186 if_rcvq_maxlen = i;
9187 return err;
9188 }
9189
9190 int
9191 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
9192 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
9193 {
9194 struct kev_dl_node_presence kev;
9195 struct sockaddr_dl *sdl;
9196 struct sockaddr_in6 *sin6;
9197 int ret = 0;
9198
9199 VERIFY(ifp);
9200 VERIFY(sa);
9201 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
9202
9203 bzero(&kev, sizeof(kev));
9204 sin6 = &kev.sin6_node_address;
9205 sdl = &kev.sdl_node_address;
9206 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
9207 kev.rssi = rssi;
9208 kev.link_quality_metric = lqm;
9209 kev.node_proximity_metric = npm;
9210 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
9211
9212 ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
9213 if (ret == 0) {
9214 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
9215 &kev.link_data, sizeof(kev));
9216 if (err != 0) {
9217 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
9218 "error %d\n", __func__, err);
9219 }
9220 }
9221 return ret;
9222 }
9223
9224 void
9225 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
9226 {
9227 struct kev_dl_node_absence kev = {};
9228 struct sockaddr_in6 *kev_sin6 = NULL;
9229 struct sockaddr_dl *kev_sdl = NULL;
9230
9231 VERIFY(ifp != NULL);
9232 VERIFY(sa != NULL);
9233 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
9234
9235 kev_sin6 = &kev.sin6_node_address;
9236 kev_sdl = &kev.sdl_node_address;
9237
9238 if (sa->sa_family == AF_INET6) {
9239 /*
9240 * If IPv6 address is given, get the link layer
9241 * address from what was cached in the neighbor cache
9242 */
9243 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9244 bcopy(sa, kev_sin6, sa->sa_len);
9245 nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
9246 } else {
9247 /*
9248 * If passed address is AF_LINK type, derive the address
9249 * based on the link address.
9250 */
9251 nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
9252 nd6_alt_node_absent(ifp, kev_sin6, NULL);
9253 }
9254
9255 kev_sdl->sdl_type = ifp->if_type;
9256 kev_sdl->sdl_index = ifp->if_index;
9257
9258 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
9259 &kev.link_data, sizeof(kev));
9260 }
9261
9262 int
9263 dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
9264 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
9265 {
9266 struct kev_dl_node_presence kev = {};
9267 struct sockaddr_dl *kev_sdl = NULL;
9268 struct sockaddr_in6 *kev_sin6 = NULL;
9269 int ret = 0;
9270
9271 VERIFY(ifp != NULL);
9272 VERIFY(sa != NULL && sdl != NULL);
9273 VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
9274
9275 kev_sin6 = &kev.sin6_node_address;
9276 kev_sdl = &kev.sdl_node_address;
9277
9278 VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
9279 bcopy(sdl, kev_sdl, sdl->sdl_len);
9280 kev_sdl->sdl_type = ifp->if_type;
9281 kev_sdl->sdl_index = ifp->if_index;
9282
9283 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9284 bcopy(sa, kev_sin6, sa->sa_len);
9285
9286 kev.rssi = rssi;
9287 kev.link_quality_metric = lqm;
9288 kev.node_proximity_metric = npm;
9289 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
9290
9291 ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
9292 if (ret == 0) {
9293 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
9294 &kev.link_data, sizeof(kev));
9295 if (err != 0) {
9296 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__, err);
9297 }
9298 }
9299 return ret;
9300 }
9301
9302 const void *
9303 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
9304 kauth_cred_t *credp)
9305 {
9306 const u_int8_t *bytes;
9307 size_t size;
9308
9309 bytes = CONST_LLADDR(sdl);
9310 size = sdl->sdl_alen;
9311
9312 #if CONFIG_MACF
9313 if (dlil_lladdr_ckreq) {
9314 switch (sdl->sdl_type) {
9315 case IFT_ETHER:
9316 case IFT_IEEE1394:
9317 break;
9318 default:
9319 credp = NULL;
9320 break;
9321 }
9322 ;
9323
9324 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
9325 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
9326 [0] = 2
9327 };
9328
9329 bytes = unspec;
9330 }
9331 }
9332 #else
9333 #pragma unused(credp)
9334 #endif
9335
9336 if (sizep != NULL) {
9337 *sizep = size;
9338 }
9339 return bytes;
9340 }
9341
9342 void
9343 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
9344 u_int8_t info[DLIL_MODARGLEN])
9345 {
9346 struct kev_dl_issues kev;
9347 struct timeval tv;
9348
9349 VERIFY(ifp != NULL);
9350 VERIFY(modid != NULL);
9351 _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
9352 _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
9353
9354 bzero(&kev, sizeof(kev));
9355
9356 microtime(&tv);
9357 kev.timestamp = tv.tv_sec;
9358 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
9359 if (info != NULL) {
9360 bcopy(info, &kev.info, DLIL_MODARGLEN);
9361 }
9362
9363 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
9364 &kev.link_data, sizeof(kev));
9365 }
9366
9367 errno_t
9368 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9369 struct proc *p)
9370 {
9371 u_int32_t level = IFNET_THROTTLE_OFF;
9372 errno_t result = 0;
9373
9374 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
9375
9376 if (cmd == SIOCSIFOPPORTUNISTIC) {
9377 /*
9378 * XXX: Use priv_check_cred() instead of root check?
9379 */
9380 if ((result = proc_suser(p)) != 0) {
9381 return result;
9382 }
9383
9384 if (ifr->ifr_opportunistic.ifo_flags ==
9385 IFRIFOF_BLOCK_OPPORTUNISTIC) {
9386 level = IFNET_THROTTLE_OPPORTUNISTIC;
9387 } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
9388 level = IFNET_THROTTLE_OFF;
9389 } else {
9390 result = EINVAL;
9391 }
9392
9393 if (result == 0) {
9394 result = ifnet_set_throttle(ifp, level);
9395 }
9396 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
9397 ifr->ifr_opportunistic.ifo_flags = 0;
9398 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
9399 ifr->ifr_opportunistic.ifo_flags |=
9400 IFRIFOF_BLOCK_OPPORTUNISTIC;
9401 }
9402 }
9403
9404 /*
9405 * Return the count of current opportunistic connections
9406 * over the interface.
9407 */
9408 if (result == 0) {
9409 uint32_t flags = 0;
9410 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
9411 INPCB_OPPORTUNISTIC_SETCMD : 0;
9412 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
9413 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
9414 ifr->ifr_opportunistic.ifo_inuse =
9415 udp_count_opportunistic(ifp->if_index, flags) +
9416 tcp_count_opportunistic(ifp->if_index, flags);
9417 }
9418
9419 if (result == EALREADY) {
9420 result = 0;
9421 }
9422
9423 return result;
9424 }
9425
9426 int
9427 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
9428 {
9429 struct ifclassq *ifq;
9430 int err = 0;
9431
9432 if (!(ifp->if_eflags & IFEF_TXSTART)) {
9433 return ENXIO;
9434 }
9435
9436 *level = IFNET_THROTTLE_OFF;
9437
9438 ifq = &ifp->if_snd;
9439 IFCQ_LOCK(ifq);
9440 /* Throttling works only for IFCQ, not ALTQ instances */
9441 if (IFCQ_IS_ENABLED(ifq)) {
9442 cqrq_throttle_t req = { 0, IFNET_THROTTLE_OFF };
9443
9444 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
9445 *level = req.level;
9446 }
9447 IFCQ_UNLOCK(ifq);
9448
9449 return err;
9450 }
9451
9452 int
9453 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
9454 {
9455 struct ifclassq *ifq;
9456 int err = 0;
9457
9458 if (!(ifp->if_eflags & IFEF_TXSTART)) {
9459 return ENXIO;
9460 }
9461
9462 ifq = &ifp->if_snd;
9463
9464 switch (level) {
9465 case IFNET_THROTTLE_OFF:
9466 case IFNET_THROTTLE_OPPORTUNISTIC:
9467 break;
9468 default:
9469 return EINVAL;
9470 }
9471
9472 IFCQ_LOCK(ifq);
9473 if (IFCQ_IS_ENABLED(ifq)) {
9474 cqrq_throttle_t req = { 1, level };
9475
9476 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
9477 }
9478 IFCQ_UNLOCK(ifq);
9479
9480 if (err == 0) {
9481 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
9482 level);
9483 #if NECP
9484 necp_update_all_clients();
9485 #endif /* NECP */
9486 if (level == IFNET_THROTTLE_OFF) {
9487 ifnet_start(ifp);
9488 }
9489 }
9490
9491 return err;
9492 }
9493
9494 errno_t
9495 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9496 struct proc *p)
9497 {
9498 #pragma unused(p)
9499 errno_t result = 0;
9500 uint32_t flags;
9501 int level, category, subcategory;
9502
9503 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
9504
9505 if (cmd == SIOCSIFLOG) {
9506 if ((result = priv_check_cred(kauth_cred_get(),
9507 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
9508 return result;
9509 }
9510
9511 level = ifr->ifr_log.ifl_level;
9512 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
9513 result = EINVAL;
9514 }
9515
9516 flags = ifr->ifr_log.ifl_flags;
9517 if ((flags &= IFNET_LOGF_MASK) == 0) {
9518 result = EINVAL;
9519 }
9520
9521 category = ifr->ifr_log.ifl_category;
9522 subcategory = ifr->ifr_log.ifl_subcategory;
9523
9524 if (result == 0) {
9525 result = ifnet_set_log(ifp, level, flags,
9526 category, subcategory);
9527 }
9528 } else {
9529 result = ifnet_get_log(ifp, &level, &flags, &category,
9530 &subcategory);
9531 if (result == 0) {
9532 ifr->ifr_log.ifl_level = level;
9533 ifr->ifr_log.ifl_flags = flags;
9534 ifr->ifr_log.ifl_category = category;
9535 ifr->ifr_log.ifl_subcategory = subcategory;
9536 }
9537 }
9538
9539 return result;
9540 }
9541
9542 int
9543 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
9544 int32_t category, int32_t subcategory)
9545 {
9546 int err = 0;
9547
9548 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
9549 VERIFY(flags & IFNET_LOGF_MASK);
9550
9551 /*
9552 * The logging level applies to all facilities; make sure to
9553 * update them all with the most current level.
9554 */
9555 flags |= ifp->if_log.flags;
9556
9557 if (ifp->if_output_ctl != NULL) {
9558 struct ifnet_log_params l;
9559
9560 bzero(&l, sizeof(l));
9561 l.level = level;
9562 l.flags = flags;
9563 l.flags &= ~IFNET_LOGF_DLIL;
9564 l.category = category;
9565 l.subcategory = subcategory;
9566
9567 /* Send this request to lower layers */
9568 if (l.flags != 0) {
9569 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
9570 sizeof(l), &l);
9571 }
9572 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
9573 /*
9574 * If targeted to the lower layers without an output
9575 * control callback registered on the interface, just
9576 * silently ignore facilities other than ours.
9577 */
9578 flags &= IFNET_LOGF_DLIL;
9579 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
9580 level = 0;
9581 }
9582 }
9583
9584 if (err == 0) {
9585 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
9586 ifp->if_log.flags = 0;
9587 } else {
9588 ifp->if_log.flags |= flags;
9589 }
9590
9591 log(LOG_INFO, "%s: logging level set to %d flags=%b "
9592 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
9593 ifp->if_log.level, ifp->if_log.flags,
9594 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
9595 category, subcategory);
9596 }
9597
9598 return err;
9599 }
9600
9601 int
9602 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
9603 int32_t *category, int32_t *subcategory)
9604 {
9605 if (level != NULL) {
9606 *level = ifp->if_log.level;
9607 }
9608 if (flags != NULL) {
9609 *flags = ifp->if_log.flags;
9610 }
9611 if (category != NULL) {
9612 *category = ifp->if_log.category;
9613 }
9614 if (subcategory != NULL) {
9615 *subcategory = ifp->if_log.subcategory;
9616 }
9617
9618 return 0;
9619 }
9620
9621 int
9622 ifnet_notify_address(struct ifnet *ifp, int af)
9623 {
9624 struct ifnet_notify_address_params na;
9625
9626 #if PF
9627 (void) pf_ifaddr_hook(ifp);
9628 #endif /* PF */
9629
9630 if (ifp->if_output_ctl == NULL) {
9631 return EOPNOTSUPP;
9632 }
9633
9634 bzero(&na, sizeof(na));
9635 na.address_family = (sa_family_t)af;
9636
9637 return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
9638 sizeof(na), &na);
9639 }
9640
9641 errno_t
9642 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
9643 {
9644 if (ifp == NULL || flowid == NULL) {
9645 return EINVAL;
9646 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9647 !IF_FULLY_ATTACHED(ifp)) {
9648 return ENXIO;
9649 }
9650
9651 *flowid = ifp->if_flowhash;
9652
9653 return 0;
9654 }
9655
9656 errno_t
9657 ifnet_disable_output(struct ifnet *ifp)
9658 {
9659 int err;
9660
9661 if (ifp == NULL) {
9662 return EINVAL;
9663 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9664 !IF_FULLY_ATTACHED(ifp)) {
9665 return ENXIO;
9666 }
9667
9668 if ((err = ifnet_fc_add(ifp)) == 0) {
9669 lck_mtx_lock_spin(&ifp->if_start_lock);
9670 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
9671 lck_mtx_unlock(&ifp->if_start_lock);
9672 }
9673 return err;
9674 }
9675
9676 errno_t
9677 ifnet_enable_output(struct ifnet *ifp)
9678 {
9679 if (ifp == NULL) {
9680 return EINVAL;
9681 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9682 !IF_FULLY_ATTACHED(ifp)) {
9683 return ENXIO;
9684 }
9685
9686 ifnet_start_common(ifp, TRUE);
9687 return 0;
9688 }
9689
9690 void
9691 ifnet_flowadv(uint32_t flowhash)
9692 {
9693 struct ifnet_fc_entry *ifce;
9694 struct ifnet *ifp;
9695
9696 ifce = ifnet_fc_get(flowhash);
9697 if (ifce == NULL) {
9698 return;
9699 }
9700
9701 VERIFY(ifce->ifce_ifp != NULL);
9702 ifp = ifce->ifce_ifp;
9703
9704 /* flow hash gets recalculated per attach, so check */
9705 if (ifnet_is_attached(ifp, 1)) {
9706 if (ifp->if_flowhash == flowhash) {
9707 (void) ifnet_enable_output(ifp);
9708 }
9709 ifnet_decr_iorefcnt(ifp);
9710 }
9711 ifnet_fc_entry_free(ifce);
9712 }
9713
9714 /*
9715 * Function to compare ifnet_fc_entries in ifnet flow control tree
9716 */
9717 static inline int
9718 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
9719 {
9720 return fc1->ifce_flowhash - fc2->ifce_flowhash;
9721 }
9722
9723 static int
9724 ifnet_fc_add(struct ifnet *ifp)
9725 {
9726 struct ifnet_fc_entry keyfc, *ifce;
9727 uint32_t flowhash;
9728
9729 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
9730 VERIFY(ifp->if_flowhash != 0);
9731 flowhash = ifp->if_flowhash;
9732
9733 bzero(&keyfc, sizeof(keyfc));
9734 keyfc.ifce_flowhash = flowhash;
9735
9736 lck_mtx_lock_spin(&ifnet_fc_lock);
9737 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9738 if (ifce != NULL && ifce->ifce_ifp == ifp) {
9739 /* Entry is already in ifnet_fc_tree, return */
9740 lck_mtx_unlock(&ifnet_fc_lock);
9741 return 0;
9742 }
9743
9744 if (ifce != NULL) {
9745 /*
9746 * There is a different fc entry with the same flow hash
9747 * but different ifp pointer. There can be a collision
9748 * on flow hash but the probability is low. Let's just
9749 * avoid adding a second one when there is a collision.
9750 */
9751 lck_mtx_unlock(&ifnet_fc_lock);
9752 return EAGAIN;
9753 }
9754
9755 /* become regular mutex */
9756 lck_mtx_convert_spin(&ifnet_fc_lock);
9757
9758 ifce = zalloc_flags(ifnet_fc_zone, Z_WAITOK | Z_ZERO);
9759 ifce->ifce_flowhash = flowhash;
9760 ifce->ifce_ifp = ifp;
9761
9762 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9763 lck_mtx_unlock(&ifnet_fc_lock);
9764 return 0;
9765 }
9766
9767 static struct ifnet_fc_entry *
9768 ifnet_fc_get(uint32_t flowhash)
9769 {
9770 struct ifnet_fc_entry keyfc, *ifce;
9771 struct ifnet *ifp;
9772
9773 bzero(&keyfc, sizeof(keyfc));
9774 keyfc.ifce_flowhash = flowhash;
9775
9776 lck_mtx_lock_spin(&ifnet_fc_lock);
9777 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9778 if (ifce == NULL) {
9779 /* Entry is not present in ifnet_fc_tree, return */
9780 lck_mtx_unlock(&ifnet_fc_lock);
9781 return NULL;
9782 }
9783
9784 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9785
9786 VERIFY(ifce->ifce_ifp != NULL);
9787 ifp = ifce->ifce_ifp;
9788
9789 /* become regular mutex */
9790 lck_mtx_convert_spin(&ifnet_fc_lock);
9791
9792 if (!ifnet_is_attached(ifp, 0)) {
9793 /*
9794 * This ifp is not attached or in the process of being
9795 * detached; just don't process it.
9796 */
9797 ifnet_fc_entry_free(ifce);
9798 ifce = NULL;
9799 }
9800 lck_mtx_unlock(&ifnet_fc_lock);
9801
9802 return ifce;
9803 }
9804
9805 static void
9806 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
9807 {
9808 zfree(ifnet_fc_zone, ifce);
9809 }
9810
9811 static uint32_t
9812 ifnet_calc_flowhash(struct ifnet *ifp)
9813 {
9814 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
9815 uint32_t flowhash = 0;
9816
9817 if (ifnet_flowhash_seed == 0) {
9818 ifnet_flowhash_seed = RandomULong();
9819 }
9820
9821 bzero(&fh, sizeof(fh));
9822
9823 (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
9824 fh.ifk_unit = ifp->if_unit;
9825 fh.ifk_flags = ifp->if_flags;
9826 fh.ifk_eflags = ifp->if_eflags;
9827 fh.ifk_capabilities = ifp->if_capabilities;
9828 fh.ifk_capenable = ifp->if_capenable;
9829 fh.ifk_output_sched_model = ifp->if_output_sched_model;
9830 fh.ifk_rand1 = RandomULong();
9831 fh.ifk_rand2 = RandomULong();
9832
9833 try_again:
9834 flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
9835 if (flowhash == 0) {
9836 /* try to get a non-zero flowhash */
9837 ifnet_flowhash_seed = RandomULong();
9838 goto try_again;
9839 }
9840
9841 return flowhash;
9842 }
9843
9844 int
9845 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
9846 uint16_t flags, uint8_t *data)
9847 {
9848 #pragma unused(flags)
9849 int error = 0;
9850
9851 switch (family) {
9852 case AF_INET:
9853 if_inetdata_lock_exclusive(ifp);
9854 if (IN_IFEXTRA(ifp) != NULL) {
9855 if (len == 0) {
9856 /* Allow clearing the signature */
9857 IN_IFEXTRA(ifp)->netsig_len = 0;
9858 bzero(IN_IFEXTRA(ifp)->netsig,
9859 sizeof(IN_IFEXTRA(ifp)->netsig));
9860 if_inetdata_lock_done(ifp);
9861 break;
9862 } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
9863 error = EINVAL;
9864 if_inetdata_lock_done(ifp);
9865 break;
9866 }
9867 IN_IFEXTRA(ifp)->netsig_len = len;
9868 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
9869 } else {
9870 error = ENOMEM;
9871 }
9872 if_inetdata_lock_done(ifp);
9873 break;
9874
9875 case AF_INET6:
9876 if_inet6data_lock_exclusive(ifp);
9877 if (IN6_IFEXTRA(ifp) != NULL) {
9878 if (len == 0) {
9879 /* Allow clearing the signature */
9880 IN6_IFEXTRA(ifp)->netsig_len = 0;
9881 bzero(IN6_IFEXTRA(ifp)->netsig,
9882 sizeof(IN6_IFEXTRA(ifp)->netsig));
9883 if_inet6data_lock_done(ifp);
9884 break;
9885 } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
9886 error = EINVAL;
9887 if_inet6data_lock_done(ifp);
9888 break;
9889 }
9890 IN6_IFEXTRA(ifp)->netsig_len = len;
9891 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
9892 } else {
9893 error = ENOMEM;
9894 }
9895 if_inet6data_lock_done(ifp);
9896 break;
9897
9898 default:
9899 error = EINVAL;
9900 break;
9901 }
9902
9903 return error;
9904 }
9905
9906 int
9907 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
9908 uint16_t *flags, uint8_t *data)
9909 {
9910 int error = 0;
9911
9912 if (ifp == NULL || len == NULL || data == NULL) {
9913 return EINVAL;
9914 }
9915
9916 switch (family) {
9917 case AF_INET:
9918 if_inetdata_lock_shared(ifp);
9919 if (IN_IFEXTRA(ifp) != NULL) {
9920 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
9921 error = EINVAL;
9922 if_inetdata_lock_done(ifp);
9923 break;
9924 }
9925 if ((*len = (uint8_t)IN_IFEXTRA(ifp)->netsig_len) > 0) {
9926 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
9927 } else {
9928 error = ENOENT;
9929 }
9930 } else {
9931 error = ENOMEM;
9932 }
9933 if_inetdata_lock_done(ifp);
9934 break;
9935
9936 case AF_INET6:
9937 if_inet6data_lock_shared(ifp);
9938 if (IN6_IFEXTRA(ifp) != NULL) {
9939 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
9940 error = EINVAL;
9941 if_inet6data_lock_done(ifp);
9942 break;
9943 }
9944 if ((*len = (uint8_t)IN6_IFEXTRA(ifp)->netsig_len) > 0) {
9945 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
9946 } else {
9947 error = ENOENT;
9948 }
9949 } else {
9950 error = ENOMEM;
9951 }
9952 if_inet6data_lock_done(ifp);
9953 break;
9954
9955 default:
9956 error = EINVAL;
9957 break;
9958 }
9959
9960 if (error == 0 && flags != NULL) {
9961 *flags = 0;
9962 }
9963
9964 return error;
9965 }
9966
9967 int
9968 ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
9969 {
9970 int i, error = 0, one_set = 0;
9971
9972 if_inet6data_lock_exclusive(ifp);
9973
9974 if (IN6_IFEXTRA(ifp) == NULL) {
9975 error = ENOMEM;
9976 goto out;
9977 }
9978
9979 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
9980 uint32_t prefix_len =
9981 prefixes[i].prefix_len;
9982 struct in6_addr *prefix =
9983 &prefixes[i].ipv6_prefix;
9984
9985 if (prefix_len == 0) {
9986 clat_log0((LOG_DEBUG,
9987 "NAT64 prefixes purged from Interface %s\n",
9988 if_name(ifp)));
9989 /* Allow clearing the signature */
9990 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
9991 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9992 sizeof(struct in6_addr));
9993
9994 continue;
9995 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
9996 prefix_len != NAT64_PREFIX_LEN_40 &&
9997 prefix_len != NAT64_PREFIX_LEN_48 &&
9998 prefix_len != NAT64_PREFIX_LEN_56 &&
9999 prefix_len != NAT64_PREFIX_LEN_64 &&
10000 prefix_len != NAT64_PREFIX_LEN_96) {
10001 clat_log0((LOG_DEBUG,
10002 "NAT64 prefixlen is incorrect %d\n", prefix_len));
10003 error = EINVAL;
10004 goto out;
10005 }
10006
10007 if (IN6_IS_SCOPE_EMBED(prefix)) {
10008 clat_log0((LOG_DEBUG,
10009 "NAT64 prefix has interface/link local scope.\n"));
10010 error = EINVAL;
10011 goto out;
10012 }
10013
10014 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
10015 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
10016 sizeof(struct in6_addr));
10017 clat_log0((LOG_DEBUG,
10018 "NAT64 prefix set to %s with prefixlen: %d\n",
10019 ip6_sprintf(prefix), prefix_len));
10020 one_set = 1;
10021 }
10022
10023 out:
10024 if_inet6data_lock_done(ifp);
10025
10026 if (error == 0 && one_set != 0) {
10027 necp_update_all_clients();
10028 }
10029
10030 return error;
10031 }
10032
10033 int
10034 ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
10035 {
10036 int i, found_one = 0, error = 0;
10037
10038 if (ifp == NULL) {
10039 return EINVAL;
10040 }
10041
10042 if_inet6data_lock_shared(ifp);
10043
10044 if (IN6_IFEXTRA(ifp) == NULL) {
10045 error = ENOMEM;
10046 goto out;
10047 }
10048
10049 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
10050 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
10051 found_one = 1;
10052 }
10053 }
10054
10055 if (found_one == 0) {
10056 error = ENOENT;
10057 goto out;
10058 }
10059
10060 if (prefixes) {
10061 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
10062 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
10063 }
10064
10065 out:
10066 if_inet6data_lock_done(ifp);
10067
10068 return error;
10069 }
10070
10071 static void
10072 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
10073 protocol_family_t pf)
10074 {
10075 #pragma unused(ifp)
10076 uint32_t did_sw;
10077
10078 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
10079 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) {
10080 return;
10081 }
10082
10083 switch (pf) {
10084 case PF_INET:
10085 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
10086 if (did_sw & CSUM_DELAY_IP) {
10087 hwcksum_dbg_finalized_hdr++;
10088 }
10089 if (did_sw & CSUM_DELAY_DATA) {
10090 hwcksum_dbg_finalized_data++;
10091 }
10092 break;
10093 case PF_INET6:
10094 /*
10095 * Checksum offload should not have been enabled when
10096 * extension headers exist; that also means that we
10097 * cannot force-finalize packets with extension headers.
10098 * Indicate to the callee should it skip such case by
10099 * setting optlen to -1.
10100 */
10101 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
10102 m->m_pkthdr.csum_flags);
10103 if (did_sw & CSUM_DELAY_IPV6_DATA) {
10104 hwcksum_dbg_finalized_data++;
10105 }
10106 break;
10107 default:
10108 return;
10109 }
10110 }
10111
10112 static void
10113 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
10114 protocol_family_t pf)
10115 {
10116 uint16_t sum = 0;
10117 uint32_t hlen;
10118
10119 if (frame_header == NULL ||
10120 frame_header < (char *)mbuf_datastart(m) ||
10121 frame_header > (char *)m->m_data) {
10122 DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
10123 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
10124 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
10125 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
10126 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
10127 (uint64_t)VM_KERNEL_ADDRPERM(m));
10128 return;
10129 }
10130 hlen = (uint32_t)(m->m_data - frame_header);
10131
10132 switch (pf) {
10133 case PF_INET:
10134 case PF_INET6:
10135 break;
10136 default:
10137 return;
10138 }
10139
10140 /*
10141 * Force partial checksum offload; useful to simulate cases
10142 * where the hardware does not support partial checksum offload,
10143 * in order to validate correctness throughout the layers above.
10144 */
10145 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
10146 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
10147
10148 if (foff > (uint32_t)m->m_pkthdr.len) {
10149 return;
10150 }
10151
10152 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
10153
10154 /* Compute 16-bit 1's complement sum from forced offset */
10155 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
10156
10157 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
10158 m->m_pkthdr.csum_rx_val = sum;
10159 m->m_pkthdr.csum_rx_start = (uint16_t)(foff + hlen);
10160
10161 hwcksum_dbg_partial_forced++;
10162 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
10163 }
10164
10165 /*
10166 * Partial checksum offload verification (and adjustment);
10167 * useful to validate and test cases where the hardware
10168 * supports partial checksum offload.
10169 */
10170 if ((m->m_pkthdr.csum_flags &
10171 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
10172 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
10173 uint32_t rxoff;
10174
10175 /* Start offset must begin after frame header */
10176 rxoff = m->m_pkthdr.csum_rx_start;
10177 if (hlen > rxoff) {
10178 hwcksum_dbg_bad_rxoff++;
10179 if (dlil_verbose) {
10180 DLIL_PRINTF("%s: partial cksum start offset %d "
10181 "is less than frame header length %d for "
10182 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
10183 (uint64_t)VM_KERNEL_ADDRPERM(m));
10184 }
10185 return;
10186 }
10187 rxoff -= hlen;
10188
10189 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
10190 /*
10191 * Compute the expected 16-bit 1's complement sum;
10192 * skip this if we've already computed it above
10193 * when partial checksum offload is forced.
10194 */
10195 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
10196
10197 /* Hardware or driver is buggy */
10198 if (sum != m->m_pkthdr.csum_rx_val) {
10199 hwcksum_dbg_bad_cksum++;
10200 if (dlil_verbose) {
10201 DLIL_PRINTF("%s: bad partial cksum value "
10202 "0x%x (expected 0x%x) for mbuf "
10203 "0x%llx [rx_start %d]\n",
10204 if_name(ifp),
10205 m->m_pkthdr.csum_rx_val, sum,
10206 (uint64_t)VM_KERNEL_ADDRPERM(m),
10207 m->m_pkthdr.csum_rx_start);
10208 }
10209 return;
10210 }
10211 }
10212 hwcksum_dbg_verified++;
10213
10214 /*
10215 * This code allows us to emulate various hardwares that
10216 * perform 16-bit 1's complement sum beginning at various
10217 * start offset values.
10218 */
10219 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
10220 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
10221
10222 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) {
10223 return;
10224 }
10225
10226 sum = m_adj_sum16(m, rxoff, aoff,
10227 m_pktlen(m) - aoff, sum);
10228
10229 m->m_pkthdr.csum_rx_val = sum;
10230 m->m_pkthdr.csum_rx_start = (uint16_t)(aoff + hlen);
10231
10232 hwcksum_dbg_adjusted++;
10233 }
10234 }
10235 }
10236
10237 static int
10238 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
10239 {
10240 #pragma unused(arg1, arg2)
10241 u_int32_t i;
10242 int err;
10243
10244 i = hwcksum_dbg_mode;
10245
10246 err = sysctl_handle_int(oidp, &i, 0, req);
10247 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10248 return err;
10249 }
10250
10251 if (hwcksum_dbg == 0) {
10252 return ENODEV;
10253 }
10254
10255 if ((i & ~HWCKSUM_DBG_MASK) != 0) {
10256 return EINVAL;
10257 }
10258
10259 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
10260
10261 return err;
10262 }
10263
10264 static int
10265 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
10266 {
10267 #pragma unused(arg1, arg2)
10268 u_int32_t i;
10269 int err;
10270
10271 i = hwcksum_dbg_partial_rxoff_forced;
10272
10273 err = sysctl_handle_int(oidp, &i, 0, req);
10274 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10275 return err;
10276 }
10277
10278 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
10279 return ENODEV;
10280 }
10281
10282 hwcksum_dbg_partial_rxoff_forced = i;
10283
10284 return err;
10285 }
10286
10287 static int
10288 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
10289 {
10290 #pragma unused(arg1, arg2)
10291 u_int32_t i;
10292 int err;
10293
10294 i = hwcksum_dbg_partial_rxoff_adj;
10295
10296 err = sysctl_handle_int(oidp, &i, 0, req);
10297 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10298 return err;
10299 }
10300
10301 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) {
10302 return ENODEV;
10303 }
10304
10305 hwcksum_dbg_partial_rxoff_adj = i;
10306
10307 return err;
10308 }
10309
10310 static int
10311 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
10312 {
10313 #pragma unused(oidp, arg1, arg2)
10314 int err;
10315
10316 if (req->oldptr == USER_ADDR_NULL) {
10317 }
10318 if (req->newptr != USER_ADDR_NULL) {
10319 return EPERM;
10320 }
10321 err = SYSCTL_OUT(req, &tx_chain_len_stats,
10322 sizeof(struct chain_len_stats));
10323
10324 return err;
10325 }
10326
10327
10328 #if DEBUG || DEVELOPMENT
10329 /* Blob for sum16 verification */
10330 static uint8_t sumdata[] = {
10331 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
10332 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
10333 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
10334 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
10335 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
10336 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
10337 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
10338 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
10339 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
10340 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
10341 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
10342 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
10343 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
10344 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
10345 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
10346 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
10347 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
10348 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
10349 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
10350 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
10351 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
10352 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
10353 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
10354 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
10355 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
10356 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
10357 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
10358 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
10359 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
10360 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
10361 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
10362 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
10363 0xc8, 0x28, 0x02, 0x00, 0x00
10364 };
10365
10366 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
10367 static struct {
10368 boolean_t init;
10369 uint16_t len;
10370 uint16_t sumr; /* reference */
10371 uint16_t sumrp; /* reference, precomputed */
10372 } sumtbl[] = {
10373 { FALSE, 0, 0, 0x0000 },
10374 { FALSE, 1, 0, 0x001f },
10375 { FALSE, 2, 0, 0x8b1f },
10376 { FALSE, 3, 0, 0x8b27 },
10377 { FALSE, 7, 0, 0x790e },
10378 { FALSE, 11, 0, 0xcb6d },
10379 { FALSE, 20, 0, 0x20dd },
10380 { FALSE, 27, 0, 0xbabd },
10381 { FALSE, 32, 0, 0xf3e8 },
10382 { FALSE, 37, 0, 0x197d },
10383 { FALSE, 43, 0, 0x9eae },
10384 { FALSE, 64, 0, 0x4678 },
10385 { FALSE, 127, 0, 0x9399 },
10386 { FALSE, 256, 0, 0xd147 },
10387 { FALSE, 325, 0, 0x0358 },
10388 };
10389 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
10390
10391 static void
10392 dlil_verify_sum16(void)
10393 {
10394 struct mbuf *m;
10395 uint8_t *buf;
10396 int n;
10397
10398 /* Make sure test data plus extra room for alignment fits in cluster */
10399 _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
10400
10401 kprintf("DLIL: running SUM16 self-tests ... ");
10402
10403 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
10404 m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
10405
10406 buf = mtod(m, uint8_t *); /* base address */
10407
10408 for (n = 0; n < SUMTBL_MAX; n++) {
10409 uint16_t len = sumtbl[n].len;
10410 int i;
10411
10412 /* Verify for all possible alignments */
10413 for (i = 0; i < (int)sizeof(uint64_t); i++) {
10414 uint16_t sum, sumr;
10415 uint8_t *c;
10416
10417 /* Copy over test data to mbuf */
10418 VERIFY(len <= sizeof(sumdata));
10419 c = buf + i;
10420 bcopy(sumdata, c, len);
10421
10422 /* Zero-offset test (align by data pointer) */
10423 m->m_data = (caddr_t)c;
10424 m->m_len = len;
10425 sum = m_sum16(m, 0, len);
10426
10427 if (!sumtbl[n].init) {
10428 sumr = (uint16_t)in_cksum_mbuf_ref(m, len, 0, 0);
10429 sumtbl[n].sumr = sumr;
10430 sumtbl[n].init = TRUE;
10431 } else {
10432 sumr = sumtbl[n].sumr;
10433 }
10434
10435 /* Something is horribly broken; stop now */
10436 if (sumr != sumtbl[n].sumrp) {
10437 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
10438 "for len=%d align=%d sum=0x%04x "
10439 "[expected=0x%04x]\n", __func__,
10440 len, i, sum, sumr);
10441 /* NOTREACHED */
10442 } else if (sum != sumr) {
10443 panic_plain("\n%s: broken m_sum16() for len=%d "
10444 "align=%d sum=0x%04x [expected=0x%04x]\n",
10445 __func__, len, i, sum, sumr);
10446 /* NOTREACHED */
10447 }
10448
10449 /* Alignment test by offset (fixed data pointer) */
10450 m->m_data = (caddr_t)buf;
10451 m->m_len = i + len;
10452 sum = m_sum16(m, i, len);
10453
10454 /* Something is horribly broken; stop now */
10455 if (sum != sumr) {
10456 panic_plain("\n%s: broken m_sum16() for len=%d "
10457 "offset=%d sum=0x%04x [expected=0x%04x]\n",
10458 __func__, len, i, sum, sumr);
10459 /* NOTREACHED */
10460 }
10461 #if INET
10462 /* Simple sum16 contiguous buffer test by aligment */
10463 sum = b_sum16(c, len);
10464
10465 /* Something is horribly broken; stop now */
10466 if (sum != sumr) {
10467 panic_plain("\n%s: broken b_sum16() for len=%d "
10468 "align=%d sum=0x%04x [expected=0x%04x]\n",
10469 __func__, len, i, sum, sumr);
10470 /* NOTREACHED */
10471 }
10472 #endif /* INET */
10473 }
10474 }
10475 m_freem(m);
10476
10477 kprintf("PASSED\n");
10478 }
10479 #endif /* DEBUG || DEVELOPMENT */
10480
10481 #define CASE_STRINGIFY(x) case x: return #x
10482
10483 __private_extern__ const char *
10484 dlil_kev_dl_code_str(u_int32_t event_code)
10485 {
10486 switch (event_code) {
10487 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
10488 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
10489 CASE_STRINGIFY(KEV_DL_SIFMTU);
10490 CASE_STRINGIFY(KEV_DL_SIFPHYS);
10491 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
10492 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
10493 CASE_STRINGIFY(KEV_DL_ADDMULTI);
10494 CASE_STRINGIFY(KEV_DL_DELMULTI);
10495 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
10496 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
10497 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
10498 CASE_STRINGIFY(KEV_DL_LINK_OFF);
10499 CASE_STRINGIFY(KEV_DL_LINK_ON);
10500 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
10501 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
10502 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
10503 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
10504 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
10505 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
10506 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
10507 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
10508 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
10509 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
10510 CASE_STRINGIFY(KEV_DL_ISSUES);
10511 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
10512 default:
10513 break;
10514 }
10515 return "";
10516 }
10517
10518 static void
10519 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
10520 {
10521 #pragma unused(arg1)
10522 struct ifnet *ifp = arg0;
10523
10524 if (ifnet_is_attached(ifp, 1)) {
10525 nstat_ifnet_threshold_reached(ifp->if_index);
10526 ifnet_decr_iorefcnt(ifp);
10527 }
10528 }
10529
10530 void
10531 ifnet_notify_data_threshold(struct ifnet *ifp)
10532 {
10533 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
10534 uint64_t oldbytes = ifp->if_dt_bytes;
10535
10536 ASSERT(ifp->if_dt_tcall != NULL);
10537
10538 /*
10539 * If we went over the threshold, notify NetworkStatistics.
10540 * We rate-limit it based on the threshold interval value.
10541 */
10542 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
10543 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
10544 !thread_call_isactive(ifp->if_dt_tcall)) {
10545 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
10546 uint64_t now = mach_absolute_time(), deadline = now;
10547 uint64_t ival;
10548
10549 if (tival != 0) {
10550 nanoseconds_to_absolutetime(tival, &ival);
10551 clock_deadline_for_periodic_event(ival, now, &deadline);
10552 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
10553 deadline);
10554 } else {
10555 (void) thread_call_enter(ifp->if_dt_tcall);
10556 }
10557 }
10558 }
10559
10560 #if (DEVELOPMENT || DEBUG)
10561 /*
10562 * The sysctl variable name contains the input parameters of
10563 * ifnet_get_keepalive_offload_frames()
10564 * ifp (interface index): name[0]
10565 * frames_array_count: name[1]
10566 * frame_data_offset: name[2]
10567 * The return length gives used_frames_count
10568 */
10569 static int
10570 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
10571 {
10572 #pragma unused(oidp)
10573 int *name = (int *)arg1;
10574 u_int namelen = arg2;
10575 int idx;
10576 ifnet_t ifp = NULL;
10577 u_int32_t frames_array_count;
10578 size_t frame_data_offset;
10579 u_int32_t used_frames_count;
10580 struct ifnet_keepalive_offload_frame *frames_array = NULL;
10581 int error = 0;
10582 u_int32_t i;
10583
10584 /*
10585 * Only root can get look at other people TCP frames
10586 */
10587 error = proc_suser(current_proc());
10588 if (error != 0) {
10589 goto done;
10590 }
10591 /*
10592 * Validate the input parameters
10593 */
10594 if (req->newptr != USER_ADDR_NULL) {
10595 error = EPERM;
10596 goto done;
10597 }
10598 if (namelen != 3) {
10599 error = EINVAL;
10600 goto done;
10601 }
10602 if (req->oldptr == USER_ADDR_NULL) {
10603 error = EINVAL;
10604 goto done;
10605 }
10606 if (req->oldlen == 0) {
10607 error = EINVAL;
10608 goto done;
10609 }
10610 idx = name[0];
10611 frames_array_count = name[1];
10612 frame_data_offset = name[2];
10613
10614 /* Make sure the passed buffer is large enough */
10615 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
10616 req->oldlen) {
10617 error = ENOMEM;
10618 goto done;
10619 }
10620
10621 ifnet_head_lock_shared();
10622 if (!IF_INDEX_IN_RANGE(idx)) {
10623 ifnet_head_done();
10624 error = ENOENT;
10625 goto done;
10626 }
10627 ifp = ifindex2ifnet[idx];
10628 ifnet_head_done();
10629
10630 frames_array = _MALLOC(frames_array_count *
10631 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
10632 if (frames_array == NULL) {
10633 error = ENOMEM;
10634 goto done;
10635 }
10636
10637 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
10638 frames_array_count, frame_data_offset, &used_frames_count);
10639 if (error != 0) {
10640 DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n",
10641 __func__, error);
10642 goto done;
10643 }
10644
10645 for (i = 0; i < used_frames_count; i++) {
10646 error = SYSCTL_OUT(req, frames_array + i,
10647 sizeof(struct ifnet_keepalive_offload_frame));
10648 if (error != 0) {
10649 goto done;
10650 }
10651 }
10652 done:
10653 if (frames_array != NULL) {
10654 _FREE(frames_array, M_TEMP);
10655 }
10656 return error;
10657 }
10658 #endif /* DEVELOPMENT || DEBUG */
10659
10660 void
10661 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
10662 struct ifnet *ifp)
10663 {
10664 tcp_update_stats_per_flow(ifs, ifp);
10665 }
10666
10667 static inline u_int32_t
10668 _set_flags(u_int32_t *flags_p, u_int32_t set_flags)
10669 {
10670 return (u_int32_t)OSBitOrAtomic(set_flags, flags_p);
10671 }
10672
10673 static inline void
10674 _clear_flags(u_int32_t *flags_p, u_int32_t clear_flags)
10675 {
10676 OSBitAndAtomic(~clear_flags, flags_p);
10677 }
10678
10679 __private_extern__ u_int32_t
10680 if_set_eflags(ifnet_t interface, u_int32_t set_flags)
10681 {
10682 return _set_flags(&interface->if_eflags, set_flags);
10683 }
10684
10685 __private_extern__ void
10686 if_clear_eflags(ifnet_t interface, u_int32_t clear_flags)
10687 {
10688 _clear_flags(&interface->if_eflags, clear_flags);
10689 }
10690
10691 __private_extern__ u_int32_t
10692 if_set_xflags(ifnet_t interface, u_int32_t set_flags)
10693 {
10694 return _set_flags(&interface->if_xflags, set_flags);
10695 }
10696
10697 __private_extern__ void
10698 if_clear_xflags(ifnet_t interface, u_int32_t clear_flags)
10699 {
10700 _clear_flags(&interface->if_xflags, clear_flags);
10701 }