]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/dlil.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
1 /*
2 * Copyright (c) 1999-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35 #include <ptrauth.h>
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/domain.h>
44 #include <sys/user.h>
45 #include <sys/random.h>
46 #include <sys/socketvar.h>
47 #include <net/if_dl.h>
48 #include <net/if.h>
49 #include <net/route.h>
50 #include <net/if_var.h>
51 #include <net/dlil.h>
52 #include <net/if_arp.h>
53 #include <net/iptap.h>
54 #include <net/pktap.h>
55 #include <sys/kern_event.h>
56 #include <sys/kdebug.h>
57 #include <sys/mcache.h>
58 #include <sys/syslog.h>
59 #include <sys/protosw.h>
60 #include <sys/priv.h>
61
62 #include <kern/assert.h>
63 #include <kern/task.h>
64 #include <kern/thread.h>
65 #include <kern/sched_prim.h>
66 #include <kern/locks.h>
67 #include <kern/zalloc.h>
68
69 #include <net/kpi_protocol.h>
70 #include <net/if_types.h>
71 #include <net/if_ipsec.h>
72 #include <net/if_llreach.h>
73 #include <net/if_utun.h>
74 #include <net/kpi_interfacefilter.h>
75 #include <net/classq/classq.h>
76 #include <net/classq/classq_sfb.h>
77 #include <net/flowhash.h>
78 #include <net/ntstat.h>
79 #include <net/if_llatbl.h>
80 #include <net/net_api_stats.h>
81 #include <net/if_ports_used.h>
82 #include <net/if_vlan_var.h>
83 #include <netinet/in.h>
84 #if INET
85 #include <netinet/in_var.h>
86 #include <netinet/igmp_var.h>
87 #include <netinet/ip_var.h>
88 #include <netinet/tcp.h>
89 #include <netinet/tcp_var.h>
90 #include <netinet/udp.h>
91 #include <netinet/udp_var.h>
92 #include <netinet/if_ether.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet/in_tclass.h>
95 #include <netinet/ip.h>
96 #include <netinet/ip_icmp.h>
97 #include <netinet/icmp_var.h>
98 #endif /* INET */
99
100 #include <net/nat464_utils.h>
101 #include <netinet6/in6_var.h>
102 #include <netinet6/nd6.h>
103 #include <netinet6/mld6_var.h>
104 #include <netinet6/scope6_var.h>
105 #include <netinet/ip6.h>
106 #include <netinet/icmp6.h>
107 #include <net/pf_pbuf.h>
108 #include <libkern/OSAtomic.h>
109 #include <libkern/tree.h>
110
111 #include <dev/random/randomdev.h>
112 #include <machine/machine_routines.h>
113
114 #include <mach/thread_act.h>
115 #include <mach/sdt.h>
116
117 #if CONFIG_MACF
118 #include <sys/kauth.h>
119 #include <security/mac_framework.h>
120 #include <net/ethernet.h>
121 #include <net/firewire.h>
122 #endif
123
124 #if PF
125 #include <net/pfvar.h>
126 #endif /* PF */
127 #include <net/pktsched/pktsched.h>
128 #include <net/pktsched/pktsched_netem.h>
129
130 #if NECP
131 #include <net/necp.h>
132 #endif /* NECP */
133
134
135 #include <os/log.h>
136
137 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
138 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
139 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
140 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
141 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
142
143 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
144 #define MAX_LINKADDR 4 /* LONGWORDS */
145 #define M_NKE M_IFADDR
146
147 #if 1
148 #define DLIL_PRINTF printf
149 #else
150 #define DLIL_PRINTF kprintf
151 #endif
152
153 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
154 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
155
156 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
157 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
158
159 enum {
160 kProtoKPI_v1 = 1,
161 kProtoKPI_v2 = 2
162 };
163
164 /*
165 * List of if_proto structures in if_proto_hash[] is protected by
166 * the ifnet lock. The rest of the fields are initialized at protocol
167 * attach time and never change, thus no lock required as long as
168 * a reference to it is valid, via if_proto_ref().
169 */
170 struct if_proto {
171 SLIST_ENTRY(if_proto) next_hash;
172 u_int32_t refcount;
173 u_int32_t detached;
174 struct ifnet *ifp;
175 protocol_family_t protocol_family;
176 int proto_kpi;
177 union {
178 struct {
179 proto_media_input input;
180 proto_media_preout pre_output;
181 proto_media_event event;
182 proto_media_ioctl ioctl;
183 proto_media_detached detached;
184 proto_media_resolve_multi resolve_multi;
185 proto_media_send_arp send_arp;
186 } v1;
187 struct {
188 proto_media_input_v2 input;
189 proto_media_preout pre_output;
190 proto_media_event event;
191 proto_media_ioctl ioctl;
192 proto_media_detached detached;
193 proto_media_resolve_multi resolve_multi;
194 proto_media_send_arp send_arp;
195 } v2;
196 } kpi;
197 };
198
199 SLIST_HEAD(proto_hash_entry, if_proto);
200
201 #define DLIL_SDLDATALEN \
202 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
203
204 struct dlil_ifnet {
205 struct ifnet dl_if; /* public ifnet */
206 /*
207 * DLIL private fields, protected by dl_if_lock
208 */
209 decl_lck_mtx_data(, dl_if_lock);
210 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
211 u_int32_t dl_if_flags; /* flags (below) */
212 u_int32_t dl_if_refcnt; /* refcnt */
213 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
214 void *dl_if_uniqueid; /* unique interface id */
215 size_t dl_if_uniqueid_len; /* length of the unique id */
216 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
217 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
218 struct {
219 struct ifaddr ifa; /* lladdr ifa */
220 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
221 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
222 } dl_if_lladdr;
223 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
224 u_int8_t dl_if_permanent_ether[ETHER_ADDR_LEN]; /* permanent address */
225 u_int8_t dl_if_permanent_ether_is_set;
226 u_int8_t dl_if_unused;
227 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
228 ctrace_t dl_if_attach; /* attach PC stacktrace */
229 ctrace_t dl_if_detach; /* detach PC stacktrace */
230 };
231
232 /* Values for dl_if_flags (private to DLIL) */
233 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
234 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
235 #define DLIF_DEBUG 0x4 /* has debugging info */
236
237 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
238
239 /* For gdb */
240 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
241
242 struct dlil_ifnet_dbg {
243 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
244 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
245 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
246 /*
247 * Circular lists of ifnet_{reference,release} callers.
248 */
249 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
250 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
251 };
252
253 #define DLIL_TO_IFP(s) (&s->dl_if)
254 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
255
256 struct ifnet_filter {
257 TAILQ_ENTRY(ifnet_filter) filt_next;
258 u_int32_t filt_skip;
259 u_int32_t filt_flags;
260 ifnet_t filt_ifp;
261 const char *filt_name;
262 void *filt_cookie;
263 protocol_family_t filt_protocol;
264 iff_input_func filt_input;
265 iff_output_func filt_output;
266 iff_event_func filt_event;
267 iff_ioctl_func filt_ioctl;
268 iff_detached_func filt_detached;
269 };
270
271 struct proto_input_entry;
272
273 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
274 static lck_grp_t *dlil_lock_group;
275 lck_grp_t *ifnet_lock_group;
276 static lck_grp_t *ifnet_head_lock_group;
277 static lck_grp_t *ifnet_snd_lock_group;
278 static lck_grp_t *ifnet_rcv_lock_group;
279 lck_attr_t *ifnet_lock_attr;
280 decl_lck_rw_data(static, ifnet_head_lock);
281 decl_lck_mtx_data(static, dlil_ifnet_lock);
282 u_int32_t dlil_filter_disable_tso_count = 0;
283
284 #if DEBUG
285 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
286 #else
287 static unsigned int ifnet_debug; /* debugging (disabled) */
288 #endif /* !DEBUG */
289 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
290 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
291 static struct zone *dlif_zone; /* zone for dlil_ifnet */
292 #define DLIF_ZONE_NAME "ifnet" /* zone name */
293
294 static ZONE_DECLARE(dlif_filt_zone, "ifnet_filter",
295 sizeof(struct ifnet_filter), ZC_ZFREE_CLEARMEM);
296
297 static ZONE_DECLARE(dlif_phash_zone, "ifnet_proto_hash",
298 sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS, ZC_ZFREE_CLEARMEM);
299
300 static ZONE_DECLARE(dlif_proto_zone, "ifnet_proto",
301 sizeof(struct if_proto), ZC_ZFREE_CLEARMEM);
302
303 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
304 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
305 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
306 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
307
308 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
309 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
310 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
311 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
312
313 static u_int32_t net_rtref;
314
315 static struct dlil_main_threading_info dlil_main_input_thread_info;
316 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
317 (struct dlil_threading_info *)&dlil_main_input_thread_info;
318
319 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
320 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
321 static void dlil_if_trace(struct dlil_ifnet *, int);
322 static void if_proto_ref(struct if_proto *);
323 static void if_proto_free(struct if_proto *);
324 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
325 static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
326 u_int32_t list_count);
327 static void if_flt_monitor_busy(struct ifnet *);
328 static void if_flt_monitor_unbusy(struct ifnet *);
329 static void if_flt_monitor_enter(struct ifnet *);
330 static void if_flt_monitor_leave(struct ifnet *);
331 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
332 char **, protocol_family_t);
333 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
334 protocol_family_t);
335 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
336 const struct sockaddr_dl *);
337 static int ifnet_lookup(struct ifnet *);
338 static void if_purgeaddrs(struct ifnet *);
339
340 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
341 struct mbuf *, char *);
342 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
343 struct mbuf *);
344 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
345 mbuf_t *, const struct sockaddr *, void *, char *, char *);
346 static void ifproto_media_event(struct ifnet *, protocol_family_t,
347 const struct kev_msg *);
348 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
349 unsigned long, void *);
350 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
351 struct sockaddr_dl *, size_t);
352 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
353 const struct sockaddr_dl *, const struct sockaddr *,
354 const struct sockaddr_dl *, const struct sockaddr *);
355
356 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
357 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
358 boolean_t poll, struct thread *tp);
359 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
360 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
361 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
362 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
363 protocol_family_t *);
364 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
365 const struct ifnet_demux_desc *, u_int32_t);
366 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
367 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
368 #if !XNU_TARGET_OS_OSX
369 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
370 const struct sockaddr *, const char *, const char *,
371 u_int32_t *, u_int32_t *);
372 #else /* XNU_TARGET_OS_OSX */
373 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
374 const struct sockaddr *, const char *, const char *);
375 #endif /* XNU_TARGET_OS_OSX */
376 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
377 const struct sockaddr *, const char *, const char *,
378 u_int32_t *, u_int32_t *);
379 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
380 static void ifp_if_free(struct ifnet *);
381 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
382 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
383 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
384
385 static errno_t dlil_input_async(struct dlil_threading_info *, struct ifnet *,
386 struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *,
387 boolean_t, struct thread *);
388 static errno_t dlil_input_sync(struct dlil_threading_info *, struct ifnet *,
389 struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *,
390 boolean_t, struct thread *);
391
392 static void dlil_main_input_thread_func(void *, wait_result_t);
393 static void dlil_main_input_thread_cont(void *, wait_result_t);
394
395 static void dlil_input_thread_func(void *, wait_result_t);
396 static void dlil_input_thread_cont(void *, wait_result_t);
397
398 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
399 static void dlil_rxpoll_input_thread_cont(void *, wait_result_t);
400
401 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *,
402 thread_continue_t *);
403 static void dlil_terminate_input_thread(struct dlil_threading_info *);
404 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
405 struct dlil_threading_info *, struct ifnet *, boolean_t);
406 static boolean_t dlil_input_stats_sync(struct ifnet *,
407 struct dlil_threading_info *);
408 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
409 u_int32_t, ifnet_model_t, boolean_t);
410 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
411 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
412 static int dlil_is_clat_needed(protocol_family_t, mbuf_t );
413 static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *);
414 static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *);
415 #if DEBUG || DEVELOPMENT
416 static void dlil_verify_sum16(void);
417 #endif /* DEBUG || DEVELOPMENT */
418 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
419 protocol_family_t);
420 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
421 protocol_family_t);
422
423 static void dlil_incr_pending_thread_count(void);
424 static void dlil_decr_pending_thread_count(void);
425
426 static void ifnet_detacher_thread_func(void *, wait_result_t);
427 static void ifnet_detacher_thread_cont(void *, wait_result_t);
428 static void ifnet_detach_final(struct ifnet *);
429 static void ifnet_detaching_enqueue(struct ifnet *);
430 static struct ifnet *ifnet_detaching_dequeue(void);
431
432 static void ifnet_start_thread_func(void *, wait_result_t);
433 static void ifnet_start_thread_cont(void *, wait_result_t);
434
435 static void ifnet_poll_thread_func(void *, wait_result_t);
436 static void ifnet_poll_thread_cont(void *, wait_result_t);
437
438 static errno_t ifnet_enqueue_common(struct ifnet *, classq_pkt_t *,
439 boolean_t, boolean_t *);
440
441 static void ifp_src_route_copyout(struct ifnet *, struct route *);
442 static void ifp_src_route_copyin(struct ifnet *, struct route *);
443 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
444 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
445
446 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
447 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
448 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
449 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
450 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
451 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
452 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
453 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
454 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
455 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
456 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
457
458 struct chain_len_stats tx_chain_len_stats;
459 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
460
461 #if TEST_INPUT_THREAD_TERMINATION
462 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
463 #endif /* TEST_INPUT_THREAD_TERMINATION */
464
465 /* The following are protected by dlil_ifnet_lock */
466 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
467 static u_int32_t ifnet_detaching_cnt;
468 static boolean_t ifnet_detaching_embryonic;
469 static void *ifnet_delayed_run; /* wait channel for detaching thread */
470
471 decl_lck_mtx_data(static, ifnet_fc_lock);
472
473 static uint32_t ifnet_flowhash_seed;
474
475 struct ifnet_flowhash_key {
476 char ifk_name[IFNAMSIZ];
477 uint32_t ifk_unit;
478 uint32_t ifk_flags;
479 uint32_t ifk_eflags;
480 uint32_t ifk_capabilities;
481 uint32_t ifk_capenable;
482 uint32_t ifk_output_sched_model;
483 uint32_t ifk_rand1;
484 uint32_t ifk_rand2;
485 };
486
487 /* Flow control entry per interface */
488 struct ifnet_fc_entry {
489 RB_ENTRY(ifnet_fc_entry) ifce_entry;
490 u_int32_t ifce_flowhash;
491 struct ifnet *ifce_ifp;
492 };
493
494 static uint32_t ifnet_calc_flowhash(struct ifnet *);
495 static int ifce_cmp(const struct ifnet_fc_entry *,
496 const struct ifnet_fc_entry *);
497 static int ifnet_fc_add(struct ifnet *);
498 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
499 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
500
501 /* protected by ifnet_fc_lock */
502 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
503 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
504 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
505
506 static ZONE_DECLARE(ifnet_fc_zone, "ifnet_fc_zone",
507 sizeof(struct ifnet_fc_entry), ZC_ZFREE_CLEARMEM);
508
509 extern void bpfdetach(struct ifnet *);
510 extern void proto_input_run(void);
511
512 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
513 u_int32_t flags);
514 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
515 u_int32_t flags);
516
517 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
518
519 #if CONFIG_MACF
520 #if !XNU_TARGET_OS_OSX
521 int dlil_lladdr_ckreq = 1;
522 #else /* XNU_TARGET_OS_OSX */
523 int dlil_lladdr_ckreq = 0;
524 #endif /* XNU_TARGET_OS_OSX */
525 #endif /* CONFIG_MACF */
526
527 #if DEBUG
528 int dlil_verbose = 1;
529 #else
530 int dlil_verbose = 0;
531 #endif /* DEBUG */
532 #if IFNET_INPUT_SANITY_CHK
533 /* sanity checking of input packet lists received */
534 static u_int32_t dlil_input_sanity_check = 0;
535 #endif /* IFNET_INPUT_SANITY_CHK */
536 /* rate limit debug messages */
537 struct timespec dlil_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
538
539 SYSCTL_DECL(_net_link_generic_system);
540
541 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
542 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
543
544 #define IF_SNDQ_MINLEN 32
545 u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
546 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
547 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
548 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
549
550 #define IF_RCVQ_MINLEN 32
551 #define IF_RCVQ_MAXLEN 256
552 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
553 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
554 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
555 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
556
557 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
558 u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
559 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
560 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
561 "ilog2 of EWMA decay rate of avg inbound packets");
562
563 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
564 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
565 static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
566 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
567 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
568 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
569 "Q", "input poll mode freeze time");
570
571 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
572 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
573 static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
574 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
575 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
576 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
577 "Q", "input poll sampling time");
578
579 static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
580 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
581 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
582 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
583 "Q", "input poll interval (time)");
584
585 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
586 u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
587 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
588 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
589 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
590
591 #define IF_RXPOLL_WLOWAT 10
592 static u_int32_t if_sysctl_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
593 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
594 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_wlowat,
595 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
596 "I", "input poll wakeup low watermark");
597
598 #define IF_RXPOLL_WHIWAT 100
599 static u_int32_t if_sysctl_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
600 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
601 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_whiwat,
602 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
603 "I", "input poll wakeup high watermark");
604
605 static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
606 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
607 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
608 "max packets per poll call");
609
610 u_int32_t if_rxpoll = 1;
611 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
612 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
613 sysctl_rxpoll, "I", "enable opportunistic input polling");
614
615 #if TEST_INPUT_THREAD_TERMINATION
616 static u_int32_t if_input_thread_termination_spin = 0;
617 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
618 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
619 &if_input_thread_termination_spin, 0,
620 sysctl_input_thread_termination_spin,
621 "I", "input thread termination spin limit");
622 #endif /* TEST_INPUT_THREAD_TERMINATION */
623
624 static u_int32_t cur_dlil_input_threads = 0;
625 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
626 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
627 "Current number of DLIL input threads");
628
629 #if IFNET_INPUT_SANITY_CHK
630 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
631 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
632 "Turn on sanity checking in DLIL input");
633 #endif /* IFNET_INPUT_SANITY_CHK */
634
635 static u_int32_t if_flowadv = 1;
636 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
637 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
638 "enable flow-advisory mechanism");
639
640 static u_int32_t if_delaybased_queue = 1;
641 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
642 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
643 "enable delay based dynamic queue sizing");
644
645 static uint64_t hwcksum_in_invalidated = 0;
646 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
647 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
648 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
649
650 uint32_t hwcksum_dbg = 0;
651 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
652 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
653 "enable hardware cksum debugging");
654
655 u_int32_t ifnet_start_delayed = 0;
656 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
657 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
658 "number of times start was delayed");
659
660 u_int32_t ifnet_delay_start_disabled = 0;
661 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
662 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
663 "number of times start was delayed");
664
665 static inline void
666 ifnet_delay_start_disabled_increment(void)
667 {
668 OSIncrementAtomic(&ifnet_delay_start_disabled);
669 }
670
671 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
672 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
673 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
674 #define HWCKSUM_DBG_MASK \
675 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
676 HWCKSUM_DBG_FINALIZE_FORCED)
677
678 static uint32_t hwcksum_dbg_mode = 0;
679 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
680 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
681 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
682
683 static uint64_t hwcksum_dbg_partial_forced = 0;
684 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
685 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
686 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
687
688 static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
689 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
690 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
691 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
692
693 static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
694 SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
695 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
696 &hwcksum_dbg_partial_rxoff_forced, 0,
697 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
698 "forced partial cksum rx offset");
699
700 static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
701 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
702 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
703 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
704 "adjusted partial cksum rx offset");
705
706 static uint64_t hwcksum_dbg_verified = 0;
707 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
708 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
709 &hwcksum_dbg_verified, "packets verified for having good checksum");
710
711 static uint64_t hwcksum_dbg_bad_cksum = 0;
712 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
713 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
714 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
715
716 static uint64_t hwcksum_dbg_bad_rxoff = 0;
717 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
718 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
719 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
720
721 static uint64_t hwcksum_dbg_adjusted = 0;
722 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
723 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
724 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
725
726 static uint64_t hwcksum_dbg_finalized_hdr = 0;
727 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
728 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
729 &hwcksum_dbg_finalized_hdr, "finalized headers");
730
731 static uint64_t hwcksum_dbg_finalized_data = 0;
732 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
733 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
734 &hwcksum_dbg_finalized_data, "finalized payloads");
735
736 uint32_t hwcksum_tx = 1;
737 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
738 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
739 "enable transmit hardware checksum offload");
740
741 uint32_t hwcksum_rx = 1;
742 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
743 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
744 "enable receive hardware checksum offload");
745
746 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
747 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
748 sysctl_tx_chain_len_stats, "S", "");
749
750 uint32_t tx_chain_len_count = 0;
751 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
752 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
753
754 static uint32_t threshold_notify = 1; /* enable/disable */
755 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
756 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
757
758 static uint32_t threshold_interval = 2; /* in seconds */
759 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
760 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
761
762 #if (DEVELOPMENT || DEBUG)
763 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
764 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
765 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
766 #endif /* DEVELOPMENT || DEBUG */
767
768 struct net_api_stats net_api_stats;
769 SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD | CTLFLAG_LOCKED,
770 &net_api_stats, net_api_stats, "");
771
772 unsigned int net_rxpoll = 1;
773 unsigned int net_affinity = 1;
774 unsigned int net_async = 1; /* 0: synchronous, 1: asynchronous */
775
776 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
777
778 extern u_int32_t inject_buckets;
779
780 static lck_grp_attr_t *dlil_grp_attributes = NULL;
781 static lck_attr_t *dlil_lck_attributes = NULL;
782
783 /* DLIL data threshold thread call */
784 static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
785
786 void
787 ifnet_filter_update_tso(boolean_t filter_enable)
788 {
789 /*
790 * update filter count and route_generation ID to let TCP
791 * know it should reevalute doing TSO or not
792 */
793 OSAddAtomic(filter_enable ? 1 : -1, &dlil_filter_disable_tso_count);
794 routegenid_update();
795 }
796
797
798 #define DLIL_INPUT_CHECK(m, ifp) { \
799 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
800 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
801 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
802 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
803 /* NOTREACHED */ \
804 } \
805 }
806
807 #define DLIL_EWMA(old, new, decay) do { \
808 u_int32_t _avg; \
809 if ((_avg = (old)) > 0) \
810 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
811 else \
812 _avg = (new); \
813 (old) = _avg; \
814 } while (0)
815
816 #define MBPS (1ULL * 1000 * 1000)
817 #define GBPS (MBPS * 1000)
818
819 struct rxpoll_time_tbl {
820 u_int64_t speed; /* downlink speed */
821 u_int32_t plowat; /* packets low watermark */
822 u_int32_t phiwat; /* packets high watermark */
823 u_int32_t blowat; /* bytes low watermark */
824 u_int32_t bhiwat; /* bytes high watermark */
825 };
826
827 static struct rxpoll_time_tbl rxpoll_tbl[] = {
828 { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) },
829 { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
830 { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
831 { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
832 { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
833 { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
834 };
835
836 decl_lck_mtx_data(static, dlil_thread_sync_lock);
837 static uint32_t dlil_pending_thread_cnt = 0;
838
839 static void
840 dlil_incr_pending_thread_count(void)
841 {
842 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
843 lck_mtx_lock(&dlil_thread_sync_lock);
844 dlil_pending_thread_cnt++;
845 lck_mtx_unlock(&dlil_thread_sync_lock);
846 }
847
848 static void
849 dlil_decr_pending_thread_count(void)
850 {
851 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
852 lck_mtx_lock(&dlil_thread_sync_lock);
853 VERIFY(dlil_pending_thread_cnt > 0);
854 dlil_pending_thread_cnt--;
855 if (dlil_pending_thread_cnt == 0) {
856 wakeup(&dlil_pending_thread_cnt);
857 }
858 lck_mtx_unlock(&dlil_thread_sync_lock);
859 }
860
861 int
862 proto_hash_value(u_int32_t protocol_family)
863 {
864 /*
865 * dlil_proto_unplumb_all() depends on the mapping between
866 * the hash bucket index and the protocol family defined
867 * here; future changes must be applied there as well.
868 */
869 switch (protocol_family) {
870 case PF_INET:
871 return 0;
872 case PF_INET6:
873 return 1;
874 case PF_VLAN:
875 return 2;
876 case PF_802154:
877 return 3;
878 case PF_UNSPEC:
879 default:
880 return 4;
881 }
882 }
883
884 /*
885 * Caller must already be holding ifnet lock.
886 */
887 static struct if_proto *
888 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
889 {
890 struct if_proto *proto = NULL;
891 u_int32_t i = proto_hash_value(protocol_family);
892
893 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
894
895 if (ifp->if_proto_hash != NULL) {
896 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
897 }
898
899 while (proto != NULL && proto->protocol_family != protocol_family) {
900 proto = SLIST_NEXT(proto, next_hash);
901 }
902
903 if (proto != NULL) {
904 if_proto_ref(proto);
905 }
906
907 return proto;
908 }
909
910 static void
911 if_proto_ref(struct if_proto *proto)
912 {
913 atomic_add_32(&proto->refcount, 1);
914 }
915
916 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
917
918 static void
919 if_proto_free(struct if_proto *proto)
920 {
921 u_int32_t oldval;
922 struct ifnet *ifp = proto->ifp;
923 u_int32_t proto_family = proto->protocol_family;
924 struct kev_dl_proto_data ev_pr_data;
925
926 oldval = atomic_add_32_ov(&proto->refcount, -1);
927 if (oldval > 1) {
928 return;
929 }
930
931 if (proto->proto_kpi == kProtoKPI_v1) {
932 if (proto->kpi.v1.detached) {
933 proto->kpi.v1.detached(ifp, proto->protocol_family);
934 }
935 }
936 if (proto->proto_kpi == kProtoKPI_v2) {
937 if (proto->kpi.v2.detached) {
938 proto->kpi.v2.detached(ifp, proto->protocol_family);
939 }
940 }
941
942 /*
943 * Cleanup routes that may still be in the routing table for that
944 * interface/protocol pair.
945 */
946 if_rtproto_del(ifp, proto_family);
947
948 ifnet_lock_shared(ifp);
949
950 /* No more reference on this, protocol must have been detached */
951 VERIFY(proto->detached);
952
953 /*
954 * The reserved field carries the number of protocol still attached
955 * (subject to change)
956 */
957 ev_pr_data.proto_family = proto_family;
958 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
959
960 ifnet_lock_done(ifp);
961
962 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
963 (struct net_event_data *)&ev_pr_data,
964 sizeof(struct kev_dl_proto_data));
965
966 if (ev_pr_data.proto_remaining_count == 0) {
967 /*
968 * The protocol count has gone to zero, mark the interface down.
969 * This used to be done by configd.KernelEventMonitor, but that
970 * is inherently prone to races (rdar://problem/30810208).
971 */
972 (void) ifnet_set_flags(ifp, 0, IFF_UP);
973 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
974 dlil_post_sifflags_msg(ifp);
975 }
976
977 zfree(dlif_proto_zone, proto);
978 }
979
980 __private_extern__ void
981 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
982 {
983 #if !MACH_ASSERT
984 #pragma unused(ifp)
985 #endif
986 unsigned int type = 0;
987 int ass = 1;
988
989 switch (what) {
990 case IFNET_LCK_ASSERT_EXCLUSIVE:
991 type = LCK_RW_ASSERT_EXCLUSIVE;
992 break;
993
994 case IFNET_LCK_ASSERT_SHARED:
995 type = LCK_RW_ASSERT_SHARED;
996 break;
997
998 case IFNET_LCK_ASSERT_OWNED:
999 type = LCK_RW_ASSERT_HELD;
1000 break;
1001
1002 case IFNET_LCK_ASSERT_NOTOWNED:
1003 /* nothing to do here for RW lock; bypass assert */
1004 ass = 0;
1005 break;
1006
1007 default:
1008 panic("bad ifnet assert type: %d", what);
1009 /* NOTREACHED */
1010 }
1011 if (ass) {
1012 LCK_RW_ASSERT(&ifp->if_lock, type);
1013 }
1014 }
1015
1016 __private_extern__ void
1017 ifnet_lock_shared(struct ifnet *ifp)
1018 {
1019 lck_rw_lock_shared(&ifp->if_lock);
1020 }
1021
1022 __private_extern__ void
1023 ifnet_lock_exclusive(struct ifnet *ifp)
1024 {
1025 lck_rw_lock_exclusive(&ifp->if_lock);
1026 }
1027
1028 __private_extern__ void
1029 ifnet_lock_done(struct ifnet *ifp)
1030 {
1031 lck_rw_done(&ifp->if_lock);
1032 }
1033
1034 #if INET
1035 __private_extern__ void
1036 if_inetdata_lock_shared(struct ifnet *ifp)
1037 {
1038 lck_rw_lock_shared(&ifp->if_inetdata_lock);
1039 }
1040
1041 __private_extern__ void
1042 if_inetdata_lock_exclusive(struct ifnet *ifp)
1043 {
1044 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
1045 }
1046
1047 __private_extern__ void
1048 if_inetdata_lock_done(struct ifnet *ifp)
1049 {
1050 lck_rw_done(&ifp->if_inetdata_lock);
1051 }
1052 #endif
1053
1054 __private_extern__ void
1055 if_inet6data_lock_shared(struct ifnet *ifp)
1056 {
1057 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1058 }
1059
1060 __private_extern__ void
1061 if_inet6data_lock_exclusive(struct ifnet *ifp)
1062 {
1063 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1064 }
1065
1066 __private_extern__ void
1067 if_inet6data_lock_done(struct ifnet *ifp)
1068 {
1069 lck_rw_done(&ifp->if_inet6data_lock);
1070 }
1071
1072 __private_extern__ void
1073 ifnet_head_lock_shared(void)
1074 {
1075 lck_rw_lock_shared(&ifnet_head_lock);
1076 }
1077
1078 __private_extern__ void
1079 ifnet_head_lock_exclusive(void)
1080 {
1081 lck_rw_lock_exclusive(&ifnet_head_lock);
1082 }
1083
1084 __private_extern__ void
1085 ifnet_head_done(void)
1086 {
1087 lck_rw_done(&ifnet_head_lock);
1088 }
1089
1090 __private_extern__ void
1091 ifnet_head_assert_exclusive(void)
1092 {
1093 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1094 }
1095
1096 /*
1097 * dlil_ifp_protolist
1098 * - get the list of protocols attached to the interface, or just the number
1099 * of attached protocols
1100 * - if the number returned is greater than 'list_count', truncation occurred
1101 *
1102 * Note:
1103 * - caller must already be holding ifnet lock.
1104 */
1105 static u_int32_t
1106 dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1107 u_int32_t list_count)
1108 {
1109 u_int32_t count = 0;
1110 int i;
1111
1112 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1113
1114 if (ifp->if_proto_hash == NULL) {
1115 goto done;
1116 }
1117
1118 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1119 struct if_proto *proto;
1120 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1121 if (list != NULL && count < list_count) {
1122 list[count] = proto->protocol_family;
1123 }
1124 count++;
1125 }
1126 }
1127 done:
1128 return count;
1129 }
1130
1131 __private_extern__ u_int32_t
1132 if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1133 {
1134 ifnet_lock_shared(ifp);
1135 count = dlil_ifp_protolist(ifp, protolist, count);
1136 ifnet_lock_done(ifp);
1137 return count;
1138 }
1139
1140 __private_extern__ void
1141 if_free_protolist(u_int32_t *list)
1142 {
1143 _FREE(list, M_TEMP);
1144 }
1145
1146 __private_extern__ int
1147 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1148 u_int32_t event_code, struct net_event_data *event_data,
1149 u_int32_t event_data_len)
1150 {
1151 struct net_event_data ev_data;
1152 struct kev_msg ev_msg;
1153
1154 bzero(&ev_msg, sizeof(ev_msg));
1155 bzero(&ev_data, sizeof(ev_data));
1156 /*
1157 * a net event always starts with a net_event_data structure
1158 * but the caller can generate a simple net event or
1159 * provide a longer event structure to post
1160 */
1161 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1162 ev_msg.kev_class = KEV_NETWORK_CLASS;
1163 ev_msg.kev_subclass = event_subclass;
1164 ev_msg.event_code = event_code;
1165
1166 if (event_data == NULL) {
1167 event_data = &ev_data;
1168 event_data_len = sizeof(struct net_event_data);
1169 }
1170
1171 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1172 event_data->if_family = ifp->if_family;
1173 event_data->if_unit = (u_int32_t)ifp->if_unit;
1174
1175 ev_msg.dv[0].data_length = event_data_len;
1176 ev_msg.dv[0].data_ptr = event_data;
1177 ev_msg.dv[1].data_length = 0;
1178
1179 bool update_generation = true;
1180 if (event_subclass == KEV_DL_SUBCLASS) {
1181 /* Don't update interface generation for frequent link quality and state changes */
1182 switch (event_code) {
1183 case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1184 case KEV_DL_RRC_STATE_CHANGED:
1185 case KEV_DL_NODE_PRESENCE:
1186 case KEV_DL_NODE_ABSENCE:
1187 case KEV_DL_MASTER_ELECTED:
1188 update_generation = false;
1189 break;
1190 default:
1191 break;
1192 }
1193 }
1194
1195 return dlil_event_internal(ifp, &ev_msg, update_generation);
1196 }
1197
1198 __private_extern__ int
1199 dlil_alloc_local_stats(struct ifnet *ifp)
1200 {
1201 int ret = EINVAL;
1202 void *buf, *base, **pbuf;
1203
1204 if (ifp == NULL) {
1205 goto end;
1206 }
1207
1208 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1209 /* allocate tcpstat_local structure */
1210 buf = zalloc_flags(dlif_tcpstat_zone, Z_WAITOK | Z_ZERO);
1211 if (buf == NULL) {
1212 ret = ENOMEM;
1213 goto end;
1214 }
1215
1216 /* Get the 64-bit aligned base address for this object */
1217 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1218 sizeof(u_int64_t));
1219 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1220 ((intptr_t)buf + dlif_tcpstat_bufsize));
1221
1222 /*
1223 * Wind back a pointer size from the aligned base and
1224 * save the original address so we can free it later.
1225 */
1226 pbuf = (void **)((intptr_t)base - sizeof(void *));
1227 *pbuf = buf;
1228 ifp->if_tcp_stat = base;
1229
1230 /* allocate udpstat_local structure */
1231 buf = zalloc_flags(dlif_udpstat_zone, Z_WAITOK | Z_ZERO);
1232 if (buf == NULL) {
1233 ret = ENOMEM;
1234 goto end;
1235 }
1236
1237 /* Get the 64-bit aligned base address for this object */
1238 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1239 sizeof(u_int64_t));
1240 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1241 ((intptr_t)buf + dlif_udpstat_bufsize));
1242
1243 /*
1244 * Wind back a pointer size from the aligned base and
1245 * save the original address so we can free it later.
1246 */
1247 pbuf = (void **)((intptr_t)base - sizeof(void *));
1248 *pbuf = buf;
1249 ifp->if_udp_stat = base;
1250
1251 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
1252 IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
1253
1254 ret = 0;
1255 }
1256
1257 if (ifp->if_ipv4_stat == NULL) {
1258 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1259 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
1260 if (ifp->if_ipv4_stat == NULL) {
1261 ret = ENOMEM;
1262 goto end;
1263 }
1264 }
1265
1266 if (ifp->if_ipv6_stat == NULL) {
1267 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1268 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
1269 if (ifp->if_ipv6_stat == NULL) {
1270 ret = ENOMEM;
1271 goto end;
1272 }
1273 }
1274 end:
1275 if (ifp != NULL && ret != 0) {
1276 if (ifp->if_tcp_stat != NULL) {
1277 pbuf = (void **)
1278 ((intptr_t)ifp->if_tcp_stat - sizeof(void *));
1279 zfree(dlif_tcpstat_zone, *pbuf);
1280 ifp->if_tcp_stat = NULL;
1281 }
1282 if (ifp->if_udp_stat != NULL) {
1283 pbuf = (void **)
1284 ((intptr_t)ifp->if_udp_stat - sizeof(void *));
1285 zfree(dlif_udpstat_zone, *pbuf);
1286 ifp->if_udp_stat = NULL;
1287 }
1288 if (ifp->if_ipv4_stat != NULL) {
1289 FREE(ifp->if_ipv4_stat, M_TEMP);
1290 ifp->if_ipv4_stat = NULL;
1291 }
1292 if (ifp->if_ipv6_stat != NULL) {
1293 FREE(ifp->if_ipv6_stat, M_TEMP);
1294 ifp->if_ipv6_stat = NULL;
1295 }
1296 }
1297
1298 return ret;
1299 }
1300
1301 static void
1302 dlil_reset_rxpoll_params(ifnet_t ifp)
1303 {
1304 ASSERT(ifp != NULL);
1305 ifnet_set_poll_cycle(ifp, NULL);
1306 ifp->if_poll_update = 0;
1307 ifp->if_poll_flags = 0;
1308 ifp->if_poll_req = 0;
1309 ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1310 bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1311 bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1312 bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1313 net_timerclear(&ifp->if_poll_mode_holdtime);
1314 net_timerclear(&ifp->if_poll_mode_lasttime);
1315 net_timerclear(&ifp->if_poll_sample_holdtime);
1316 net_timerclear(&ifp->if_poll_sample_lasttime);
1317 net_timerclear(&ifp->if_poll_dbg_lasttime);
1318 }
1319
1320 static int
1321 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp,
1322 thread_continue_t *thfunc)
1323 {
1324 boolean_t dlil_rxpoll_input;
1325 thread_continue_t func = NULL;
1326 u_int32_t limit;
1327 int error = 0;
1328
1329 dlil_rxpoll_input = (ifp != NULL && net_rxpoll &&
1330 (ifp->if_eflags & IFEF_RXPOLL) && (ifp->if_xflags & IFXF_LEGACY));
1331
1332 /* default strategy utilizes the DLIL worker thread */
1333 inp->dlth_strategy = dlil_input_async;
1334
1335 /* NULL ifp indicates the main input thread, called at dlil_init time */
1336 if (ifp == NULL) {
1337 /*
1338 * Main input thread only.
1339 */
1340 func = dlil_main_input_thread_func;
1341 VERIFY(inp == dlil_main_input_thread);
1342 (void) strlcat(inp->dlth_name,
1343 "main_input", DLIL_THREADNAME_LEN);
1344 } else if (dlil_rxpoll_input) {
1345 /*
1346 * Legacy (non-netif) hybrid polling.
1347 */
1348 func = dlil_rxpoll_input_thread_func;
1349 VERIFY(inp != dlil_main_input_thread);
1350 (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN,
1351 "%s_input_poll", if_name(ifp));
1352 } else if (net_async || (ifp->if_xflags & IFXF_LEGACY)) {
1353 /*
1354 * Asynchronous strategy.
1355 */
1356 func = dlil_input_thread_func;
1357 VERIFY(inp != dlil_main_input_thread);
1358 (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN,
1359 "%s_input", if_name(ifp));
1360 } else {
1361 /*
1362 * Synchronous strategy if there's a netif below and
1363 * the device isn't capable of hybrid polling.
1364 */
1365 ASSERT(func == NULL);
1366 ASSERT(!(ifp->if_xflags & IFXF_LEGACY));
1367 VERIFY(inp != dlil_main_input_thread);
1368 ASSERT(!inp->dlth_affinity);
1369 inp->dlth_strategy = dlil_input_sync;
1370 }
1371 VERIFY(inp->dlth_thread == THREAD_NULL);
1372
1373 /* let caller know */
1374 if (thfunc != NULL) {
1375 *thfunc = func;
1376 }
1377
1378 inp->dlth_lock_grp = lck_grp_alloc_init(inp->dlth_name,
1379 dlil_grp_attributes);
1380 lck_mtx_init(&inp->dlth_lock, inp->dlth_lock_grp, dlil_lck_attributes);
1381
1382 inp->dlth_ifp = ifp; /* NULL for main input thread */
1383 /*
1384 * For interfaces that support opportunistic polling, set the
1385 * low and high watermarks for outstanding inbound packets/bytes.
1386 * Also define freeze times for transitioning between modes
1387 * and updating the average.
1388 */
1389 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1390 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1391 if (ifp->if_xflags & IFXF_LEGACY) {
1392 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1393 }
1394 } else {
1395 limit = (u_int32_t)-1;
1396 }
1397
1398 _qinit(&inp->dlth_pkts, Q_DROPTAIL, limit, QP_MBUF);
1399 if (inp == dlil_main_input_thread) {
1400 struct dlil_main_threading_info *inpm =
1401 (struct dlil_main_threading_info *)inp;
1402 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1403 }
1404
1405 if (func == NULL) {
1406 ASSERT(!(ifp->if_xflags & IFXF_LEGACY));
1407 ASSERT(error == 0);
1408 error = ENODEV;
1409 goto done;
1410 }
1411
1412 error = kernel_thread_start(func, inp, &inp->dlth_thread);
1413 if (error == KERN_SUCCESS) {
1414 thread_precedence_policy_data_t info;
1415 __unused kern_return_t kret;
1416
1417 bzero(&info, sizeof(info));
1418 info.importance = 0;
1419 kret = thread_policy_set(inp->dlth_thread,
1420 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
1421 THREAD_PRECEDENCE_POLICY_COUNT);
1422 ASSERT(kret == KERN_SUCCESS);
1423 /*
1424 * We create an affinity set so that the matching workloop
1425 * thread or the starter thread (for loopback) can be
1426 * scheduled on the same processor set as the input thread.
1427 */
1428 if (net_affinity) {
1429 struct thread *tp = inp->dlth_thread;
1430 u_int32_t tag;
1431 /*
1432 * Randomize to reduce the probability
1433 * of affinity tag namespace collision.
1434 */
1435 read_frandom(&tag, sizeof(tag));
1436 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1437 thread_reference(tp);
1438 inp->dlth_affinity_tag = tag;
1439 inp->dlth_affinity = TRUE;
1440 }
1441 }
1442 } else if (inp == dlil_main_input_thread) {
1443 panic_plain("%s: couldn't create main input thread", __func__);
1444 /* NOTREACHED */
1445 } else {
1446 panic_plain("%s: couldn't create %s input thread", __func__,
1447 if_name(ifp));
1448 /* NOTREACHED */
1449 }
1450 OSAddAtomic(1, &cur_dlil_input_threads);
1451
1452 done:
1453 return error;
1454 }
1455
1456 #if TEST_INPUT_THREAD_TERMINATION
1457 static int
1458 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1459 {
1460 #pragma unused(arg1, arg2)
1461 uint32_t i;
1462 int err;
1463
1464 i = if_input_thread_termination_spin;
1465
1466 err = sysctl_handle_int(oidp, &i, 0, req);
1467 if (err != 0 || req->newptr == USER_ADDR_NULL) {
1468 return err;
1469 }
1470
1471 if (net_rxpoll == 0) {
1472 return ENXIO;
1473 }
1474
1475 if_input_thread_termination_spin = i;
1476 return err;
1477 }
1478 #endif /* TEST_INPUT_THREAD_TERMINATION */
1479
1480 static void
1481 dlil_clean_threading_info(struct dlil_threading_info *inp)
1482 {
1483 lck_mtx_destroy(&inp->dlth_lock, inp->dlth_lock_grp);
1484 lck_grp_free(inp->dlth_lock_grp);
1485 inp->dlth_lock_grp = NULL;
1486
1487 inp->dlth_flags = 0;
1488 inp->dlth_wtot = 0;
1489 bzero(inp->dlth_name, sizeof(inp->dlth_name));
1490 inp->dlth_ifp = NULL;
1491 VERIFY(qhead(&inp->dlth_pkts) == NULL && qempty(&inp->dlth_pkts));
1492 qlimit(&inp->dlth_pkts) = 0;
1493 bzero(&inp->dlth_stats, sizeof(inp->dlth_stats));
1494
1495 VERIFY(!inp->dlth_affinity);
1496 inp->dlth_thread = THREAD_NULL;
1497 inp->dlth_strategy = NULL;
1498 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
1499 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
1500 VERIFY(inp->dlth_affinity_tag == 0);
1501 #if IFNET_INPUT_SANITY_CHK
1502 inp->dlth_pkts_cnt = 0;
1503 #endif /* IFNET_INPUT_SANITY_CHK */
1504 }
1505
1506 static void
1507 dlil_terminate_input_thread(struct dlil_threading_info *inp)
1508 {
1509 struct ifnet *ifp = inp->dlth_ifp;
1510 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
1511
1512 VERIFY(current_thread() == inp->dlth_thread);
1513 VERIFY(inp != dlil_main_input_thread);
1514
1515 OSAddAtomic(-1, &cur_dlil_input_threads);
1516
1517 #if TEST_INPUT_THREAD_TERMINATION
1518 { /* do something useless that won't get optimized away */
1519 uint32_t v = 1;
1520 for (uint32_t i = 0;
1521 i < if_input_thread_termination_spin;
1522 i++) {
1523 v = (i + 1) * v;
1524 }
1525 DLIL_PRINTF("the value is %d\n", v);
1526 }
1527 #endif /* TEST_INPUT_THREAD_TERMINATION */
1528
1529 lck_mtx_lock_spin(&inp->dlth_lock);
1530 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
1531 VERIFY((inp->dlth_flags & DLIL_INPUT_TERMINATE) != 0);
1532 inp->dlth_flags |= DLIL_INPUT_TERMINATE_COMPLETE;
1533 wakeup_one((caddr_t)&inp->dlth_flags);
1534 lck_mtx_unlock(&inp->dlth_lock);
1535
1536 /* free up pending packets */
1537 if (pkt.cp_mbuf != NULL) {
1538 mbuf_freem_list(pkt.cp_mbuf);
1539 }
1540
1541 /* for the extra refcnt from kernel_thread_start() */
1542 thread_deallocate(current_thread());
1543
1544 if (dlil_verbose) {
1545 DLIL_PRINTF("%s: input thread terminated\n",
1546 if_name(ifp));
1547 }
1548
1549 /* this is the end */
1550 thread_terminate(current_thread());
1551 /* NOTREACHED */
1552 }
1553
1554 static kern_return_t
1555 dlil_affinity_set(struct thread *tp, u_int32_t tag)
1556 {
1557 thread_affinity_policy_data_t policy;
1558
1559 bzero(&policy, sizeof(policy));
1560 policy.affinity_tag = tag;
1561 return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1562 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
1563 }
1564
1565 void
1566 dlil_init(void)
1567 {
1568 thread_t thread = THREAD_NULL;
1569
1570 /*
1571 * The following fields must be 64-bit aligned for atomic operations.
1572 */
1573 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1574 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1575 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1576 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1577 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1578 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1579 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1580 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1581 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1582 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1583 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1584 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1585 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1586 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1587 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1588
1589 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1590 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1591 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1592 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1593 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1594 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1595 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1596 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1597 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1598 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1599 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1600 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1601 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1602 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1603 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1604
1605 /*
1606 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1607 */
1608 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1609 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1610 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1611 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1612 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1613 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1614 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1615 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1616 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1617 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1618 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1619 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1620 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1621 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1622
1623 /*
1624 * ... as well as the mbuf checksum flags counterparts.
1625 */
1626 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1627 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1628 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1629 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1630 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1631 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1632 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1633 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1634 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1635 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1636 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1637
1638 /*
1639 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1640 */
1641 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1642 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1643
1644 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1645 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1646 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1647 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1648
1649 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1650 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1651 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1652
1653 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1654 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1655 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1656 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1657 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1658 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1659 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1660 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1661 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1662 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1663 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1664 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1665 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1666 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1667 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1668 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1669 _CASSERT(IFRTYPE_FAMILY_6LOWPAN == IFNET_FAMILY_6LOWPAN);
1670 _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1671 _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
1672
1673 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1674 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1675 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1676 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1677 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1678 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1679 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1680 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1681 _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT == IFNET_SUBFAMILY_DEFAULT);
1682
1683 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1684 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1685
1686 PE_parse_boot_argn("net_affinity", &net_affinity,
1687 sizeof(net_affinity));
1688
1689 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
1690
1691 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
1692
1693 PE_parse_boot_argn("net_async", &net_async, sizeof(net_async));
1694
1695 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
1696
1697 VERIFY(dlil_pending_thread_cnt == 0);
1698 dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) :
1699 sizeof(struct dlil_ifnet_dbg);
1700 /* Enforce 64-bit alignment for dlil_ifnet structure */
1701 dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t);
1702 dlif_bufsize = (uint32_t)P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t));
1703 dlif_zone = zone_create(DLIF_ZONE_NAME, dlif_bufsize, ZC_ZFREE_CLEARMEM);
1704
1705 dlif_tcpstat_size = sizeof(struct tcpstat_local);
1706 /* Enforce 64-bit alignment for tcpstat_local structure */
1707 dlif_tcpstat_bufsize =
1708 dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t);
1709 dlif_tcpstat_bufsize = (uint32_t)
1710 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t));
1711 dlif_tcpstat_zone = zone_create(DLIF_TCPSTAT_ZONE_NAME,
1712 dlif_tcpstat_bufsize, ZC_ZFREE_CLEARMEM);
1713
1714 dlif_udpstat_size = sizeof(struct udpstat_local);
1715 /* Enforce 64-bit alignment for udpstat_local structure */
1716 dlif_udpstat_bufsize =
1717 dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t);
1718 dlif_udpstat_bufsize = (uint32_t)
1719 P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t));
1720 dlif_udpstat_zone = zone_create(DLIF_UDPSTAT_ZONE_NAME,
1721 dlif_udpstat_bufsize, ZC_ZFREE_CLEARMEM);
1722
1723 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1724
1725 TAILQ_INIT(&dlil_ifnet_head);
1726 TAILQ_INIT(&ifnet_head);
1727 TAILQ_INIT(&ifnet_detaching_head);
1728 TAILQ_INIT(&ifnet_ordered_head);
1729
1730 /* Setup the lock groups we will use */
1731 dlil_grp_attributes = lck_grp_attr_alloc_init();
1732
1733 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1734 dlil_grp_attributes);
1735 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1736 dlil_grp_attributes);
1737 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1738 dlil_grp_attributes);
1739 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1740 dlil_grp_attributes);
1741 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1742 dlil_grp_attributes);
1743
1744 /* Setup the lock attributes we will use */
1745 dlil_lck_attributes = lck_attr_alloc_init();
1746
1747 ifnet_lock_attr = lck_attr_alloc_init();
1748
1749 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1750 dlil_lck_attributes);
1751 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1752 lck_mtx_init(&dlil_thread_sync_lock, dlil_lock_group, dlil_lck_attributes);
1753
1754 /* Setup interface flow control related items */
1755 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1756
1757 /* Initialize interface address subsystem */
1758 ifa_init();
1759
1760 #if PF
1761 /* Initialize the packet filter */
1762 pfinit();
1763 #endif /* PF */
1764
1765 /* Initialize queue algorithms */
1766 classq_init();
1767
1768 /* Initialize packet schedulers */
1769 pktsched_init();
1770
1771 /* Initialize flow advisory subsystem */
1772 flowadv_init();
1773
1774 /* Initialize the pktap virtual interface */
1775 pktap_init();
1776
1777 /* Initialize the service class to dscp map */
1778 net_qos_map_init();
1779
1780 /* Initialize the interface port list */
1781 if_ports_used_init();
1782
1783 /* Initialize the interface low power mode event handler */
1784 if_low_power_evhdlr_init();
1785
1786 #if DEBUG || DEVELOPMENT
1787 /* Run self-tests */
1788 dlil_verify_sum16();
1789 #endif /* DEBUG || DEVELOPMENT */
1790
1791 /* Initialize link layer table */
1792 lltable_glbl_init();
1793
1794 /*
1795 * Create and start up the main DLIL input thread and the interface
1796 * detacher threads once everything is initialized.
1797 */
1798 dlil_incr_pending_thread_count();
1799 (void) dlil_create_input_thread(NULL, dlil_main_input_thread, NULL);
1800
1801 /*
1802 * Create ifnet detacher thread.
1803 * When an interface gets detached, part of the detach processing
1804 * is delayed. The interface is added to delayed detach list
1805 * and this thread is woken up to call ifnet_detach_final
1806 * on these interfaces.
1807 */
1808 dlil_incr_pending_thread_count();
1809 if (kernel_thread_start(ifnet_detacher_thread_func,
1810 NULL, &thread) != KERN_SUCCESS) {
1811 panic_plain("%s: couldn't create detacher thread", __func__);
1812 /* NOTREACHED */
1813 }
1814 thread_deallocate(thread);
1815
1816 /*
1817 * Wait for the created kernel threads for dlil to get
1818 * scheduled and run at least once before we proceed
1819 */
1820 lck_mtx_lock(&dlil_thread_sync_lock);
1821 while (dlil_pending_thread_cnt != 0) {
1822 DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
1823 "threads to get scheduled at least once.\n", __func__);
1824 (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock,
1825 (PZERO - 1), __func__, NULL);
1826 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1827 }
1828 lck_mtx_unlock(&dlil_thread_sync_lock);
1829 DLIL_PRINTF("%s: All the created dlil kernel threads have been "
1830 "scheduled at least once. Proceeding.\n", __func__);
1831 }
1832
1833 static void
1834 if_flt_monitor_busy(struct ifnet *ifp)
1835 {
1836 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1837
1838 ++ifp->if_flt_busy;
1839 VERIFY(ifp->if_flt_busy != 0);
1840 }
1841
1842 static void
1843 if_flt_monitor_unbusy(struct ifnet *ifp)
1844 {
1845 if_flt_monitor_leave(ifp);
1846 }
1847
1848 static void
1849 if_flt_monitor_enter(struct ifnet *ifp)
1850 {
1851 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1852
1853 while (ifp->if_flt_busy) {
1854 ++ifp->if_flt_waiters;
1855 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1856 (PZERO - 1), "if_flt_monitor", NULL);
1857 }
1858 if_flt_monitor_busy(ifp);
1859 }
1860
1861 static void
1862 if_flt_monitor_leave(struct ifnet *ifp)
1863 {
1864 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1865
1866 VERIFY(ifp->if_flt_busy != 0);
1867 --ifp->if_flt_busy;
1868
1869 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1870 ifp->if_flt_waiters = 0;
1871 wakeup(&ifp->if_flt_head);
1872 }
1873 }
1874
1875 __private_extern__ int
1876 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1877 interface_filter_t *filter_ref, u_int32_t flags)
1878 {
1879 int retval = 0;
1880 struct ifnet_filter *filter = NULL;
1881
1882 ifnet_head_lock_shared();
1883 /* Check that the interface is in the global list */
1884 if (!ifnet_lookup(ifp)) {
1885 retval = ENXIO;
1886 goto done;
1887 }
1888
1889 filter = zalloc_flags(dlif_filt_zone, Z_WAITOK | Z_ZERO);
1890 if (filter == NULL) {
1891 retval = ENOMEM;
1892 goto done;
1893 }
1894
1895 /* refcnt held above during lookup */
1896 filter->filt_flags = flags;
1897 filter->filt_ifp = ifp;
1898 filter->filt_cookie = if_filter->iff_cookie;
1899 filter->filt_name = if_filter->iff_name;
1900 filter->filt_protocol = if_filter->iff_protocol;
1901 /*
1902 * Do not install filter callbacks for internal coproc interface
1903 */
1904 if (!IFNET_IS_INTCOPROC(ifp)) {
1905 filter->filt_input = if_filter->iff_input;
1906 filter->filt_output = if_filter->iff_output;
1907 filter->filt_event = if_filter->iff_event;
1908 filter->filt_ioctl = if_filter->iff_ioctl;
1909 }
1910 filter->filt_detached = if_filter->iff_detached;
1911
1912 lck_mtx_lock(&ifp->if_flt_lock);
1913 if_flt_monitor_enter(ifp);
1914
1915 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1916 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1917
1918 if_flt_monitor_leave(ifp);
1919 lck_mtx_unlock(&ifp->if_flt_lock);
1920
1921 *filter_ref = filter;
1922
1923 /*
1924 * Bump filter count and route_generation ID to let TCP
1925 * know it shouldn't do TSO on this connection
1926 */
1927 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1928 ifnet_filter_update_tso(TRUE);
1929 }
1930 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1931 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1932 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1933 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1934 }
1935 if (dlil_verbose) {
1936 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
1937 if_filter->iff_name);
1938 }
1939 done:
1940 ifnet_head_done();
1941 if (retval != 0 && ifp != NULL) {
1942 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1943 if_name(ifp), if_filter->iff_name, retval);
1944 }
1945 if (retval != 0 && filter != NULL) {
1946 zfree(dlif_filt_zone, filter);
1947 }
1948
1949 return retval;
1950 }
1951
1952 static int
1953 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1954 {
1955 int retval = 0;
1956
1957 if (detached == 0) {
1958 ifnet_t ifp = NULL;
1959
1960 ifnet_head_lock_shared();
1961 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1962 interface_filter_t entry = NULL;
1963
1964 lck_mtx_lock(&ifp->if_flt_lock);
1965 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1966 if (entry != filter || entry->filt_skip) {
1967 continue;
1968 }
1969 /*
1970 * We've found a match; since it's possible
1971 * that the thread gets blocked in the monitor,
1972 * we do the lock dance. Interface should
1973 * not be detached since we still have a use
1974 * count held during filter attach.
1975 */
1976 entry->filt_skip = 1; /* skip input/output */
1977 lck_mtx_unlock(&ifp->if_flt_lock);
1978 ifnet_head_done();
1979
1980 lck_mtx_lock(&ifp->if_flt_lock);
1981 if_flt_monitor_enter(ifp);
1982 LCK_MTX_ASSERT(&ifp->if_flt_lock,
1983 LCK_MTX_ASSERT_OWNED);
1984
1985 /* Remove the filter from the list */
1986 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1987 filt_next);
1988
1989 if_flt_monitor_leave(ifp);
1990 lck_mtx_unlock(&ifp->if_flt_lock);
1991 if (dlil_verbose) {
1992 DLIL_PRINTF("%s: %s filter detached\n",
1993 if_name(ifp), filter->filt_name);
1994 }
1995 goto destroy;
1996 }
1997 lck_mtx_unlock(&ifp->if_flt_lock);
1998 }
1999 ifnet_head_done();
2000
2001 /* filter parameter is not a valid filter ref */
2002 retval = EINVAL;
2003 goto done;
2004 }
2005
2006 if (dlil_verbose) {
2007 DLIL_PRINTF("%s filter detached\n", filter->filt_name);
2008 }
2009
2010 destroy:
2011
2012 /* Call the detached function if there is one */
2013 if (filter->filt_detached) {
2014 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
2015 }
2016
2017 /*
2018 * Decrease filter count and route_generation ID to let TCP
2019 * know it should reevalute doing TSO or not
2020 */
2021 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
2022 ifnet_filter_update_tso(FALSE);
2023 }
2024
2025 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
2026
2027 /* Free the filter */
2028 zfree(dlif_filt_zone, filter);
2029 filter = NULL;
2030 done:
2031 if (retval != 0 && filter != NULL) {
2032 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
2033 filter->filt_name, retval);
2034 }
2035
2036 return retval;
2037 }
2038
2039 __private_extern__ void
2040 dlil_detach_filter(interface_filter_t filter)
2041 {
2042 if (filter == NULL) {
2043 return;
2044 }
2045 dlil_detach_filter_internal(filter, 0);
2046 }
2047
2048 __private_extern__ boolean_t
2049 dlil_has_ip_filter(void)
2050 {
2051 boolean_t has_filter = (net_api_stats.nas_ipf_add_count > 0);
2052 DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter);
2053 return has_filter;
2054 }
2055
2056 __private_extern__ boolean_t
2057 dlil_has_if_filter(struct ifnet *ifp)
2058 {
2059 boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head);
2060 DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter);
2061 return has_filter;
2062 }
2063
2064 static inline void
2065 dlil_input_wakeup(struct dlil_threading_info *inp)
2066 {
2067 LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2068
2069 inp->dlth_flags |= DLIL_INPUT_WAITING;
2070 if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
2071 inp->dlth_wtot++;
2072 wakeup_one((caddr_t)&inp->dlth_flags);
2073 }
2074 }
2075
2076 __attribute__((noreturn))
2077 static void
2078 dlil_main_input_thread_func(void *v, wait_result_t w)
2079 {
2080 #pragma unused(w)
2081 struct dlil_threading_info *inp = v;
2082
2083 VERIFY(inp == dlil_main_input_thread);
2084 VERIFY(inp->dlth_ifp == NULL);
2085 VERIFY(current_thread() == inp->dlth_thread);
2086
2087 lck_mtx_lock(&inp->dlth_lock);
2088 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2089 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2090 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2091 /* wake up once to get out of embryonic state */
2092 dlil_input_wakeup(inp);
2093 lck_mtx_unlock(&inp->dlth_lock);
2094 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2095 /* NOTREACHED */
2096 __builtin_unreachable();
2097 }
2098
2099 /*
2100 * Main input thread:
2101 *
2102 * a) handles all inbound packets for lo0
2103 * b) handles all inbound packets for interfaces with no dedicated
2104 * input thread (e.g. anything but Ethernet/PDP or those that support
2105 * opportunistic polling.)
2106 * c) protocol registrations
2107 * d) packet injections
2108 */
2109 __attribute__((noreturn))
2110 static void
2111 dlil_main_input_thread_cont(void *v, wait_result_t wres)
2112 {
2113 struct dlil_main_threading_info *inpm = v;
2114 struct dlil_threading_info *inp = v;
2115
2116 /* main input thread is uninterruptible */
2117 VERIFY(wres != THREAD_INTERRUPTED);
2118 lck_mtx_lock_spin(&inp->dlth_lock);
2119 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_TERMINATE |
2120 DLIL_INPUT_RUNNING)));
2121 inp->dlth_flags |= DLIL_INPUT_RUNNING;
2122
2123 while (1) {
2124 struct mbuf *m = NULL, *m_loop = NULL;
2125 u_int32_t m_cnt, m_cnt_loop;
2126 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2127 boolean_t proto_req;
2128 boolean_t embryonic;
2129
2130 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2131
2132 if (__improbable(embryonic =
2133 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2134 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2135 }
2136
2137 proto_req = (inp->dlth_flags &
2138 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
2139
2140 /* Packets for non-dedicated interfaces other than lo0 */
2141 m_cnt = qlen(&inp->dlth_pkts);
2142 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
2143 m = pkt.cp_mbuf;
2144
2145 /* Packets exclusive to lo0 */
2146 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
2147 _getq_all(&inpm->lo_rcvq_pkts, &pkt, NULL, NULL, NULL);
2148 m_loop = pkt.cp_mbuf;
2149
2150 inp->dlth_wtot = 0;
2151
2152 lck_mtx_unlock(&inp->dlth_lock);
2153
2154 if (__improbable(embryonic)) {
2155 dlil_decr_pending_thread_count();
2156 }
2157
2158 /*
2159 * NOTE warning %%% attention !!!!
2160 * We should think about putting some thread starvation
2161 * safeguards if we deal with long chains of packets.
2162 */
2163 if (__probable(m_loop != NULL)) {
2164 dlil_input_packet_list_extended(lo_ifp, m_loop,
2165 m_cnt_loop, IFNET_MODEL_INPUT_POLL_OFF);
2166 }
2167
2168 if (__probable(m != NULL)) {
2169 dlil_input_packet_list_extended(NULL, m,
2170 m_cnt, IFNET_MODEL_INPUT_POLL_OFF);
2171 }
2172
2173 if (__improbable(proto_req)) {
2174 proto_input_run();
2175 }
2176
2177 lck_mtx_lock_spin(&inp->dlth_lock);
2178 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
2179 /* main input thread cannot be terminated */
2180 VERIFY(!(inp->dlth_flags & DLIL_INPUT_TERMINATE));
2181 if (!(inp->dlth_flags & ~DLIL_INPUT_RUNNING)) {
2182 break;
2183 }
2184 }
2185
2186 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
2187 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2188 lck_mtx_unlock(&inp->dlth_lock);
2189 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2190
2191 VERIFY(0); /* we should never get here */
2192 /* NOTREACHED */
2193 __builtin_unreachable();
2194 }
2195
2196 /*
2197 * Input thread for interfaces with legacy input model.
2198 */
2199 __attribute__((noreturn))
2200 static void
2201 dlil_input_thread_func(void *v, wait_result_t w)
2202 {
2203 #pragma unused(w)
2204 char thread_name[MAXTHREADNAMESIZE];
2205 struct dlil_threading_info *inp = v;
2206 struct ifnet *ifp = inp->dlth_ifp;
2207
2208 VERIFY(inp != dlil_main_input_thread);
2209 VERIFY(ifp != NULL);
2210 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll ||
2211 !(ifp->if_xflags & IFXF_LEGACY));
2212 VERIFY(ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_OFF ||
2213 !(ifp->if_xflags & IFXF_LEGACY));
2214 VERIFY(current_thread() == inp->dlth_thread);
2215
2216 /* construct the name for this thread, and then apply it */
2217 bzero(thread_name, sizeof(thread_name));
2218 (void) snprintf(thread_name, sizeof(thread_name),
2219 "dlil_input_%s", ifp->if_xname);
2220 thread_set_thread_name(inp->dlth_thread, thread_name);
2221
2222 lck_mtx_lock(&inp->dlth_lock);
2223 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2224 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2225 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2226 /* wake up once to get out of embryonic state */
2227 dlil_input_wakeup(inp);
2228 lck_mtx_unlock(&inp->dlth_lock);
2229 (void) thread_block_parameter(dlil_input_thread_cont, inp);
2230 /* NOTREACHED */
2231 __builtin_unreachable();
2232 }
2233
2234 __attribute__((noreturn))
2235 static void
2236 dlil_input_thread_cont(void *v, wait_result_t wres)
2237 {
2238 struct dlil_threading_info *inp = v;
2239 struct ifnet *ifp = inp->dlth_ifp;
2240
2241 lck_mtx_lock_spin(&inp->dlth_lock);
2242 if (__improbable(wres == THREAD_INTERRUPTED ||
2243 (inp->dlth_flags & DLIL_INPUT_TERMINATE))) {
2244 goto terminate;
2245 }
2246
2247 VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING));
2248 inp->dlth_flags |= DLIL_INPUT_RUNNING;
2249
2250 while (1) {
2251 struct mbuf *m = NULL;
2252 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2253 boolean_t notify = FALSE;
2254 boolean_t embryonic;
2255 u_int32_t m_cnt;
2256
2257 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2258
2259 if (__improbable(embryonic =
2260 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2261 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2262 }
2263
2264 /*
2265 * Protocol registration and injection must always use
2266 * the main input thread; in theory the latter can utilize
2267 * the corresponding input thread where the packet arrived
2268 * on, but that requires our knowing the interface in advance
2269 * (and the benefits might not worth the trouble.)
2270 */
2271 VERIFY(!(inp->dlth_flags &
2272 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2273
2274 /* Packets for this interface */
2275 m_cnt = qlen(&inp->dlth_pkts);
2276 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
2277 m = pkt.cp_mbuf;
2278
2279 inp->dlth_wtot = 0;
2280
2281 notify = dlil_input_stats_sync(ifp, inp);
2282
2283 lck_mtx_unlock(&inp->dlth_lock);
2284
2285 if (__improbable(embryonic)) {
2286 ifnet_decr_pending_thread_count(ifp);
2287 }
2288
2289 if (__improbable(notify)) {
2290 ifnet_notify_data_threshold(ifp);
2291 }
2292
2293 /*
2294 * NOTE warning %%% attention !!!!
2295 * We should think about putting some thread starvation
2296 * safeguards if we deal with long chains of packets.
2297 */
2298 if (__probable(m != NULL)) {
2299 dlil_input_packet_list_extended(NULL, m,
2300 m_cnt, ifp->if_poll_mode);
2301 }
2302
2303 lck_mtx_lock_spin(&inp->dlth_lock);
2304 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
2305 if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING |
2306 DLIL_INPUT_TERMINATE))) {
2307 break;
2308 }
2309 }
2310
2311 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
2312
2313 if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) {
2314 terminate:
2315 lck_mtx_unlock(&inp->dlth_lock);
2316 dlil_terminate_input_thread(inp);
2317 /* NOTREACHED */
2318 } else {
2319 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2320 lck_mtx_unlock(&inp->dlth_lock);
2321 (void) thread_block_parameter(dlil_input_thread_cont, inp);
2322 /* NOTREACHED */
2323 }
2324
2325 VERIFY(0); /* we should never get here */
2326 /* NOTREACHED */
2327 __builtin_unreachable();
2328 }
2329
2330 /*
2331 * Input thread for interfaces with opportunistic polling input model.
2332 */
2333 __attribute__((noreturn))
2334 static void
2335 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2336 {
2337 #pragma unused(w)
2338 char thread_name[MAXTHREADNAMESIZE];
2339 struct dlil_threading_info *inp = v;
2340 struct ifnet *ifp = inp->dlth_ifp;
2341
2342 VERIFY(inp != dlil_main_input_thread);
2343 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL) &&
2344 (ifp->if_xflags & IFXF_LEGACY));
2345 VERIFY(current_thread() == inp->dlth_thread);
2346
2347 /* construct the name for this thread, and then apply it */
2348 bzero(thread_name, sizeof(thread_name));
2349 (void) snprintf(thread_name, sizeof(thread_name),
2350 "dlil_input_poll_%s", ifp->if_xname);
2351 thread_set_thread_name(inp->dlth_thread, thread_name);
2352
2353 lck_mtx_lock(&inp->dlth_lock);
2354 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2355 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2356 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2357 /* wake up once to get out of embryonic state */
2358 dlil_input_wakeup(inp);
2359 lck_mtx_unlock(&inp->dlth_lock);
2360 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont, inp);
2361 /* NOTREACHED */
2362 __builtin_unreachable();
2363 }
2364
2365 __attribute__((noreturn))
2366 static void
2367 dlil_rxpoll_input_thread_cont(void *v, wait_result_t wres)
2368 {
2369 struct dlil_threading_info *inp = v;
2370 struct ifnet *ifp = inp->dlth_ifp;
2371 struct timespec ts;
2372
2373 lck_mtx_lock_spin(&inp->dlth_lock);
2374 if (__improbable(wres == THREAD_INTERRUPTED ||
2375 (inp->dlth_flags & DLIL_INPUT_TERMINATE))) {
2376 goto terminate;
2377 }
2378
2379 VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING));
2380 inp->dlth_flags |= DLIL_INPUT_RUNNING;
2381
2382 while (1) {
2383 struct mbuf *m = NULL;
2384 uint32_t m_cnt, poll_req = 0;
2385 uint64_t m_size = 0;
2386 ifnet_model_t mode;
2387 struct timespec now, delta;
2388 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2389 boolean_t notify;
2390 boolean_t embryonic;
2391 uint64_t ival;
2392
2393 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2394
2395 if (__improbable(embryonic =
2396 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2397 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2398 goto skip;
2399 }
2400
2401 if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
2402 ival = IF_RXPOLL_INTERVALTIME_MIN;
2403 }
2404
2405 /* Link parameters changed? */
2406 if (ifp->if_poll_update != 0) {
2407 ifp->if_poll_update = 0;
2408 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
2409 }
2410
2411 /* Current operating mode */
2412 mode = ifp->if_poll_mode;
2413
2414 /*
2415 * Protocol registration and injection must always use
2416 * the main input thread; in theory the latter can utilize
2417 * the corresponding input thread where the packet arrived
2418 * on, but that requires our knowing the interface in advance
2419 * (and the benefits might not worth the trouble.)
2420 */
2421 VERIFY(!(inp->dlth_flags &
2422 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2423
2424 /* Total count of all packets */
2425 m_cnt = qlen(&inp->dlth_pkts);
2426
2427 /* Total bytes of all packets */
2428 m_size = qsize(&inp->dlth_pkts);
2429
2430 /* Packets for this interface */
2431 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
2432 m = pkt.cp_mbuf;
2433 VERIFY(m != NULL || m_cnt == 0);
2434
2435 nanouptime(&now);
2436 if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
2437 *(&ifp->if_poll_sample_lasttime) = *(&now);
2438 }
2439
2440 net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
2441 if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
2442 u_int32_t ptot, btot;
2443
2444 /* Accumulate statistics for current sampling */
2445 PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
2446
2447 if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
2448 goto skip;
2449 }
2450
2451 *(&ifp->if_poll_sample_lasttime) = *(&now);
2452
2453 /* Calculate min/max of inbound bytes */
2454 btot = (u_int32_t)ifp->if_poll_sstats.bytes;
2455 if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
2456 ifp->if_rxpoll_bmin = btot;
2457 }
2458 if (btot > ifp->if_rxpoll_bmax) {
2459 ifp->if_rxpoll_bmax = btot;
2460 }
2461
2462 /* Calculate EWMA of inbound bytes */
2463 DLIL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
2464
2465 /* Calculate min/max of inbound packets */
2466 ptot = (u_int32_t)ifp->if_poll_sstats.packets;
2467 if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
2468 ifp->if_rxpoll_pmin = ptot;
2469 }
2470 if (ptot > ifp->if_rxpoll_pmax) {
2471 ifp->if_rxpoll_pmax = ptot;
2472 }
2473
2474 /* Calculate EWMA of inbound packets */
2475 DLIL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
2476
2477 /* Reset sampling statistics */
2478 PKTCNTR_CLEAR(&ifp->if_poll_sstats);
2479
2480 /* Calculate EWMA of wakeup requests */
2481 DLIL_EWMA(ifp->if_rxpoll_wavg, inp->dlth_wtot,
2482 if_rxpoll_decay);
2483 inp->dlth_wtot = 0;
2484
2485 if (dlil_verbose) {
2486 if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
2487 *(&ifp->if_poll_dbg_lasttime) = *(&now);
2488 }
2489 net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
2490 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2491 *(&ifp->if_poll_dbg_lasttime) = *(&now);
2492 DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
2493 "limits [%d/%d], wreq avg %d "
2494 "limits [%d/%d], bytes avg %d "
2495 "limits [%d/%d]\n", if_name(ifp),
2496 (ifp->if_poll_mode ==
2497 IFNET_MODEL_INPUT_POLL_ON) ?
2498 "ON" : "OFF", ifp->if_rxpoll_pavg,
2499 ifp->if_rxpoll_pmax,
2500 ifp->if_rxpoll_plowat,
2501 ifp->if_rxpoll_phiwat,
2502 ifp->if_rxpoll_wavg,
2503 ifp->if_rxpoll_wlowat,
2504 ifp->if_rxpoll_whiwat,
2505 ifp->if_rxpoll_bavg,
2506 ifp->if_rxpoll_blowat,
2507 ifp->if_rxpoll_bhiwat);
2508 }
2509 }
2510
2511 /* Perform mode transition, if necessary */
2512 if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
2513 *(&ifp->if_poll_mode_lasttime) = *(&now);
2514 }
2515
2516 net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
2517 if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
2518 goto skip;
2519 }
2520
2521 if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
2522 ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
2523 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
2524 mode = IFNET_MODEL_INPUT_POLL_OFF;
2525 } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
2526 (ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat ||
2527 ifp->if_rxpoll_wavg >= ifp->if_rxpoll_whiwat) &&
2528 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
2529 mode = IFNET_MODEL_INPUT_POLL_ON;
2530 }
2531
2532 if (mode != ifp->if_poll_mode) {
2533 ifp->if_poll_mode = mode;
2534 *(&ifp->if_poll_mode_lasttime) = *(&now);
2535 poll_req++;
2536 }
2537 }
2538 skip:
2539 notify = dlil_input_stats_sync(ifp, inp);
2540
2541 lck_mtx_unlock(&inp->dlth_lock);
2542
2543 if (__improbable(embryonic)) {
2544 ifnet_decr_pending_thread_count(ifp);
2545 }
2546
2547 if (__improbable(notify)) {
2548 ifnet_notify_data_threshold(ifp);
2549 }
2550
2551 /*
2552 * If there's a mode change and interface is still attached,
2553 * perform a downcall to the driver for the new mode. Also
2554 * hold an IO refcnt on the interface to prevent it from
2555 * being detached (will be release below.)
2556 */
2557 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2558 struct ifnet_model_params p = {
2559 .model = mode, .reserved = { 0 }
2560 };
2561 errno_t err;
2562
2563 if (dlil_verbose) {
2564 DLIL_PRINTF("%s: polling is now %s, "
2565 "pkts avg %d max %d limits [%d/%d], "
2566 "wreq avg %d limits [%d/%d], "
2567 "bytes avg %d limits [%d/%d]\n",
2568 if_name(ifp),
2569 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2570 "ON" : "OFF", ifp->if_rxpoll_pavg,
2571 ifp->if_rxpoll_pmax, ifp->if_rxpoll_plowat,
2572 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wavg,
2573 ifp->if_rxpoll_wlowat, ifp->if_rxpoll_whiwat,
2574 ifp->if_rxpoll_bavg, ifp->if_rxpoll_blowat,
2575 ifp->if_rxpoll_bhiwat);
2576 }
2577
2578 if ((err = ((*ifp->if_input_ctl)(ifp,
2579 IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) {
2580 DLIL_PRINTF("%s: error setting polling mode "
2581 "to %s (%d)\n", if_name(ifp),
2582 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2583 "ON" : "OFF", err);
2584 }
2585
2586 switch (mode) {
2587 case IFNET_MODEL_INPUT_POLL_OFF:
2588 ifnet_set_poll_cycle(ifp, NULL);
2589 ifp->if_rxpoll_offreq++;
2590 if (err != 0) {
2591 ifp->if_rxpoll_offerr++;
2592 }
2593 break;
2594
2595 case IFNET_MODEL_INPUT_POLL_ON:
2596 net_nsectimer(&ival, &ts);
2597 ifnet_set_poll_cycle(ifp, &ts);
2598 ifnet_poll(ifp);
2599 ifp->if_rxpoll_onreq++;
2600 if (err != 0) {
2601 ifp->if_rxpoll_onerr++;
2602 }
2603 break;
2604
2605 default:
2606 VERIFY(0);
2607 /* NOTREACHED */
2608 }
2609
2610 /* Release the IO refcnt */
2611 ifnet_decr_iorefcnt(ifp);
2612 }
2613
2614 /*
2615 * NOTE warning %%% attention !!!!
2616 * We should think about putting some thread starvation
2617 * safeguards if we deal with long chains of packets.
2618 */
2619 if (__probable(m != NULL)) {
2620 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2621 }
2622
2623 lck_mtx_lock_spin(&inp->dlth_lock);
2624 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
2625 if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING |
2626 DLIL_INPUT_TERMINATE))) {
2627 break;
2628 }
2629 }
2630
2631 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
2632
2633 if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) {
2634 terminate:
2635 lck_mtx_unlock(&inp->dlth_lock);
2636 dlil_terminate_input_thread(inp);
2637 /* NOTREACHED */
2638 } else {
2639 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2640 lck_mtx_unlock(&inp->dlth_lock);
2641 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont,
2642 inp);
2643 /* NOTREACHED */
2644 }
2645
2646 VERIFY(0); /* we should never get here */
2647 /* NOTREACHED */
2648 __builtin_unreachable();
2649 }
2650
2651 errno_t
2652 dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
2653 {
2654 if (p != NULL) {
2655 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2656 (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
2657 return EINVAL;
2658 }
2659 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2660 p->packets_lowat >= p->packets_hiwat) {
2661 return EINVAL;
2662 }
2663 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2664 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
2665 return EINVAL;
2666 }
2667 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2668 p->bytes_lowat >= p->bytes_hiwat) {
2669 return EINVAL;
2670 }
2671 if (p->interval_time != 0 &&
2672 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
2673 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2674 }
2675 }
2676 return 0;
2677 }
2678
2679 void
2680 dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2681 {
2682 u_int64_t sample_holdtime, inbw;
2683
2684 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2685 sample_holdtime = 0; /* polling is disabled */
2686 ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
2687 ifp->if_rxpoll_blowat = 0;
2688 ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
2689 ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
2690 ifp->if_rxpoll_plim = 0;
2691 ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2692 } else {
2693 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2694 u_int64_t ival;
2695 unsigned int n, i;
2696
2697 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2698 if (inbw < rxpoll_tbl[i].speed) {
2699 break;
2700 }
2701 n = i;
2702 }
2703 /* auto-tune if caller didn't specify a value */
2704 plowat = ((p == NULL || p->packets_lowat == 0) ?
2705 rxpoll_tbl[n].plowat : p->packets_lowat);
2706 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2707 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2708 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2709 rxpoll_tbl[n].blowat : p->bytes_lowat);
2710 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2711 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2712 plim = ((p == NULL || p->packets_limit == 0) ?
2713 if_rxpoll_max : p->packets_limit);
2714 ival = ((p == NULL || p->interval_time == 0) ?
2715 if_rxpoll_interval_time : p->interval_time);
2716
2717 VERIFY(plowat != 0 && phiwat != 0);
2718 VERIFY(blowat != 0 && bhiwat != 0);
2719 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2720
2721 sample_holdtime = if_rxpoll_sample_holdtime;
2722 ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
2723 ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
2724 ifp->if_rxpoll_plowat = plowat;
2725 ifp->if_rxpoll_phiwat = phiwat;
2726 ifp->if_rxpoll_blowat = blowat;
2727 ifp->if_rxpoll_bhiwat = bhiwat;
2728 ifp->if_rxpoll_plim = plim;
2729 ifp->if_rxpoll_ival = ival;
2730 }
2731
2732 net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
2733 net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
2734
2735 if (dlil_verbose) {
2736 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
2737 "poll interval %llu nsec, pkts per poll %u, "
2738 "pkt limits [%u/%u], wreq limits [%u/%u], "
2739 "bytes limits [%u/%u]\n", if_name(ifp),
2740 inbw, sample_holdtime, ifp->if_rxpoll_ival,
2741 ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
2742 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
2743 ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
2744 ifp->if_rxpoll_bhiwat);
2745 }
2746 }
2747
2748 /*
2749 * Must be called on an attached ifnet (caller is expected to check.)
2750 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2751 */
2752 errno_t
2753 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2754 boolean_t locked)
2755 {
2756 errno_t err;
2757 struct dlil_threading_info *inp;
2758
2759 VERIFY(ifp != NULL);
2760 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2761 return ENXIO;
2762 }
2763 err = dlil_rxpoll_validate_params(p);
2764 if (err != 0) {
2765 return err;
2766 }
2767
2768 if (!locked) {
2769 lck_mtx_lock(&inp->dlth_lock);
2770 }
2771 LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2772 /*
2773 * Normally, we'd reset the parameters to the auto-tuned values
2774 * if the the input thread detects a change in link rate. If the
2775 * driver provides its own parameters right after a link rate
2776 * changes, but before the input thread gets to run, we want to
2777 * make sure to keep the driver's values. Clearing if_poll_update
2778 * will achieve that.
2779 */
2780 if (p != NULL && !locked && ifp->if_poll_update != 0) {
2781 ifp->if_poll_update = 0;
2782 }
2783 dlil_rxpoll_update_params(ifp, p);
2784 if (!locked) {
2785 lck_mtx_unlock(&inp->dlth_lock);
2786 }
2787 return 0;
2788 }
2789
2790 /*
2791 * Must be called on an attached ifnet (caller is expected to check.)
2792 */
2793 errno_t
2794 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2795 {
2796 struct dlil_threading_info *inp;
2797
2798 VERIFY(ifp != NULL && p != NULL);
2799 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2800 return ENXIO;
2801 }
2802
2803 bzero(p, sizeof(*p));
2804
2805 lck_mtx_lock(&inp->dlth_lock);
2806 p->packets_limit = ifp->if_rxpoll_plim;
2807 p->packets_lowat = ifp->if_rxpoll_plowat;
2808 p->packets_hiwat = ifp->if_rxpoll_phiwat;
2809 p->bytes_lowat = ifp->if_rxpoll_blowat;
2810 p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2811 p->interval_time = ifp->if_rxpoll_ival;
2812 lck_mtx_unlock(&inp->dlth_lock);
2813
2814 return 0;
2815 }
2816
2817 errno_t
2818 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2819 const struct ifnet_stat_increment_param *s)
2820 {
2821 return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
2822 }
2823
2824 errno_t
2825 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2826 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2827 {
2828 return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
2829 }
2830
2831 errno_t
2832 ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2833 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2834 {
2835 return ifnet_input_common(ifp, m_head, m_tail, s,
2836 (m_head != NULL), TRUE);
2837 }
2838
2839 static errno_t
2840 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2841 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2842 {
2843 dlil_input_func input_func;
2844 struct ifnet_stat_increment_param _s;
2845 u_int32_t m_cnt = 0, m_size = 0;
2846 struct mbuf *last;
2847 errno_t err = 0;
2848
2849 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2850 if (m_head != NULL) {
2851 mbuf_freem_list(m_head);
2852 }
2853 return EINVAL;
2854 }
2855
2856 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2857 VERIFY(m_tail == NULL || ext);
2858 VERIFY(s != NULL || !ext);
2859
2860 /*
2861 * Drop the packet(s) if the parameters are invalid, or if the
2862 * interface is no longer attached; else hold an IO refcnt to
2863 * prevent it from being detached (will be released below.)
2864 */
2865 if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
2866 if (m_head != NULL) {
2867 mbuf_freem_list(m_head);
2868 }
2869 return EINVAL;
2870 }
2871
2872 input_func = ifp->if_input_dlil;
2873 VERIFY(input_func != NULL);
2874
2875 if (m_tail == NULL) {
2876 last = m_head;
2877 while (m_head != NULL) {
2878 #if IFNET_INPUT_SANITY_CHK
2879 if (__improbable(dlil_input_sanity_check != 0)) {
2880 DLIL_INPUT_CHECK(last, ifp);
2881 }
2882 #endif /* IFNET_INPUT_SANITY_CHK */
2883 m_cnt++;
2884 m_size += m_length(last);
2885 if (mbuf_nextpkt(last) == NULL) {
2886 break;
2887 }
2888 last = mbuf_nextpkt(last);
2889 }
2890 m_tail = last;
2891 } else {
2892 #if IFNET_INPUT_SANITY_CHK
2893 if (__improbable(dlil_input_sanity_check != 0)) {
2894 last = m_head;
2895 while (1) {
2896 DLIL_INPUT_CHECK(last, ifp);
2897 m_cnt++;
2898 m_size += m_length(last);
2899 if (mbuf_nextpkt(last) == NULL) {
2900 break;
2901 }
2902 last = mbuf_nextpkt(last);
2903 }
2904 } else {
2905 m_cnt = s->packets_in;
2906 m_size = s->bytes_in;
2907 last = m_tail;
2908 }
2909 #else
2910 m_cnt = s->packets_in;
2911 m_size = s->bytes_in;
2912 last = m_tail;
2913 #endif /* IFNET_INPUT_SANITY_CHK */
2914 }
2915
2916 if (last != m_tail) {
2917 panic_plain("%s: invalid input packet chain for %s, "
2918 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2919 m_tail, last);
2920 }
2921
2922 /*
2923 * Assert packet count only for the extended variant, for backwards
2924 * compatibility, since this came directly from the device driver.
2925 * Relax this assertion for input bytes, as the driver may have
2926 * included the link-layer headers in the computation; hence
2927 * m_size is just an approximation.
2928 */
2929 if (ext && s->packets_in != m_cnt) {
2930 panic_plain("%s: input packet count mismatch for %s, "
2931 "%d instead of %d\n", __func__, if_name(ifp),
2932 s->packets_in, m_cnt);
2933 }
2934
2935 if (s == NULL) {
2936 bzero(&_s, sizeof(_s));
2937 s = &_s;
2938 } else {
2939 _s = *s;
2940 }
2941 _s.packets_in = m_cnt;
2942 _s.bytes_in = m_size;
2943
2944 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2945
2946 if (ifp != lo_ifp) {
2947 /* Release the IO refcnt */
2948 ifnet_datamov_end(ifp);
2949 }
2950
2951 return err;
2952 }
2953
2954
2955 errno_t
2956 dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2957 {
2958 return ifp->if_output(ifp, m);
2959 }
2960
2961 errno_t
2962 dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2963 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2964 boolean_t poll, struct thread *tp)
2965 {
2966 struct dlil_threading_info *inp = ifp->if_inp;
2967
2968 if (__improbable(inp == NULL)) {
2969 inp = dlil_main_input_thread;
2970 }
2971
2972 return inp->dlth_strategy(inp, ifp, m_head, m_tail, s, poll, tp);
2973 }
2974
2975 static errno_t
2976 dlil_input_async(struct dlil_threading_info *inp,
2977 struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2978 const struct ifnet_stat_increment_param *s, boolean_t poll,
2979 struct thread *tp)
2980 {
2981 u_int32_t m_cnt = s->packets_in;
2982 u_int32_t m_size = s->bytes_in;
2983 boolean_t notify = FALSE;
2984
2985 /*
2986 * If there is a matching DLIL input thread associated with an
2987 * affinity set, associate this thread with the same set. We
2988 * will only do this once.
2989 */
2990 lck_mtx_lock_spin(&inp->dlth_lock);
2991 if (inp != dlil_main_input_thread && inp->dlth_affinity && tp != NULL &&
2992 ((!poll && inp->dlth_driver_thread == THREAD_NULL) ||
2993 (poll && inp->dlth_poller_thread == THREAD_NULL))) {
2994 u_int32_t tag = inp->dlth_affinity_tag;
2995
2996 if (poll) {
2997 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
2998 inp->dlth_poller_thread = tp;
2999 } else {
3000 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
3001 inp->dlth_driver_thread = tp;
3002 }
3003 lck_mtx_unlock(&inp->dlth_lock);
3004
3005 /* Associate the current thread with the new affinity tag */
3006 (void) dlil_affinity_set(tp, tag);
3007
3008 /*
3009 * Take a reference on the current thread; during detach,
3010 * we will need to refer to it in order to tear down its
3011 * affinity.
3012 */
3013 thread_reference(tp);
3014 lck_mtx_lock_spin(&inp->dlth_lock);
3015 }
3016
3017 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
3018
3019 /*
3020 * Because of loopbacked multicast we cannot stuff the ifp in
3021 * the rcvif of the packet header: loopback (lo0) packets use a
3022 * dedicated list so that we can later associate them with lo_ifp
3023 * on their way up the stack. Packets for other interfaces without
3024 * dedicated input threads go to the regular list.
3025 */
3026 if (m_head != NULL) {
3027 classq_pkt_t head, tail;
3028 CLASSQ_PKT_INIT_MBUF(&head, m_head);
3029 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3030 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
3031 struct dlil_main_threading_info *inpm =
3032 (struct dlil_main_threading_info *)inp;
3033 _addq_multi(&inpm->lo_rcvq_pkts, &head, &tail,
3034 m_cnt, m_size);
3035 } else {
3036 _addq_multi(&inp->dlth_pkts, &head, &tail,
3037 m_cnt, m_size);
3038 }
3039 }
3040
3041 #if IFNET_INPUT_SANITY_CHK
3042 if (__improbable(dlil_input_sanity_check != 0)) {
3043 u_int32_t count = 0, size = 0;
3044 struct mbuf *m0;
3045
3046 for (m0 = m_head; m0; m0 = mbuf_nextpkt(m0)) {
3047 size += m_length(m0);
3048 count++;
3049 }
3050
3051 if (count != m_cnt) {
3052 panic_plain("%s: invalid total packet count %u "
3053 "(expected %u)\n", if_name(ifp), count, m_cnt);
3054 /* NOTREACHED */
3055 __builtin_unreachable();
3056 } else if (size != m_size) {
3057 panic_plain("%s: invalid total packet size %u "
3058 "(expected %u)\n", if_name(ifp), size, m_size);
3059 /* NOTREACHED */
3060 __builtin_unreachable();
3061 }
3062
3063 inp->dlth_pkts_cnt += m_cnt;
3064 }
3065 #endif /* IFNET_INPUT_SANITY_CHK */
3066
3067 dlil_input_stats_add(s, inp, ifp, poll);
3068 /*
3069 * If we're using the main input thread, synchronize the
3070 * stats now since we have the interface context. All
3071 * other cases involving dedicated input threads will
3072 * have their stats synchronized there.
3073 */
3074 if (inp == dlil_main_input_thread) {
3075 notify = dlil_input_stats_sync(ifp, inp);
3076 }
3077
3078 dlil_input_wakeup(inp);
3079 lck_mtx_unlock(&inp->dlth_lock);
3080
3081 if (notify) {
3082 ifnet_notify_data_threshold(ifp);
3083 }
3084
3085 return 0;
3086 }
3087
3088 static errno_t
3089 dlil_input_sync(struct dlil_threading_info *inp,
3090 struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
3091 const struct ifnet_stat_increment_param *s, boolean_t poll,
3092 struct thread *tp)
3093 {
3094 #pragma unused(tp)
3095 u_int32_t m_cnt = s->packets_in;
3096 u_int32_t m_size = s->bytes_in;
3097 boolean_t notify = FALSE;
3098 classq_pkt_t head, tail;
3099
3100 ASSERT(inp != dlil_main_input_thread);
3101
3102 /* XXX: should we just assert instead? */
3103 if (__improbable(m_head == NULL)) {
3104 return 0;
3105 }
3106
3107 CLASSQ_PKT_INIT_MBUF(&head, m_head);
3108 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3109
3110 lck_mtx_lock_spin(&inp->dlth_lock);
3111 _addq_multi(&inp->dlth_pkts, &head, &tail, m_cnt, m_size);
3112
3113 #if IFNET_INPUT_SANITY_CHK
3114 if (__improbable(dlil_input_sanity_check != 0)) {
3115 u_int32_t count = 0, size = 0;
3116 struct mbuf *m0;
3117
3118 for (m0 = m_head; m0; m0 = mbuf_nextpkt(m0)) {
3119 size += m_length(m0);
3120 count++;
3121 }
3122
3123 if (count != m_cnt) {
3124 panic_plain("%s: invalid total packet count %u "
3125 "(expected %u)\n", if_name(ifp), count, m_cnt);
3126 /* NOTREACHED */
3127 __builtin_unreachable();
3128 } else if (size != m_size) {
3129 panic_plain("%s: invalid total packet size %u "
3130 "(expected %u)\n", if_name(ifp), size, m_size);
3131 /* NOTREACHED */
3132 __builtin_unreachable();
3133 }
3134
3135 inp->dlth_pkts_cnt += m_cnt;
3136 }
3137 #endif /* IFNET_INPUT_SANITY_CHK */
3138
3139 dlil_input_stats_add(s, inp, ifp, poll);
3140
3141 m_cnt = qlen(&inp->dlth_pkts);
3142 _getq_all(&inp->dlth_pkts, &head, NULL, NULL, NULL);
3143
3144 notify = dlil_input_stats_sync(ifp, inp);
3145
3146 lck_mtx_unlock(&inp->dlth_lock);
3147
3148 if (notify) {
3149 ifnet_notify_data_threshold(ifp);
3150 }
3151
3152 /*
3153 * NOTE warning %%% attention !!!!
3154 * We should think about putting some thread starvation
3155 * safeguards if we deal with long chains of packets.
3156 */
3157 if (head.cp_mbuf != NULL) {
3158 dlil_input_packet_list_extended(NULL, head.cp_mbuf,
3159 m_cnt, ifp->if_poll_mode);
3160 }
3161
3162 return 0;
3163 }
3164
3165
3166 static void
3167 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
3168 {
3169 if (!(ifp->if_eflags & IFEF_TXSTART)) {
3170 return;
3171 }
3172 /*
3173 * If the starter thread is inactive, signal it to do work,
3174 * unless the interface is being flow controlled from below,
3175 * e.g. a virtual interface being flow controlled by a real
3176 * network interface beneath it, or it's been disabled via
3177 * a call to ifnet_disable_output().
3178 */
3179 lck_mtx_lock_spin(&ifp->if_start_lock);
3180 if (resetfc) {
3181 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
3182 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
3183 lck_mtx_unlock(&ifp->if_start_lock);
3184 return;
3185 }
3186 ifp->if_start_req++;
3187 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
3188 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
3189 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
3190 ifp->if_start_delayed == 0)) {
3191 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
3192 }
3193 lck_mtx_unlock(&ifp->if_start_lock);
3194 }
3195
3196 void
3197 ifnet_start(struct ifnet *ifp)
3198 {
3199 ifnet_start_common(ifp, FALSE);
3200 }
3201
3202 __attribute__((noreturn))
3203 static void
3204 ifnet_start_thread_func(void *v, wait_result_t w)
3205 {
3206 #pragma unused(w)
3207 struct ifnet *ifp = v;
3208 char thread_name[MAXTHREADNAMESIZE];
3209
3210 /* Construct the name for this thread, and then apply it. */
3211 bzero(thread_name, sizeof(thread_name));
3212 (void) snprintf(thread_name, sizeof(thread_name),
3213 "ifnet_start_%s", ifp->if_xname);
3214 ASSERT(ifp->if_start_thread == current_thread());
3215 thread_set_thread_name(current_thread(), thread_name);
3216
3217 /*
3218 * Treat the dedicated starter thread for lo0 as equivalent to
3219 * the driver workloop thread; if net_affinity is enabled for
3220 * the main input thread, associate this starter thread to it
3221 * by binding them with the same affinity tag. This is done
3222 * only once (as we only have one lo_ifp which never goes away.)
3223 */
3224 if (ifp == lo_ifp) {
3225 struct dlil_threading_info *inp = dlil_main_input_thread;
3226 struct thread *tp = current_thread();
3227
3228 lck_mtx_lock(&inp->dlth_lock);
3229 if (inp->dlth_affinity) {
3230 u_int32_t tag = inp->dlth_affinity_tag;
3231
3232 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
3233 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
3234 inp->dlth_driver_thread = tp;
3235 lck_mtx_unlock(&inp->dlth_lock);
3236
3237 /* Associate this thread with the affinity tag */
3238 (void) dlil_affinity_set(tp, tag);
3239 } else {
3240 lck_mtx_unlock(&inp->dlth_lock);
3241 }
3242 }
3243
3244 lck_mtx_lock(&ifp->if_start_lock);
3245 VERIFY(!ifp->if_start_embryonic && !ifp->if_start_active);
3246 (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
3247 ifp->if_start_embryonic = 1;
3248 /* wake up once to get out of embryonic state */
3249 ifp->if_start_req++;
3250 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
3251 lck_mtx_unlock(&ifp->if_start_lock);
3252 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3253 /* NOTREACHED */
3254 __builtin_unreachable();
3255 }
3256
3257 __attribute__((noreturn))
3258 static void
3259 ifnet_start_thread_cont(void *v, wait_result_t wres)
3260 {
3261 struct ifnet *ifp = v;
3262 struct ifclassq *ifq = &ifp->if_snd;
3263
3264 lck_mtx_lock_spin(&ifp->if_start_lock);
3265 if (__improbable(wres == THREAD_INTERRUPTED ||
3266 ifp->if_start_thread == THREAD_NULL)) {
3267 goto terminate;
3268 }
3269
3270 if (__improbable(ifp->if_start_embryonic)) {
3271 ifp->if_start_embryonic = 0;
3272 lck_mtx_unlock(&ifp->if_start_lock);
3273 ifnet_decr_pending_thread_count(ifp);
3274 lck_mtx_lock_spin(&ifp->if_start_lock);
3275 goto skip;
3276 }
3277
3278 ifp->if_start_active = 1;
3279
3280 /*
3281 * Keep on servicing until no more request.
3282 */
3283 for (;;) {
3284 u_int32_t req = ifp->if_start_req;
3285 if (!IFCQ_IS_EMPTY(ifq) &&
3286 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3287 ifp->if_start_delayed == 0 &&
3288 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
3289 (ifp->if_eflags & IFEF_DELAY_START)) {
3290 ifp->if_start_delayed = 1;
3291 ifnet_start_delayed++;
3292 break;
3293 } else {
3294 ifp->if_start_delayed = 0;
3295 }
3296 lck_mtx_unlock(&ifp->if_start_lock);
3297
3298 /*
3299 * If no longer attached, don't call start because ifp
3300 * is being destroyed; else hold an IO refcnt to
3301 * prevent the interface from being detached (will be
3302 * released below.)
3303 */
3304 if (!ifnet_datamov_begin(ifp)) {
3305 lck_mtx_lock_spin(&ifp->if_start_lock);
3306 break;
3307 }
3308
3309 /* invoke the driver's start routine */
3310 ((*ifp->if_start)(ifp));
3311
3312 /*
3313 * Release the io ref count taken above.
3314 */
3315 ifnet_datamov_end(ifp);
3316
3317 lck_mtx_lock_spin(&ifp->if_start_lock);
3318
3319 /*
3320 * If there's no pending request or if the
3321 * interface has been disabled, we're done.
3322 */
3323 if (req == ifp->if_start_req ||
3324 (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
3325 break;
3326 }
3327 }
3328 skip:
3329 ifp->if_start_req = 0;
3330 ifp->if_start_active = 0;
3331
3332
3333 if (__probable(ifp->if_start_thread != THREAD_NULL)) {
3334 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3335 struct timespec delay_start_ts;
3336 struct timespec *ts;
3337
3338 /*
3339 * Wakeup N ns from now if rate-controlled by TBR, and if
3340 * there are still packets in the send queue which haven't
3341 * been dequeued so far; else sleep indefinitely (ts = NULL)
3342 * until ifnet_start() is called again.
3343 */
3344 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
3345 &ifp->if_start_cycle : NULL);
3346
3347 if (ts == NULL && ifp->if_start_delayed == 1) {
3348 delay_start_ts.tv_sec = 0;
3349 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
3350 ts = &delay_start_ts;
3351 }
3352
3353 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
3354 ts = NULL;
3355 }
3356
3357 if (__improbable(ts != NULL)) {
3358 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
3359 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3360 }
3361
3362 (void) assert_wait_deadline(&ifp->if_start_thread,
3363 THREAD_UNINT, deadline);
3364 lck_mtx_unlock(&ifp->if_start_lock);
3365 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3366 /* NOTREACHED */
3367 } else {
3368 terminate:
3369 /* interface is detached? */
3370 ifnet_set_start_cycle(ifp, NULL);
3371 lck_mtx_unlock(&ifp->if_start_lock);
3372 ifnet_purge(ifp);
3373
3374 if (dlil_verbose) {
3375 DLIL_PRINTF("%s: starter thread terminated\n",
3376 if_name(ifp));
3377 }
3378
3379 /* for the extra refcnt from kernel_thread_start() */
3380 thread_deallocate(current_thread());
3381 /* this is the end */
3382 thread_terminate(current_thread());
3383 /* NOTREACHED */
3384 }
3385
3386 /* must never get here */
3387 VERIFY(0);
3388 /* NOTREACHED */
3389 __builtin_unreachable();
3390 }
3391
3392 void
3393 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
3394 {
3395 if (ts == NULL) {
3396 bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
3397 } else {
3398 *(&ifp->if_start_cycle) = *ts;
3399 }
3400
3401 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
3402 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
3403 if_name(ifp), ts->tv_nsec);
3404 }
3405 }
3406
3407 static inline void
3408 ifnet_poll_wakeup(struct ifnet *ifp)
3409 {
3410 LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
3411
3412 ifp->if_poll_req++;
3413 if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
3414 ifp->if_poll_thread != THREAD_NULL) {
3415 wakeup_one((caddr_t)&ifp->if_poll_thread);
3416 }
3417 }
3418
3419 void
3420 ifnet_poll(struct ifnet *ifp)
3421 {
3422 /*
3423 * If the poller thread is inactive, signal it to do work.
3424 */
3425 lck_mtx_lock_spin(&ifp->if_poll_lock);
3426 ifnet_poll_wakeup(ifp);
3427 lck_mtx_unlock(&ifp->if_poll_lock);
3428 }
3429
3430 __attribute__((noreturn))
3431 static void
3432 ifnet_poll_thread_func(void *v, wait_result_t w)
3433 {
3434 #pragma unused(w)
3435 char thread_name[MAXTHREADNAMESIZE];
3436 struct ifnet *ifp = v;
3437
3438 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
3439 VERIFY(current_thread() == ifp->if_poll_thread);
3440
3441 /* construct the name for this thread, and then apply it */
3442 bzero(thread_name, sizeof(thread_name));
3443 (void) snprintf(thread_name, sizeof(thread_name),
3444 "ifnet_poller_%s", ifp->if_xname);
3445 thread_set_thread_name(ifp->if_poll_thread, thread_name);
3446
3447 lck_mtx_lock(&ifp->if_poll_lock);
3448 VERIFY(!(ifp->if_poll_flags & (IF_POLLF_EMBRYONIC | IF_POLLF_RUNNING)));
3449 (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
3450 ifp->if_poll_flags |= IF_POLLF_EMBRYONIC;
3451 /* wake up once to get out of embryonic state */
3452 ifnet_poll_wakeup(ifp);
3453 lck_mtx_unlock(&ifp->if_poll_lock);
3454 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3455 /* NOTREACHED */
3456 __builtin_unreachable();
3457 }
3458
3459 __attribute__((noreturn))
3460 static void
3461 ifnet_poll_thread_cont(void *v, wait_result_t wres)
3462 {
3463 struct dlil_threading_info *inp;
3464 struct ifnet *ifp = v;
3465 struct ifnet_stat_increment_param s;
3466 struct timespec start_time;
3467
3468 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
3469
3470 bzero(&s, sizeof(s));
3471 net_timerclear(&start_time);
3472
3473 lck_mtx_lock_spin(&ifp->if_poll_lock);
3474 if (__improbable(wres == THREAD_INTERRUPTED ||
3475 ifp->if_poll_thread == THREAD_NULL)) {
3476 goto terminate;
3477 }
3478
3479 inp = ifp->if_inp;
3480 VERIFY(inp != NULL);
3481
3482 if (__improbable(ifp->if_poll_flags & IF_POLLF_EMBRYONIC)) {
3483 ifp->if_poll_flags &= ~IF_POLLF_EMBRYONIC;
3484 lck_mtx_unlock(&ifp->if_poll_lock);
3485 ifnet_decr_pending_thread_count(ifp);
3486 lck_mtx_lock_spin(&ifp->if_poll_lock);
3487 goto skip;
3488 }
3489
3490 ifp->if_poll_flags |= IF_POLLF_RUNNING;
3491
3492 /*
3493 * Keep on servicing until no more request.
3494 */
3495 for (;;) {
3496 struct mbuf *m_head, *m_tail;
3497 u_int32_t m_lim, m_cnt, m_totlen;
3498 u_int16_t req = ifp->if_poll_req;
3499
3500 m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
3501 MAX((qlimit(&inp->dlth_pkts)), (ifp->if_rxpoll_phiwat << 2));
3502 lck_mtx_unlock(&ifp->if_poll_lock);
3503
3504 /*
3505 * If no longer attached, there's nothing to do;
3506 * else hold an IO refcnt to prevent the interface
3507 * from being detached (will be released below.)
3508 */
3509 if (!ifnet_is_attached(ifp, 1)) {
3510 lck_mtx_lock_spin(&ifp->if_poll_lock);
3511 break;
3512 }
3513
3514 if (dlil_verbose > 1) {
3515 DLIL_PRINTF("%s: polling up to %d pkts, "
3516 "pkts avg %d max %d, wreq avg %d, "
3517 "bytes avg %d\n",
3518 if_name(ifp), m_lim,
3519 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3520 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
3521 }
3522
3523 /* invoke the driver's input poll routine */
3524 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3525 &m_cnt, &m_totlen));
3526
3527 if (m_head != NULL) {
3528 VERIFY(m_tail != NULL && m_cnt > 0);
3529
3530 if (dlil_verbose > 1) {
3531 DLIL_PRINTF("%s: polled %d pkts, "
3532 "pkts avg %d max %d, wreq avg %d, "
3533 "bytes avg %d\n",
3534 if_name(ifp), m_cnt,
3535 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3536 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
3537 }
3538
3539 /* stats are required for extended variant */
3540 s.packets_in = m_cnt;
3541 s.bytes_in = m_totlen;
3542
3543 (void) ifnet_input_common(ifp, m_head, m_tail,
3544 &s, TRUE, TRUE);
3545 } else {
3546 if (dlil_verbose > 1) {
3547 DLIL_PRINTF("%s: no packets, "
3548 "pkts avg %d max %d, wreq avg %d, "
3549 "bytes avg %d\n",
3550 if_name(ifp), ifp->if_rxpoll_pavg,
3551 ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
3552 ifp->if_rxpoll_bavg);
3553 }
3554
3555 (void) ifnet_input_common(ifp, NULL, NULL,
3556 NULL, FALSE, TRUE);
3557 }
3558
3559 /* Release the io ref count */
3560 ifnet_decr_iorefcnt(ifp);
3561
3562 lck_mtx_lock_spin(&ifp->if_poll_lock);
3563
3564 /* if there's no pending request, we're done */
3565 if (req == ifp->if_poll_req ||
3566 ifp->if_poll_thread == THREAD_NULL) {
3567 break;
3568 }
3569 }
3570 skip:
3571 ifp->if_poll_req = 0;
3572 ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
3573
3574 if (ifp->if_poll_thread != THREAD_NULL) {
3575 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3576 struct timespec *ts;
3577
3578 /*
3579 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3580 * until ifnet_poll() is called again.
3581 */
3582 ts = &ifp->if_poll_cycle;
3583 if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
3584 ts = NULL;
3585 }
3586
3587 if (ts != NULL) {
3588 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
3589 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3590 }
3591
3592 (void) assert_wait_deadline(&ifp->if_poll_thread,
3593 THREAD_UNINT, deadline);
3594 lck_mtx_unlock(&ifp->if_poll_lock);
3595 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3596 /* NOTREACHED */
3597 } else {
3598 terminate:
3599 /* interface is detached (maybe while asleep)? */
3600 ifnet_set_poll_cycle(ifp, NULL);
3601 lck_mtx_unlock(&ifp->if_poll_lock);
3602
3603 if (dlil_verbose) {
3604 DLIL_PRINTF("%s: poller thread terminated\n",
3605 if_name(ifp));
3606 }
3607
3608 /* for the extra refcnt from kernel_thread_start() */
3609 thread_deallocate(current_thread());
3610 /* this is the end */
3611 thread_terminate(current_thread());
3612 /* NOTREACHED */
3613 }
3614
3615 /* must never get here */
3616 VERIFY(0);
3617 /* NOTREACHED */
3618 __builtin_unreachable();
3619 }
3620
3621 void
3622 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3623 {
3624 if (ts == NULL) {
3625 bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
3626 } else {
3627 *(&ifp->if_poll_cycle) = *ts;
3628 }
3629
3630 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
3631 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
3632 if_name(ifp), ts->tv_nsec);
3633 }
3634 }
3635
3636 void
3637 ifnet_purge(struct ifnet *ifp)
3638 {
3639 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
3640 if_qflush(ifp, 0);
3641 }
3642 }
3643
3644 void
3645 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3646 {
3647 IFCQ_LOCK_ASSERT_HELD(ifq);
3648
3649 if (!(IFCQ_IS_READY(ifq))) {
3650 return;
3651 }
3652
3653 if (IFCQ_TBR_IS_ENABLED(ifq)) {
3654 struct tb_profile tb = {
3655 .rate = ifq->ifcq_tbr.tbr_rate_raw,
3656 .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
3657 };
3658 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3659 }
3660
3661 ifclassq_update(ifq, ev);
3662 }
3663
3664 void
3665 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3666 {
3667 switch (ev) {
3668 case CLASSQ_EV_LINK_BANDWIDTH:
3669 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
3670 ifp->if_poll_update++;
3671 }
3672 break;
3673
3674 default:
3675 break;
3676 }
3677 }
3678
3679 errno_t
3680 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3681 {
3682 struct ifclassq *ifq;
3683 u_int32_t omodel;
3684 errno_t err;
3685
3686 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
3687 return EINVAL;
3688 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3689 return ENXIO;
3690 }
3691
3692 ifq = &ifp->if_snd;
3693 IFCQ_LOCK(ifq);
3694 omodel = ifp->if_output_sched_model;
3695 ifp->if_output_sched_model = model;
3696 if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
3697 ifp->if_output_sched_model = omodel;
3698 }
3699 IFCQ_UNLOCK(ifq);
3700
3701 return err;
3702 }
3703
3704 errno_t
3705 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3706 {
3707 if (ifp == NULL) {
3708 return EINVAL;
3709 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3710 return ENXIO;
3711 }
3712
3713 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3714
3715 return 0;
3716 }
3717
3718 errno_t
3719 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3720 {
3721 if (ifp == NULL || maxqlen == NULL) {
3722 return EINVAL;
3723 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3724 return ENXIO;
3725 }
3726
3727 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3728
3729 return 0;
3730 }
3731
3732 errno_t
3733 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
3734 {
3735 errno_t err;
3736
3737 if (ifp == NULL || pkts == NULL) {
3738 err = EINVAL;
3739 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3740 err = ENXIO;
3741 } else {
3742 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3743 pkts, NULL);
3744 }
3745
3746 return err;
3747 }
3748
3749 errno_t
3750 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3751 u_int32_t *pkts, u_int32_t *bytes)
3752 {
3753 errno_t err;
3754
3755 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3756 (pkts == NULL && bytes == NULL)) {
3757 err = EINVAL;
3758 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3759 err = ENXIO;
3760 } else {
3761 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3762 }
3763
3764 return err;
3765 }
3766
3767 errno_t
3768 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3769 {
3770 struct dlil_threading_info *inp;
3771
3772 if (ifp == NULL) {
3773 return EINVAL;
3774 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3775 return ENXIO;
3776 }
3777
3778 if (maxqlen == 0) {
3779 maxqlen = if_rcvq_maxlen;
3780 } else if (maxqlen < IF_RCVQ_MINLEN) {
3781 maxqlen = IF_RCVQ_MINLEN;
3782 }
3783
3784 inp = ifp->if_inp;
3785 lck_mtx_lock(&inp->dlth_lock);
3786 qlimit(&inp->dlth_pkts) = maxqlen;
3787 lck_mtx_unlock(&inp->dlth_lock);
3788
3789 return 0;
3790 }
3791
3792 errno_t
3793 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3794 {
3795 struct dlil_threading_info *inp;
3796
3797 if (ifp == NULL || maxqlen == NULL) {
3798 return EINVAL;
3799 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3800 return ENXIO;
3801 }
3802
3803 inp = ifp->if_inp;
3804 lck_mtx_lock(&inp->dlth_lock);
3805 *maxqlen = qlimit(&inp->dlth_pkts);
3806 lck_mtx_unlock(&inp->dlth_lock);
3807 return 0;
3808 }
3809
3810 void
3811 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3812 uint16_t delay_timeout)
3813 {
3814 if (delay_qlen > 0 && delay_timeout > 0) {
3815 if_set_eflags(ifp, IFEF_ENQUEUE_MULTI);
3816 ifp->if_start_delay_qlen = MIN(100, delay_qlen);
3817 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3818 /* convert timeout to nanoseconds */
3819 ifp->if_start_delay_timeout *= 1000;
3820 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3821 ifp->if_xname, (uint32_t)delay_qlen,
3822 (uint32_t)delay_timeout);
3823 } else {
3824 if_clear_eflags(ifp, IFEF_ENQUEUE_MULTI);
3825 }
3826 }
3827
3828 /*
3829 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
3830 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
3831 * buf holds the full header.
3832 */
3833 static __attribute__((noinline)) void
3834 ifnet_mcast_clear_dscp(uint8_t *buf, uint8_t ip_ver)
3835 {
3836 struct ip *ip;
3837 struct ip6_hdr *ip6;
3838 uint8_t lbuf[64] __attribute__((aligned(8)));
3839 uint8_t *p = buf;
3840
3841 if (ip_ver == IPVERSION) {
3842 uint8_t old_tos;
3843 uint32_t sum;
3844
3845 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3846 DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
3847 bcopy(buf, lbuf, sizeof(struct ip));
3848 p = lbuf;
3849 }
3850 ip = (struct ip *)(void *)p;
3851 if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
3852 return;
3853 }
3854
3855 DTRACE_IP1(clear__v4, struct ip *, ip);
3856 old_tos = ip->ip_tos;
3857 ip->ip_tos &= IPTOS_ECN_MASK;
3858 sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
3859 sum = (sum >> 16) + (sum & 0xffff);
3860 ip->ip_sum = (uint16_t)(sum & 0xffff);
3861
3862 if (__improbable(p == lbuf)) {
3863 bcopy(lbuf, buf, sizeof(struct ip));
3864 }
3865 } else {
3866 uint32_t flow;
3867 ASSERT(ip_ver == IPV6_VERSION);
3868
3869 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3870 DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
3871 bcopy(buf, lbuf, sizeof(struct ip6_hdr));
3872 p = lbuf;
3873 }
3874 ip6 = (struct ip6_hdr *)(void *)p;
3875 flow = ntohl(ip6->ip6_flow);
3876 if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
3877 return;
3878 }
3879
3880 DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
3881 ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
3882
3883 if (__improbable(p == lbuf)) {
3884 bcopy(lbuf, buf, sizeof(struct ip6_hdr));
3885 }
3886 }
3887 }
3888
3889 static inline errno_t
3890 ifnet_enqueue_ifclassq(struct ifnet *ifp, classq_pkt_t *p, boolean_t flush,
3891 boolean_t *pdrop)
3892 {
3893 volatile uint64_t *fg_ts = NULL;
3894 volatile uint64_t *rt_ts = NULL;
3895 struct timespec now;
3896 u_int64_t now_nsec = 0;
3897 int error = 0;
3898 uint8_t *mcast_buf = NULL;
3899 uint8_t ip_ver;
3900 uint32_t pktlen;
3901
3902 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3903
3904 /*
3905 * If packet already carries a timestamp, either from dlil_output()
3906 * or from flowswitch, use it here. Otherwise, record timestamp.
3907 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3908 * the timestamp value is used internally there.
3909 */
3910 switch (p->cp_ptype) {
3911 case QP_MBUF:
3912 ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
3913 ASSERT(p->cp_mbuf->m_nextpkt == NULL);
3914
3915 if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3916 p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
3917 nanouptime(&now);
3918 net_timernsec(&now, &now_nsec);
3919 p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
3920 }
3921 p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3922 /*
3923 * If the packet service class is not background,
3924 * update the timestamp to indicate recent activity
3925 * on a foreground socket.
3926 */
3927 if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3928 p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3929 if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
3930 PKTF_SO_BACKGROUND)) {
3931 ifp->if_fg_sendts = (uint32_t)_net_uptime;
3932 if (fg_ts != NULL) {
3933 *fg_ts = (uint32_t)_net_uptime;
3934 }
3935 }
3936 if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3937 ifp->if_rt_sendts = (uint32_t)_net_uptime;
3938 if (rt_ts != NULL) {
3939 *rt_ts = (uint32_t)_net_uptime;
3940 }
3941 }
3942 }
3943 pktlen = m_pktlen(p->cp_mbuf);
3944
3945 /*
3946 * Some Wi-Fi AP implementations do not correctly handle
3947 * multicast IP packets with DSCP bits set (radr://9331522).
3948 * As a workaround we clear the DSCP bits but keep service
3949 * class (rdar://51507725).
3950 */
3951 if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3952 IFNET_IS_WIFI_INFRA(ifp)) {
3953 size_t len = mbuf_len(p->cp_mbuf), hlen;
3954 struct ether_header *eh;
3955 boolean_t pullup = FALSE;
3956 uint16_t etype;
3957
3958 if (__improbable(len < sizeof(struct ether_header))) {
3959 DTRACE_IP1(small__ether, size_t, len);
3960 if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3961 sizeof(struct ether_header))) == NULL) {
3962 return ENOMEM;
3963 }
3964 }
3965 eh = (struct ether_header *)mbuf_data(p->cp_mbuf);
3966 etype = ntohs(eh->ether_type);
3967 if (etype == ETHERTYPE_IP) {
3968 hlen = sizeof(struct ether_header) +
3969 sizeof(struct ip);
3970 if (len < hlen) {
3971 DTRACE_IP1(small__v4, size_t, len);
3972 pullup = TRUE;
3973 }
3974 ip_ver = IPVERSION;
3975 } else if (etype == ETHERTYPE_IPV6) {
3976 hlen = sizeof(struct ether_header) +
3977 sizeof(struct ip6_hdr);
3978 if (len < hlen) {
3979 DTRACE_IP1(small__v6, size_t, len);
3980 pullup = TRUE;
3981 }
3982 ip_ver = IPV6_VERSION;
3983 } else {
3984 DTRACE_IP1(invalid__etype, uint16_t, etype);
3985 break;
3986 }
3987 if (pullup) {
3988 if ((p->cp_mbuf = m_pullup(p->cp_mbuf, (int)hlen)) ==
3989 NULL) {
3990 return ENOMEM;
3991 }
3992
3993 eh = (struct ether_header *)mbuf_data(
3994 p->cp_mbuf);
3995 }
3996 mcast_buf = (uint8_t *)(eh + 1);
3997 /*
3998 * ifnet_mcast_clear_dscp() will finish the work below.
3999 * Note that the pullups above ensure that mcast_buf
4000 * points to a full IP header.
4001 */
4002 }
4003 break;
4004
4005
4006 default:
4007 VERIFY(0);
4008 /* NOTREACHED */
4009 __builtin_unreachable();
4010 }
4011
4012 if (mcast_buf != NULL) {
4013 ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
4014 }
4015
4016 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
4017 if (now_nsec == 0) {
4018 nanouptime(&now);
4019 net_timernsec(&now, &now_nsec);
4020 }
4021 /*
4022 * If the driver chose to delay start callback for
4023 * coalescing multiple packets, Then use the following
4024 * heuristics to make sure that start callback will
4025 * be delayed only when bulk data transfer is detected.
4026 * 1. number of packets enqueued in (delay_win * 2) is
4027 * greater than or equal to the delay qlen.
4028 * 2. If delay_start is enabled it will stay enabled for
4029 * another 10 idle windows. This is to take into account
4030 * variable RTT and burst traffic.
4031 * 3. If the time elapsed since last enqueue is more
4032 * than 200ms we disable delaying start callback. This is
4033 * is to take idle time into account.
4034 */
4035 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
4036 if (ifp->if_start_delay_swin > 0) {
4037 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
4038 ifp->if_start_delay_cnt++;
4039 } else if ((now_nsec - ifp->if_start_delay_swin)
4040 >= (200 * 1000 * 1000)) {
4041 ifp->if_start_delay_swin = now_nsec;
4042 ifp->if_start_delay_cnt = 1;
4043 ifp->if_start_delay_idle = 0;
4044 if (ifp->if_eflags & IFEF_DELAY_START) {
4045 if_clear_eflags(ifp, IFEF_DELAY_START);
4046 ifnet_delay_start_disabled_increment();
4047 }
4048 } else {
4049 if (ifp->if_start_delay_cnt >=
4050 ifp->if_start_delay_qlen) {
4051 if_set_eflags(ifp, IFEF_DELAY_START);
4052 ifp->if_start_delay_idle = 0;
4053 } else {
4054 if (ifp->if_start_delay_idle >= 10) {
4055 if_clear_eflags(ifp,
4056 IFEF_DELAY_START);
4057 ifnet_delay_start_disabled_increment();
4058 } else {
4059 ifp->if_start_delay_idle++;
4060 }
4061 }
4062 ifp->if_start_delay_swin = now_nsec;
4063 ifp->if_start_delay_cnt = 1;
4064 }
4065 } else {
4066 ifp->if_start_delay_swin = now_nsec;
4067 ifp->if_start_delay_cnt = 1;
4068 ifp->if_start_delay_idle = 0;
4069 if_clear_eflags(ifp, IFEF_DELAY_START);
4070 }
4071 } else {
4072 if_clear_eflags(ifp, IFEF_DELAY_START);
4073 }
4074
4075 /* enqueue the packet (caller consumes object) */
4076 error = ifclassq_enqueue(&ifp->if_snd, p, p, 1, pktlen, pdrop);
4077
4078 /*
4079 * Tell the driver to start dequeueing; do this even when the queue
4080 * for the packet is suspended (EQSUSPENDED), as the driver could still
4081 * be dequeueing from other unsuspended queues.
4082 */
4083 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
4084 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
4085 ifnet_start(ifp);
4086 }
4087
4088 return error;
4089 }
4090
4091 static inline errno_t
4092 ifnet_enqueue_ifclassq_chain(struct ifnet *ifp, classq_pkt_t *head,
4093 classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
4094 boolean_t *pdrop)
4095 {
4096 int error;
4097
4098 /* enqueue the packet (caller consumes object) */
4099 error = ifclassq_enqueue(&ifp->if_snd, head, tail, cnt, bytes, pdrop);
4100
4101 /*
4102 * Tell the driver to start dequeueing; do this even when the queue
4103 * for the packet is suspended (EQSUSPENDED), as the driver could still
4104 * be dequeueing from other unsuspended queues.
4105 */
4106 if ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED) {
4107 ifnet_start(ifp);
4108 }
4109 return error;
4110 }
4111
4112 int
4113 ifnet_enqueue_netem(void *handle, pktsched_pkt_t *pkts, uint32_t n_pkts)
4114 {
4115 struct ifnet *ifp = handle;
4116 boolean_t pdrop; /* dummy */
4117 uint32_t i;
4118
4119 ASSERT(n_pkts >= 1);
4120 for (i = 0; i < n_pkts - 1; i++) {
4121 (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt,
4122 FALSE, &pdrop);
4123 }
4124 /* flush with the last packet */
4125 (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt, TRUE, &pdrop);
4126
4127 return 0;
4128 }
4129
4130 static inline errno_t
4131 ifnet_enqueue_common(struct ifnet *ifp, classq_pkt_t *pkt, boolean_t flush,
4132 boolean_t *pdrop)
4133 {
4134 if (ifp->if_output_netem != NULL) {
4135 return netem_enqueue(ifp->if_output_netem, pkt, pdrop);
4136 } else {
4137 return ifnet_enqueue_ifclassq(ifp, pkt, flush, pdrop);
4138 }
4139 }
4140
4141 errno_t
4142 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
4143 {
4144 boolean_t pdrop;
4145 return ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop);
4146 }
4147
4148 errno_t
4149 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
4150 boolean_t *pdrop)
4151 {
4152 classq_pkt_t pkt;
4153
4154 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
4155 m->m_nextpkt != NULL) {
4156 if (m != NULL) {
4157 m_freem_list(m);
4158 *pdrop = TRUE;
4159 }
4160 return EINVAL;
4161 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4162 !IF_FULLY_ATTACHED(ifp)) {
4163 /* flag tested without lock for performance */
4164 m_freem(m);
4165 *pdrop = TRUE;
4166 return ENXIO;
4167 } else if (!(ifp->if_flags & IFF_UP)) {
4168 m_freem(m);
4169 *pdrop = TRUE;
4170 return ENETDOWN;
4171 }
4172
4173 CLASSQ_PKT_INIT_MBUF(&pkt, m);
4174 return ifnet_enqueue_common(ifp, &pkt, flush, pdrop);
4175 }
4176
4177 errno_t
4178 ifnet_enqueue_mbuf_chain(struct ifnet *ifp, struct mbuf *m_head,
4179 struct mbuf *m_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
4180 boolean_t *pdrop)
4181 {
4182 classq_pkt_t head, tail;
4183
4184 ASSERT(m_head != NULL);
4185 ASSERT((m_head->m_flags & M_PKTHDR) != 0);
4186 ASSERT(m_tail != NULL);
4187 ASSERT((m_tail->m_flags & M_PKTHDR) != 0);
4188 ASSERT(ifp != NULL);
4189 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
4190
4191 if (!IF_FULLY_ATTACHED(ifp)) {
4192 /* flag tested without lock for performance */
4193 m_freem_list(m_head);
4194 *pdrop = TRUE;
4195 return ENXIO;
4196 } else if (!(ifp->if_flags & IFF_UP)) {
4197 m_freem_list(m_head);
4198 *pdrop = TRUE;
4199 return ENETDOWN;
4200 }
4201
4202 CLASSQ_PKT_INIT_MBUF(&head, m_head);
4203 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
4204 return ifnet_enqueue_ifclassq_chain(ifp, &head, &tail, cnt, bytes,
4205 flush, pdrop);
4206 }
4207
4208
4209 errno_t
4210 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
4211 {
4212 errno_t rc;
4213 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
4214
4215 if (ifp == NULL || mp == NULL) {
4216 return EINVAL;
4217 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4218 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4219 return ENXIO;
4220 }
4221 if (!ifnet_is_attached(ifp, 1)) {
4222 return ENXIO;
4223 }
4224
4225 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
4226 &pkt, NULL, NULL, NULL);
4227 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
4228 ifnet_decr_iorefcnt(ifp);
4229 *mp = pkt.cp_mbuf;
4230 return rc;
4231 }
4232
4233 errno_t
4234 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
4235 struct mbuf **mp)
4236 {
4237 errno_t rc;
4238 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
4239
4240 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
4241 return EINVAL;
4242 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4243 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4244 return ENXIO;
4245 }
4246 if (!ifnet_is_attached(ifp, 1)) {
4247 return ENXIO;
4248 }
4249
4250 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
4251 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL);
4252 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
4253 ifnet_decr_iorefcnt(ifp);
4254 *mp = pkt.cp_mbuf;
4255 return rc;
4256 }
4257
4258 errno_t
4259 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
4260 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
4261 {
4262 errno_t rc;
4263 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4264 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4265
4266 if (ifp == NULL || head == NULL || pkt_limit < 1) {
4267 return EINVAL;
4268 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4269 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4270 return ENXIO;
4271 }
4272 if (!ifnet_is_attached(ifp, 1)) {
4273 return ENXIO;
4274 }
4275
4276 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
4277 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len);
4278 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4279 ifnet_decr_iorefcnt(ifp);
4280 *head = pkt_head.cp_mbuf;
4281 if (tail != NULL) {
4282 *tail = pkt_tail.cp_mbuf;
4283 }
4284 return rc;
4285 }
4286
4287 errno_t
4288 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
4289 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
4290 {
4291 errno_t rc;
4292 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4293 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4294
4295 if (ifp == NULL || head == NULL || byte_limit < 1) {
4296 return EINVAL;
4297 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4298 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4299 return ENXIO;
4300 }
4301 if (!ifnet_is_attached(ifp, 1)) {
4302 return ENXIO;
4303 }
4304
4305 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
4306 byte_limit, &pkt_head, &pkt_tail, cnt, len);
4307 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4308 ifnet_decr_iorefcnt(ifp);
4309 *head = pkt_head.cp_mbuf;
4310 if (tail != NULL) {
4311 *tail = pkt_tail.cp_mbuf;
4312 }
4313 return rc;
4314 }
4315
4316 errno_t
4317 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
4318 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
4319 u_int32_t *len)
4320 {
4321 errno_t rc;
4322 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4323 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4324
4325 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
4326 !MBUF_VALID_SC(sc)) {
4327 return EINVAL;
4328 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4329 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4330 return ENXIO;
4331 }
4332 if (!ifnet_is_attached(ifp, 1)) {
4333 return ENXIO;
4334 }
4335
4336 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
4337 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
4338 cnt, len);
4339 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4340 ifnet_decr_iorefcnt(ifp);
4341 *head = pkt_head.cp_mbuf;
4342 if (tail != NULL) {
4343 *tail = pkt_tail.cp_mbuf;
4344 }
4345 return rc;
4346 }
4347
4348 #if XNU_TARGET_OS_OSX
4349 errno_t
4350 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
4351 const struct sockaddr *dest, const char *dest_linkaddr,
4352 const char *frame_type, u_int32_t *pre, u_int32_t *post)
4353 {
4354 if (pre != NULL) {
4355 *pre = 0;
4356 }
4357 if (post != NULL) {
4358 *post = 0;
4359 }
4360
4361 return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
4362 }
4363 #endif /* XNU_TARGET_OS_OSX */
4364
4365 static boolean_t
4366 packet_has_vlan_tag(struct mbuf * m)
4367 {
4368 u_int tag = 0;
4369
4370 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
4371 tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag);
4372 if (tag == 0) {
4373 /* the packet is just priority-tagged, clear the bit */
4374 m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
4375 }
4376 }
4377 return tag != 0;
4378 }
4379
4380 static int
4381 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
4382 char **frame_header_p, protocol_family_t protocol_family)
4383 {
4384 boolean_t is_vlan_packet = FALSE;
4385 struct ifnet_filter *filter;
4386 struct mbuf *m = *m_p;
4387
4388 is_vlan_packet = packet_has_vlan_tag(m);
4389
4390 if (TAILQ_EMPTY(&ifp->if_flt_head)) {
4391 return 0;
4392 }
4393
4394 /*
4395 * Pass the inbound packet to the interface filters
4396 */
4397 lck_mtx_lock_spin(&ifp->if_flt_lock);
4398 /* prevent filter list from changing in case we drop the lock */
4399 if_flt_monitor_busy(ifp);
4400 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4401 int result;
4402
4403 /* exclude VLAN packets from external filters PR-3586856 */
4404 if (is_vlan_packet &&
4405 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4406 continue;
4407 }
4408
4409 if (!filter->filt_skip && filter->filt_input != NULL &&
4410 (filter->filt_protocol == 0 ||
4411 filter->filt_protocol == protocol_family)) {
4412 lck_mtx_unlock(&ifp->if_flt_lock);
4413
4414 result = (*filter->filt_input)(filter->filt_cookie,
4415 ifp, protocol_family, m_p, frame_header_p);
4416
4417 lck_mtx_lock_spin(&ifp->if_flt_lock);
4418 if (result != 0) {
4419 /* we're done with the filter list */
4420 if_flt_monitor_unbusy(ifp);
4421 lck_mtx_unlock(&ifp->if_flt_lock);
4422 return result;
4423 }
4424 }
4425 }
4426 /* we're done with the filter list */
4427 if_flt_monitor_unbusy(ifp);
4428 lck_mtx_unlock(&ifp->if_flt_lock);
4429
4430 /*
4431 * Strip away M_PROTO1 bit prior to sending packet up the stack as
4432 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
4433 */
4434 if (*m_p != NULL) {
4435 (*m_p)->m_flags &= ~M_PROTO1;
4436 }
4437
4438 return 0;
4439 }
4440
4441 static int
4442 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
4443 protocol_family_t protocol_family)
4444 {
4445 boolean_t is_vlan_packet;
4446 struct ifnet_filter *filter;
4447 struct mbuf *m = *m_p;
4448
4449 is_vlan_packet = packet_has_vlan_tag(m);
4450
4451 /*
4452 * Pass the outbound packet to the interface filters
4453 */
4454 lck_mtx_lock_spin(&ifp->if_flt_lock);
4455 /* prevent filter list from changing in case we drop the lock */
4456 if_flt_monitor_busy(ifp);
4457 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4458 int result;
4459
4460 /* exclude VLAN packets from external filters PR-3586856 */
4461 if (is_vlan_packet &&
4462 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4463 continue;
4464 }
4465
4466 if (!filter->filt_skip && filter->filt_output != NULL &&
4467 (filter->filt_protocol == 0 ||
4468 filter->filt_protocol == protocol_family)) {
4469 lck_mtx_unlock(&ifp->if_flt_lock);
4470
4471 result = filter->filt_output(filter->filt_cookie, ifp,
4472 protocol_family, m_p);
4473
4474 lck_mtx_lock_spin(&ifp->if_flt_lock);
4475 if (result != 0) {
4476 /* we're done with the filter list */
4477 if_flt_monitor_unbusy(ifp);
4478 lck_mtx_unlock(&ifp->if_flt_lock);
4479 return result;
4480 }
4481 }
4482 }
4483 /* we're done with the filter list */
4484 if_flt_monitor_unbusy(ifp);
4485 lck_mtx_unlock(&ifp->if_flt_lock);
4486
4487 return 0;
4488 }
4489
4490 static void
4491 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
4492 {
4493 int error;
4494
4495 if (ifproto->proto_kpi == kProtoKPI_v1) {
4496 /* Version 1 protocols get one packet at a time */
4497 while (m != NULL) {
4498 char * frame_header;
4499 mbuf_t next_packet;
4500
4501 next_packet = m->m_nextpkt;
4502 m->m_nextpkt = NULL;
4503 frame_header = m->m_pkthdr.pkt_hdr;
4504 m->m_pkthdr.pkt_hdr = NULL;
4505 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
4506 ifproto->protocol_family, m, frame_header);
4507 if (error != 0 && error != EJUSTRETURN) {
4508 m_freem(m);
4509 }
4510 m = next_packet;
4511 }
4512 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
4513 /* Version 2 protocols support packet lists */
4514 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
4515 ifproto->protocol_family, m);
4516 if (error != 0 && error != EJUSTRETURN) {
4517 m_freem_list(m);
4518 }
4519 }
4520 }
4521
4522 static void
4523 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
4524 struct dlil_threading_info *inp, struct ifnet *ifp, boolean_t poll)
4525 {
4526 struct ifnet_stat_increment_param *d = &inp->dlth_stats;
4527
4528 if (s->packets_in != 0) {
4529 d->packets_in += s->packets_in;
4530 }
4531 if (s->bytes_in != 0) {
4532 d->bytes_in += s->bytes_in;
4533 }
4534 if (s->errors_in != 0) {
4535 d->errors_in += s->errors_in;
4536 }
4537
4538 if (s->packets_out != 0) {
4539 d->packets_out += s->packets_out;
4540 }
4541 if (s->bytes_out != 0) {
4542 d->bytes_out += s->bytes_out;
4543 }
4544 if (s->errors_out != 0) {
4545 d->errors_out += s->errors_out;
4546 }
4547
4548 if (s->collisions != 0) {
4549 d->collisions += s->collisions;
4550 }
4551 if (s->dropped != 0) {
4552 d->dropped += s->dropped;
4553 }
4554
4555 if (poll) {
4556 PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in);
4557 }
4558 }
4559
4560 static boolean_t
4561 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
4562 {
4563 struct ifnet_stat_increment_param *s = &inp->dlth_stats;
4564
4565 /*
4566 * Use of atomic operations is unavoidable here because
4567 * these stats may also be incremented elsewhere via KPIs.
4568 */
4569 if (s->packets_in != 0) {
4570 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
4571 s->packets_in = 0;
4572 }
4573 if (s->bytes_in != 0) {
4574 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
4575 s->bytes_in = 0;
4576 }
4577 if (s->errors_in != 0) {
4578 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
4579 s->errors_in = 0;
4580 }
4581
4582 if (s->packets_out != 0) {
4583 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
4584 s->packets_out = 0;
4585 }
4586 if (s->bytes_out != 0) {
4587 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
4588 s->bytes_out = 0;
4589 }
4590 if (s->errors_out != 0) {
4591 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
4592 s->errors_out = 0;
4593 }
4594
4595 if (s->collisions != 0) {
4596 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
4597 s->collisions = 0;
4598 }
4599 if (s->dropped != 0) {
4600 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
4601 s->dropped = 0;
4602 }
4603
4604 /*
4605 * No need for atomic operations as they are modified here
4606 * only from within the DLIL input thread context.
4607 */
4608 if (ifp->if_poll_tstats.packets != 0) {
4609 ifp->if_poll_pstats.ifi_poll_packets += ifp->if_poll_tstats.packets;
4610 ifp->if_poll_tstats.packets = 0;
4611 }
4612 if (ifp->if_poll_tstats.bytes != 0) {
4613 ifp->if_poll_pstats.ifi_poll_bytes += ifp->if_poll_tstats.bytes;
4614 ifp->if_poll_tstats.bytes = 0;
4615 }
4616
4617 return ifp->if_data_threshold != 0;
4618 }
4619
4620 __private_extern__ void
4621 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
4622 {
4623 return dlil_input_packet_list_common(ifp, m, 0,
4624 IFNET_MODEL_INPUT_POLL_OFF, FALSE);
4625 }
4626
4627 __private_extern__ void
4628 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
4629 u_int32_t cnt, ifnet_model_t mode)
4630 {
4631 return dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE);
4632 }
4633
4634 static void
4635 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
4636 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
4637 {
4638 int error = 0;
4639 protocol_family_t protocol_family;
4640 mbuf_t next_packet;
4641 ifnet_t ifp = ifp_param;
4642 char *frame_header = NULL;
4643 struct if_proto *last_ifproto = NULL;
4644 mbuf_t pkt_first = NULL;
4645 mbuf_t *pkt_next = NULL;
4646 u_int32_t poll_thresh = 0, poll_ival = 0;
4647 int iorefcnt = 0;
4648
4649 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4650
4651 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
4652 (poll_ival = if_rxpoll_interval_pkts) > 0) {
4653 poll_thresh = cnt;
4654 }
4655
4656 while (m != NULL) {
4657 struct if_proto *ifproto = NULL;
4658 uint32_t pktf_mask; /* pkt flags to preserve */
4659
4660 if (ifp_param == NULL) {
4661 ifp = m->m_pkthdr.rcvif;
4662 }
4663
4664 if ((ifp->if_eflags & IFEF_RXPOLL) &&
4665 (ifp->if_xflags & IFXF_LEGACY) && poll_thresh != 0 &&
4666 poll_ival > 0 && (--poll_thresh % poll_ival) == 0) {
4667 ifnet_poll(ifp);
4668 }
4669
4670 /* Check if this mbuf looks valid */
4671 MBUF_INPUT_CHECK(m, ifp);
4672
4673 next_packet = m->m_nextpkt;
4674 m->m_nextpkt = NULL;
4675 frame_header = m->m_pkthdr.pkt_hdr;
4676 m->m_pkthdr.pkt_hdr = NULL;
4677
4678 /*
4679 * Get an IO reference count if the interface is not
4680 * loopback (lo0) and it is attached; lo0 never goes
4681 * away, so optimize for that.
4682 */
4683 if (ifp != lo_ifp) {
4684 /* iorefcnt is 0 if it hasn't been taken yet */
4685 if (iorefcnt == 0) {
4686 if (!ifnet_datamov_begin(ifp)) {
4687 m_freem(m);
4688 goto next;
4689 }
4690 }
4691 iorefcnt = 1;
4692 /*
4693 * Preserve the time stamp and skip pktap flags.
4694 */
4695 pktf_mask = PKTF_TS_VALID | PKTF_SKIP_PKTAP;
4696 } else {
4697 /*
4698 * If this arrived on lo0, preserve interface addr
4699 * info to allow for connectivity between loopback
4700 * and local interface addresses.
4701 */
4702 pktf_mask = (PKTF_LOOP | PKTF_IFAINFO);
4703 }
4704
4705 /* make sure packet comes in clean */
4706 m_classifier_init(m, pktf_mask);
4707
4708 ifp_inc_traffic_class_in(ifp, m);
4709
4710 /* find which protocol family this packet is for */
4711 ifnet_lock_shared(ifp);
4712 error = (*ifp->if_demux)(ifp, m, frame_header,
4713 &protocol_family);
4714 ifnet_lock_done(ifp);
4715 if (error != 0) {
4716 if (error == EJUSTRETURN) {
4717 goto next;
4718 }
4719 protocol_family = 0;
4720 }
4721
4722 pktap_input(ifp, protocol_family, m, frame_header);
4723
4724 /* Drop v4 packets received on CLAT46 enabled interface */
4725 if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp)) {
4726 m_freem(m);
4727 ip6stat.ip6s_clat464_in_v4_drop++;
4728 goto next;
4729 }
4730
4731 /* Translate the packet if it is received on CLAT interface */
4732 if (protocol_family == PF_INET6 && IS_INTF_CLAT46(ifp)
4733 && dlil_is_clat_needed(protocol_family, m)) {
4734 char *data = NULL;
4735 struct ether_header eh;
4736 struct ether_header *ehp = NULL;
4737
4738 if (ifp->if_type == IFT_ETHER) {
4739 ehp = (struct ether_header *)(void *)frame_header;
4740 /* Skip RX Ethernet packets if they are not IPV6 */
4741 if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6) {
4742 goto skip_clat;
4743 }
4744
4745 /* Keep a copy of frame_header for Ethernet packets */
4746 bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN);
4747 }
4748 error = dlil_clat64(ifp, &protocol_family, &m);
4749 data = (char *) mbuf_data(m);
4750 if (error != 0) {
4751 m_freem(m);
4752 ip6stat.ip6s_clat464_in_drop++;
4753 goto next;
4754 }
4755 /* Native v6 should be No-op */
4756 if (protocol_family != PF_INET) {
4757 goto skip_clat;
4758 }
4759
4760 /* Do this only for translated v4 packets. */
4761 switch (ifp->if_type) {
4762 case IFT_CELLULAR:
4763 frame_header = data;
4764 break;
4765 case IFT_ETHER:
4766 /*
4767 * Drop if the mbuf doesn't have enough
4768 * space for Ethernet header
4769 */
4770 if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) {
4771 m_free(m);
4772 ip6stat.ip6s_clat464_in_drop++;
4773 goto next;
4774 }
4775 /*
4776 * Set the frame_header ETHER_HDR_LEN bytes
4777 * preceeding the data pointer. Change
4778 * the ether_type too.
4779 */
4780 frame_header = data - ETHER_HDR_LEN;
4781 eh.ether_type = htons(ETHERTYPE_IP);
4782 bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN);
4783 break;
4784 }
4785 }
4786 skip_clat:
4787 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
4788 !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
4789 dlil_input_cksum_dbg(ifp, m, frame_header,
4790 protocol_family);
4791 }
4792 /*
4793 * For partial checksum offload, we expect the driver to
4794 * set the start offset indicating the start of the span
4795 * that is covered by the hardware-computed checksum;
4796 * adjust this start offset accordingly because the data
4797 * pointer has been advanced beyond the link-layer header.
4798 *
4799 * Virtual lan types (bridge, vlan, bond) can call
4800 * dlil_input_packet_list() with the same packet with the
4801 * checksum flags set. Set a flag indicating that the
4802 * adjustment has already been done.
4803 */
4804 if ((m->m_pkthdr.csum_flags & CSUM_ADJUST_DONE) != 0) {
4805 /* adjustment has already been done */
4806 } else if ((m->m_pkthdr.csum_flags &
4807 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4808 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4809 int adj;
4810 if (frame_header == NULL ||
4811 frame_header < (char *)mbuf_datastart(m) ||
4812 frame_header > (char *)m->m_data ||
4813 (adj = (int)(m->m_data - frame_header)) >
4814 m->m_pkthdr.csum_rx_start) {
4815 m->m_pkthdr.csum_data = 0;
4816 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
4817 hwcksum_in_invalidated++;
4818 } else {
4819 m->m_pkthdr.csum_rx_start -= adj;
4820 }
4821 /* make sure we don't adjust more than once */
4822 m->m_pkthdr.csum_flags |= CSUM_ADJUST_DONE;
4823 }
4824 if (clat_debug) {
4825 pktap_input(ifp, protocol_family, m, frame_header);
4826 }
4827
4828 if (m->m_flags & (M_BCAST | M_MCAST)) {
4829 atomic_add_64(&ifp->if_imcasts, 1);
4830 }
4831
4832 /* run interface filters */
4833 error = dlil_interface_filters_input(ifp, &m,
4834 &frame_header, protocol_family);
4835 if (error != 0) {
4836 if (error != EJUSTRETURN) {
4837 m_freem(m);
4838 }
4839 goto next;
4840 }
4841 if ((m->m_flags & M_PROMISC) != 0) {
4842 m_freem(m);
4843 goto next;
4844 }
4845
4846 /* Lookup the protocol attachment to this interface */
4847 if (protocol_family == 0) {
4848 ifproto = NULL;
4849 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
4850 (last_ifproto->protocol_family == protocol_family)) {
4851 VERIFY(ifproto == NULL);
4852 ifproto = last_ifproto;
4853 if_proto_ref(last_ifproto);
4854 } else {
4855 VERIFY(ifproto == NULL);
4856 ifnet_lock_shared(ifp);
4857 /* callee holds a proto refcnt upon success */
4858 ifproto = find_attached_proto(ifp, protocol_family);
4859 ifnet_lock_done(ifp);
4860 }
4861 if (ifproto == NULL) {
4862 /* no protocol for this packet, discard */
4863 m_freem(m);
4864 goto next;
4865 }
4866 if (ifproto != last_ifproto) {
4867 if (last_ifproto != NULL) {
4868 /* pass up the list for the previous protocol */
4869 dlil_ifproto_input(last_ifproto, pkt_first);
4870 pkt_first = NULL;
4871 if_proto_free(last_ifproto);
4872 }
4873 last_ifproto = ifproto;
4874 if_proto_ref(ifproto);
4875 }
4876 /* extend the list */
4877 m->m_pkthdr.pkt_hdr = frame_header;
4878 if (pkt_first == NULL) {
4879 pkt_first = m;
4880 } else {
4881 *pkt_next = m;
4882 }
4883 pkt_next = &m->m_nextpkt;
4884
4885 next:
4886 if (next_packet == NULL && last_ifproto != NULL) {
4887 /* pass up the last list of packets */
4888 dlil_ifproto_input(last_ifproto, pkt_first);
4889 if_proto_free(last_ifproto);
4890 last_ifproto = NULL;
4891 }
4892 if (ifproto != NULL) {
4893 if_proto_free(ifproto);
4894 ifproto = NULL;
4895 }
4896
4897 m = next_packet;
4898
4899 /* update the driver's multicast filter, if needed */
4900 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
4901 ifp->if_updatemcasts = 0;
4902 }
4903 if (iorefcnt == 1) {
4904 /* If the next mbuf is on a different interface, unlock data-mov */
4905 if (!m || (ifp != ifp_param && ifp != m->m_pkthdr.rcvif)) {
4906 ifnet_datamov_end(ifp);
4907 iorefcnt = 0;
4908 }
4909 }
4910 }
4911
4912 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4913 }
4914
4915 errno_t
4916 if_mcasts_update(struct ifnet *ifp)
4917 {
4918 errno_t err;
4919
4920 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
4921 if (err == EAFNOSUPPORT) {
4922 err = 0;
4923 }
4924 DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
4925 "(err=%d)\n", if_name(ifp),
4926 (err == 0 ? "successfully restored" : "failed to restore"),
4927 ifp->if_updatemcasts, err);
4928
4929 /* just return success */
4930 return 0;
4931 }
4932
4933 /* If ifp is set, we will increment the generation for the interface */
4934 int
4935 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
4936 {
4937 if (ifp != NULL) {
4938 ifnet_increment_generation(ifp);
4939 }
4940
4941 #if NECP
4942 necp_update_all_clients();
4943 #endif /* NECP */
4944
4945 return kev_post_msg(event);
4946 }
4947
4948 __private_extern__ void
4949 dlil_post_sifflags_msg(struct ifnet * ifp)
4950 {
4951 struct kev_msg ev_msg;
4952 struct net_event_data ev_data;
4953
4954 bzero(&ev_data, sizeof(ev_data));
4955 bzero(&ev_msg, sizeof(ev_msg));
4956 ev_msg.vendor_code = KEV_VENDOR_APPLE;
4957 ev_msg.kev_class = KEV_NETWORK_CLASS;
4958 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
4959 ev_msg.event_code = KEV_DL_SIFFLAGS;
4960 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
4961 ev_data.if_family = ifp->if_family;
4962 ev_data.if_unit = (u_int32_t) ifp->if_unit;
4963 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
4964 ev_msg.dv[0].data_ptr = &ev_data;
4965 ev_msg.dv[1].data_length = 0;
4966 dlil_post_complete_msg(ifp, &ev_msg);
4967 }
4968
4969 #define TMP_IF_PROTO_ARR_SIZE 10
4970 static int
4971 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
4972 {
4973 struct ifnet_filter *filter = NULL;
4974 struct if_proto *proto = NULL;
4975 int if_proto_count = 0;
4976 struct if_proto **tmp_ifproto_arr = NULL;
4977 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4978 int tmp_ifproto_arr_idx = 0;
4979 bool tmp_malloc = false;
4980
4981 /*
4982 * Pass the event to the interface filters
4983 */
4984 lck_mtx_lock_spin(&ifp->if_flt_lock);
4985 /* prevent filter list from changing in case we drop the lock */
4986 if_flt_monitor_busy(ifp);
4987 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4988 if (filter->filt_event != NULL) {
4989 lck_mtx_unlock(&ifp->if_flt_lock);
4990
4991 filter->filt_event(filter->filt_cookie, ifp,
4992 filter->filt_protocol, event);
4993
4994 lck_mtx_lock_spin(&ifp->if_flt_lock);
4995 }
4996 }
4997 /* we're done with the filter list */
4998 if_flt_monitor_unbusy(ifp);
4999 lck_mtx_unlock(&ifp->if_flt_lock);
5000
5001 /* Get an io ref count if the interface is attached */
5002 if (!ifnet_is_attached(ifp, 1)) {
5003 goto done;
5004 }
5005
5006 /*
5007 * An embedded tmp_list_entry in if_proto may still get
5008 * over-written by another thread after giving up ifnet lock,
5009 * therefore we are avoiding embedded pointers here.
5010 */
5011 ifnet_lock_shared(ifp);
5012 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
5013 if (if_proto_count) {
5014 int i;
5015 VERIFY(ifp->if_proto_hash != NULL);
5016 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
5017 tmp_ifproto_arr = tmp_ifproto_stack_arr;
5018 } else {
5019 MALLOC(tmp_ifproto_arr, struct if_proto **,
5020 sizeof(*tmp_ifproto_arr) * if_proto_count,
5021 M_TEMP, M_ZERO);
5022 if (tmp_ifproto_arr == NULL) {
5023 ifnet_lock_done(ifp);
5024 goto cleanup;
5025 }
5026 tmp_malloc = true;
5027 }
5028
5029 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
5030 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
5031 next_hash) {
5032 if_proto_ref(proto);
5033 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
5034 tmp_ifproto_arr_idx++;
5035 }
5036 }
5037 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
5038 }
5039 ifnet_lock_done(ifp);
5040
5041 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
5042 tmp_ifproto_arr_idx++) {
5043 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
5044 VERIFY(proto != NULL);
5045 proto_media_event eventp =
5046 (proto->proto_kpi == kProtoKPI_v1 ?
5047 proto->kpi.v1.event :
5048 proto->kpi.v2.event);
5049
5050 if (eventp != NULL) {
5051 eventp(ifp, proto->protocol_family,
5052 event);
5053 }
5054 if_proto_free(proto);
5055 }
5056
5057 cleanup:
5058 if (tmp_malloc) {
5059 FREE(tmp_ifproto_arr, M_TEMP);
5060 }
5061
5062 /* Pass the event to the interface */
5063 if (ifp->if_event != NULL) {
5064 ifp->if_event(ifp, event);
5065 }
5066
5067 /* Release the io ref count */
5068 ifnet_decr_iorefcnt(ifp);
5069 done:
5070 return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
5071 }
5072
5073 errno_t
5074 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
5075 {
5076 struct kev_msg kev_msg;
5077 int result = 0;
5078
5079 if (ifp == NULL || event == NULL) {
5080 return EINVAL;
5081 }
5082
5083 bzero(&kev_msg, sizeof(kev_msg));
5084 kev_msg.vendor_code = event->vendor_code;
5085 kev_msg.kev_class = event->kev_class;
5086 kev_msg.kev_subclass = event->kev_subclass;
5087 kev_msg.event_code = event->event_code;
5088 kev_msg.dv[0].data_ptr = &event->event_data[0];
5089 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
5090 kev_msg.dv[1].data_length = 0;
5091
5092 result = dlil_event_internal(ifp, &kev_msg, TRUE);
5093
5094 return result;
5095 }
5096
5097 static void
5098 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
5099 {
5100 mbuf_t n = m;
5101 int chainlen = 0;
5102
5103 while (n != NULL) {
5104 chainlen++;
5105 n = n->m_next;
5106 }
5107 switch (chainlen) {
5108 case 0:
5109 break;
5110 case 1:
5111 atomic_add_64(&cls->cls_one, 1);
5112 break;
5113 case 2:
5114 atomic_add_64(&cls->cls_two, 1);
5115 break;
5116 case 3:
5117 atomic_add_64(&cls->cls_three, 1);
5118 break;
5119 case 4:
5120 atomic_add_64(&cls->cls_four, 1);
5121 break;
5122 case 5:
5123 default:
5124 atomic_add_64(&cls->cls_five_or_more, 1);
5125 break;
5126 }
5127 }
5128
5129 /*
5130 * dlil_output
5131 *
5132 * Caller should have a lock on the protocol domain if the protocol
5133 * doesn't support finer grained locking. In most cases, the lock
5134 * will be held from the socket layer and won't be released until
5135 * we return back to the socket layer.
5136 *
5137 * This does mean that we must take a protocol lock before we take
5138 * an interface lock if we're going to take both. This makes sense
5139 * because a protocol is likely to interact with an ifp while it
5140 * is under the protocol lock.
5141 *
5142 * An advisory code will be returned if adv is not null. This
5143 * can be used to provide feedback about interface queues to the
5144 * application.
5145 */
5146 errno_t
5147 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
5148 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
5149 {
5150 char *frame_type = NULL;
5151 char *dst_linkaddr = NULL;
5152 int retval = 0;
5153 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
5154 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
5155 struct if_proto *proto = NULL;
5156 mbuf_t m = NULL;
5157 mbuf_t send_head = NULL;
5158 mbuf_t *send_tail = &send_head;
5159 int iorefcnt = 0;
5160 u_int32_t pre = 0, post = 0;
5161 u_int32_t fpkts = 0, fbytes = 0;
5162 int32_t flen = 0;
5163 struct timespec now;
5164 u_int64_t now_nsec;
5165 boolean_t did_clat46 = FALSE;
5166 protocol_family_t old_proto_family = proto_family;
5167 struct sockaddr_in6 dest6;
5168 struct rtentry *rt = NULL;
5169 u_int32_t m_loop_set = 0;
5170
5171 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
5172
5173 /*
5174 * Get an io refcnt if the interface is attached to prevent ifnet_detach
5175 * from happening while this operation is in progress
5176 */
5177 if (!ifnet_datamov_begin(ifp)) {
5178 retval = ENXIO;
5179 goto cleanup;
5180 }
5181 iorefcnt = 1;
5182
5183 VERIFY(ifp->if_output_dlil != NULL);
5184
5185 /* update the driver's multicast filter, if needed */
5186 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
5187 ifp->if_updatemcasts = 0;
5188 }
5189
5190 frame_type = frame_type_buffer;
5191 dst_linkaddr = dst_linkaddr_buffer;
5192
5193 if (raw == 0) {
5194 ifnet_lock_shared(ifp);
5195 /* callee holds a proto refcnt upon success */
5196 proto = find_attached_proto(ifp, proto_family);
5197 if (proto == NULL) {
5198 ifnet_lock_done(ifp);
5199 retval = ENXIO;
5200 goto cleanup;
5201 }
5202 ifnet_lock_done(ifp);
5203 }
5204
5205 preout_again:
5206 if (packetlist == NULL) {
5207 goto cleanup;
5208 }
5209
5210 m = packetlist;
5211 packetlist = packetlist->m_nextpkt;
5212 m->m_nextpkt = NULL;
5213
5214 /*
5215 * Perform address family translation for the first
5216 * packet outside the loop in order to perform address
5217 * lookup for the translated proto family.
5218 */
5219 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5220 (ifp->if_type == IFT_CELLULAR ||
5221 dlil_is_clat_needed(proto_family, m))) {
5222 retval = dlil_clat46(ifp, &proto_family, &m);
5223 /*
5224 * Go to the next packet if translation fails
5225 */
5226 if (retval != 0) {
5227 m_freem(m);
5228 m = NULL;
5229 ip6stat.ip6s_clat464_out_drop++;
5230 /* Make sure that the proto family is PF_INET */
5231 ASSERT(proto_family == PF_INET);
5232 goto preout_again;
5233 }
5234 /*
5235 * Free the old one and make it point to the IPv6 proto structure.
5236 *
5237 * Change proto for the first time we have successfully
5238 * performed address family translation.
5239 */
5240 if (!did_clat46 && proto_family == PF_INET6) {
5241 did_clat46 = TRUE;
5242
5243 if (proto != NULL) {
5244 if_proto_free(proto);
5245 }
5246 ifnet_lock_shared(ifp);
5247 /* callee holds a proto refcnt upon success */
5248 proto = find_attached_proto(ifp, proto_family);
5249 if (proto == NULL) {
5250 ifnet_lock_done(ifp);
5251 retval = ENXIO;
5252 m_freem(m);
5253 m = NULL;
5254 goto cleanup;
5255 }
5256 ifnet_lock_done(ifp);
5257 if (ifp->if_type == IFT_ETHER) {
5258 /* Update the dest to translated v6 address */
5259 dest6.sin6_len = sizeof(struct sockaddr_in6);
5260 dest6.sin6_family = AF_INET6;
5261 dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
5262 dest = (const struct sockaddr *)&dest6;
5263
5264 /*
5265 * Lookup route to the translated destination
5266 * Free this route ref during cleanup
5267 */
5268 rt = rtalloc1_scoped((struct sockaddr *)&dest6,
5269 0, 0, ifp->if_index);
5270
5271 route = rt;
5272 }
5273 }
5274 }
5275
5276 /*
5277 * This path gets packet chain going to the same destination.
5278 * The pre output routine is used to either trigger resolution of
5279 * the next hop or retreive the next hop's link layer addressing.
5280 * For ex: ether_inet(6)_pre_output routine.
5281 *
5282 * If the routine returns EJUSTRETURN, it implies that packet has
5283 * been queued, and therefore we have to call preout_again for the
5284 * following packet in the chain.
5285 *
5286 * For errors other than EJUSTRETURN, the current packet is freed
5287 * and the rest of the chain (pointed by packetlist is freed as
5288 * part of clean up.
5289 *
5290 * Else if there is no error the retrieved information is used for
5291 * all the packets in the chain.
5292 */
5293 if (raw == 0) {
5294 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
5295 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
5296 retval = 0;
5297 if (preoutp != NULL) {
5298 retval = preoutp(ifp, proto_family, &m, dest, route,
5299 frame_type, dst_linkaddr);
5300
5301 if (retval != 0) {
5302 if (retval == EJUSTRETURN) {
5303 goto preout_again;
5304 }
5305 m_freem(m);
5306 m = NULL;
5307 goto cleanup;
5308 }
5309 }
5310 }
5311
5312 do {
5313 /*
5314 * pkt_hdr is set here to point to m_data prior to
5315 * calling into the framer. This value of pkt_hdr is
5316 * used by the netif gso logic to retrieve the ip header
5317 * for the TCP packets, offloaded for TSO processing.
5318 */
5319 if ((raw != 0) && (ifp->if_family == IFNET_FAMILY_ETHERNET)) {
5320 uint8_t vlan_encap_len = 0;
5321
5322 if ((old_proto_family == PF_VLAN) &&
5323 ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0)) {
5324 vlan_encap_len = ETHER_VLAN_ENCAP_LEN;
5325 }
5326 m->m_pkthdr.pkt_hdr = mtod(m, char *) + ETHER_HDR_LEN + vlan_encap_len;
5327 } else {
5328 m->m_pkthdr.pkt_hdr = mtod(m, void *);
5329 }
5330
5331 /*
5332 * Perform address family translation if needed.
5333 * For now we only support stateless 4 to 6 translation
5334 * on the out path.
5335 *
5336 * The routine below translates IP header, updates protocol
5337 * checksum and also translates ICMP.
5338 *
5339 * We skip the first packet as it is already translated and
5340 * the proto family is set to PF_INET6.
5341 */
5342 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5343 (ifp->if_type == IFT_CELLULAR ||
5344 dlil_is_clat_needed(proto_family, m))) {
5345 retval = dlil_clat46(ifp, &proto_family, &m);
5346 /* Goto the next packet if the translation fails */
5347 if (retval != 0) {
5348 m_freem(m);
5349 m = NULL;
5350 ip6stat.ip6s_clat464_out_drop++;
5351 goto next;
5352 }
5353 }
5354
5355 #if CONFIG_DTRACE
5356 if (!raw && proto_family == PF_INET) {
5357 struct ip *ip = mtod(m, struct ip *);
5358 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
5359 struct ip *, ip, struct ifnet *, ifp,
5360 struct ip *, ip, struct ip6_hdr *, NULL);
5361 } else if (!raw && proto_family == PF_INET6) {
5362 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
5363 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
5364 struct ip6_hdr *, ip6, struct ifnet *, ifp,
5365 struct ip *, NULL, struct ip6_hdr *, ip6);
5366 }
5367 #endif /* CONFIG_DTRACE */
5368
5369 if (raw == 0 && ifp->if_framer != NULL) {
5370 int rcvif_set = 0;
5371
5372 /*
5373 * If this is a broadcast packet that needs to be
5374 * looped back into the system, set the inbound ifp
5375 * to that of the outbound ifp. This will allow
5376 * us to determine that it is a legitimate packet
5377 * for the system. Only set the ifp if it's not
5378 * already set, just to be safe.
5379 */
5380 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
5381 m->m_pkthdr.rcvif == NULL) {
5382 m->m_pkthdr.rcvif = ifp;
5383 rcvif_set = 1;
5384 }
5385 m_loop_set = m->m_flags & M_LOOP;
5386 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
5387 frame_type, &pre, &post);
5388 if (retval != 0) {
5389 if (retval != EJUSTRETURN) {
5390 m_freem(m);
5391 }
5392 goto next;
5393 }
5394
5395 /*
5396 * For partial checksum offload, adjust the start
5397 * and stuff offsets based on the prepended header.
5398 */
5399 if ((m->m_pkthdr.csum_flags &
5400 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5401 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5402 m->m_pkthdr.csum_tx_stuff += pre;
5403 m->m_pkthdr.csum_tx_start += pre;
5404 }
5405
5406 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) {
5407 dlil_output_cksum_dbg(ifp, m, pre,
5408 proto_family);
5409 }
5410
5411 /*
5412 * Clear the ifp if it was set above, and to be
5413 * safe, only if it is still the same as the
5414 * outbound ifp we have in context. If it was
5415 * looped back, then a copy of it was sent to the
5416 * loopback interface with the rcvif set, and we
5417 * are clearing the one that will go down to the
5418 * layer below.
5419 */
5420 if (rcvif_set && m->m_pkthdr.rcvif == ifp) {
5421 m->m_pkthdr.rcvif = NULL;
5422 }
5423 }
5424
5425 /*
5426 * Let interface filters (if any) do their thing ...
5427 */
5428 retval = dlil_interface_filters_output(ifp, &m, proto_family);
5429 if (retval != 0) {
5430 if (retval != EJUSTRETURN) {
5431 m_freem(m);
5432 }
5433 goto next;
5434 }
5435 /*
5436 * Strip away M_PROTO1 bit prior to sending packet
5437 * to the driver as this field may be used by the driver
5438 */
5439 m->m_flags &= ~M_PROTO1;
5440
5441 /*
5442 * If the underlying interface is not capable of handling a
5443 * packet whose data portion spans across physically disjoint
5444 * pages, we need to "normalize" the packet so that we pass
5445 * down a chain of mbufs where each mbuf points to a span that
5446 * resides in the system page boundary. If the packet does
5447 * not cross page(s), the following is a no-op.
5448 */
5449 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
5450 if ((m = m_normalize(m)) == NULL) {
5451 goto next;
5452 }
5453 }
5454
5455 /*
5456 * If this is a TSO packet, make sure the interface still
5457 * advertise TSO capability.
5458 */
5459 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
5460 retval = EMSGSIZE;
5461 m_freem(m);
5462 goto cleanup;
5463 }
5464
5465 ifp_inc_traffic_class_out(ifp, m);
5466
5467 pktap_output(ifp, proto_family, m, pre, post);
5468
5469 /*
5470 * Count the number of elements in the mbuf chain
5471 */
5472 if (tx_chain_len_count) {
5473 dlil_count_chain_len(m, &tx_chain_len_stats);
5474 }
5475
5476 /*
5477 * Record timestamp; ifnet_enqueue() will use this info
5478 * rather than redoing the work. An optimization could
5479 * involve doing this just once at the top, if there are
5480 * no interface filters attached, but that's probably
5481 * not a big deal.
5482 */
5483 nanouptime(&now);
5484 net_timernsec(&now, &now_nsec);
5485 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
5486
5487 /*
5488 * Discard partial sum information if this packet originated
5489 * from another interface; the packet would already have the
5490 * final checksum and we shouldn't recompute it.
5491 */
5492 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
5493 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5494 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5495 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5496 m->m_pkthdr.csum_data = 0;
5497 }
5498
5499 /*
5500 * Finally, call the driver.
5501 */
5502 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
5503 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5504 flen += (m_pktlen(m) - (pre + post));
5505 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5506 }
5507 *send_tail = m;
5508 send_tail = &m->m_nextpkt;
5509 } else {
5510 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5511 flen = (m_pktlen(m) - (pre + post));
5512 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5513 } else {
5514 flen = 0;
5515 }
5516 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
5517 0, 0, 0, 0, 0);
5518 retval = (*ifp->if_output_dlil)(ifp, m);
5519 if (retval == EQFULL || retval == EQSUSPENDED) {
5520 if (adv != NULL && adv->code == FADV_SUCCESS) {
5521 adv->code = (retval == EQFULL ?
5522 FADV_FLOW_CONTROLLED :
5523 FADV_SUSPENDED);
5524 }
5525 retval = 0;
5526 }
5527 if (retval == 0 && flen > 0) {
5528 fbytes += flen;
5529 fpkts++;
5530 }
5531 if (retval != 0 && dlil_verbose) {
5532 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5533 __func__, if_name(ifp),
5534 retval);
5535 }
5536 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
5537 0, 0, 0, 0, 0);
5538 }
5539 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5540
5541 next:
5542 m = packetlist;
5543 if (m != NULL) {
5544 m->m_flags |= m_loop_set;
5545 packetlist = packetlist->m_nextpkt;
5546 m->m_nextpkt = NULL;
5547 }
5548 /* Reset the proto family to old proto family for CLAT */
5549 if (did_clat46) {
5550 proto_family = old_proto_family;
5551 }
5552 } while (m != NULL);
5553
5554 if (send_head != NULL) {
5555 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
5556 0, 0, 0, 0, 0);
5557 if (ifp->if_eflags & IFEF_SENDLIST) {
5558 retval = (*ifp->if_output_dlil)(ifp, send_head);
5559 if (retval == EQFULL || retval == EQSUSPENDED) {
5560 if (adv != NULL) {
5561 adv->code = (retval == EQFULL ?
5562 FADV_FLOW_CONTROLLED :
5563 FADV_SUSPENDED);
5564 }
5565 retval = 0;
5566 }
5567 if (retval == 0 && flen > 0) {
5568 fbytes += flen;
5569 fpkts++;
5570 }
5571 if (retval != 0 && dlil_verbose) {
5572 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5573 __func__, if_name(ifp), retval);
5574 }
5575 } else {
5576 struct mbuf *send_m;
5577 int enq_cnt = 0;
5578 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
5579 while (send_head != NULL) {
5580 send_m = send_head;
5581 send_head = send_m->m_nextpkt;
5582 send_m->m_nextpkt = NULL;
5583 retval = (*ifp->if_output_dlil)(ifp, send_m);
5584 if (retval == EQFULL || retval == EQSUSPENDED) {
5585 if (adv != NULL) {
5586 adv->code = (retval == EQFULL ?
5587 FADV_FLOW_CONTROLLED :
5588 FADV_SUSPENDED);
5589 }
5590 retval = 0;
5591 }
5592 if (retval == 0) {
5593 enq_cnt++;
5594 if (flen > 0) {
5595 fpkts++;
5596 }
5597 }
5598 if (retval != 0 && dlil_verbose) {
5599 DLIL_PRINTF("%s: output error on %s "
5600 "retval = %d\n",
5601 __func__, if_name(ifp), retval);
5602 }
5603 }
5604 if (enq_cnt > 0) {
5605 fbytes += flen;
5606 ifnet_start(ifp);
5607 }
5608 }
5609 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5610 }
5611
5612 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5613
5614 cleanup:
5615 if (fbytes > 0) {
5616 ifp->if_fbytes += fbytes;
5617 }
5618 if (fpkts > 0) {
5619 ifp->if_fpackets += fpkts;
5620 }
5621 if (proto != NULL) {
5622 if_proto_free(proto);
5623 }
5624 if (packetlist) { /* if any packets are left, clean up */
5625 mbuf_freem_list(packetlist);
5626 }
5627 if (retval == EJUSTRETURN) {
5628 retval = 0;
5629 }
5630 if (iorefcnt == 1) {
5631 ifnet_datamov_end(ifp);
5632 }
5633 if (rt != NULL) {
5634 rtfree(rt);
5635 rt = NULL;
5636 }
5637
5638 return retval;
5639 }
5640
5641 /*
5642 * This routine checks if the destination address is not a loopback, link-local,
5643 * multicast or broadcast address.
5644 */
5645 static int
5646 dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
5647 {
5648 int ret = 0;
5649 switch (proto_family) {
5650 case PF_INET: {
5651 struct ip *iph = mtod(m, struct ip *);
5652 if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
5653 ret = 1;
5654 }
5655 break;
5656 }
5657 case PF_INET6: {
5658 struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
5659 if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
5660 CLAT64_NEEDED(&ip6h->ip6_dst)) {
5661 ret = 1;
5662 }
5663 break;
5664 }
5665 }
5666
5667 return ret;
5668 }
5669 /*
5670 * @brief This routine translates IPv4 packet to IPv6 packet,
5671 * updates protocol checksum and also translates ICMP for code
5672 * along with inner header translation.
5673 *
5674 * @param ifp Pointer to the interface
5675 * @param proto_family pointer to protocol family. It is updated if function
5676 * performs the translation successfully.
5677 * @param m Pointer to the pointer pointing to the packet. Needed because this
5678 * routine can end up changing the mbuf to a different one.
5679 *
5680 * @return 0 on success or else a negative value.
5681 */
5682 static errno_t
5683 dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5684 {
5685 VERIFY(*proto_family == PF_INET);
5686 VERIFY(IS_INTF_CLAT46(ifp));
5687
5688 pbuf_t pbuf_store, *pbuf = NULL;
5689 struct ip *iph = NULL;
5690 struct in_addr osrc, odst;
5691 uint8_t proto = 0;
5692 struct in6_ifaddr *ia6_clat_src = NULL;
5693 struct in6_addr *src = NULL;
5694 struct in6_addr dst;
5695 int error = 0;
5696 uint16_t off = 0;
5697 uint16_t tot_len = 0;
5698 uint16_t ip_id_val = 0;
5699 uint16_t ip_frag_off = 0;
5700
5701 boolean_t is_frag = FALSE;
5702 boolean_t is_first_frag = TRUE;
5703 boolean_t is_last_frag = TRUE;
5704
5705 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5706 pbuf = &pbuf_store;
5707 iph = pbuf->pb_data;
5708
5709 osrc = iph->ip_src;
5710 odst = iph->ip_dst;
5711 proto = iph->ip_p;
5712 off = (uint16_t)(iph->ip_hl << 2);
5713 ip_id_val = iph->ip_id;
5714 ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
5715
5716 tot_len = ntohs(iph->ip_len);
5717
5718 /*
5719 * For packets that are not first frags
5720 * we only need to adjust CSUM.
5721 * For 4 to 6, Fragmentation header gets appended
5722 * after proto translation.
5723 */
5724 if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
5725 is_frag = TRUE;
5726
5727 /* If the offset is not zero, it is not first frag */
5728 if (ip_frag_off != 0) {
5729 is_first_frag = FALSE;
5730 }
5731
5732 /* If IP_MF is set, then it is not last frag */
5733 if (ntohs(iph->ip_off) & IP_MF) {
5734 is_last_frag = FALSE;
5735 }
5736 }
5737
5738 /*
5739 * Retrive the local IPv6 CLAT46 address reserved for stateless
5740 * translation.
5741 */
5742 ia6_clat_src = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5743 if (ia6_clat_src == NULL) {
5744 ip6stat.ip6s_clat464_out_nov6addr_drop++;
5745 error = -1;
5746 goto cleanup;
5747 }
5748
5749 src = &ia6_clat_src->ia_addr.sin6_addr;
5750
5751 /*
5752 * Translate IPv4 destination to IPv6 destination by using the
5753 * prefixes learned through prior PLAT discovery.
5754 */
5755 if ((error = nat464_synthesize_ipv6(ifp, &odst, &dst)) != 0) {
5756 ip6stat.ip6s_clat464_out_v6synthfail_drop++;
5757 goto cleanup;
5758 }
5759
5760 /* Translate the IP header part first */
5761 error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
5762 iph->ip_ttl, *src, dst, tot_len) == NT_NAT64) ? 0 : -1;
5763
5764 iph = NULL; /* Invalidate iph as pbuf has been modified */
5765
5766 if (error != 0) {
5767 ip6stat.ip6s_clat464_out_46transfail_drop++;
5768 goto cleanup;
5769 }
5770
5771 /*
5772 * Translate protocol header, update checksum, checksum flags
5773 * and related fields.
5774 */
5775 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
5776 proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
5777
5778 if (error != 0) {
5779 ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
5780 goto cleanup;
5781 }
5782
5783 /* Now insert the IPv6 fragment header */
5784 if (is_frag) {
5785 error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
5786
5787 if (error != 0) {
5788 ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
5789 goto cleanup;
5790 }
5791 }
5792
5793 cleanup:
5794 if (ia6_clat_src != NULL) {
5795 IFA_REMREF(&ia6_clat_src->ia_ifa);
5796 }
5797
5798 if (pbuf_is_valid(pbuf)) {
5799 *m = pbuf->pb_mbuf;
5800 pbuf->pb_mbuf = NULL;
5801 pbuf_destroy(pbuf);
5802 } else {
5803 error = -1;
5804 ip6stat.ip6s_clat464_out_invalpbuf_drop++;
5805 }
5806
5807 if (error == 0) {
5808 *proto_family = PF_INET6;
5809 ip6stat.ip6s_clat464_out_success++;
5810 }
5811
5812 return error;
5813 }
5814
5815 /*
5816 * @brief This routine translates incoming IPv6 to IPv4 packet,
5817 * updates protocol checksum and also translates ICMPv6 outer
5818 * and inner headers
5819 *
5820 * @return 0 on success or else a negative value.
5821 */
5822 static errno_t
5823 dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5824 {
5825 VERIFY(*proto_family == PF_INET6);
5826 VERIFY(IS_INTF_CLAT46(ifp));
5827
5828 struct ip6_hdr *ip6h = NULL;
5829 struct in6_addr osrc, odst;
5830 uint8_t proto = 0;
5831 struct in6_ifaddr *ia6_clat_dst = NULL;
5832 struct in_ifaddr *ia4_clat_dst = NULL;
5833 struct in_addr *dst = NULL;
5834 struct in_addr src;
5835 int error = 0;
5836 uint32_t off = 0;
5837 u_int64_t tot_len = 0;
5838 uint8_t tos = 0;
5839 boolean_t is_first_frag = TRUE;
5840
5841 /* Incoming mbuf does not contain valid IP6 header */
5842 if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
5843 ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
5844 (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
5845 ip6stat.ip6s_clat464_in_tooshort_drop++;
5846 return -1;
5847 }
5848
5849 ip6h = mtod(*m, struct ip6_hdr *);
5850 /* Validate that mbuf contains IP payload equal to ip6_plen */
5851 if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
5852 ip6stat.ip6s_clat464_in_tooshort_drop++;
5853 return -1;
5854 }
5855
5856 osrc = ip6h->ip6_src;
5857 odst = ip6h->ip6_dst;
5858
5859 /*
5860 * Retrieve the local CLAT46 reserved IPv6 address.
5861 * Let the packet pass if we don't find one, as the flag
5862 * may get set before IPv6 configuration has taken place.
5863 */
5864 ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5865 if (ia6_clat_dst == NULL) {
5866 goto done;
5867 }
5868
5869 /*
5870 * Check if the original dest in the packet is same as the reserved
5871 * CLAT46 IPv6 address
5872 */
5873 if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
5874 pbuf_t pbuf_store, *pbuf = NULL;
5875 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5876 pbuf = &pbuf_store;
5877
5878 /*
5879 * Retrive the local CLAT46 IPv4 address reserved for stateless
5880 * translation.
5881 */
5882 ia4_clat_dst = inifa_ifpclatv4(ifp);
5883 if (ia4_clat_dst == NULL) {
5884 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5885 ip6stat.ip6s_clat464_in_nov4addr_drop++;
5886 error = -1;
5887 goto cleanup;
5888 }
5889 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5890
5891 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5892 dst = &ia4_clat_dst->ia_addr.sin_addr;
5893 if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) {
5894 ip6stat.ip6s_clat464_in_v4synthfail_drop++;
5895 error = -1;
5896 goto cleanup;
5897 }
5898
5899 ip6h = pbuf->pb_data;
5900 off = sizeof(struct ip6_hdr);
5901 proto = ip6h->ip6_nxt;
5902 tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
5903 tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
5904
5905 /*
5906 * Translate the IP header and update the fragmentation
5907 * header if needed
5908 */
5909 error = (nat464_translate_64(pbuf, off, tos, &proto,
5910 ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
5911 0 : -1;
5912
5913 ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
5914
5915 if (error != 0) {
5916 ip6stat.ip6s_clat464_in_64transfail_drop++;
5917 goto cleanup;
5918 }
5919
5920 /*
5921 * Translate protocol header, update checksum, checksum flags
5922 * and related fields.
5923 */
5924 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
5925 (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
5926 NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
5927
5928 if (error != 0) {
5929 ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
5930 goto cleanup;
5931 }
5932
5933 cleanup:
5934 if (ia4_clat_dst != NULL) {
5935 IFA_REMREF(&ia4_clat_dst->ia_ifa);
5936 }
5937
5938 if (pbuf_is_valid(pbuf)) {
5939 *m = pbuf->pb_mbuf;
5940 pbuf->pb_mbuf = NULL;
5941 pbuf_destroy(pbuf);
5942 } else {
5943 error = -1;
5944 ip6stat.ip6s_clat464_in_invalpbuf_drop++;
5945 }
5946
5947 if (error == 0) {
5948 *proto_family = PF_INET;
5949 ip6stat.ip6s_clat464_in_success++;
5950 }
5951 } /* CLAT traffic */
5952
5953 done:
5954 return error;
5955 }
5956
5957 errno_t
5958 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
5959 void *ioctl_arg)
5960 {
5961 struct ifnet_filter *filter;
5962 int retval = EOPNOTSUPP;
5963 int result = 0;
5964
5965 if (ifp == NULL || ioctl_code == 0) {
5966 return EINVAL;
5967 }
5968
5969 /* Get an io ref count if the interface is attached */
5970 if (!ifnet_is_attached(ifp, 1)) {
5971 return EOPNOTSUPP;
5972 }
5973
5974 /*
5975 * Run the interface filters first.
5976 * We want to run all filters before calling the protocol,
5977 * interface family, or interface.
5978 */
5979 lck_mtx_lock_spin(&ifp->if_flt_lock);
5980 /* prevent filter list from changing in case we drop the lock */
5981 if_flt_monitor_busy(ifp);
5982 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5983 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
5984 filter->filt_protocol == proto_fam)) {
5985 lck_mtx_unlock(&ifp->if_flt_lock);
5986
5987 result = filter->filt_ioctl(filter->filt_cookie, ifp,
5988 proto_fam, ioctl_code, ioctl_arg);
5989
5990 lck_mtx_lock_spin(&ifp->if_flt_lock);
5991
5992 /* Only update retval if no one has handled the ioctl */
5993 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
5994 if (result == ENOTSUP) {
5995 result = EOPNOTSUPP;
5996 }
5997 retval = result;
5998 if (retval != 0 && retval != EOPNOTSUPP) {
5999 /* we're done with the filter list */
6000 if_flt_monitor_unbusy(ifp);
6001 lck_mtx_unlock(&ifp->if_flt_lock);
6002 goto cleanup;
6003 }
6004 }
6005 }
6006 }
6007 /* we're done with the filter list */
6008 if_flt_monitor_unbusy(ifp);
6009 lck_mtx_unlock(&ifp->if_flt_lock);
6010
6011 /* Allow the protocol to handle the ioctl */
6012 if (proto_fam != 0) {
6013 struct if_proto *proto;
6014
6015 /* callee holds a proto refcnt upon success */
6016 ifnet_lock_shared(ifp);
6017 proto = find_attached_proto(ifp, proto_fam);
6018 ifnet_lock_done(ifp);
6019 if (proto != NULL) {
6020 proto_media_ioctl ioctlp =
6021 (proto->proto_kpi == kProtoKPI_v1 ?
6022 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
6023 result = EOPNOTSUPP;
6024 if (ioctlp != NULL) {
6025 result = ioctlp(ifp, proto_fam, ioctl_code,
6026 ioctl_arg);
6027 }
6028 if_proto_free(proto);
6029
6030 /* Only update retval if no one has handled the ioctl */
6031 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
6032 if (result == ENOTSUP) {
6033 result = EOPNOTSUPP;
6034 }
6035 retval = result;
6036 if (retval && retval != EOPNOTSUPP) {
6037 goto cleanup;
6038 }
6039 }
6040 }
6041 }
6042
6043 /* retval is either 0 or EOPNOTSUPP */
6044
6045 /*
6046 * Let the interface handle this ioctl.
6047 * If it returns EOPNOTSUPP, ignore that, we may have
6048 * already handled this in the protocol or family.
6049 */
6050 if (ifp->if_ioctl) {
6051 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
6052 }
6053
6054 /* Only update retval if no one has handled the ioctl */
6055 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
6056 if (result == ENOTSUP) {
6057 result = EOPNOTSUPP;
6058 }
6059 retval = result;
6060 if (retval && retval != EOPNOTSUPP) {
6061 goto cleanup;
6062 }
6063 }
6064
6065 cleanup:
6066 if (retval == EJUSTRETURN) {
6067 retval = 0;
6068 }
6069
6070 ifnet_decr_iorefcnt(ifp);
6071
6072 return retval;
6073 }
6074
6075 __private_extern__ errno_t
6076 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
6077 {
6078 errno_t error = 0;
6079
6080
6081 if (ifp->if_set_bpf_tap) {
6082 /* Get an io reference on the interface if it is attached */
6083 if (!ifnet_is_attached(ifp, 1)) {
6084 return ENXIO;
6085 }
6086 error = ifp->if_set_bpf_tap(ifp, mode, callback);
6087 ifnet_decr_iorefcnt(ifp);
6088 }
6089 return error;
6090 }
6091
6092 errno_t
6093 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
6094 struct sockaddr *ll_addr, size_t ll_len)
6095 {
6096 errno_t result = EOPNOTSUPP;
6097 struct if_proto *proto;
6098 const struct sockaddr *verify;
6099 proto_media_resolve_multi resolvep;
6100
6101 if (!ifnet_is_attached(ifp, 1)) {
6102 return result;
6103 }
6104
6105 bzero(ll_addr, ll_len);
6106
6107 /* Call the protocol first; callee holds a proto refcnt upon success */
6108 ifnet_lock_shared(ifp);
6109 proto = find_attached_proto(ifp, proto_addr->sa_family);
6110 ifnet_lock_done(ifp);
6111 if (proto != NULL) {
6112 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
6113 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
6114 if (resolvep != NULL) {
6115 result = resolvep(ifp, proto_addr,
6116 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
6117 }
6118 if_proto_free(proto);
6119 }
6120
6121 /* Let the interface verify the multicast address */
6122 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
6123 if (result == 0) {
6124 verify = ll_addr;
6125 } else {
6126 verify = proto_addr;
6127 }
6128 result = ifp->if_check_multi(ifp, verify);
6129 }
6130
6131 ifnet_decr_iorefcnt(ifp);
6132 return result;
6133 }
6134
6135 __private_extern__ errno_t
6136 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
6137 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6138 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
6139 {
6140 struct if_proto *proto;
6141 errno_t result = 0;
6142
6143 /* callee holds a proto refcnt upon success */
6144 ifnet_lock_shared(ifp);
6145 proto = find_attached_proto(ifp, target_proto->sa_family);
6146 ifnet_lock_done(ifp);
6147 if (proto == NULL) {
6148 result = ENOTSUP;
6149 } else {
6150 proto_media_send_arp arpp;
6151 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
6152 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
6153 if (arpp == NULL) {
6154 result = ENOTSUP;
6155 } else {
6156 switch (arpop) {
6157 case ARPOP_REQUEST:
6158 arpstat.txrequests++;
6159 if (target_hw != NULL) {
6160 arpstat.txurequests++;
6161 }
6162 break;
6163 case ARPOP_REPLY:
6164 arpstat.txreplies++;
6165 break;
6166 }
6167 result = arpp(ifp, arpop, sender_hw, sender_proto,
6168 target_hw, target_proto);
6169 }
6170 if_proto_free(proto);
6171 }
6172
6173 return result;
6174 }
6175
6176 struct net_thread_marks { };
6177 static const struct net_thread_marks net_thread_marks_base = { };
6178
6179 __private_extern__ const net_thread_marks_t net_thread_marks_none =
6180 &net_thread_marks_base;
6181
6182 __private_extern__ net_thread_marks_t
6183 net_thread_marks_push(u_int32_t push)
6184 {
6185 static const char *const base = (const void*)&net_thread_marks_base;
6186 u_int32_t pop = 0;
6187
6188 if (push != 0) {
6189 struct uthread *uth = get_bsdthread_info(current_thread());
6190
6191 pop = push & ~uth->uu_network_marks;
6192 if (pop != 0) {
6193 uth->uu_network_marks |= pop;
6194 }
6195 }
6196
6197 return (net_thread_marks_t)&base[pop];
6198 }
6199
6200 __private_extern__ net_thread_marks_t
6201 net_thread_unmarks_push(u_int32_t unpush)
6202 {
6203 static const char *const base = (const void*)&net_thread_marks_base;
6204 u_int32_t unpop = 0;
6205
6206 if (unpush != 0) {
6207 struct uthread *uth = get_bsdthread_info(current_thread());
6208
6209 unpop = unpush & uth->uu_network_marks;
6210 if (unpop != 0) {
6211 uth->uu_network_marks &= ~unpop;
6212 }
6213 }
6214
6215 return (net_thread_marks_t)&base[unpop];
6216 }
6217
6218 __private_extern__ void
6219 net_thread_marks_pop(net_thread_marks_t popx)
6220 {
6221 static const char *const base = (const void*)&net_thread_marks_base;
6222 const ptrdiff_t pop = (const char *)popx - (const char *)base;
6223
6224 if (pop != 0) {
6225 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6226 struct uthread *uth = get_bsdthread_info(current_thread());
6227
6228 VERIFY((pop & ones) == pop);
6229 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
6230 uth->uu_network_marks &= ~pop;
6231 }
6232 }
6233
6234 __private_extern__ void
6235 net_thread_unmarks_pop(net_thread_marks_t unpopx)
6236 {
6237 static const char *const base = (const void*)&net_thread_marks_base;
6238 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
6239
6240 if (unpop != 0) {
6241 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6242 struct uthread *uth = get_bsdthread_info(current_thread());
6243
6244 VERIFY((unpop & ones) == unpop);
6245 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
6246 uth->uu_network_marks |= unpop;
6247 }
6248 }
6249
6250 __private_extern__ u_int32_t
6251 net_thread_is_marked(u_int32_t check)
6252 {
6253 if (check != 0) {
6254 struct uthread *uth = get_bsdthread_info(current_thread());
6255 return uth->uu_network_marks & check;
6256 } else {
6257 return 0;
6258 }
6259 }
6260
6261 __private_extern__ u_int32_t
6262 net_thread_is_unmarked(u_int32_t check)
6263 {
6264 if (check != 0) {
6265 struct uthread *uth = get_bsdthread_info(current_thread());
6266 return ~uth->uu_network_marks & check;
6267 } else {
6268 return 0;
6269 }
6270 }
6271
6272 static __inline__ int
6273 _is_announcement(const struct sockaddr_in * sender_sin,
6274 const struct sockaddr_in * target_sin)
6275 {
6276 if (target_sin == NULL || sender_sin == NULL) {
6277 return FALSE;
6278 }
6279
6280 return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
6281 }
6282
6283 __private_extern__ errno_t
6284 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
6285 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
6286 const struct sockaddr *target_proto0, u_int32_t rtflags)
6287 {
6288 errno_t result = 0;
6289 const struct sockaddr_in * sender_sin;
6290 const struct sockaddr_in * target_sin;
6291 struct sockaddr_inarp target_proto_sinarp;
6292 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
6293
6294 if (target_proto == NULL || sender_proto == NULL) {
6295 return EINVAL;
6296 }
6297
6298 if (sender_proto->sa_family != target_proto->sa_family) {
6299 return EINVAL;
6300 }
6301
6302 /*
6303 * If the target is a (default) router, provide that
6304 * information to the send_arp callback routine.
6305 */
6306 if (rtflags & RTF_ROUTER) {
6307 bcopy(target_proto, &target_proto_sinarp,
6308 sizeof(struct sockaddr_in));
6309 target_proto_sinarp.sin_other |= SIN_ROUTER;
6310 target_proto = (struct sockaddr *)&target_proto_sinarp;
6311 }
6312
6313 /*
6314 * If this is an ARP request and the target IP is IPv4LL,
6315 * send the request on all interfaces. The exception is
6316 * an announcement, which must only appear on the specific
6317 * interface.
6318 */
6319 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
6320 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
6321 if (target_proto->sa_family == AF_INET &&
6322 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
6323 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
6324 !_is_announcement(sender_sin, target_sin)) {
6325 ifnet_t *ifp_list;
6326 u_int32_t count;
6327 u_int32_t ifp_on;
6328
6329 result = ENOTSUP;
6330
6331 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
6332 for (ifp_on = 0; ifp_on < count; ifp_on++) {
6333 errno_t new_result;
6334 ifaddr_t source_hw = NULL;
6335 ifaddr_t source_ip = NULL;
6336 struct sockaddr_in source_ip_copy;
6337 struct ifnet *cur_ifp = ifp_list[ifp_on];
6338
6339 /*
6340 * Only arp on interfaces marked for IPv4LL
6341 * ARPing. This may mean that we don't ARP on
6342 * the interface the subnet route points to.
6343 */
6344 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
6345 continue;
6346 }
6347
6348 /* Find the source IP address */
6349 ifnet_lock_shared(cur_ifp);
6350 source_hw = cur_ifp->if_lladdr;
6351 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
6352 ifa_link) {
6353 IFA_LOCK(source_ip);
6354 if (source_ip->ifa_addr != NULL &&
6355 source_ip->ifa_addr->sa_family ==
6356 AF_INET) {
6357 /* Copy the source IP address */
6358 source_ip_copy =
6359 *(struct sockaddr_in *)
6360 (void *)source_ip->ifa_addr;
6361 IFA_UNLOCK(source_ip);
6362 break;
6363 }
6364 IFA_UNLOCK(source_ip);
6365 }
6366
6367 /* No IP Source, don't arp */
6368 if (source_ip == NULL) {
6369 ifnet_lock_done(cur_ifp);
6370 continue;
6371 }
6372
6373 IFA_ADDREF(source_hw);
6374 ifnet_lock_done(cur_ifp);
6375
6376 /* Send the ARP */
6377 new_result = dlil_send_arp_internal(cur_ifp,
6378 arpop, (struct sockaddr_dl *)(void *)
6379 source_hw->ifa_addr,
6380 (struct sockaddr *)&source_ip_copy, NULL,
6381 target_proto);
6382
6383 IFA_REMREF(source_hw);
6384 if (result == ENOTSUP) {
6385 result = new_result;
6386 }
6387 }
6388 ifnet_list_free(ifp_list);
6389 }
6390 } else {
6391 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
6392 sender_proto, target_hw, target_proto);
6393 }
6394
6395 return result;
6396 }
6397
6398 /*
6399 * Caller must hold ifnet head lock.
6400 */
6401 static int
6402 ifnet_lookup(struct ifnet *ifp)
6403 {
6404 struct ifnet *_ifp;
6405
6406 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
6407 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
6408 if (_ifp == ifp) {
6409 break;
6410 }
6411 }
6412 return _ifp != NULL;
6413 }
6414
6415 /*
6416 * Caller has to pass a non-zero refio argument to get a
6417 * IO reference count. This will prevent ifnet_detach from
6418 * being called when there are outstanding io reference counts.
6419 */
6420 int
6421 ifnet_is_attached(struct ifnet *ifp, int refio)
6422 {
6423 int ret;
6424
6425 lck_mtx_lock_spin(&ifp->if_ref_lock);
6426 if ((ret = IF_FULLY_ATTACHED(ifp))) {
6427 if (refio > 0) {
6428 ifp->if_refio++;
6429 }
6430 }
6431 lck_mtx_unlock(&ifp->if_ref_lock);
6432
6433 return ret;
6434 }
6435
6436 void
6437 ifnet_incr_pending_thread_count(struct ifnet *ifp)
6438 {
6439 lck_mtx_lock_spin(&ifp->if_ref_lock);
6440 ifp->if_threads_pending++;
6441 lck_mtx_unlock(&ifp->if_ref_lock);
6442 }
6443
6444 void
6445 ifnet_decr_pending_thread_count(struct ifnet *ifp)
6446 {
6447 lck_mtx_lock_spin(&ifp->if_ref_lock);
6448 VERIFY(ifp->if_threads_pending > 0);
6449 ifp->if_threads_pending--;
6450 if (ifp->if_threads_pending == 0) {
6451 wakeup(&ifp->if_threads_pending);
6452 }
6453 lck_mtx_unlock(&ifp->if_ref_lock);
6454 }
6455
6456 /*
6457 * Caller must ensure the interface is attached; the assumption is that
6458 * there is at least an outstanding IO reference count held already.
6459 * Most callers would call ifnet_is_{attached,data_ready}() instead.
6460 */
6461 void
6462 ifnet_incr_iorefcnt(struct ifnet *ifp)
6463 {
6464 lck_mtx_lock_spin(&ifp->if_ref_lock);
6465 VERIFY(IF_FULLY_ATTACHED(ifp));
6466 VERIFY(ifp->if_refio > 0);
6467 ifp->if_refio++;
6468 lck_mtx_unlock(&ifp->if_ref_lock);
6469 }
6470
6471 __attribute__((always_inline))
6472 static void
6473 ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
6474 {
6475 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
6476
6477 VERIFY(ifp->if_refio > 0);
6478 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6479
6480 ifp->if_refio--;
6481 VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
6482
6483 /*
6484 * if there are no more outstanding io references, wakeup the
6485 * ifnet_detach thread if detaching flag is set.
6486 */
6487 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
6488 wakeup(&(ifp->if_refio));
6489 }
6490 }
6491
6492 void
6493 ifnet_decr_iorefcnt(struct ifnet *ifp)
6494 {
6495 lck_mtx_lock_spin(&ifp->if_ref_lock);
6496 ifnet_decr_iorefcnt_locked(ifp);
6497 lck_mtx_unlock(&ifp->if_ref_lock);
6498 }
6499
6500 boolean_t
6501 ifnet_datamov_begin(struct ifnet *ifp)
6502 {
6503 boolean_t ret;
6504
6505 lck_mtx_lock_spin(&ifp->if_ref_lock);
6506 if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
6507 ifp->if_refio++;
6508 ifp->if_datamov++;
6509 }
6510 lck_mtx_unlock(&ifp->if_ref_lock);
6511
6512 return ret;
6513 }
6514
6515 void
6516 ifnet_datamov_end(struct ifnet *ifp)
6517 {
6518 lck_mtx_lock_spin(&ifp->if_ref_lock);
6519 VERIFY(ifp->if_datamov > 0);
6520 /*
6521 * if there's no more thread moving data, wakeup any
6522 * drainers that's blocked waiting for this.
6523 */
6524 if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
6525 wakeup(&(ifp->if_datamov));
6526 }
6527 ifnet_decr_iorefcnt_locked(ifp);
6528 lck_mtx_unlock(&ifp->if_ref_lock);
6529 }
6530
6531 void
6532 ifnet_datamov_suspend(struct ifnet *ifp)
6533 {
6534 lck_mtx_lock_spin(&ifp->if_ref_lock);
6535 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6536 ifp->if_refio++;
6537 if (ifp->if_suspend++ == 0) {
6538 VERIFY(ifp->if_refflags & IFRF_READY);
6539 ifp->if_refflags &= ~IFRF_READY;
6540 }
6541 lck_mtx_unlock(&ifp->if_ref_lock);
6542 }
6543
6544 void
6545 ifnet_datamov_drain(struct ifnet *ifp)
6546 {
6547 lck_mtx_lock(&ifp->if_ref_lock);
6548 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6549 /* data movement must already be suspended */
6550 VERIFY(ifp->if_suspend > 0);
6551 VERIFY(!(ifp->if_refflags & IFRF_READY));
6552 ifp->if_drainers++;
6553 while (ifp->if_datamov != 0) {
6554 (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
6555 (PZERO - 1), __func__, NULL);
6556 }
6557 VERIFY(!(ifp->if_refflags & IFRF_READY));
6558 VERIFY(ifp->if_drainers > 0);
6559 ifp->if_drainers--;
6560 lck_mtx_unlock(&ifp->if_ref_lock);
6561
6562 /* purge the interface queues */
6563 if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
6564 if_qflush(ifp, 0);
6565 }
6566 }
6567
6568 void
6569 ifnet_datamov_resume(struct ifnet *ifp)
6570 {
6571 lck_mtx_lock(&ifp->if_ref_lock);
6572 /* data movement must already be suspended */
6573 VERIFY(ifp->if_suspend > 0);
6574 if (--ifp->if_suspend == 0) {
6575 VERIFY(!(ifp->if_refflags & IFRF_READY));
6576 ifp->if_refflags |= IFRF_READY;
6577 }
6578 ifnet_decr_iorefcnt_locked(ifp);
6579 lck_mtx_unlock(&ifp->if_ref_lock);
6580 }
6581
6582 static void
6583 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
6584 {
6585 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
6586 ctrace_t *tr;
6587 u_int32_t idx;
6588 u_int16_t *cnt;
6589
6590 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
6591 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
6592 /* NOTREACHED */
6593 }
6594
6595 if (refhold) {
6596 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
6597 tr = dl_if_dbg->dldbg_if_refhold;
6598 } else {
6599 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
6600 tr = dl_if_dbg->dldbg_if_refrele;
6601 }
6602
6603 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
6604 ctrace_record(&tr[idx]);
6605 }
6606
6607 errno_t
6608 dlil_if_ref(struct ifnet *ifp)
6609 {
6610 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6611
6612 if (dl_if == NULL) {
6613 return EINVAL;
6614 }
6615
6616 lck_mtx_lock_spin(&dl_if->dl_if_lock);
6617 ++dl_if->dl_if_refcnt;
6618 if (dl_if->dl_if_refcnt == 0) {
6619 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
6620 /* NOTREACHED */
6621 }
6622 if (dl_if->dl_if_trace != NULL) {
6623 (*dl_if->dl_if_trace)(dl_if, TRUE);
6624 }
6625 lck_mtx_unlock(&dl_if->dl_if_lock);
6626
6627 return 0;
6628 }
6629
6630 errno_t
6631 dlil_if_free(struct ifnet *ifp)
6632 {
6633 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6634 bool need_release = FALSE;
6635
6636 if (dl_if == NULL) {
6637 return EINVAL;
6638 }
6639
6640 lck_mtx_lock_spin(&dl_if->dl_if_lock);
6641 switch (dl_if->dl_if_refcnt) {
6642 case 0:
6643 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
6644 /* NOTREACHED */
6645 break;
6646 case 1:
6647 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
6648 need_release = TRUE;
6649 }
6650 break;
6651 default:
6652 break;
6653 }
6654 --dl_if->dl_if_refcnt;
6655 if (dl_if->dl_if_trace != NULL) {
6656 (*dl_if->dl_if_trace)(dl_if, FALSE);
6657 }
6658 lck_mtx_unlock(&dl_if->dl_if_lock);
6659 if (need_release) {
6660 dlil_if_release(ifp);
6661 }
6662 return 0;
6663 }
6664
6665 static errno_t
6666 dlil_attach_protocol_internal(struct if_proto *proto,
6667 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
6668 uint32_t * proto_count)
6669 {
6670 struct kev_dl_proto_data ev_pr_data;
6671 struct ifnet *ifp = proto->ifp;
6672 int retval = 0;
6673 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
6674 struct if_proto *prev_proto;
6675 struct if_proto *_proto;
6676
6677 /* callee holds a proto refcnt upon success */
6678 ifnet_lock_exclusive(ifp);
6679 _proto = find_attached_proto(ifp, proto->protocol_family);
6680 if (_proto != NULL) {
6681 ifnet_lock_done(ifp);
6682 if_proto_free(_proto);
6683 return EEXIST;
6684 }
6685
6686 /*
6687 * Call family module add_proto routine so it can refine the
6688 * demux descriptors as it wishes.
6689 */
6690 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
6691 demux_count);
6692 if (retval) {
6693 ifnet_lock_done(ifp);
6694 return retval;
6695 }
6696
6697 /*
6698 * Insert the protocol in the hash
6699 */
6700 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
6701 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
6702 prev_proto = SLIST_NEXT(prev_proto, next_hash);
6703 }
6704 if (prev_proto) {
6705 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
6706 } else {
6707 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
6708 proto, next_hash);
6709 }
6710
6711 /* hold a proto refcnt for attach */
6712 if_proto_ref(proto);
6713
6714 /*
6715 * The reserved field carries the number of protocol still attached
6716 * (subject to change)
6717 */
6718 ev_pr_data.proto_family = proto->protocol_family;
6719 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
6720
6721 ifnet_lock_done(ifp);
6722
6723 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
6724 (struct net_event_data *)&ev_pr_data,
6725 sizeof(struct kev_dl_proto_data));
6726 if (proto_count != NULL) {
6727 *proto_count = ev_pr_data.proto_remaining_count;
6728 }
6729 return retval;
6730 }
6731
6732 errno_t
6733 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
6734 const struct ifnet_attach_proto_param *proto_details)
6735 {
6736 int retval = 0;
6737 struct if_proto *ifproto = NULL;
6738 uint32_t proto_count = 0;
6739
6740 ifnet_head_lock_shared();
6741 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6742 retval = EINVAL;
6743 goto end;
6744 }
6745 /* Check that the interface is in the global list */
6746 if (!ifnet_lookup(ifp)) {
6747 retval = ENXIO;
6748 goto end;
6749 }
6750
6751 ifproto = zalloc_flags(dlif_proto_zone, Z_WAITOK | Z_ZERO);
6752 if (ifproto == NULL) {
6753 retval = ENOMEM;
6754 goto end;
6755 }
6756
6757 /* refcnt held above during lookup */
6758 ifproto->ifp = ifp;
6759 ifproto->protocol_family = protocol;
6760 ifproto->proto_kpi = kProtoKPI_v1;
6761 ifproto->kpi.v1.input = proto_details->input;
6762 ifproto->kpi.v1.pre_output = proto_details->pre_output;
6763 ifproto->kpi.v1.event = proto_details->event;
6764 ifproto->kpi.v1.ioctl = proto_details->ioctl;
6765 ifproto->kpi.v1.detached = proto_details->detached;
6766 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
6767 ifproto->kpi.v1.send_arp = proto_details->send_arp;
6768
6769 retval = dlil_attach_protocol_internal(ifproto,
6770 proto_details->demux_list, proto_details->demux_count,
6771 &proto_count);
6772
6773 end:
6774 if (retval != 0 && retval != EEXIST) {
6775 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
6776 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
6777 } else {
6778 if (dlil_verbose) {
6779 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
6780 ifp != NULL ? if_name(ifp) : "N/A",
6781 protocol, proto_count);
6782 }
6783 }
6784 ifnet_head_done();
6785 if (retval == 0) {
6786 /*
6787 * A protocol has been attached, mark the interface up.
6788 * This used to be done by configd.KernelEventMonitor, but that
6789 * is inherently prone to races (rdar://problem/30810208).
6790 */
6791 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6792 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6793 dlil_post_sifflags_msg(ifp);
6794 } else if (ifproto != NULL) {
6795 zfree(dlif_proto_zone, ifproto);
6796 }
6797 return retval;
6798 }
6799
6800 errno_t
6801 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6802 const struct ifnet_attach_proto_param_v2 *proto_details)
6803 {
6804 int retval = 0;
6805 struct if_proto *ifproto = NULL;
6806 uint32_t proto_count = 0;
6807
6808 ifnet_head_lock_shared();
6809 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6810 retval = EINVAL;
6811 goto end;
6812 }
6813 /* Check that the interface is in the global list */
6814 if (!ifnet_lookup(ifp)) {
6815 retval = ENXIO;
6816 goto end;
6817 }
6818
6819 ifproto = zalloc(dlif_proto_zone);
6820 if (ifproto == NULL) {
6821 retval = ENOMEM;
6822 goto end;
6823 }
6824 bzero(ifproto, sizeof(*ifproto));
6825
6826 /* refcnt held above during lookup */
6827 ifproto->ifp = ifp;
6828 ifproto->protocol_family = protocol;
6829 ifproto->proto_kpi = kProtoKPI_v2;
6830 ifproto->kpi.v2.input = proto_details->input;
6831 ifproto->kpi.v2.pre_output = proto_details->pre_output;
6832 ifproto->kpi.v2.event = proto_details->event;
6833 ifproto->kpi.v2.ioctl = proto_details->ioctl;
6834 ifproto->kpi.v2.detached = proto_details->detached;
6835 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
6836 ifproto->kpi.v2.send_arp = proto_details->send_arp;
6837
6838 retval = dlil_attach_protocol_internal(ifproto,
6839 proto_details->demux_list, proto_details->demux_count,
6840 &proto_count);
6841
6842 end:
6843 if (retval != 0 && retval != EEXIST) {
6844 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
6845 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
6846 } else {
6847 if (dlil_verbose) {
6848 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
6849 ifp != NULL ? if_name(ifp) : "N/A",
6850 protocol, proto_count);
6851 }
6852 }
6853 ifnet_head_done();
6854 if (retval == 0) {
6855 /*
6856 * A protocol has been attached, mark the interface up.
6857 * This used to be done by configd.KernelEventMonitor, but that
6858 * is inherently prone to races (rdar://problem/30810208).
6859 */
6860 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6861 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6862 dlil_post_sifflags_msg(ifp);
6863 } else if (ifproto != NULL) {
6864 zfree(dlif_proto_zone, ifproto);
6865 }
6866 return retval;
6867 }
6868
6869 errno_t
6870 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
6871 {
6872 struct if_proto *proto = NULL;
6873 int retval = 0;
6874
6875 if (ifp == NULL || proto_family == 0) {
6876 retval = EINVAL;
6877 goto end;
6878 }
6879
6880 ifnet_lock_exclusive(ifp);
6881 /* callee holds a proto refcnt upon success */
6882 proto = find_attached_proto(ifp, proto_family);
6883 if (proto == NULL) {
6884 retval = ENXIO;
6885 ifnet_lock_done(ifp);
6886 goto end;
6887 }
6888
6889 /* call family module del_proto */
6890 if (ifp->if_del_proto) {
6891 ifp->if_del_proto(ifp, proto->protocol_family);
6892 }
6893
6894 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
6895 proto, if_proto, next_hash);
6896
6897 if (proto->proto_kpi == kProtoKPI_v1) {
6898 proto->kpi.v1.input = ifproto_media_input_v1;
6899 proto->kpi.v1.pre_output = ifproto_media_preout;
6900 proto->kpi.v1.event = ifproto_media_event;
6901 proto->kpi.v1.ioctl = ifproto_media_ioctl;
6902 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
6903 proto->kpi.v1.send_arp = ifproto_media_send_arp;
6904 } else {
6905 proto->kpi.v2.input = ifproto_media_input_v2;
6906 proto->kpi.v2.pre_output = ifproto_media_preout;
6907 proto->kpi.v2.event = ifproto_media_event;
6908 proto->kpi.v2.ioctl = ifproto_media_ioctl;
6909 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
6910 proto->kpi.v2.send_arp = ifproto_media_send_arp;
6911 }
6912 proto->detached = 1;
6913 ifnet_lock_done(ifp);
6914
6915 if (dlil_verbose) {
6916 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
6917 (proto->proto_kpi == kProtoKPI_v1) ?
6918 "v1" : "v2", proto_family);
6919 }
6920
6921 /* release proto refcnt held during protocol attach */
6922 if_proto_free(proto);
6923
6924 /*
6925 * Release proto refcnt held during lookup; the rest of
6926 * protocol detach steps will happen when the last proto
6927 * reference is released.
6928 */
6929 if_proto_free(proto);
6930
6931 end:
6932 return retval;
6933 }
6934
6935
6936 static errno_t
6937 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
6938 struct mbuf *packet, char *header)
6939 {
6940 #pragma unused(ifp, protocol, packet, header)
6941 return ENXIO;
6942 }
6943
6944 static errno_t
6945 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
6946 struct mbuf *packet)
6947 {
6948 #pragma unused(ifp, protocol, packet)
6949 return ENXIO;
6950 }
6951
6952 static errno_t
6953 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
6954 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
6955 char *link_layer_dest)
6956 {
6957 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
6958 return ENXIO;
6959 }
6960
6961 static void
6962 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
6963 const struct kev_msg *event)
6964 {
6965 #pragma unused(ifp, protocol, event)
6966 }
6967
6968 static errno_t
6969 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
6970 unsigned long command, void *argument)
6971 {
6972 #pragma unused(ifp, protocol, command, argument)
6973 return ENXIO;
6974 }
6975
6976 static errno_t
6977 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
6978 struct sockaddr_dl *out_ll, size_t ll_len)
6979 {
6980 #pragma unused(ifp, proto_addr, out_ll, ll_len)
6981 return ENXIO;
6982 }
6983
6984 static errno_t
6985 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
6986 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6987 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
6988 {
6989 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
6990 return ENXIO;
6991 }
6992
6993 extern int if_next_index(void);
6994 extern int tcp_ecn_outbound;
6995
6996 errno_t
6997 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
6998 {
6999 struct ifnet *tmp_if;
7000 struct ifaddr *ifa;
7001 struct if_data_internal if_data_saved;
7002 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7003 struct dlil_threading_info *dl_inp;
7004 thread_continue_t thfunc = NULL;
7005 u_int32_t sflags = 0;
7006 int err;
7007
7008 if (ifp == NULL) {
7009 return EINVAL;
7010 }
7011
7012 /*
7013 * Serialize ifnet attach using dlil_ifnet_lock, in order to
7014 * prevent the interface from being configured while it is
7015 * embryonic, as ifnet_head_lock is dropped and reacquired
7016 * below prior to marking the ifnet with IFRF_ATTACHED.
7017 */
7018 dlil_if_lock();
7019 ifnet_head_lock_exclusive();
7020 /* Verify we aren't already on the list */
7021 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
7022 if (tmp_if == ifp) {
7023 ifnet_head_done();
7024 dlil_if_unlock();
7025 return EEXIST;
7026 }
7027 }
7028
7029 lck_mtx_lock_spin(&ifp->if_ref_lock);
7030 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
7031 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
7032 __func__, ifp);
7033 /* NOTREACHED */
7034 }
7035 lck_mtx_unlock(&ifp->if_ref_lock);
7036
7037 ifnet_lock_exclusive(ifp);
7038
7039 /* Sanity check */
7040 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
7041 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
7042 VERIFY(ifp->if_threads_pending == 0);
7043
7044 if (ll_addr != NULL) {
7045 if (ifp->if_addrlen == 0) {
7046 ifp->if_addrlen = ll_addr->sdl_alen;
7047 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
7048 ifnet_lock_done(ifp);
7049 ifnet_head_done();
7050 dlil_if_unlock();
7051 return EINVAL;
7052 }
7053 }
7054
7055 /*
7056 * Allow interfaces without protocol families to attach
7057 * only if they have the necessary fields filled out.
7058 */
7059 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
7060 DLIL_PRINTF("%s: Attempt to attach interface without "
7061 "family module - %d\n", __func__, ifp->if_family);
7062 ifnet_lock_done(ifp);
7063 ifnet_head_done();
7064 dlil_if_unlock();
7065 return ENODEV;
7066 }
7067
7068 /* Allocate protocol hash table */
7069 VERIFY(ifp->if_proto_hash == NULL);
7070 ifp->if_proto_hash = zalloc_flags(dlif_phash_zone, Z_WAITOK | Z_ZERO);
7071 if (ifp->if_proto_hash == NULL) {
7072 ifnet_lock_done(ifp);
7073 ifnet_head_done();
7074 dlil_if_unlock();
7075 return ENOBUFS;
7076 }
7077
7078 lck_mtx_lock_spin(&ifp->if_flt_lock);
7079 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
7080 TAILQ_INIT(&ifp->if_flt_head);
7081 VERIFY(ifp->if_flt_busy == 0);
7082 VERIFY(ifp->if_flt_waiters == 0);
7083 lck_mtx_unlock(&ifp->if_flt_lock);
7084
7085 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
7086 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
7087 LIST_INIT(&ifp->if_multiaddrs);
7088 }
7089
7090 VERIFY(ifp->if_allhostsinm == NULL);
7091 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
7092 TAILQ_INIT(&ifp->if_addrhead);
7093
7094 if (ifp->if_index == 0) {
7095 int idx = if_next_index();
7096
7097 if (idx == -1) {
7098 ifp->if_index = 0;
7099 ifnet_lock_done(ifp);
7100 ifnet_head_done();
7101 dlil_if_unlock();
7102 return ENOBUFS;
7103 }
7104 ifp->if_index = (uint16_t)idx;
7105
7106 /* the lladdr passed at attach time is the permanent address */
7107 if (ll_addr != NULL && ifp->if_type == IFT_ETHER &&
7108 ll_addr->sdl_alen == ETHER_ADDR_LEN) {
7109 bcopy(CONST_LLADDR(ll_addr),
7110 dl_if->dl_if_permanent_ether,
7111 ETHER_ADDR_LEN);
7112 dl_if->dl_if_permanent_ether_is_set = 1;
7113 }
7114 }
7115 /* There should not be anything occupying this slot */
7116 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
7117
7118 /* allocate (if needed) and initialize a link address */
7119 ifa = dlil_alloc_lladdr(ifp, ll_addr);
7120 if (ifa == NULL) {
7121 ifnet_lock_done(ifp);
7122 ifnet_head_done();
7123 dlil_if_unlock();
7124 return ENOBUFS;
7125 }
7126
7127 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
7128 ifnet_addrs[ifp->if_index - 1] = ifa;
7129
7130 /* make this address the first on the list */
7131 IFA_LOCK(ifa);
7132 /* hold a reference for ifnet_addrs[] */
7133 IFA_ADDREF_LOCKED(ifa);
7134 /* if_attach_link_ifa() holds a reference for ifa_link */
7135 if_attach_link_ifa(ifp, ifa);
7136 IFA_UNLOCK(ifa);
7137
7138 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
7139 ifindex2ifnet[ifp->if_index] = ifp;
7140
7141 /* Hold a reference to the underlying dlil_ifnet */
7142 ifnet_reference(ifp);
7143
7144 /* Clear stats (save and restore other fields that we care) */
7145 if_data_saved = ifp->if_data;
7146 bzero(&ifp->if_data, sizeof(ifp->if_data));
7147 ifp->if_data.ifi_type = if_data_saved.ifi_type;
7148 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
7149 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
7150 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
7151 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
7152 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
7153 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
7154 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
7155 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
7156 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
7157 ifnet_touch_lastchange(ifp);
7158
7159 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
7160 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
7161 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
7162
7163 /* By default, use SFB and enable flow advisory */
7164 sflags = PKTSCHEDF_QALG_SFB;
7165 if (if_flowadv) {
7166 sflags |= PKTSCHEDF_QALG_FLOWCTL;
7167 }
7168
7169 if (if_delaybased_queue) {
7170 sflags |= PKTSCHEDF_QALG_DELAYBASED;
7171 }
7172
7173 if (ifp->if_output_sched_model ==
7174 IFNET_SCHED_MODEL_DRIVER_MANAGED) {
7175 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
7176 }
7177
7178 /* Initialize transmit queue(s) */
7179 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
7180 if (err != 0) {
7181 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
7182 "err=%d", __func__, ifp, err);
7183 /* NOTREACHED */
7184 }
7185
7186 /* Sanity checks on the input thread storage */
7187 dl_inp = &dl_if->dl_if_inpstorage;
7188 bzero(&dl_inp->dlth_stats, sizeof(dl_inp->dlth_stats));
7189 VERIFY(dl_inp->dlth_flags == 0);
7190 VERIFY(dl_inp->dlth_wtot == 0);
7191 VERIFY(dl_inp->dlth_ifp == NULL);
7192 VERIFY(qhead(&dl_inp->dlth_pkts) == NULL && qempty(&dl_inp->dlth_pkts));
7193 VERIFY(qlimit(&dl_inp->dlth_pkts) == 0);
7194 VERIFY(!dl_inp->dlth_affinity);
7195 VERIFY(ifp->if_inp == NULL);
7196 VERIFY(dl_inp->dlth_thread == THREAD_NULL);
7197 VERIFY(dl_inp->dlth_strategy == NULL);
7198 VERIFY(dl_inp->dlth_driver_thread == THREAD_NULL);
7199 VERIFY(dl_inp->dlth_poller_thread == THREAD_NULL);
7200 VERIFY(dl_inp->dlth_affinity_tag == 0);
7201
7202 #if IFNET_INPUT_SANITY_CHK
7203 VERIFY(dl_inp->dlth_pkts_cnt == 0);
7204 #endif /* IFNET_INPUT_SANITY_CHK */
7205
7206 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7207 dlil_reset_rxpoll_params(ifp);
7208 /*
7209 * A specific DLIL input thread is created per non-loopback interface.
7210 */
7211 if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
7212 ifp->if_inp = dl_inp;
7213 ifnet_incr_pending_thread_count(ifp);
7214 err = dlil_create_input_thread(ifp, ifp->if_inp, &thfunc);
7215 if (err == ENODEV) {
7216 VERIFY(thfunc == NULL);
7217 ifnet_decr_pending_thread_count(ifp);
7218 } else if (err != 0) {
7219 panic_plain("%s: ifp=%p couldn't get an input thread; "
7220 "err=%d", __func__, ifp, err);
7221 /* NOTREACHED */
7222 }
7223 }
7224 /*
7225 * If the driver supports the new transmit model, calculate flow hash
7226 * and create a workloop starter thread to invoke the if_start callback
7227 * where the packets may be dequeued and transmitted.
7228 */
7229 if (ifp->if_eflags & IFEF_TXSTART) {
7230 thread_precedence_policy_data_t info;
7231 __unused kern_return_t kret;
7232
7233 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
7234 VERIFY(ifp->if_flowhash != 0);
7235 VERIFY(ifp->if_start_thread == THREAD_NULL);
7236
7237 ifnet_set_start_cycle(ifp, NULL);
7238 ifp->if_start_active = 0;
7239 ifp->if_start_req = 0;
7240 ifp->if_start_flags = 0;
7241 VERIFY(ifp->if_start != NULL);
7242 ifnet_incr_pending_thread_count(ifp);
7243 if ((err = kernel_thread_start(ifnet_start_thread_func,
7244 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
7245 panic_plain("%s: "
7246 "ifp=%p couldn't get a start thread; "
7247 "err=%d", __func__, ifp, err);
7248 /* NOTREACHED */
7249 }
7250 bzero(&info, sizeof(info));
7251 info.importance = 1;
7252 kret = thread_policy_set(ifp->if_start_thread,
7253 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
7254 THREAD_PRECEDENCE_POLICY_COUNT);
7255 ASSERT(kret == KERN_SUCCESS);
7256 } else {
7257 ifp->if_flowhash = 0;
7258 }
7259
7260 /* Reset polling parameters */
7261 ifnet_set_poll_cycle(ifp, NULL);
7262 ifp->if_poll_update = 0;
7263 ifp->if_poll_flags = 0;
7264 ifp->if_poll_req = 0;
7265 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7266
7267 /*
7268 * If the driver supports the new receive model, create a poller
7269 * thread to invoke if_input_poll callback where the packets may
7270 * be dequeued from the driver and processed for reception.
7271 * if the interface is netif compat then the poller thread is
7272 * managed by netif.
7273 */
7274 if (thfunc == dlil_rxpoll_input_thread_func) {
7275 thread_precedence_policy_data_t info;
7276 __unused kern_return_t kret;
7277 VERIFY(ifp->if_input_poll != NULL);
7278 VERIFY(ifp->if_input_ctl != NULL);
7279 ifnet_incr_pending_thread_count(ifp);
7280 if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
7281 &ifp->if_poll_thread)) != KERN_SUCCESS) {
7282 panic_plain("%s: ifp=%p couldn't get a poll thread; "
7283 "err=%d", __func__, ifp, err);
7284 /* NOTREACHED */
7285 }
7286 bzero(&info, sizeof(info));
7287 info.importance = 1;
7288 kret = thread_policy_set(ifp->if_poll_thread,
7289 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
7290 THREAD_PRECEDENCE_POLICY_COUNT);
7291 ASSERT(kret == KERN_SUCCESS);
7292 }
7293
7294 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
7295 VERIFY(ifp->if_desc.ifd_len == 0);
7296 VERIFY(ifp->if_desc.ifd_desc != NULL);
7297
7298 /* Record attach PC stacktrace */
7299 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
7300
7301 ifp->if_updatemcasts = 0;
7302 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
7303 struct ifmultiaddr *ifma;
7304 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
7305 IFMA_LOCK(ifma);
7306 if (ifma->ifma_addr->sa_family == AF_LINK ||
7307 ifma->ifma_addr->sa_family == AF_UNSPEC) {
7308 ifp->if_updatemcasts++;
7309 }
7310 IFMA_UNLOCK(ifma);
7311 }
7312
7313 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
7314 "membership(s)\n", if_name(ifp),
7315 ifp->if_updatemcasts);
7316 }
7317
7318 /* Clear logging parameters */
7319 bzero(&ifp->if_log, sizeof(ifp->if_log));
7320
7321 /* Clear foreground/realtime activity timestamps */
7322 ifp->if_fg_sendts = 0;
7323 ifp->if_rt_sendts = 0;
7324
7325 VERIFY(ifp->if_delegated.ifp == NULL);
7326 VERIFY(ifp->if_delegated.type == 0);
7327 VERIFY(ifp->if_delegated.family == 0);
7328 VERIFY(ifp->if_delegated.subfamily == 0);
7329 VERIFY(ifp->if_delegated.expensive == 0);
7330 VERIFY(ifp->if_delegated.constrained == 0);
7331
7332 VERIFY(ifp->if_agentids == NULL);
7333 VERIFY(ifp->if_agentcount == 0);
7334
7335 /* Reset interface state */
7336 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
7337 ifp->if_interface_state.valid_bitmask |=
7338 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7339 ifp->if_interface_state.interface_availability =
7340 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
7341
7342 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
7343 if (ifp == lo_ifp) {
7344 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
7345 ifp->if_interface_state.valid_bitmask |=
7346 IF_INTERFACE_STATE_LQM_STATE_VALID;
7347 } else {
7348 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
7349 }
7350
7351 /*
7352 * Enable ECN capability on this interface depending on the
7353 * value of ECN global setting
7354 */
7355 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
7356 if_set_eflags(ifp, IFEF_ECN_ENABLE);
7357 if_clear_eflags(ifp, IFEF_ECN_DISABLE);
7358 }
7359
7360 /*
7361 * Built-in Cyclops always on policy for WiFi infra
7362 */
7363 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
7364 errno_t error;
7365
7366 error = if_set_qosmarking_mode(ifp,
7367 IFRTYPE_QOSMARKING_FASTLANE);
7368 if (error != 0) {
7369 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
7370 __func__, ifp->if_xname, error);
7371 } else {
7372 if_set_eflags(ifp, IFEF_QOSMARKING_ENABLED);
7373 #if (DEVELOPMENT || DEBUG)
7374 DLIL_PRINTF("%s fastlane enabled on %s\n",
7375 __func__, ifp->if_xname);
7376 #endif /* (DEVELOPMENT || DEBUG) */
7377 }
7378 }
7379
7380 ifnet_lock_done(ifp);
7381 ifnet_head_done();
7382
7383
7384 lck_mtx_lock(&ifp->if_cached_route_lock);
7385 /* Enable forwarding cached route */
7386 ifp->if_fwd_cacheok = 1;
7387 /* Clean up any existing cached routes */
7388 ROUTE_RELEASE(&ifp->if_fwd_route);
7389 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
7390 ROUTE_RELEASE(&ifp->if_src_route);
7391 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
7392 ROUTE_RELEASE(&ifp->if_src_route6);
7393 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
7394 lck_mtx_unlock(&ifp->if_cached_route_lock);
7395
7396 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
7397
7398 /*
7399 * Allocate and attach IGMPv3/MLDv2 interface specific variables
7400 * and trees; do this before the ifnet is marked as attached.
7401 * The ifnet keeps the reference to the info structures even after
7402 * the ifnet is detached, since the network-layer records still
7403 * refer to the info structures even after that. This also
7404 * makes it possible for them to still function after the ifnet
7405 * is recycled or reattached.
7406 */
7407 #if INET
7408 if (IGMP_IFINFO(ifp) == NULL) {
7409 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, Z_WAITOK);
7410 VERIFY(IGMP_IFINFO(ifp) != NULL);
7411 } else {
7412 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
7413 igmp_domifreattach(IGMP_IFINFO(ifp));
7414 }
7415 #endif /* INET */
7416 if (MLD_IFINFO(ifp) == NULL) {
7417 MLD_IFINFO(ifp) = mld_domifattach(ifp, Z_WAITOK);
7418 VERIFY(MLD_IFINFO(ifp) != NULL);
7419 } else {
7420 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
7421 mld_domifreattach(MLD_IFINFO(ifp));
7422 }
7423
7424 VERIFY(ifp->if_data_threshold == 0);
7425 VERIFY(ifp->if_dt_tcall != NULL);
7426
7427 /*
7428 * Wait for the created kernel threads for I/O to get
7429 * scheduled and run at least once before we proceed
7430 * to mark interface as attached.
7431 */
7432 lck_mtx_lock(&ifp->if_ref_lock);
7433 while (ifp->if_threads_pending != 0) {
7434 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
7435 "interface %s to get scheduled at least once.\n",
7436 __func__, ifp->if_xname);
7437 (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
7438 __func__, NULL);
7439 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
7440 }
7441 lck_mtx_unlock(&ifp->if_ref_lock);
7442 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
7443 "at least once. Proceeding.\n", __func__, ifp->if_xname);
7444
7445 /* Final mark this ifnet as attached. */
7446 lck_mtx_lock(rnh_lock);
7447 ifnet_lock_exclusive(ifp);
7448 lck_mtx_lock_spin(&ifp->if_ref_lock);
7449 ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
7450 lck_mtx_unlock(&ifp->if_ref_lock);
7451 if (net_rtref) {
7452 /* boot-args override; enable idle notification */
7453 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
7454 IFRF_IDLE_NOTIFY);
7455 } else {
7456 /* apply previous request(s) to set the idle flags, if any */
7457 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
7458 ifp->if_idle_new_flags_mask);
7459 }
7460 ifnet_lock_done(ifp);
7461 lck_mtx_unlock(rnh_lock);
7462 dlil_if_unlock();
7463
7464 #if PF
7465 /*
7466 * Attach packet filter to this interface, if enabled.
7467 */
7468 pf_ifnet_hook(ifp, 1);
7469 #endif /* PF */
7470
7471 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
7472
7473 if (dlil_verbose) {
7474 DLIL_PRINTF("%s: attached%s\n", if_name(ifp),
7475 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
7476 }
7477
7478 return 0;
7479 }
7480
7481 /*
7482 * Prepare the storage for the first/permanent link address, which must
7483 * must have the same lifetime as the ifnet itself. Although the link
7484 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
7485 * its location in memory must never change as it may still be referred
7486 * to by some parts of the system afterwards (unfortunate implementation
7487 * artifacts inherited from BSD.)
7488 *
7489 * Caller must hold ifnet lock as writer.
7490 */
7491 static struct ifaddr *
7492 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
7493 {
7494 struct ifaddr *ifa, *oifa;
7495 struct sockaddr_dl *asdl, *msdl;
7496 char workbuf[IFNAMSIZ * 2];
7497 int namelen, masklen, socksize;
7498 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7499
7500 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
7501 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
7502
7503 namelen = scnprintf(workbuf, sizeof(workbuf), "%s",
7504 if_name(ifp));
7505 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
7506 + ((namelen > 0) ? namelen : 0);
7507 socksize = masklen + ifp->if_addrlen;
7508 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
7509 if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
7510 socksize = sizeof(struct sockaddr_dl);
7511 }
7512 socksize = ROUNDUP(socksize);
7513 #undef ROUNDUP
7514
7515 ifa = ifp->if_lladdr;
7516 if (socksize > DLIL_SDLMAXLEN ||
7517 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
7518 /*
7519 * Rare, but in the event that the link address requires
7520 * more storage space than DLIL_SDLMAXLEN, allocate the
7521 * largest possible storages for address and mask, such
7522 * that we can reuse the same space when if_addrlen grows.
7523 * This same space will be used when if_addrlen shrinks.
7524 */
7525 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
7526 int ifasize = sizeof(*ifa) + 2 * SOCK_MAXADDRLEN;
7527 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
7528 if (ifa == NULL) {
7529 return NULL;
7530 }
7531 ifa_lock_init(ifa);
7532 /* Don't set IFD_ALLOC, as this is permanent */
7533 ifa->ifa_debug = IFD_LINK;
7534 }
7535 IFA_LOCK(ifa);
7536 /* address and mask sockaddr_dl locations */
7537 asdl = (struct sockaddr_dl *)(ifa + 1);
7538 bzero(asdl, SOCK_MAXADDRLEN);
7539 msdl = (struct sockaddr_dl *)(void *)
7540 ((char *)asdl + SOCK_MAXADDRLEN);
7541 bzero(msdl, SOCK_MAXADDRLEN);
7542 } else {
7543 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
7544 /*
7545 * Use the storage areas for address and mask within the
7546 * dlil_ifnet structure. This is the most common case.
7547 */
7548 if (ifa == NULL) {
7549 ifa = &dl_if->dl_if_lladdr.ifa;
7550 ifa_lock_init(ifa);
7551 /* Don't set IFD_ALLOC, as this is permanent */
7552 ifa->ifa_debug = IFD_LINK;
7553 }
7554 IFA_LOCK(ifa);
7555 /* address and mask sockaddr_dl locations */
7556 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
7557 bzero(asdl, sizeof(dl_if->dl_if_lladdr.asdl));
7558 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
7559 bzero(msdl, sizeof(dl_if->dl_if_lladdr.msdl));
7560 }
7561
7562 /* hold a permanent reference for the ifnet itself */
7563 IFA_ADDREF_LOCKED(ifa);
7564 oifa = ifp->if_lladdr;
7565 ifp->if_lladdr = ifa;
7566
7567 VERIFY(ifa->ifa_debug == IFD_LINK);
7568 ifa->ifa_ifp = ifp;
7569 ifa->ifa_rtrequest = link_rtrequest;
7570 ifa->ifa_addr = (struct sockaddr *)asdl;
7571 asdl->sdl_len = (u_char)socksize;
7572 asdl->sdl_family = AF_LINK;
7573 if (namelen > 0) {
7574 bcopy(workbuf, asdl->sdl_data, min(namelen,
7575 sizeof(asdl->sdl_data)));
7576 asdl->sdl_nlen = (u_char)namelen;
7577 } else {
7578 asdl->sdl_nlen = 0;
7579 }
7580 asdl->sdl_index = ifp->if_index;
7581 asdl->sdl_type = ifp->if_type;
7582 if (ll_addr != NULL) {
7583 asdl->sdl_alen = ll_addr->sdl_alen;
7584 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
7585 } else {
7586 asdl->sdl_alen = 0;
7587 }
7588 ifa->ifa_netmask = (struct sockaddr *)msdl;
7589 msdl->sdl_len = (u_char)masklen;
7590 while (namelen > 0) {
7591 msdl->sdl_data[--namelen] = 0xff;
7592 }
7593 IFA_UNLOCK(ifa);
7594
7595 if (oifa != NULL) {
7596 IFA_REMREF(oifa);
7597 }
7598
7599 return ifa;
7600 }
7601
7602 static void
7603 if_purgeaddrs(struct ifnet *ifp)
7604 {
7605 #if INET
7606 in_purgeaddrs(ifp);
7607 #endif /* INET */
7608 in6_purgeaddrs(ifp);
7609 }
7610
7611 errno_t
7612 ifnet_detach(ifnet_t ifp)
7613 {
7614 struct ifnet *delegated_ifp;
7615 struct nd_ifinfo *ndi = NULL;
7616
7617 if (ifp == NULL) {
7618 return EINVAL;
7619 }
7620
7621 ndi = ND_IFINFO(ifp);
7622 if (NULL != ndi) {
7623 ndi->cga_initialized = FALSE;
7624 }
7625
7626 lck_mtx_lock(rnh_lock);
7627 ifnet_head_lock_exclusive();
7628 ifnet_lock_exclusive(ifp);
7629
7630 if (ifp->if_output_netem != NULL) {
7631 netem_destroy(ifp->if_output_netem);
7632 ifp->if_output_netem = NULL;
7633 }
7634
7635 /*
7636 * Check to see if this interface has previously triggered
7637 * aggressive protocol draining; if so, decrement the global
7638 * refcnt and clear PR_AGGDRAIN on the route domain if
7639 * there are no more of such an interface around.
7640 */
7641 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
7642
7643 lck_mtx_lock_spin(&ifp->if_ref_lock);
7644 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
7645 lck_mtx_unlock(&ifp->if_ref_lock);
7646 ifnet_lock_done(ifp);
7647 ifnet_head_done();
7648 lck_mtx_unlock(rnh_lock);
7649 return EINVAL;
7650 } else if (ifp->if_refflags & IFRF_DETACHING) {
7651 /* Interface has already been detached */
7652 lck_mtx_unlock(&ifp->if_ref_lock);
7653 ifnet_lock_done(ifp);
7654 ifnet_head_done();
7655 lck_mtx_unlock(rnh_lock);
7656 return ENXIO;
7657 }
7658 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
7659 /* Indicate this interface is being detached */
7660 ifp->if_refflags &= ~IFRF_ATTACHED;
7661 ifp->if_refflags |= IFRF_DETACHING;
7662 lck_mtx_unlock(&ifp->if_ref_lock);
7663
7664 if (dlil_verbose) {
7665 DLIL_PRINTF("%s: detaching\n", if_name(ifp));
7666 }
7667
7668 /* clean up flow control entry object if there's any */
7669 if (ifp->if_eflags & IFEF_TXSTART) {
7670 ifnet_flowadv(ifp->if_flowhash);
7671 }
7672
7673 /* Reset ECN enable/disable flags */
7674 /* Reset CLAT46 flag */
7675 if_clear_eflags(ifp, IFEF_ECN_ENABLE | IFEF_ECN_DISABLE | IFEF_CLAT46);
7676
7677 /*
7678 * We do not reset the TCP keep alive counters in case
7679 * a TCP connection stays connection after the interface
7680 * went down
7681 */
7682 if (ifp->if_tcp_kao_cnt > 0) {
7683 os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
7684 __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
7685 }
7686 ifp->if_tcp_kao_max = 0;
7687
7688 /*
7689 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
7690 * no longer be visible during lookups from this point.
7691 */
7692 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
7693 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
7694 ifp->if_link.tqe_next = NULL;
7695 ifp->if_link.tqe_prev = NULL;
7696 if (ifp->if_ordered_link.tqe_next != NULL ||
7697 ifp->if_ordered_link.tqe_prev != NULL) {
7698 ifnet_remove_from_ordered_list(ifp);
7699 }
7700 ifindex2ifnet[ifp->if_index] = NULL;
7701
7702 /* 18717626 - reset router mode */
7703 if_clear_eflags(ifp, IFEF_IPV4_ROUTER);
7704 ifp->if_ipv6_router_mode = IPV6_ROUTER_MODE_DISABLED;
7705
7706 /* Record detach PC stacktrace */
7707 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
7708
7709 /* Clear logging parameters */
7710 bzero(&ifp->if_log, sizeof(ifp->if_log));
7711
7712 /* Clear delegated interface info (reference released below) */
7713 delegated_ifp = ifp->if_delegated.ifp;
7714 bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
7715
7716 /* Reset interface state */
7717 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
7718
7719 ifnet_lock_done(ifp);
7720 ifnet_head_done();
7721 lck_mtx_unlock(rnh_lock);
7722
7723
7724 /* Release reference held on the delegated interface */
7725 if (delegated_ifp != NULL) {
7726 ifnet_release(delegated_ifp);
7727 }
7728
7729 /* Reset Link Quality Metric (unless loopback [lo0]) */
7730 if (ifp != lo_ifp) {
7731 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
7732 }
7733
7734 /* Reset TCP local statistics */
7735 if (ifp->if_tcp_stat != NULL) {
7736 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
7737 }
7738
7739 /* Reset UDP local statistics */
7740 if (ifp->if_udp_stat != NULL) {
7741 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
7742 }
7743
7744 /* Reset ifnet IPv4 stats */
7745 if (ifp->if_ipv4_stat != NULL) {
7746 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
7747 }
7748
7749 /* Reset ifnet IPv6 stats */
7750 if (ifp->if_ipv6_stat != NULL) {
7751 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
7752 }
7753
7754 /* Release memory held for interface link status report */
7755 if (ifp->if_link_status != NULL) {
7756 FREE(ifp->if_link_status, M_TEMP);
7757 ifp->if_link_status = NULL;
7758 }
7759
7760 /* Clear agent IDs */
7761 if (ifp->if_agentids != NULL) {
7762 FREE(ifp->if_agentids, M_NETAGENT);
7763 ifp->if_agentids = NULL;
7764 }
7765 ifp->if_agentcount = 0;
7766
7767
7768 /* Let BPF know we're detaching */
7769 bpfdetach(ifp);
7770
7771 /* Mark the interface as DOWN */
7772 if_down(ifp);
7773
7774 /* Disable forwarding cached route */
7775 lck_mtx_lock(&ifp->if_cached_route_lock);
7776 ifp->if_fwd_cacheok = 0;
7777 lck_mtx_unlock(&ifp->if_cached_route_lock);
7778
7779 /* Disable data threshold and wait for any pending event posting */
7780 ifp->if_data_threshold = 0;
7781 VERIFY(ifp->if_dt_tcall != NULL);
7782 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
7783
7784 /*
7785 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
7786 * references to the info structures and leave them attached to
7787 * this ifnet.
7788 */
7789 #if INET
7790 igmp_domifdetach(ifp);
7791 #endif /* INET */
7792 mld_domifdetach(ifp);
7793
7794 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
7795
7796 /* Let worker thread take care of the rest, to avoid reentrancy */
7797 dlil_if_lock();
7798 ifnet_detaching_enqueue(ifp);
7799 dlil_if_unlock();
7800
7801 return 0;
7802 }
7803
7804 static void
7805 ifnet_detaching_enqueue(struct ifnet *ifp)
7806 {
7807 dlil_if_lock_assert();
7808
7809 ++ifnet_detaching_cnt;
7810 VERIFY(ifnet_detaching_cnt != 0);
7811 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
7812 wakeup((caddr_t)&ifnet_delayed_run);
7813 }
7814
7815 static struct ifnet *
7816 ifnet_detaching_dequeue(void)
7817 {
7818 struct ifnet *ifp;
7819
7820 dlil_if_lock_assert();
7821
7822 ifp = TAILQ_FIRST(&ifnet_detaching_head);
7823 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
7824 if (ifp != NULL) {
7825 VERIFY(ifnet_detaching_cnt != 0);
7826 --ifnet_detaching_cnt;
7827 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
7828 ifp->if_detaching_link.tqe_next = NULL;
7829 ifp->if_detaching_link.tqe_prev = NULL;
7830 }
7831 return ifp;
7832 }
7833
7834 __attribute__((noreturn))
7835 static void
7836 ifnet_detacher_thread_cont(void *v, wait_result_t wres)
7837 {
7838 #pragma unused(v, wres)
7839 struct ifnet *ifp;
7840
7841 dlil_if_lock();
7842 if (__improbable(ifnet_detaching_embryonic)) {
7843 ifnet_detaching_embryonic = FALSE;
7844 /* there's no lock ordering constrain so OK to do this here */
7845 dlil_decr_pending_thread_count();
7846 }
7847
7848 for (;;) {
7849 dlil_if_lock_assert();
7850
7851 if (ifnet_detaching_cnt == 0) {
7852 break;
7853 }
7854
7855 net_update_uptime();
7856
7857 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
7858
7859 /* Take care of detaching ifnet */
7860 ifp = ifnet_detaching_dequeue();
7861 if (ifp != NULL) {
7862 dlil_if_unlock();
7863 ifnet_detach_final(ifp);
7864 dlil_if_lock();
7865 }
7866 }
7867
7868 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
7869 dlil_if_unlock();
7870 (void) thread_block(ifnet_detacher_thread_cont);
7871
7872 VERIFY(0); /* we should never get here */
7873 /* NOTREACHED */
7874 __builtin_unreachable();
7875 }
7876
7877 __dead2
7878 static void
7879 ifnet_detacher_thread_func(void *v, wait_result_t w)
7880 {
7881 #pragma unused(v, w)
7882 dlil_if_lock();
7883 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
7884 ifnet_detaching_embryonic = TRUE;
7885 /* wake up once to get out of embryonic state */
7886 wakeup((caddr_t)&ifnet_delayed_run);
7887 dlil_if_unlock();
7888 (void) thread_block(ifnet_detacher_thread_cont);
7889 VERIFY(0);
7890 /* NOTREACHED */
7891 __builtin_unreachable();
7892 }
7893
7894 static void
7895 ifnet_detach_final(struct ifnet *ifp)
7896 {
7897 struct ifnet_filter *filter, *filter_next;
7898 struct ifnet_filter_head fhead;
7899 struct dlil_threading_info *inp;
7900 struct ifaddr *ifa;
7901 ifnet_detached_func if_free;
7902 int i;
7903
7904 lck_mtx_lock(&ifp->if_ref_lock);
7905 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7906 panic("%s: flags mismatch (detaching not set) ifp=%p",
7907 __func__, ifp);
7908 /* NOTREACHED */
7909 }
7910
7911 /*
7912 * Wait until the existing IO references get released
7913 * before we proceed with ifnet_detach. This is not a
7914 * common case, so block without using a continuation.
7915 */
7916 while (ifp->if_refio > 0) {
7917 DLIL_PRINTF("%s: Waiting for IO references on %s interface "
7918 "to be released\n", __func__, if_name(ifp));
7919 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
7920 (PZERO - 1), "ifnet_ioref_wait", NULL);
7921 }
7922
7923 VERIFY(ifp->if_datamov == 0);
7924 VERIFY(ifp->if_drainers == 0);
7925 VERIFY(ifp->if_suspend == 0);
7926 ifp->if_refflags &= ~IFRF_READY;
7927 lck_mtx_unlock(&ifp->if_ref_lock);
7928
7929 /* Drain and destroy send queue */
7930 ifclassq_teardown(ifp);
7931
7932 /* Detach interface filters */
7933 lck_mtx_lock(&ifp->if_flt_lock);
7934 if_flt_monitor_enter(ifp);
7935
7936 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
7937 fhead = ifp->if_flt_head;
7938 TAILQ_INIT(&ifp->if_flt_head);
7939
7940 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
7941 filter_next = TAILQ_NEXT(filter, filt_next);
7942 lck_mtx_unlock(&ifp->if_flt_lock);
7943
7944 dlil_detach_filter_internal(filter, 1);
7945 lck_mtx_lock(&ifp->if_flt_lock);
7946 }
7947 if_flt_monitor_leave(ifp);
7948 lck_mtx_unlock(&ifp->if_flt_lock);
7949
7950 /* Tell upper layers to drop their network addresses */
7951 if_purgeaddrs(ifp);
7952
7953 ifnet_lock_exclusive(ifp);
7954
7955 /* Unplumb all protocols */
7956 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
7957 struct if_proto *proto;
7958
7959 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7960 while (proto != NULL) {
7961 protocol_family_t family = proto->protocol_family;
7962 ifnet_lock_done(ifp);
7963 proto_unplumb(family, ifp);
7964 ifnet_lock_exclusive(ifp);
7965 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7966 }
7967 /* There should not be any protocols left */
7968 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
7969 }
7970 zfree(dlif_phash_zone, ifp->if_proto_hash);
7971 ifp->if_proto_hash = NULL;
7972
7973 /* Detach (permanent) link address from if_addrhead */
7974 ifa = TAILQ_FIRST(&ifp->if_addrhead);
7975 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
7976 IFA_LOCK(ifa);
7977 if_detach_link_ifa(ifp, ifa);
7978 IFA_UNLOCK(ifa);
7979
7980 /* Remove (permanent) link address from ifnet_addrs[] */
7981 IFA_REMREF(ifa);
7982 ifnet_addrs[ifp->if_index - 1] = NULL;
7983
7984 /* This interface should not be on {ifnet_head,detaching} */
7985 VERIFY(ifp->if_link.tqe_next == NULL);
7986 VERIFY(ifp->if_link.tqe_prev == NULL);
7987 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
7988 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
7989 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
7990 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
7991
7992 /* The slot should have been emptied */
7993 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
7994
7995 /* There should not be any addresses left */
7996 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
7997
7998 /*
7999 * Signal the starter thread to terminate itself.
8000 */
8001 if (ifp->if_start_thread != THREAD_NULL) {
8002 lck_mtx_lock_spin(&ifp->if_start_lock);
8003 ifp->if_start_flags = 0;
8004 ifp->if_start_thread = THREAD_NULL;
8005 wakeup_one((caddr_t)&ifp->if_start_thread);
8006 lck_mtx_unlock(&ifp->if_start_lock);
8007 }
8008
8009 /*
8010 * Signal the poller thread to terminate itself.
8011 */
8012 if (ifp->if_poll_thread != THREAD_NULL) {
8013 lck_mtx_lock_spin(&ifp->if_poll_lock);
8014 ifp->if_poll_thread = THREAD_NULL;
8015 wakeup_one((caddr_t)&ifp->if_poll_thread);
8016 lck_mtx_unlock(&ifp->if_poll_lock);
8017 }
8018
8019 /*
8020 * If thread affinity was set for the workloop thread, we will need
8021 * to tear down the affinity and release the extra reference count
8022 * taken at attach time. Does not apply to lo0 or other interfaces
8023 * without dedicated input threads.
8024 */
8025 if ((inp = ifp->if_inp) != NULL) {
8026 VERIFY(inp != dlil_main_input_thread);
8027
8028 if (inp->dlth_affinity) {
8029 struct thread *tp, *wtp, *ptp;
8030
8031 lck_mtx_lock_spin(&inp->dlth_lock);
8032 wtp = inp->dlth_driver_thread;
8033 inp->dlth_driver_thread = THREAD_NULL;
8034 ptp = inp->dlth_poller_thread;
8035 inp->dlth_poller_thread = THREAD_NULL;
8036 ASSERT(inp->dlth_thread != THREAD_NULL);
8037 tp = inp->dlth_thread; /* don't nullify now */
8038 inp->dlth_affinity_tag = 0;
8039 inp->dlth_affinity = FALSE;
8040 lck_mtx_unlock(&inp->dlth_lock);
8041
8042 /* Tear down poll thread affinity */
8043 if (ptp != NULL) {
8044 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
8045 VERIFY(ifp->if_xflags & IFXF_LEGACY);
8046 (void) dlil_affinity_set(ptp,
8047 THREAD_AFFINITY_TAG_NULL);
8048 thread_deallocate(ptp);
8049 }
8050
8051 /* Tear down workloop thread affinity */
8052 if (wtp != NULL) {
8053 (void) dlil_affinity_set(wtp,
8054 THREAD_AFFINITY_TAG_NULL);
8055 thread_deallocate(wtp);
8056 }
8057
8058 /* Tear down DLIL input thread affinity */
8059 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
8060 thread_deallocate(tp);
8061 }
8062
8063 /* disassociate ifp DLIL input thread */
8064 ifp->if_inp = NULL;
8065
8066 /* if the worker thread was created, tell it to terminate */
8067 if (inp->dlth_thread != THREAD_NULL) {
8068 lck_mtx_lock_spin(&inp->dlth_lock);
8069 inp->dlth_flags |= DLIL_INPUT_TERMINATE;
8070 if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
8071 wakeup_one((caddr_t)&inp->dlth_flags);
8072 }
8073 lck_mtx_unlock(&inp->dlth_lock);
8074 ifnet_lock_done(ifp);
8075
8076 /* wait for the input thread to terminate */
8077 lck_mtx_lock_spin(&inp->dlth_lock);
8078 while ((inp->dlth_flags & DLIL_INPUT_TERMINATE_COMPLETE)
8079 == 0) {
8080 (void) msleep(&inp->dlth_flags, &inp->dlth_lock,
8081 (PZERO - 1) | PSPIN, inp->dlth_name, NULL);
8082 }
8083 lck_mtx_unlock(&inp->dlth_lock);
8084 ifnet_lock_exclusive(ifp);
8085 }
8086
8087 /* clean-up input thread state */
8088 dlil_clean_threading_info(inp);
8089 /* clean-up poll parameters */
8090 VERIFY(ifp->if_poll_thread == THREAD_NULL);
8091 dlil_reset_rxpoll_params(ifp);
8092 }
8093
8094 /* The driver might unload, so point these to ourselves */
8095 if_free = ifp->if_free;
8096 ifp->if_output_dlil = ifp_if_output;
8097 ifp->if_output = ifp_if_output;
8098 ifp->if_pre_enqueue = ifp_if_output;
8099 ifp->if_start = ifp_if_start;
8100 ifp->if_output_ctl = ifp_if_ctl;
8101 ifp->if_input_dlil = ifp_if_input;
8102 ifp->if_input_poll = ifp_if_input_poll;
8103 ifp->if_input_ctl = ifp_if_ctl;
8104 ifp->if_ioctl = ifp_if_ioctl;
8105 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
8106 ifp->if_free = ifp_if_free;
8107 ifp->if_demux = ifp_if_demux;
8108 ifp->if_event = ifp_if_event;
8109 ifp->if_framer_legacy = ifp_if_framer;
8110 ifp->if_framer = ifp_if_framer_extended;
8111 ifp->if_add_proto = ifp_if_add_proto;
8112 ifp->if_del_proto = ifp_if_del_proto;
8113 ifp->if_check_multi = ifp_if_check_multi;
8114
8115 /* wipe out interface description */
8116 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
8117 ifp->if_desc.ifd_len = 0;
8118 VERIFY(ifp->if_desc.ifd_desc != NULL);
8119 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
8120
8121 /* there shouldn't be any delegation by now */
8122 VERIFY(ifp->if_delegated.ifp == NULL);
8123 VERIFY(ifp->if_delegated.type == 0);
8124 VERIFY(ifp->if_delegated.family == 0);
8125 VERIFY(ifp->if_delegated.subfamily == 0);
8126 VERIFY(ifp->if_delegated.expensive == 0);
8127 VERIFY(ifp->if_delegated.constrained == 0);
8128
8129 /* QoS marking get cleared */
8130 if_clear_eflags(ifp, IFEF_QOSMARKING_ENABLED);
8131 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
8132
8133
8134 ifnet_lock_done(ifp);
8135
8136 #if PF
8137 /*
8138 * Detach this interface from packet filter, if enabled.
8139 */
8140 pf_ifnet_hook(ifp, 0);
8141 #endif /* PF */
8142
8143 /* Filter list should be empty */
8144 lck_mtx_lock_spin(&ifp->if_flt_lock);
8145 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
8146 VERIFY(ifp->if_flt_busy == 0);
8147 VERIFY(ifp->if_flt_waiters == 0);
8148 lck_mtx_unlock(&ifp->if_flt_lock);
8149
8150 /* Last chance to drain send queue */
8151 if_qflush(ifp, 0);
8152
8153 /* Last chance to cleanup any cached route */
8154 lck_mtx_lock(&ifp->if_cached_route_lock);
8155 VERIFY(!ifp->if_fwd_cacheok);
8156 ROUTE_RELEASE(&ifp->if_fwd_route);
8157 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
8158 ROUTE_RELEASE(&ifp->if_src_route);
8159 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
8160 ROUTE_RELEASE(&ifp->if_src_route6);
8161 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
8162 lck_mtx_unlock(&ifp->if_cached_route_lock);
8163
8164 VERIFY(ifp->if_data_threshold == 0);
8165 VERIFY(ifp->if_dt_tcall != NULL);
8166 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
8167
8168 ifnet_llreach_ifdetach(ifp);
8169
8170 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
8171
8172 /*
8173 * Finally, mark this ifnet as detached.
8174 */
8175 lck_mtx_lock_spin(&ifp->if_ref_lock);
8176 if (!(ifp->if_refflags & IFRF_DETACHING)) {
8177 panic("%s: flags mismatch (detaching not set) ifp=%p",
8178 __func__, ifp);
8179 /* NOTREACHED */
8180 }
8181 ifp->if_refflags &= ~IFRF_DETACHING;
8182 lck_mtx_unlock(&ifp->if_ref_lock);
8183 if (if_free != NULL) {
8184 if_free(ifp);
8185 }
8186
8187 if (dlil_verbose) {
8188 DLIL_PRINTF("%s: detached\n", if_name(ifp));
8189 }
8190
8191 /* Release reference held during ifnet attach */
8192 ifnet_release(ifp);
8193 }
8194
8195 errno_t
8196 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
8197 {
8198 #pragma unused(ifp)
8199 m_freem_list(m);
8200 return 0;
8201 }
8202
8203 void
8204 ifp_if_start(struct ifnet *ifp)
8205 {
8206 ifnet_purge(ifp);
8207 }
8208
8209 static errno_t
8210 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
8211 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
8212 boolean_t poll, struct thread *tp)
8213 {
8214 #pragma unused(ifp, m_tail, s, poll, tp)
8215 m_freem_list(m_head);
8216 return ENXIO;
8217 }
8218
8219 static void
8220 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
8221 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
8222 {
8223 #pragma unused(ifp, flags, max_cnt)
8224 if (m_head != NULL) {
8225 *m_head = NULL;
8226 }
8227 if (m_tail != NULL) {
8228 *m_tail = NULL;
8229 }
8230 if (cnt != NULL) {
8231 *cnt = 0;
8232 }
8233 if (len != NULL) {
8234 *len = 0;
8235 }
8236 }
8237
8238 static errno_t
8239 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
8240 {
8241 #pragma unused(ifp, cmd, arglen, arg)
8242 return EOPNOTSUPP;
8243 }
8244
8245 static errno_t
8246 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
8247 {
8248 #pragma unused(ifp, fh, pf)
8249 m_freem(m);
8250 return EJUSTRETURN;
8251 }
8252
8253 static errno_t
8254 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
8255 const struct ifnet_demux_desc *da, u_int32_t dc)
8256 {
8257 #pragma unused(ifp, pf, da, dc)
8258 return EINVAL;
8259 }
8260
8261 static errno_t
8262 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
8263 {
8264 #pragma unused(ifp, pf)
8265 return EINVAL;
8266 }
8267
8268 static errno_t
8269 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
8270 {
8271 #pragma unused(ifp, sa)
8272 return EOPNOTSUPP;
8273 }
8274
8275 #if !XNU_TARGET_OS_OSX
8276 static errno_t
8277 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8278 const struct sockaddr *sa, const char *ll, const char *t,
8279 u_int32_t *pre, u_int32_t *post)
8280 #else /* XNU_TARGET_OS_OSX */
8281 static errno_t
8282 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8283 const struct sockaddr *sa, const char *ll, const char *t)
8284 #endif /* XNU_TARGET_OS_OSX */
8285 {
8286 #pragma unused(ifp, m, sa, ll, t)
8287 #if !XNU_TARGET_OS_OSX
8288 return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
8289 #else /* XNU_TARGET_OS_OSX */
8290 return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
8291 #endif /* XNU_TARGET_OS_OSX */
8292 }
8293
8294 static errno_t
8295 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
8296 const struct sockaddr *sa, const char *ll, const char *t,
8297 u_int32_t *pre, u_int32_t *post)
8298 {
8299 #pragma unused(ifp, sa, ll, t)
8300 m_freem(*m);
8301 *m = NULL;
8302
8303 if (pre != NULL) {
8304 *pre = 0;
8305 }
8306 if (post != NULL) {
8307 *post = 0;
8308 }
8309
8310 return EJUSTRETURN;
8311 }
8312
8313 errno_t
8314 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
8315 {
8316 #pragma unused(ifp, cmd, arg)
8317 return EOPNOTSUPP;
8318 }
8319
8320 static errno_t
8321 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
8322 {
8323 #pragma unused(ifp, tm, f)
8324 /* XXX not sure what to do here */
8325 return 0;
8326 }
8327
8328 static void
8329 ifp_if_free(struct ifnet *ifp)
8330 {
8331 #pragma unused(ifp)
8332 }
8333
8334 static void
8335 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
8336 {
8337 #pragma unused(ifp, e)
8338 }
8339
8340 int
8341 dlil_if_acquire(u_int32_t family, const void *uniqueid,
8342 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
8343 {
8344 struct ifnet *ifp1 = NULL;
8345 struct dlil_ifnet *dlifp1 = NULL;
8346 struct dlil_ifnet *dlifp1_saved = NULL;
8347 void *buf, *base, **pbuf;
8348 int ret = 0;
8349
8350 VERIFY(*ifp == NULL);
8351 dlil_if_lock();
8352 /*
8353 * We absolutely can't have an interface with the same name
8354 * in in-use state.
8355 * To make sure of that list has to be traversed completely
8356 */
8357 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
8358 ifp1 = (struct ifnet *)dlifp1;
8359
8360 if (ifp1->if_family != family) {
8361 continue;
8362 }
8363
8364 /*
8365 * If interface is in use, return EBUSY if either unique id
8366 * or interface extended names are the same
8367 */
8368 lck_mtx_lock(&dlifp1->dl_if_lock);
8369 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
8370 if (dlifp1->dl_if_flags & DLIF_INUSE) {
8371 lck_mtx_unlock(&dlifp1->dl_if_lock);
8372 ret = EBUSY;
8373 goto end;
8374 }
8375 }
8376
8377 if (uniqueid_len) {
8378 if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
8379 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
8380 if (dlifp1->dl_if_flags & DLIF_INUSE) {
8381 lck_mtx_unlock(&dlifp1->dl_if_lock);
8382 ret = EBUSY;
8383 goto end;
8384 } else {
8385 /* Cache the first interface that can be recycled */
8386 if (*ifp == NULL) {
8387 *ifp = ifp1;
8388 dlifp1_saved = dlifp1;
8389 }
8390 /*
8391 * XXX Do not break or jump to end as we have to traverse
8392 * the whole list to ensure there are no name collisions
8393 */
8394 }
8395 }
8396 }
8397 lck_mtx_unlock(&dlifp1->dl_if_lock);
8398 }
8399
8400 /* If there's an interface that can be recycled, use that */
8401 if (*ifp != NULL) {
8402 if (dlifp1_saved != NULL) {
8403 lck_mtx_lock(&dlifp1_saved->dl_if_lock);
8404 dlifp1_saved->dl_if_flags |= (DLIF_INUSE | DLIF_REUSE);
8405 lck_mtx_unlock(&dlifp1_saved->dl_if_lock);
8406 dlifp1_saved = NULL;
8407 }
8408 goto end;
8409 }
8410
8411 /* no interface found, allocate a new one */
8412 buf = zalloc_flags(dlif_zone, Z_WAITOK | Z_ZERO);
8413 if (buf == NULL) {
8414 ret = ENOMEM;
8415 goto end;
8416 }
8417
8418 /* Get the 64-bit aligned base address for this object */
8419 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
8420 sizeof(u_int64_t));
8421 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
8422
8423 /*
8424 * Wind back a pointer size from the aligned base and
8425 * save the original address so we can free it later.
8426 */
8427 pbuf = (void **)((intptr_t)base - sizeof(void *));
8428 *pbuf = buf;
8429 dlifp1 = base;
8430
8431 if (uniqueid_len) {
8432 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
8433 M_NKE, M_WAITOK);
8434 if (dlifp1->dl_if_uniqueid == NULL) {
8435 zfree(dlif_zone, buf);
8436 ret = ENOMEM;
8437 goto end;
8438 }
8439 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
8440 dlifp1->dl_if_uniqueid_len = uniqueid_len;
8441 }
8442
8443 ifp1 = (struct ifnet *)dlifp1;
8444 dlifp1->dl_if_flags = DLIF_INUSE;
8445 if (ifnet_debug) {
8446 dlifp1->dl_if_flags |= DLIF_DEBUG;
8447 dlifp1->dl_if_trace = dlil_if_trace;
8448 }
8449 ifp1->if_name = dlifp1->dl_if_namestorage;
8450 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
8451
8452 /* initialize interface description */
8453 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
8454 ifp1->if_desc.ifd_len = 0;
8455 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
8456
8457
8458 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
8459 DLIL_PRINTF("%s: failed to allocate if local stats, "
8460 "error: %d\n", __func__, ret);
8461 /* This probably shouldn't be fatal */
8462 ret = 0;
8463 }
8464
8465 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
8466 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
8467 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
8468 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
8469 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
8470 ifnet_lock_attr);
8471 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
8472 #if INET
8473 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
8474 ifnet_lock_attr);
8475 ifp1->if_inetdata = NULL;
8476 #endif
8477 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
8478 ifnet_lock_attr);
8479 ifp1->if_inet6data = NULL;
8480 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
8481 ifnet_lock_attr);
8482 ifp1->if_link_status = NULL;
8483
8484 /* for send data paths */
8485 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
8486 ifnet_lock_attr);
8487 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
8488 ifnet_lock_attr);
8489 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
8490 ifnet_lock_attr);
8491
8492 /* for receive data paths */
8493 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
8494 ifnet_lock_attr);
8495
8496 /* thread call allocation is done with sleeping zalloc */
8497 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
8498 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
8499 if (ifp1->if_dt_tcall == NULL) {
8500 panic_plain("%s: couldn't create if_dt_tcall", __func__);
8501 /* NOTREACHED */
8502 }
8503
8504 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
8505
8506 *ifp = ifp1;
8507
8508 end:
8509 dlil_if_unlock();
8510
8511 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof(u_int64_t)) &&
8512 IS_P2ALIGNED(&ifp1->if_data, sizeof(u_int64_t))));
8513
8514 return ret;
8515 }
8516
8517 __private_extern__ void
8518 dlil_if_release(ifnet_t ifp)
8519 {
8520 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
8521
8522 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
8523 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
8524 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
8525 }
8526
8527 ifnet_lock_exclusive(ifp);
8528 lck_mtx_lock(&dlifp->dl_if_lock);
8529 dlifp->dl_if_flags &= ~DLIF_INUSE;
8530 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
8531 ifp->if_name = dlifp->dl_if_namestorage;
8532 /* Reset external name (name + unit) */
8533 ifp->if_xname = dlifp->dl_if_xnamestorage;
8534 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
8535 "%s?", ifp->if_name);
8536 lck_mtx_unlock(&dlifp->dl_if_lock);
8537 ifnet_lock_done(ifp);
8538 }
8539
8540 __private_extern__ void
8541 dlil_if_lock(void)
8542 {
8543 lck_mtx_lock(&dlil_ifnet_lock);
8544 }
8545
8546 __private_extern__ void
8547 dlil_if_unlock(void)
8548 {
8549 lck_mtx_unlock(&dlil_ifnet_lock);
8550 }
8551
8552 __private_extern__ void
8553 dlil_if_lock_assert(void)
8554 {
8555 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
8556 }
8557
8558 __private_extern__ void
8559 dlil_proto_unplumb_all(struct ifnet *ifp)
8560 {
8561 /*
8562 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
8563 * each bucket contains exactly one entry; PF_VLAN does not need an
8564 * explicit unplumb.
8565 *
8566 * if_proto_hash[3] is for other protocols; we expect anything
8567 * in this bucket to respond to the DETACHING event (which would
8568 * have happened by now) and do the unplumb then.
8569 */
8570 (void) proto_unplumb(PF_INET, ifp);
8571 (void) proto_unplumb(PF_INET6, ifp);
8572 }
8573
8574 static void
8575 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
8576 {
8577 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8578 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8579
8580 route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
8581
8582 lck_mtx_unlock(&ifp->if_cached_route_lock);
8583 }
8584
8585 static void
8586 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
8587 {
8588 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8589 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8590
8591 if (ifp->if_fwd_cacheok) {
8592 route_copyin(src, &ifp->if_src_route, sizeof(*src));
8593 } else {
8594 ROUTE_RELEASE(src);
8595 }
8596 lck_mtx_unlock(&ifp->if_cached_route_lock);
8597 }
8598
8599 static void
8600 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
8601 {
8602 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8603 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8604
8605 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
8606 sizeof(*dst));
8607
8608 lck_mtx_unlock(&ifp->if_cached_route_lock);
8609 }
8610
8611 static void
8612 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
8613 {
8614 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8615 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8616
8617 if (ifp->if_fwd_cacheok) {
8618 route_copyin((struct route *)src,
8619 (struct route *)&ifp->if_src_route6, sizeof(*src));
8620 } else {
8621 ROUTE_RELEASE(src);
8622 }
8623 lck_mtx_unlock(&ifp->if_cached_route_lock);
8624 }
8625
8626 struct rtentry *
8627 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
8628 {
8629 struct route src_rt;
8630 struct sockaddr_in *dst;
8631
8632 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
8633
8634 ifp_src_route_copyout(ifp, &src_rt);
8635
8636 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
8637 ROUTE_RELEASE(&src_rt);
8638 if (dst->sin_family != AF_INET) {
8639 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8640 dst->sin_len = sizeof(src_rt.ro_dst);
8641 dst->sin_family = AF_INET;
8642 }
8643 dst->sin_addr = src_ip;
8644
8645 VERIFY(src_rt.ro_rt == NULL);
8646 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
8647 0, 0, ifp->if_index);
8648
8649 if (src_rt.ro_rt != NULL) {
8650 /* retain a ref, copyin consumes one */
8651 struct rtentry *rte = src_rt.ro_rt;
8652 RT_ADDREF(rte);
8653 ifp_src_route_copyin(ifp, &src_rt);
8654 src_rt.ro_rt = rte;
8655 }
8656 }
8657
8658 return src_rt.ro_rt;
8659 }
8660
8661 struct rtentry *
8662 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
8663 {
8664 struct route_in6 src_rt;
8665
8666 ifp_src_route6_copyout(ifp, &src_rt);
8667
8668 if (ROUTE_UNUSABLE(&src_rt) ||
8669 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
8670 ROUTE_RELEASE(&src_rt);
8671 if (src_rt.ro_dst.sin6_family != AF_INET6) {
8672 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8673 src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
8674 src_rt.ro_dst.sin6_family = AF_INET6;
8675 }
8676 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
8677 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
8678 sizeof(src_rt.ro_dst.sin6_addr));
8679
8680 if (src_rt.ro_rt == NULL) {
8681 src_rt.ro_rt = rtalloc1_scoped(
8682 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
8683 ifp->if_index);
8684
8685 if (src_rt.ro_rt != NULL) {
8686 /* retain a ref, copyin consumes one */
8687 struct rtentry *rte = src_rt.ro_rt;
8688 RT_ADDREF(rte);
8689 ifp_src_route6_copyin(ifp, &src_rt);
8690 src_rt.ro_rt = rte;
8691 }
8692 }
8693 }
8694
8695 return src_rt.ro_rt;
8696 }
8697
8698 void
8699 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
8700 {
8701 struct kev_dl_link_quality_metric_data ev_lqm_data;
8702
8703 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
8704
8705 /* Normalize to edge */
8706 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
8707 lqm = IFNET_LQM_THRESH_ABORT;
8708 atomic_bitset_32(&tcbinfo.ipi_flags,
8709 INPCBINFO_HANDLE_LQM_ABORT);
8710 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
8711 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
8712 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
8713 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
8714 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
8715 lqm <= IFNET_LQM_THRESH_POOR) {
8716 lqm = IFNET_LQM_THRESH_POOR;
8717 } else if (lqm > IFNET_LQM_THRESH_POOR &&
8718 lqm <= IFNET_LQM_THRESH_GOOD) {
8719 lqm = IFNET_LQM_THRESH_GOOD;
8720 }
8721
8722 /*
8723 * Take the lock if needed
8724 */
8725 if (!locked) {
8726 ifnet_lock_exclusive(ifp);
8727 }
8728
8729 if (lqm == ifp->if_interface_state.lqm_state &&
8730 (ifp->if_interface_state.valid_bitmask &
8731 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
8732 /*
8733 * Release the lock if was not held by the caller
8734 */
8735 if (!locked) {
8736 ifnet_lock_done(ifp);
8737 }
8738 return; /* nothing to update */
8739 }
8740 ifp->if_interface_state.valid_bitmask |=
8741 IF_INTERFACE_STATE_LQM_STATE_VALID;
8742 ifp->if_interface_state.lqm_state = (int8_t)lqm;
8743
8744 /*
8745 * Don't want to hold the lock when issuing kernel events
8746 */
8747 ifnet_lock_done(ifp);
8748
8749 bzero(&ev_lqm_data, sizeof(ev_lqm_data));
8750 ev_lqm_data.link_quality_metric = lqm;
8751
8752 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
8753 (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data));
8754
8755 /*
8756 * Reacquire the lock for the caller
8757 */
8758 if (locked) {
8759 ifnet_lock_exclusive(ifp);
8760 }
8761 }
8762
8763 static void
8764 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
8765 {
8766 struct kev_dl_rrc_state kev;
8767
8768 if (rrc_state == ifp->if_interface_state.rrc_state &&
8769 (ifp->if_interface_state.valid_bitmask &
8770 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
8771 return;
8772 }
8773
8774 ifp->if_interface_state.valid_bitmask |=
8775 IF_INTERFACE_STATE_RRC_STATE_VALID;
8776
8777 ifp->if_interface_state.rrc_state = (uint8_t)rrc_state;
8778
8779 /*
8780 * Don't want to hold the lock when issuing kernel events
8781 */
8782 ifnet_lock_done(ifp);
8783
8784 bzero(&kev, sizeof(struct kev_dl_rrc_state));
8785 kev.rrc_state = rrc_state;
8786
8787 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
8788 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
8789
8790 ifnet_lock_exclusive(ifp);
8791 }
8792
8793 errno_t
8794 if_state_update(struct ifnet *ifp,
8795 struct if_interface_state *if_interface_state)
8796 {
8797 u_short if_index_available = 0;
8798
8799 ifnet_lock_exclusive(ifp);
8800
8801 if ((ifp->if_type != IFT_CELLULAR) &&
8802 (if_interface_state->valid_bitmask &
8803 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
8804 ifnet_lock_done(ifp);
8805 return ENOTSUP;
8806 }
8807 if ((if_interface_state->valid_bitmask &
8808 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
8809 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
8810 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
8811 ifnet_lock_done(ifp);
8812 return EINVAL;
8813 }
8814 if ((if_interface_state->valid_bitmask &
8815 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
8816 if_interface_state->rrc_state !=
8817 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
8818 if_interface_state->rrc_state !=
8819 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
8820 ifnet_lock_done(ifp);
8821 return EINVAL;
8822 }
8823
8824 if (if_interface_state->valid_bitmask &
8825 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8826 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
8827 }
8828 if (if_interface_state->valid_bitmask &
8829 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8830 if_rrc_state_update(ifp, if_interface_state->rrc_state);
8831 }
8832 if (if_interface_state->valid_bitmask &
8833 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8834 ifp->if_interface_state.valid_bitmask |=
8835 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8836 ifp->if_interface_state.interface_availability =
8837 if_interface_state->interface_availability;
8838
8839 if (ifp->if_interface_state.interface_availability ==
8840 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
8841 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
8842 __func__, if_name(ifp), ifp->if_index);
8843 if_index_available = ifp->if_index;
8844 } else {
8845 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
8846 __func__, if_name(ifp), ifp->if_index);
8847 }
8848 }
8849 ifnet_lock_done(ifp);
8850
8851 /*
8852 * Check if the TCP connections going on this interface should be
8853 * forced to send probe packets instead of waiting for TCP timers
8854 * to fire. This is done on an explicit notification such as
8855 * SIOCSIFINTERFACESTATE which marks the interface as available.
8856 */
8857 if (if_index_available > 0) {
8858 tcp_interface_send_probe(if_index_available);
8859 }
8860
8861 return 0;
8862 }
8863
8864 void
8865 if_get_state(struct ifnet *ifp,
8866 struct if_interface_state *if_interface_state)
8867 {
8868 ifnet_lock_shared(ifp);
8869
8870 if_interface_state->valid_bitmask = 0;
8871
8872 if (ifp->if_interface_state.valid_bitmask &
8873 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8874 if_interface_state->valid_bitmask |=
8875 IF_INTERFACE_STATE_RRC_STATE_VALID;
8876 if_interface_state->rrc_state =
8877 ifp->if_interface_state.rrc_state;
8878 }
8879 if (ifp->if_interface_state.valid_bitmask &
8880 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8881 if_interface_state->valid_bitmask |=
8882 IF_INTERFACE_STATE_LQM_STATE_VALID;
8883 if_interface_state->lqm_state =
8884 ifp->if_interface_state.lqm_state;
8885 }
8886 if (ifp->if_interface_state.valid_bitmask &
8887 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8888 if_interface_state->valid_bitmask |=
8889 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8890 if_interface_state->interface_availability =
8891 ifp->if_interface_state.interface_availability;
8892 }
8893
8894 ifnet_lock_done(ifp);
8895 }
8896
8897 errno_t
8898 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
8899 {
8900 if (conn_probe > 1) {
8901 return EINVAL;
8902 }
8903 if (conn_probe == 0) {
8904 if_clear_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
8905 } else {
8906 if_set_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
8907 }
8908
8909 #if NECP
8910 necp_update_all_clients();
8911 #endif /* NECP */
8912
8913 tcp_probe_connectivity(ifp, conn_probe);
8914 return 0;
8915 }
8916
8917 /* for uuid.c */
8918 static int
8919 get_ether_index(int * ret_other_index)
8920 {
8921 struct ifnet *ifp;
8922 int en0_index = 0;
8923 int other_en_index = 0;
8924 int any_ether_index = 0;
8925 short best_unit = 0;
8926
8927 *ret_other_index = 0;
8928 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
8929 /*
8930 * find en0, or if not en0, the lowest unit en*, and if not
8931 * that, any ethernet
8932 */
8933 ifnet_lock_shared(ifp);
8934 if (strcmp(ifp->if_name, "en") == 0) {
8935 if (ifp->if_unit == 0) {
8936 /* found en0, we're done */
8937 en0_index = ifp->if_index;
8938 ifnet_lock_done(ifp);
8939 break;
8940 }
8941 if (other_en_index == 0 || ifp->if_unit < best_unit) {
8942 other_en_index = ifp->if_index;
8943 best_unit = ifp->if_unit;
8944 }
8945 } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
8946 any_ether_index = ifp->if_index;
8947 }
8948 ifnet_lock_done(ifp);
8949 }
8950 if (en0_index == 0) {
8951 if (other_en_index != 0) {
8952 *ret_other_index = other_en_index;
8953 } else if (any_ether_index != 0) {
8954 *ret_other_index = any_ether_index;
8955 }
8956 }
8957 return en0_index;
8958 }
8959
8960 int
8961 uuid_get_ethernet(u_int8_t *node)
8962 {
8963 static int en0_index;
8964 struct ifnet *ifp;
8965 int other_index = 0;
8966 int the_index = 0;
8967 int ret;
8968
8969 ifnet_head_lock_shared();
8970 if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
8971 en0_index = get_ether_index(&other_index);
8972 }
8973 if (en0_index != 0) {
8974 the_index = en0_index;
8975 } else if (other_index != 0) {
8976 the_index = other_index;
8977 }
8978 if (the_index != 0) {
8979 struct dlil_ifnet *dl_if;
8980
8981 ifp = ifindex2ifnet[the_index];
8982 VERIFY(ifp != NULL);
8983 dl_if = (struct dlil_ifnet *)ifp;
8984 if (dl_if->dl_if_permanent_ether_is_set != 0) {
8985 /*
8986 * Use the permanent ethernet address if it is
8987 * available because it will never change.
8988 */
8989 memcpy(node, dl_if->dl_if_permanent_ether,
8990 ETHER_ADDR_LEN);
8991 } else {
8992 memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
8993 }
8994 ret = 0;
8995 } else {
8996 ret = -1;
8997 }
8998 ifnet_head_done();
8999 return ret;
9000 }
9001
9002 static int
9003 sysctl_rxpoll SYSCTL_HANDLER_ARGS
9004 {
9005 #pragma unused(arg1, arg2)
9006 uint32_t i;
9007 int err;
9008
9009 i = if_rxpoll;
9010
9011 err = sysctl_handle_int(oidp, &i, 0, req);
9012 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9013 return err;
9014 }
9015
9016 if (net_rxpoll == 0) {
9017 return ENXIO;
9018 }
9019
9020 if_rxpoll = i;
9021 return err;
9022 }
9023
9024 static int
9025 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
9026 {
9027 #pragma unused(arg1, arg2)
9028 uint64_t q;
9029 int err;
9030
9031 q = if_rxpoll_mode_holdtime;
9032
9033 err = sysctl_handle_quad(oidp, &q, 0, req);
9034 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9035 return err;
9036 }
9037
9038 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) {
9039 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
9040 }
9041
9042 if_rxpoll_mode_holdtime = q;
9043
9044 return err;
9045 }
9046
9047 static int
9048 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
9049 {
9050 #pragma unused(arg1, arg2)
9051 uint64_t q;
9052 int err;
9053
9054 q = if_rxpoll_sample_holdtime;
9055
9056 err = sysctl_handle_quad(oidp, &q, 0, req);
9057 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9058 return err;
9059 }
9060
9061 if (q < IF_RXPOLL_SAMPLETIME_MIN) {
9062 q = IF_RXPOLL_SAMPLETIME_MIN;
9063 }
9064
9065 if_rxpoll_sample_holdtime = q;
9066
9067 return err;
9068 }
9069
9070 static int
9071 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
9072 {
9073 #pragma unused(arg1, arg2)
9074 uint64_t q;
9075 int err;
9076
9077 q = if_rxpoll_interval_time;
9078
9079 err = sysctl_handle_quad(oidp, &q, 0, req);
9080 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9081 return err;
9082 }
9083
9084 if (q < IF_RXPOLL_INTERVALTIME_MIN) {
9085 q = IF_RXPOLL_INTERVALTIME_MIN;
9086 }
9087
9088 if_rxpoll_interval_time = q;
9089
9090 return err;
9091 }
9092
9093 static int
9094 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
9095 {
9096 #pragma unused(arg1, arg2)
9097 uint32_t i;
9098 int err;
9099
9100 i = if_sysctl_rxpoll_wlowat;
9101
9102 err = sysctl_handle_int(oidp, &i, 0, req);
9103 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9104 return err;
9105 }
9106
9107 if (i == 0 || i >= if_sysctl_rxpoll_whiwat) {
9108 return EINVAL;
9109 }
9110
9111 if_sysctl_rxpoll_wlowat = i;
9112 return err;
9113 }
9114
9115 static int
9116 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
9117 {
9118 #pragma unused(arg1, arg2)
9119 uint32_t i;
9120 int err;
9121
9122 i = if_sysctl_rxpoll_whiwat;
9123
9124 err = sysctl_handle_int(oidp, &i, 0, req);
9125 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9126 return err;
9127 }
9128
9129 if (i <= if_sysctl_rxpoll_wlowat) {
9130 return EINVAL;
9131 }
9132
9133 if_sysctl_rxpoll_whiwat = i;
9134 return err;
9135 }
9136
9137 static int
9138 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
9139 {
9140 #pragma unused(arg1, arg2)
9141 int i, err;
9142
9143 i = if_sndq_maxlen;
9144
9145 err = sysctl_handle_int(oidp, &i, 0, req);
9146 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9147 return err;
9148 }
9149
9150 if (i < IF_SNDQ_MINLEN) {
9151 i = IF_SNDQ_MINLEN;
9152 }
9153
9154 if_sndq_maxlen = i;
9155 return err;
9156 }
9157
9158 static int
9159 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
9160 {
9161 #pragma unused(arg1, arg2)
9162 int i, err;
9163
9164 i = if_rcvq_maxlen;
9165
9166 err = sysctl_handle_int(oidp, &i, 0, req);
9167 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9168 return err;
9169 }
9170
9171 if (i < IF_RCVQ_MINLEN) {
9172 i = IF_RCVQ_MINLEN;
9173 }
9174
9175 if_rcvq_maxlen = i;
9176 return err;
9177 }
9178
9179 int
9180 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
9181 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
9182 {
9183 struct kev_dl_node_presence kev;
9184 struct sockaddr_dl *sdl;
9185 struct sockaddr_in6 *sin6;
9186 int ret = 0;
9187
9188 VERIFY(ifp);
9189 VERIFY(sa);
9190 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
9191
9192 bzero(&kev, sizeof(kev));
9193 sin6 = &kev.sin6_node_address;
9194 sdl = &kev.sdl_node_address;
9195 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
9196 kev.rssi = rssi;
9197 kev.link_quality_metric = lqm;
9198 kev.node_proximity_metric = npm;
9199 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
9200
9201 ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
9202 if (ret == 0) {
9203 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
9204 &kev.link_data, sizeof(kev));
9205 if (err != 0) {
9206 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
9207 "error %d\n", __func__, err);
9208 }
9209 }
9210 return ret;
9211 }
9212
9213 void
9214 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
9215 {
9216 struct kev_dl_node_absence kev = {};
9217 struct sockaddr_in6 *kev_sin6 = NULL;
9218 struct sockaddr_dl *kev_sdl = NULL;
9219
9220 VERIFY(ifp != NULL);
9221 VERIFY(sa != NULL);
9222 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
9223
9224 kev_sin6 = &kev.sin6_node_address;
9225 kev_sdl = &kev.sdl_node_address;
9226
9227 if (sa->sa_family == AF_INET6) {
9228 /*
9229 * If IPv6 address is given, get the link layer
9230 * address from what was cached in the neighbor cache
9231 */
9232 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9233 bcopy(sa, kev_sin6, sa->sa_len);
9234 nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
9235 } else {
9236 /*
9237 * If passed address is AF_LINK type, derive the address
9238 * based on the link address.
9239 */
9240 nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
9241 nd6_alt_node_absent(ifp, kev_sin6, NULL);
9242 }
9243
9244 kev_sdl->sdl_type = ifp->if_type;
9245 kev_sdl->sdl_index = ifp->if_index;
9246
9247 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
9248 &kev.link_data, sizeof(kev));
9249 }
9250
9251 int
9252 dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
9253 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
9254 {
9255 struct kev_dl_node_presence kev = {};
9256 struct sockaddr_dl *kev_sdl = NULL;
9257 struct sockaddr_in6 *kev_sin6 = NULL;
9258 int ret = 0;
9259
9260 VERIFY(ifp != NULL);
9261 VERIFY(sa != NULL && sdl != NULL);
9262 VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
9263
9264 kev_sin6 = &kev.sin6_node_address;
9265 kev_sdl = &kev.sdl_node_address;
9266
9267 VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
9268 bcopy(sdl, kev_sdl, sdl->sdl_len);
9269 kev_sdl->sdl_type = ifp->if_type;
9270 kev_sdl->sdl_index = ifp->if_index;
9271
9272 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9273 bcopy(sa, kev_sin6, sa->sa_len);
9274
9275 kev.rssi = rssi;
9276 kev.link_quality_metric = lqm;
9277 kev.node_proximity_metric = npm;
9278 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
9279
9280 ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
9281 if (ret == 0) {
9282 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
9283 &kev.link_data, sizeof(kev));
9284 if (err != 0) {
9285 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__, err);
9286 }
9287 }
9288 return ret;
9289 }
9290
9291 const void *
9292 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
9293 kauth_cred_t *credp)
9294 {
9295 const u_int8_t *bytes;
9296 size_t size;
9297
9298 bytes = CONST_LLADDR(sdl);
9299 size = sdl->sdl_alen;
9300
9301 #if CONFIG_MACF
9302 if (dlil_lladdr_ckreq) {
9303 switch (sdl->sdl_type) {
9304 case IFT_ETHER:
9305 case IFT_IEEE1394:
9306 break;
9307 default:
9308 credp = NULL;
9309 break;
9310 }
9311 ;
9312
9313 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
9314 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
9315 [0] = 2
9316 };
9317
9318 bytes = unspec;
9319 }
9320 }
9321 #else
9322 #pragma unused(credp)
9323 #endif
9324
9325 if (sizep != NULL) {
9326 *sizep = size;
9327 }
9328 return bytes;
9329 }
9330
9331 void
9332 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
9333 u_int8_t info[DLIL_MODARGLEN])
9334 {
9335 struct kev_dl_issues kev;
9336 struct timeval tv;
9337
9338 VERIFY(ifp != NULL);
9339 VERIFY(modid != NULL);
9340 _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
9341 _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
9342
9343 bzero(&kev, sizeof(kev));
9344
9345 microtime(&tv);
9346 kev.timestamp = tv.tv_sec;
9347 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
9348 if (info != NULL) {
9349 bcopy(info, &kev.info, DLIL_MODARGLEN);
9350 }
9351
9352 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
9353 &kev.link_data, sizeof(kev));
9354 }
9355
9356 errno_t
9357 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9358 struct proc *p)
9359 {
9360 u_int32_t level = IFNET_THROTTLE_OFF;
9361 errno_t result = 0;
9362
9363 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
9364
9365 if (cmd == SIOCSIFOPPORTUNISTIC) {
9366 /*
9367 * XXX: Use priv_check_cred() instead of root check?
9368 */
9369 if ((result = proc_suser(p)) != 0) {
9370 return result;
9371 }
9372
9373 if (ifr->ifr_opportunistic.ifo_flags ==
9374 IFRIFOF_BLOCK_OPPORTUNISTIC) {
9375 level = IFNET_THROTTLE_OPPORTUNISTIC;
9376 } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
9377 level = IFNET_THROTTLE_OFF;
9378 } else {
9379 result = EINVAL;
9380 }
9381
9382 if (result == 0) {
9383 result = ifnet_set_throttle(ifp, level);
9384 }
9385 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
9386 ifr->ifr_opportunistic.ifo_flags = 0;
9387 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
9388 ifr->ifr_opportunistic.ifo_flags |=
9389 IFRIFOF_BLOCK_OPPORTUNISTIC;
9390 }
9391 }
9392
9393 /*
9394 * Return the count of current opportunistic connections
9395 * over the interface.
9396 */
9397 if (result == 0) {
9398 uint32_t flags = 0;
9399 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
9400 INPCB_OPPORTUNISTIC_SETCMD : 0;
9401 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
9402 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
9403 ifr->ifr_opportunistic.ifo_inuse =
9404 udp_count_opportunistic(ifp->if_index, flags) +
9405 tcp_count_opportunistic(ifp->if_index, flags);
9406 }
9407
9408 if (result == EALREADY) {
9409 result = 0;
9410 }
9411
9412 return result;
9413 }
9414
9415 int
9416 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
9417 {
9418 struct ifclassq *ifq;
9419 int err = 0;
9420
9421 if (!(ifp->if_eflags & IFEF_TXSTART)) {
9422 return ENXIO;
9423 }
9424
9425 *level = IFNET_THROTTLE_OFF;
9426
9427 ifq = &ifp->if_snd;
9428 IFCQ_LOCK(ifq);
9429 /* Throttling works only for IFCQ, not ALTQ instances */
9430 if (IFCQ_IS_ENABLED(ifq)) {
9431 cqrq_throttle_t req = { 0, IFNET_THROTTLE_OFF };
9432
9433 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
9434 *level = req.level;
9435 }
9436 IFCQ_UNLOCK(ifq);
9437
9438 return err;
9439 }
9440
9441 int
9442 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
9443 {
9444 struct ifclassq *ifq;
9445 int err = 0;
9446
9447 if (!(ifp->if_eflags & IFEF_TXSTART)) {
9448 return ENXIO;
9449 }
9450
9451 ifq = &ifp->if_snd;
9452
9453 switch (level) {
9454 case IFNET_THROTTLE_OFF:
9455 case IFNET_THROTTLE_OPPORTUNISTIC:
9456 break;
9457 default:
9458 return EINVAL;
9459 }
9460
9461 IFCQ_LOCK(ifq);
9462 if (IFCQ_IS_ENABLED(ifq)) {
9463 cqrq_throttle_t req = { 1, level };
9464
9465 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
9466 }
9467 IFCQ_UNLOCK(ifq);
9468
9469 if (err == 0) {
9470 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
9471 level);
9472 #if NECP
9473 necp_update_all_clients();
9474 #endif /* NECP */
9475 if (level == IFNET_THROTTLE_OFF) {
9476 ifnet_start(ifp);
9477 }
9478 }
9479
9480 return err;
9481 }
9482
9483 errno_t
9484 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9485 struct proc *p)
9486 {
9487 #pragma unused(p)
9488 errno_t result = 0;
9489 uint32_t flags;
9490 int level, category, subcategory;
9491
9492 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
9493
9494 if (cmd == SIOCSIFLOG) {
9495 if ((result = priv_check_cred(kauth_cred_get(),
9496 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
9497 return result;
9498 }
9499
9500 level = ifr->ifr_log.ifl_level;
9501 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
9502 result = EINVAL;
9503 }
9504
9505 flags = ifr->ifr_log.ifl_flags;
9506 if ((flags &= IFNET_LOGF_MASK) == 0) {
9507 result = EINVAL;
9508 }
9509
9510 category = ifr->ifr_log.ifl_category;
9511 subcategory = ifr->ifr_log.ifl_subcategory;
9512
9513 if (result == 0) {
9514 result = ifnet_set_log(ifp, level, flags,
9515 category, subcategory);
9516 }
9517 } else {
9518 result = ifnet_get_log(ifp, &level, &flags, &category,
9519 &subcategory);
9520 if (result == 0) {
9521 ifr->ifr_log.ifl_level = level;
9522 ifr->ifr_log.ifl_flags = flags;
9523 ifr->ifr_log.ifl_category = category;
9524 ifr->ifr_log.ifl_subcategory = subcategory;
9525 }
9526 }
9527
9528 return result;
9529 }
9530
9531 int
9532 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
9533 int32_t category, int32_t subcategory)
9534 {
9535 int err = 0;
9536
9537 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
9538 VERIFY(flags & IFNET_LOGF_MASK);
9539
9540 /*
9541 * The logging level applies to all facilities; make sure to
9542 * update them all with the most current level.
9543 */
9544 flags |= ifp->if_log.flags;
9545
9546 if (ifp->if_output_ctl != NULL) {
9547 struct ifnet_log_params l;
9548
9549 bzero(&l, sizeof(l));
9550 l.level = level;
9551 l.flags = flags;
9552 l.flags &= ~IFNET_LOGF_DLIL;
9553 l.category = category;
9554 l.subcategory = subcategory;
9555
9556 /* Send this request to lower layers */
9557 if (l.flags != 0) {
9558 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
9559 sizeof(l), &l);
9560 }
9561 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
9562 /*
9563 * If targeted to the lower layers without an output
9564 * control callback registered on the interface, just
9565 * silently ignore facilities other than ours.
9566 */
9567 flags &= IFNET_LOGF_DLIL;
9568 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
9569 level = 0;
9570 }
9571 }
9572
9573 if (err == 0) {
9574 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
9575 ifp->if_log.flags = 0;
9576 } else {
9577 ifp->if_log.flags |= flags;
9578 }
9579
9580 log(LOG_INFO, "%s: logging level set to %d flags=%b "
9581 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
9582 ifp->if_log.level, ifp->if_log.flags,
9583 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
9584 category, subcategory);
9585 }
9586
9587 return err;
9588 }
9589
9590 int
9591 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
9592 int32_t *category, int32_t *subcategory)
9593 {
9594 if (level != NULL) {
9595 *level = ifp->if_log.level;
9596 }
9597 if (flags != NULL) {
9598 *flags = ifp->if_log.flags;
9599 }
9600 if (category != NULL) {
9601 *category = ifp->if_log.category;
9602 }
9603 if (subcategory != NULL) {
9604 *subcategory = ifp->if_log.subcategory;
9605 }
9606
9607 return 0;
9608 }
9609
9610 int
9611 ifnet_notify_address(struct ifnet *ifp, int af)
9612 {
9613 struct ifnet_notify_address_params na;
9614
9615 #if PF
9616 (void) pf_ifaddr_hook(ifp);
9617 #endif /* PF */
9618
9619 if (ifp->if_output_ctl == NULL) {
9620 return EOPNOTSUPP;
9621 }
9622
9623 bzero(&na, sizeof(na));
9624 na.address_family = (sa_family_t)af;
9625
9626 return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
9627 sizeof(na), &na);
9628 }
9629
9630 errno_t
9631 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
9632 {
9633 if (ifp == NULL || flowid == NULL) {
9634 return EINVAL;
9635 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9636 !IF_FULLY_ATTACHED(ifp)) {
9637 return ENXIO;
9638 }
9639
9640 *flowid = ifp->if_flowhash;
9641
9642 return 0;
9643 }
9644
9645 errno_t
9646 ifnet_disable_output(struct ifnet *ifp)
9647 {
9648 int err;
9649
9650 if (ifp == NULL) {
9651 return EINVAL;
9652 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9653 !IF_FULLY_ATTACHED(ifp)) {
9654 return ENXIO;
9655 }
9656
9657 if ((err = ifnet_fc_add(ifp)) == 0) {
9658 lck_mtx_lock_spin(&ifp->if_start_lock);
9659 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
9660 lck_mtx_unlock(&ifp->if_start_lock);
9661 }
9662 return err;
9663 }
9664
9665 errno_t
9666 ifnet_enable_output(struct ifnet *ifp)
9667 {
9668 if (ifp == NULL) {
9669 return EINVAL;
9670 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9671 !IF_FULLY_ATTACHED(ifp)) {
9672 return ENXIO;
9673 }
9674
9675 ifnet_start_common(ifp, TRUE);
9676 return 0;
9677 }
9678
9679 void
9680 ifnet_flowadv(uint32_t flowhash)
9681 {
9682 struct ifnet_fc_entry *ifce;
9683 struct ifnet *ifp;
9684
9685 ifce = ifnet_fc_get(flowhash);
9686 if (ifce == NULL) {
9687 return;
9688 }
9689
9690 VERIFY(ifce->ifce_ifp != NULL);
9691 ifp = ifce->ifce_ifp;
9692
9693 /* flow hash gets recalculated per attach, so check */
9694 if (ifnet_is_attached(ifp, 1)) {
9695 if (ifp->if_flowhash == flowhash) {
9696 (void) ifnet_enable_output(ifp);
9697 }
9698 ifnet_decr_iorefcnt(ifp);
9699 }
9700 ifnet_fc_entry_free(ifce);
9701 }
9702
9703 /*
9704 * Function to compare ifnet_fc_entries in ifnet flow control tree
9705 */
9706 static inline int
9707 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
9708 {
9709 return fc1->ifce_flowhash - fc2->ifce_flowhash;
9710 }
9711
9712 static int
9713 ifnet_fc_add(struct ifnet *ifp)
9714 {
9715 struct ifnet_fc_entry keyfc, *ifce;
9716 uint32_t flowhash;
9717
9718 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
9719 VERIFY(ifp->if_flowhash != 0);
9720 flowhash = ifp->if_flowhash;
9721
9722 bzero(&keyfc, sizeof(keyfc));
9723 keyfc.ifce_flowhash = flowhash;
9724
9725 lck_mtx_lock_spin(&ifnet_fc_lock);
9726 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9727 if (ifce != NULL && ifce->ifce_ifp == ifp) {
9728 /* Entry is already in ifnet_fc_tree, return */
9729 lck_mtx_unlock(&ifnet_fc_lock);
9730 return 0;
9731 }
9732
9733 if (ifce != NULL) {
9734 /*
9735 * There is a different fc entry with the same flow hash
9736 * but different ifp pointer. There can be a collision
9737 * on flow hash but the probability is low. Let's just
9738 * avoid adding a second one when there is a collision.
9739 */
9740 lck_mtx_unlock(&ifnet_fc_lock);
9741 return EAGAIN;
9742 }
9743
9744 /* become regular mutex */
9745 lck_mtx_convert_spin(&ifnet_fc_lock);
9746
9747 ifce = zalloc_flags(ifnet_fc_zone, Z_WAITOK | Z_ZERO);
9748 ifce->ifce_flowhash = flowhash;
9749 ifce->ifce_ifp = ifp;
9750
9751 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9752 lck_mtx_unlock(&ifnet_fc_lock);
9753 return 0;
9754 }
9755
9756 static struct ifnet_fc_entry *
9757 ifnet_fc_get(uint32_t flowhash)
9758 {
9759 struct ifnet_fc_entry keyfc, *ifce;
9760 struct ifnet *ifp;
9761
9762 bzero(&keyfc, sizeof(keyfc));
9763 keyfc.ifce_flowhash = flowhash;
9764
9765 lck_mtx_lock_spin(&ifnet_fc_lock);
9766 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9767 if (ifce == NULL) {
9768 /* Entry is not present in ifnet_fc_tree, return */
9769 lck_mtx_unlock(&ifnet_fc_lock);
9770 return NULL;
9771 }
9772
9773 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9774
9775 VERIFY(ifce->ifce_ifp != NULL);
9776 ifp = ifce->ifce_ifp;
9777
9778 /* become regular mutex */
9779 lck_mtx_convert_spin(&ifnet_fc_lock);
9780
9781 if (!ifnet_is_attached(ifp, 0)) {
9782 /*
9783 * This ifp is not attached or in the process of being
9784 * detached; just don't process it.
9785 */
9786 ifnet_fc_entry_free(ifce);
9787 ifce = NULL;
9788 }
9789 lck_mtx_unlock(&ifnet_fc_lock);
9790
9791 return ifce;
9792 }
9793
9794 static void
9795 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
9796 {
9797 zfree(ifnet_fc_zone, ifce);
9798 }
9799
9800 static uint32_t
9801 ifnet_calc_flowhash(struct ifnet *ifp)
9802 {
9803 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
9804 uint32_t flowhash = 0;
9805
9806 if (ifnet_flowhash_seed == 0) {
9807 ifnet_flowhash_seed = RandomULong();
9808 }
9809
9810 bzero(&fh, sizeof(fh));
9811
9812 (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
9813 fh.ifk_unit = ifp->if_unit;
9814 fh.ifk_flags = ifp->if_flags;
9815 fh.ifk_eflags = ifp->if_eflags;
9816 fh.ifk_capabilities = ifp->if_capabilities;
9817 fh.ifk_capenable = ifp->if_capenable;
9818 fh.ifk_output_sched_model = ifp->if_output_sched_model;
9819 fh.ifk_rand1 = RandomULong();
9820 fh.ifk_rand2 = RandomULong();
9821
9822 try_again:
9823 flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
9824 if (flowhash == 0) {
9825 /* try to get a non-zero flowhash */
9826 ifnet_flowhash_seed = RandomULong();
9827 goto try_again;
9828 }
9829
9830 return flowhash;
9831 }
9832
9833 int
9834 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
9835 uint16_t flags, uint8_t *data)
9836 {
9837 #pragma unused(flags)
9838 int error = 0;
9839
9840 switch (family) {
9841 case AF_INET:
9842 if_inetdata_lock_exclusive(ifp);
9843 if (IN_IFEXTRA(ifp) != NULL) {
9844 if (len == 0) {
9845 /* Allow clearing the signature */
9846 IN_IFEXTRA(ifp)->netsig_len = 0;
9847 bzero(IN_IFEXTRA(ifp)->netsig,
9848 sizeof(IN_IFEXTRA(ifp)->netsig));
9849 if_inetdata_lock_done(ifp);
9850 break;
9851 } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
9852 error = EINVAL;
9853 if_inetdata_lock_done(ifp);
9854 break;
9855 }
9856 IN_IFEXTRA(ifp)->netsig_len = len;
9857 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
9858 } else {
9859 error = ENOMEM;
9860 }
9861 if_inetdata_lock_done(ifp);
9862 break;
9863
9864 case AF_INET6:
9865 if_inet6data_lock_exclusive(ifp);
9866 if (IN6_IFEXTRA(ifp) != NULL) {
9867 if (len == 0) {
9868 /* Allow clearing the signature */
9869 IN6_IFEXTRA(ifp)->netsig_len = 0;
9870 bzero(IN6_IFEXTRA(ifp)->netsig,
9871 sizeof(IN6_IFEXTRA(ifp)->netsig));
9872 if_inet6data_lock_done(ifp);
9873 break;
9874 } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
9875 error = EINVAL;
9876 if_inet6data_lock_done(ifp);
9877 break;
9878 }
9879 IN6_IFEXTRA(ifp)->netsig_len = len;
9880 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
9881 } else {
9882 error = ENOMEM;
9883 }
9884 if_inet6data_lock_done(ifp);
9885 break;
9886
9887 default:
9888 error = EINVAL;
9889 break;
9890 }
9891
9892 return error;
9893 }
9894
9895 int
9896 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
9897 uint16_t *flags, uint8_t *data)
9898 {
9899 int error = 0;
9900
9901 if (ifp == NULL || len == NULL || data == NULL) {
9902 return EINVAL;
9903 }
9904
9905 switch (family) {
9906 case AF_INET:
9907 if_inetdata_lock_shared(ifp);
9908 if (IN_IFEXTRA(ifp) != NULL) {
9909 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
9910 error = EINVAL;
9911 if_inetdata_lock_done(ifp);
9912 break;
9913 }
9914 if ((*len = (uint8_t)IN_IFEXTRA(ifp)->netsig_len) > 0) {
9915 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
9916 } else {
9917 error = ENOENT;
9918 }
9919 } else {
9920 error = ENOMEM;
9921 }
9922 if_inetdata_lock_done(ifp);
9923 break;
9924
9925 case AF_INET6:
9926 if_inet6data_lock_shared(ifp);
9927 if (IN6_IFEXTRA(ifp) != NULL) {
9928 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
9929 error = EINVAL;
9930 if_inet6data_lock_done(ifp);
9931 break;
9932 }
9933 if ((*len = (uint8_t)IN6_IFEXTRA(ifp)->netsig_len) > 0) {
9934 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
9935 } else {
9936 error = ENOENT;
9937 }
9938 } else {
9939 error = ENOMEM;
9940 }
9941 if_inet6data_lock_done(ifp);
9942 break;
9943
9944 default:
9945 error = EINVAL;
9946 break;
9947 }
9948
9949 if (error == 0 && flags != NULL) {
9950 *flags = 0;
9951 }
9952
9953 return error;
9954 }
9955
9956 int
9957 ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
9958 {
9959 int i, error = 0, one_set = 0;
9960
9961 if_inet6data_lock_exclusive(ifp);
9962
9963 if (IN6_IFEXTRA(ifp) == NULL) {
9964 error = ENOMEM;
9965 goto out;
9966 }
9967
9968 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
9969 uint32_t prefix_len =
9970 prefixes[i].prefix_len;
9971 struct in6_addr *prefix =
9972 &prefixes[i].ipv6_prefix;
9973
9974 if (prefix_len == 0) {
9975 clat_log0((LOG_DEBUG,
9976 "NAT64 prefixes purged from Interface %s\n",
9977 if_name(ifp)));
9978 /* Allow clearing the signature */
9979 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
9980 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9981 sizeof(struct in6_addr));
9982
9983 continue;
9984 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
9985 prefix_len != NAT64_PREFIX_LEN_40 &&
9986 prefix_len != NAT64_PREFIX_LEN_48 &&
9987 prefix_len != NAT64_PREFIX_LEN_56 &&
9988 prefix_len != NAT64_PREFIX_LEN_64 &&
9989 prefix_len != NAT64_PREFIX_LEN_96) {
9990 clat_log0((LOG_DEBUG,
9991 "NAT64 prefixlen is incorrect %d\n", prefix_len));
9992 error = EINVAL;
9993 goto out;
9994 }
9995
9996 if (IN6_IS_SCOPE_EMBED(prefix)) {
9997 clat_log0((LOG_DEBUG,
9998 "NAT64 prefix has interface/link local scope.\n"));
9999 error = EINVAL;
10000 goto out;
10001 }
10002
10003 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
10004 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
10005 sizeof(struct in6_addr));
10006 clat_log0((LOG_DEBUG,
10007 "NAT64 prefix set to %s with prefixlen: %d\n",
10008 ip6_sprintf(prefix), prefix_len));
10009 one_set = 1;
10010 }
10011
10012 out:
10013 if_inet6data_lock_done(ifp);
10014
10015 if (error == 0 && one_set != 0) {
10016 necp_update_all_clients();
10017 }
10018
10019 return error;
10020 }
10021
10022 int
10023 ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
10024 {
10025 int i, found_one = 0, error = 0;
10026
10027 if (ifp == NULL) {
10028 return EINVAL;
10029 }
10030
10031 if_inet6data_lock_shared(ifp);
10032
10033 if (IN6_IFEXTRA(ifp) == NULL) {
10034 error = ENOMEM;
10035 goto out;
10036 }
10037
10038 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
10039 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
10040 found_one = 1;
10041 }
10042 }
10043
10044 if (found_one == 0) {
10045 error = ENOENT;
10046 goto out;
10047 }
10048
10049 if (prefixes) {
10050 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
10051 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
10052 }
10053
10054 out:
10055 if_inet6data_lock_done(ifp);
10056
10057 return error;
10058 }
10059
10060 static void
10061 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
10062 protocol_family_t pf)
10063 {
10064 #pragma unused(ifp)
10065 uint32_t did_sw;
10066
10067 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
10068 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) {
10069 return;
10070 }
10071
10072 switch (pf) {
10073 case PF_INET:
10074 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
10075 if (did_sw & CSUM_DELAY_IP) {
10076 hwcksum_dbg_finalized_hdr++;
10077 }
10078 if (did_sw & CSUM_DELAY_DATA) {
10079 hwcksum_dbg_finalized_data++;
10080 }
10081 break;
10082 case PF_INET6:
10083 /*
10084 * Checksum offload should not have been enabled when
10085 * extension headers exist; that also means that we
10086 * cannot force-finalize packets with extension headers.
10087 * Indicate to the callee should it skip such case by
10088 * setting optlen to -1.
10089 */
10090 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
10091 m->m_pkthdr.csum_flags);
10092 if (did_sw & CSUM_DELAY_IPV6_DATA) {
10093 hwcksum_dbg_finalized_data++;
10094 }
10095 break;
10096 default:
10097 return;
10098 }
10099 }
10100
10101 static void
10102 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
10103 protocol_family_t pf)
10104 {
10105 uint16_t sum = 0;
10106 uint32_t hlen;
10107
10108 if (frame_header == NULL ||
10109 frame_header < (char *)mbuf_datastart(m) ||
10110 frame_header > (char *)m->m_data) {
10111 DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
10112 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
10113 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
10114 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
10115 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
10116 (uint64_t)VM_KERNEL_ADDRPERM(m));
10117 return;
10118 }
10119 hlen = (uint32_t)(m->m_data - frame_header);
10120
10121 switch (pf) {
10122 case PF_INET:
10123 case PF_INET6:
10124 break;
10125 default:
10126 return;
10127 }
10128
10129 /*
10130 * Force partial checksum offload; useful to simulate cases
10131 * where the hardware does not support partial checksum offload,
10132 * in order to validate correctness throughout the layers above.
10133 */
10134 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
10135 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
10136
10137 if (foff > (uint32_t)m->m_pkthdr.len) {
10138 return;
10139 }
10140
10141 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
10142
10143 /* Compute 16-bit 1's complement sum from forced offset */
10144 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
10145
10146 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
10147 m->m_pkthdr.csum_rx_val = sum;
10148 m->m_pkthdr.csum_rx_start = (uint16_t)(foff + hlen);
10149
10150 hwcksum_dbg_partial_forced++;
10151 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
10152 }
10153
10154 /*
10155 * Partial checksum offload verification (and adjustment);
10156 * useful to validate and test cases where the hardware
10157 * supports partial checksum offload.
10158 */
10159 if ((m->m_pkthdr.csum_flags &
10160 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
10161 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
10162 uint32_t rxoff;
10163
10164 /* Start offset must begin after frame header */
10165 rxoff = m->m_pkthdr.csum_rx_start;
10166 if (hlen > rxoff) {
10167 hwcksum_dbg_bad_rxoff++;
10168 if (dlil_verbose) {
10169 DLIL_PRINTF("%s: partial cksum start offset %d "
10170 "is less than frame header length %d for "
10171 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
10172 (uint64_t)VM_KERNEL_ADDRPERM(m));
10173 }
10174 return;
10175 }
10176 rxoff -= hlen;
10177
10178 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
10179 /*
10180 * Compute the expected 16-bit 1's complement sum;
10181 * skip this if we've already computed it above
10182 * when partial checksum offload is forced.
10183 */
10184 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
10185
10186 /* Hardware or driver is buggy */
10187 if (sum != m->m_pkthdr.csum_rx_val) {
10188 hwcksum_dbg_bad_cksum++;
10189 if (dlil_verbose) {
10190 DLIL_PRINTF("%s: bad partial cksum value "
10191 "0x%x (expected 0x%x) for mbuf "
10192 "0x%llx [rx_start %d]\n",
10193 if_name(ifp),
10194 m->m_pkthdr.csum_rx_val, sum,
10195 (uint64_t)VM_KERNEL_ADDRPERM(m),
10196 m->m_pkthdr.csum_rx_start);
10197 }
10198 return;
10199 }
10200 }
10201 hwcksum_dbg_verified++;
10202
10203 /*
10204 * This code allows us to emulate various hardwares that
10205 * perform 16-bit 1's complement sum beginning at various
10206 * start offset values.
10207 */
10208 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
10209 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
10210
10211 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) {
10212 return;
10213 }
10214
10215 sum = m_adj_sum16(m, rxoff, aoff,
10216 m_pktlen(m) - aoff, sum);
10217
10218 m->m_pkthdr.csum_rx_val = sum;
10219 m->m_pkthdr.csum_rx_start = (uint16_t)(aoff + hlen);
10220
10221 hwcksum_dbg_adjusted++;
10222 }
10223 }
10224 }
10225
10226 static int
10227 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
10228 {
10229 #pragma unused(arg1, arg2)
10230 u_int32_t i;
10231 int err;
10232
10233 i = hwcksum_dbg_mode;
10234
10235 err = sysctl_handle_int(oidp, &i, 0, req);
10236 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10237 return err;
10238 }
10239
10240 if (hwcksum_dbg == 0) {
10241 return ENODEV;
10242 }
10243
10244 if ((i & ~HWCKSUM_DBG_MASK) != 0) {
10245 return EINVAL;
10246 }
10247
10248 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
10249
10250 return err;
10251 }
10252
10253 static int
10254 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
10255 {
10256 #pragma unused(arg1, arg2)
10257 u_int32_t i;
10258 int err;
10259
10260 i = hwcksum_dbg_partial_rxoff_forced;
10261
10262 err = sysctl_handle_int(oidp, &i, 0, req);
10263 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10264 return err;
10265 }
10266
10267 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
10268 return ENODEV;
10269 }
10270
10271 hwcksum_dbg_partial_rxoff_forced = i;
10272
10273 return err;
10274 }
10275
10276 static int
10277 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
10278 {
10279 #pragma unused(arg1, arg2)
10280 u_int32_t i;
10281 int err;
10282
10283 i = hwcksum_dbg_partial_rxoff_adj;
10284
10285 err = sysctl_handle_int(oidp, &i, 0, req);
10286 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10287 return err;
10288 }
10289
10290 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) {
10291 return ENODEV;
10292 }
10293
10294 hwcksum_dbg_partial_rxoff_adj = i;
10295
10296 return err;
10297 }
10298
10299 static int
10300 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
10301 {
10302 #pragma unused(oidp, arg1, arg2)
10303 int err;
10304
10305 if (req->oldptr == USER_ADDR_NULL) {
10306 }
10307 if (req->newptr != USER_ADDR_NULL) {
10308 return EPERM;
10309 }
10310 err = SYSCTL_OUT(req, &tx_chain_len_stats,
10311 sizeof(struct chain_len_stats));
10312
10313 return err;
10314 }
10315
10316
10317 #if DEBUG || DEVELOPMENT
10318 /* Blob for sum16 verification */
10319 static uint8_t sumdata[] = {
10320 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
10321 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
10322 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
10323 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
10324 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
10325 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
10326 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
10327 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
10328 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
10329 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
10330 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
10331 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
10332 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
10333 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
10334 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
10335 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
10336 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
10337 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
10338 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
10339 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
10340 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
10341 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
10342 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
10343 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
10344 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
10345 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
10346 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
10347 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
10348 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
10349 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
10350 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
10351 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
10352 0xc8, 0x28, 0x02, 0x00, 0x00
10353 };
10354
10355 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
10356 static struct {
10357 boolean_t init;
10358 uint16_t len;
10359 uint16_t sumr; /* reference */
10360 uint16_t sumrp; /* reference, precomputed */
10361 } sumtbl[] = {
10362 { FALSE, 0, 0, 0x0000 },
10363 { FALSE, 1, 0, 0x001f },
10364 { FALSE, 2, 0, 0x8b1f },
10365 { FALSE, 3, 0, 0x8b27 },
10366 { FALSE, 7, 0, 0x790e },
10367 { FALSE, 11, 0, 0xcb6d },
10368 { FALSE, 20, 0, 0x20dd },
10369 { FALSE, 27, 0, 0xbabd },
10370 { FALSE, 32, 0, 0xf3e8 },
10371 { FALSE, 37, 0, 0x197d },
10372 { FALSE, 43, 0, 0x9eae },
10373 { FALSE, 64, 0, 0x4678 },
10374 { FALSE, 127, 0, 0x9399 },
10375 { FALSE, 256, 0, 0xd147 },
10376 { FALSE, 325, 0, 0x0358 },
10377 };
10378 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
10379
10380 static void
10381 dlil_verify_sum16(void)
10382 {
10383 struct mbuf *m;
10384 uint8_t *buf;
10385 int n;
10386
10387 /* Make sure test data plus extra room for alignment fits in cluster */
10388 _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
10389
10390 kprintf("DLIL: running SUM16 self-tests ... ");
10391
10392 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
10393 m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
10394
10395 buf = mtod(m, uint8_t *); /* base address */
10396
10397 for (n = 0; n < SUMTBL_MAX; n++) {
10398 uint16_t len = sumtbl[n].len;
10399 int i;
10400
10401 /* Verify for all possible alignments */
10402 for (i = 0; i < (int)sizeof(uint64_t); i++) {
10403 uint16_t sum, sumr;
10404 uint8_t *c;
10405
10406 /* Copy over test data to mbuf */
10407 VERIFY(len <= sizeof(sumdata));
10408 c = buf + i;
10409 bcopy(sumdata, c, len);
10410
10411 /* Zero-offset test (align by data pointer) */
10412 m->m_data = (caddr_t)c;
10413 m->m_len = len;
10414 sum = m_sum16(m, 0, len);
10415
10416 if (!sumtbl[n].init) {
10417 sumr = (uint16_t)in_cksum_mbuf_ref(m, len, 0, 0);
10418 sumtbl[n].sumr = sumr;
10419 sumtbl[n].init = TRUE;
10420 } else {
10421 sumr = sumtbl[n].sumr;
10422 }
10423
10424 /* Something is horribly broken; stop now */
10425 if (sumr != sumtbl[n].sumrp) {
10426 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
10427 "for len=%d align=%d sum=0x%04x "
10428 "[expected=0x%04x]\n", __func__,
10429 len, i, sum, sumr);
10430 /* NOTREACHED */
10431 } else if (sum != sumr) {
10432 panic_plain("\n%s: broken m_sum16() for len=%d "
10433 "align=%d sum=0x%04x [expected=0x%04x]\n",
10434 __func__, len, i, sum, sumr);
10435 /* NOTREACHED */
10436 }
10437
10438 /* Alignment test by offset (fixed data pointer) */
10439 m->m_data = (caddr_t)buf;
10440 m->m_len = i + len;
10441 sum = m_sum16(m, i, len);
10442
10443 /* Something is horribly broken; stop now */
10444 if (sum != sumr) {
10445 panic_plain("\n%s: broken m_sum16() for len=%d "
10446 "offset=%d sum=0x%04x [expected=0x%04x]\n",
10447 __func__, len, i, sum, sumr);
10448 /* NOTREACHED */
10449 }
10450 #if INET
10451 /* Simple sum16 contiguous buffer test by aligment */
10452 sum = b_sum16(c, len);
10453
10454 /* Something is horribly broken; stop now */
10455 if (sum != sumr) {
10456 panic_plain("\n%s: broken b_sum16() for len=%d "
10457 "align=%d sum=0x%04x [expected=0x%04x]\n",
10458 __func__, len, i, sum, sumr);
10459 /* NOTREACHED */
10460 }
10461 #endif /* INET */
10462 }
10463 }
10464 m_freem(m);
10465
10466 kprintf("PASSED\n");
10467 }
10468 #endif /* DEBUG || DEVELOPMENT */
10469
10470 #define CASE_STRINGIFY(x) case x: return #x
10471
10472 __private_extern__ const char *
10473 dlil_kev_dl_code_str(u_int32_t event_code)
10474 {
10475 switch (event_code) {
10476 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
10477 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
10478 CASE_STRINGIFY(KEV_DL_SIFMTU);
10479 CASE_STRINGIFY(KEV_DL_SIFPHYS);
10480 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
10481 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
10482 CASE_STRINGIFY(KEV_DL_ADDMULTI);
10483 CASE_STRINGIFY(KEV_DL_DELMULTI);
10484 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
10485 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
10486 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
10487 CASE_STRINGIFY(KEV_DL_LINK_OFF);
10488 CASE_STRINGIFY(KEV_DL_LINK_ON);
10489 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
10490 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
10491 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
10492 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
10493 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
10494 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
10495 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
10496 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
10497 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
10498 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
10499 CASE_STRINGIFY(KEV_DL_ISSUES);
10500 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
10501 default:
10502 break;
10503 }
10504 return "";
10505 }
10506
10507 static void
10508 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
10509 {
10510 #pragma unused(arg1)
10511 struct ifnet *ifp = arg0;
10512
10513 if (ifnet_is_attached(ifp, 1)) {
10514 nstat_ifnet_threshold_reached(ifp->if_index);
10515 ifnet_decr_iorefcnt(ifp);
10516 }
10517 }
10518
10519 void
10520 ifnet_notify_data_threshold(struct ifnet *ifp)
10521 {
10522 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
10523 uint64_t oldbytes = ifp->if_dt_bytes;
10524
10525 ASSERT(ifp->if_dt_tcall != NULL);
10526
10527 /*
10528 * If we went over the threshold, notify NetworkStatistics.
10529 * We rate-limit it based on the threshold interval value.
10530 */
10531 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
10532 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
10533 !thread_call_isactive(ifp->if_dt_tcall)) {
10534 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
10535 uint64_t now = mach_absolute_time(), deadline = now;
10536 uint64_t ival;
10537
10538 if (tival != 0) {
10539 nanoseconds_to_absolutetime(tival, &ival);
10540 clock_deadline_for_periodic_event(ival, now, &deadline);
10541 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
10542 deadline);
10543 } else {
10544 (void) thread_call_enter(ifp->if_dt_tcall);
10545 }
10546 }
10547 }
10548
10549 #if (DEVELOPMENT || DEBUG)
10550 /*
10551 * The sysctl variable name contains the input parameters of
10552 * ifnet_get_keepalive_offload_frames()
10553 * ifp (interface index): name[0]
10554 * frames_array_count: name[1]
10555 * frame_data_offset: name[2]
10556 * The return length gives used_frames_count
10557 */
10558 static int
10559 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
10560 {
10561 #pragma unused(oidp)
10562 int *name = (int *)arg1;
10563 u_int namelen = arg2;
10564 int idx;
10565 ifnet_t ifp = NULL;
10566 u_int32_t frames_array_count;
10567 size_t frame_data_offset;
10568 u_int32_t used_frames_count;
10569 struct ifnet_keepalive_offload_frame *frames_array = NULL;
10570 int error = 0;
10571 u_int32_t i;
10572
10573 /*
10574 * Only root can get look at other people TCP frames
10575 */
10576 error = proc_suser(current_proc());
10577 if (error != 0) {
10578 goto done;
10579 }
10580 /*
10581 * Validate the input parameters
10582 */
10583 if (req->newptr != USER_ADDR_NULL) {
10584 error = EPERM;
10585 goto done;
10586 }
10587 if (namelen != 3) {
10588 error = EINVAL;
10589 goto done;
10590 }
10591 if (req->oldptr == USER_ADDR_NULL) {
10592 error = EINVAL;
10593 goto done;
10594 }
10595 if (req->oldlen == 0) {
10596 error = EINVAL;
10597 goto done;
10598 }
10599 idx = name[0];
10600 frames_array_count = name[1];
10601 frame_data_offset = name[2];
10602
10603 /* Make sure the passed buffer is large enough */
10604 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
10605 req->oldlen) {
10606 error = ENOMEM;
10607 goto done;
10608 }
10609
10610 ifnet_head_lock_shared();
10611 if (!IF_INDEX_IN_RANGE(idx)) {
10612 ifnet_head_done();
10613 error = ENOENT;
10614 goto done;
10615 }
10616 ifp = ifindex2ifnet[idx];
10617 ifnet_head_done();
10618
10619 frames_array = _MALLOC(frames_array_count *
10620 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
10621 if (frames_array == NULL) {
10622 error = ENOMEM;
10623 goto done;
10624 }
10625
10626 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
10627 frames_array_count, frame_data_offset, &used_frames_count);
10628 if (error != 0) {
10629 DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n",
10630 __func__, error);
10631 goto done;
10632 }
10633
10634 for (i = 0; i < used_frames_count; i++) {
10635 error = SYSCTL_OUT(req, frames_array + i,
10636 sizeof(struct ifnet_keepalive_offload_frame));
10637 if (error != 0) {
10638 goto done;
10639 }
10640 }
10641 done:
10642 if (frames_array != NULL) {
10643 _FREE(frames_array, M_TEMP);
10644 }
10645 return error;
10646 }
10647 #endif /* DEVELOPMENT || DEBUG */
10648
10649 void
10650 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
10651 struct ifnet *ifp)
10652 {
10653 tcp_update_stats_per_flow(ifs, ifp);
10654 }
10655
10656 static inline u_int32_t
10657 _set_flags(u_int32_t *flags_p, u_int32_t set_flags)
10658 {
10659 return (u_int32_t)OSBitOrAtomic(set_flags, flags_p);
10660 }
10661
10662 static inline void
10663 _clear_flags(u_int32_t *flags_p, u_int32_t clear_flags)
10664 {
10665 OSBitAndAtomic(~clear_flags, flags_p);
10666 }
10667
10668 __private_extern__ u_int32_t
10669 if_set_eflags(ifnet_t interface, u_int32_t set_flags)
10670 {
10671 return _set_flags(&interface->if_eflags, set_flags);
10672 }
10673
10674 __private_extern__ void
10675 if_clear_eflags(ifnet_t interface, u_int32_t clear_flags)
10676 {
10677 _clear_flags(&interface->if_eflags, clear_flags);
10678 }
10679
10680 __private_extern__ u_int32_t
10681 if_set_xflags(ifnet_t interface, u_int32_t set_flags)
10682 {
10683 return _set_flags(&interface->if_xflags, set_flags);
10684 }
10685
10686 __private_extern__ void
10687 if_clear_xflags(ifnet_t interface, u_int32_t clear_flags)
10688 {
10689 _clear_flags(&interface->if_xflags, clear_flags);
10690 }