]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/dlil.c
xnu-4570.41.2.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
1 /*
2 * Copyright (c) 1999-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
43 #include <sys/user.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
47 #include <net/if.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
50 #include <net/dlil.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
59 #include <sys/priv.h>
60
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
67
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_llreach.h>
71 #include <net/kpi_interfacefilter.h>
72 #include <net/classq/classq.h>
73 #include <net/classq/classq_sfb.h>
74 #include <net/flowhash.h>
75 #include <net/ntstat.h>
76 #include <net/if_llatbl.h>
77 #include <net/net_api_stats.h>
78
79 #if INET
80 #include <netinet/in_var.h>
81 #include <netinet/igmp_var.h>
82 #include <netinet/ip_var.h>
83 #include <netinet/tcp.h>
84 #include <netinet/tcp_var.h>
85 #include <netinet/udp.h>
86 #include <netinet/udp_var.h>
87 #include <netinet/if_ether.h>
88 #include <netinet/in_pcb.h>
89 #include <netinet/in_tclass.h>
90 #endif /* INET */
91
92 #if INET6
93 #include <netinet6/in6_var.h>
94 #include <netinet6/nd6.h>
95 #include <netinet6/mld6_var.h>
96 #include <netinet6/scope6_var.h>
97 #endif /* INET6 */
98
99 #include <libkern/OSAtomic.h>
100 #include <libkern/tree.h>
101
102 #include <dev/random/randomdev.h>
103 #include <machine/machine_routines.h>
104
105 #include <mach/thread_act.h>
106 #include <mach/sdt.h>
107
108 #if CONFIG_MACF
109 #include <sys/kauth.h>
110 #include <security/mac_framework.h>
111 #include <net/ethernet.h>
112 #include <net/firewire.h>
113 #endif
114
115 #if PF
116 #include <net/pfvar.h>
117 #endif /* PF */
118 #include <net/pktsched/pktsched.h>
119
120 #if NECP
121 #include <net/necp.h>
122 #endif /* NECP */
123
124
125 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
126 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
127 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
128 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
129 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
130
131 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
132 #define MAX_LINKADDR 4 /* LONGWORDS */
133 #define M_NKE M_IFADDR
134
135 #if 1
136 #define DLIL_PRINTF printf
137 #else
138 #define DLIL_PRINTF kprintf
139 #endif
140
141 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
142 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
143
144 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
145 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
146
147 enum {
148 kProtoKPI_v1 = 1,
149 kProtoKPI_v2 = 2
150 };
151
152 /*
153 * List of if_proto structures in if_proto_hash[] is protected by
154 * the ifnet lock. The rest of the fields are initialized at protocol
155 * attach time and never change, thus no lock required as long as
156 * a reference to it is valid, via if_proto_ref().
157 */
158 struct if_proto {
159 SLIST_ENTRY(if_proto) next_hash;
160 u_int32_t refcount;
161 u_int32_t detached;
162 struct ifnet *ifp;
163 protocol_family_t protocol_family;
164 int proto_kpi;
165 union {
166 struct {
167 proto_media_input input;
168 proto_media_preout pre_output;
169 proto_media_event event;
170 proto_media_ioctl ioctl;
171 proto_media_detached detached;
172 proto_media_resolve_multi resolve_multi;
173 proto_media_send_arp send_arp;
174 } v1;
175 struct {
176 proto_media_input_v2 input;
177 proto_media_preout pre_output;
178 proto_media_event event;
179 proto_media_ioctl ioctl;
180 proto_media_detached detached;
181 proto_media_resolve_multi resolve_multi;
182 proto_media_send_arp send_arp;
183 } v2;
184 } kpi;
185 };
186
187 SLIST_HEAD(proto_hash_entry, if_proto);
188
189 #define DLIL_SDLDATALEN \
190 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
191
192 struct dlil_ifnet {
193 struct ifnet dl_if; /* public ifnet */
194 /*
195 * DLIL private fields, protected by dl_if_lock
196 */
197 decl_lck_mtx_data(, dl_if_lock);
198 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
199 u_int32_t dl_if_flags; /* flags (below) */
200 u_int32_t dl_if_refcnt; /* refcnt */
201 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
202 void *dl_if_uniqueid; /* unique interface id */
203 size_t dl_if_uniqueid_len; /* length of the unique id */
204 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
205 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
206 struct {
207 struct ifaddr ifa; /* lladdr ifa */
208 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
209 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
210 } dl_if_lladdr;
211 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
212 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
213 ctrace_t dl_if_attach; /* attach PC stacktrace */
214 ctrace_t dl_if_detach; /* detach PC stacktrace */
215 };
216
217 /* Values for dl_if_flags (private to DLIL) */
218 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
219 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
220 #define DLIF_DEBUG 0x4 /* has debugging info */
221
222 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
223
224 /* For gdb */
225 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
226
227 struct dlil_ifnet_dbg {
228 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
229 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
230 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
231 /*
232 * Circular lists of ifnet_{reference,release} callers.
233 */
234 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
235 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
236 };
237
238 #define DLIL_TO_IFP(s) (&s->dl_if)
239 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
240
241 struct ifnet_filter {
242 TAILQ_ENTRY(ifnet_filter) filt_next;
243 u_int32_t filt_skip;
244 u_int32_t filt_flags;
245 ifnet_t filt_ifp;
246 const char *filt_name;
247 void *filt_cookie;
248 protocol_family_t filt_protocol;
249 iff_input_func filt_input;
250 iff_output_func filt_output;
251 iff_event_func filt_event;
252 iff_ioctl_func filt_ioctl;
253 iff_detached_func filt_detached;
254 };
255
256 struct proto_input_entry;
257
258 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
259 static lck_grp_t *dlil_lock_group;
260 lck_grp_t *ifnet_lock_group;
261 static lck_grp_t *ifnet_head_lock_group;
262 static lck_grp_t *ifnet_snd_lock_group;
263 static lck_grp_t *ifnet_rcv_lock_group;
264 lck_attr_t *ifnet_lock_attr;
265 decl_lck_rw_data(static, ifnet_head_lock);
266 decl_lck_mtx_data(static, dlil_ifnet_lock);
267 u_int32_t dlil_filter_disable_tso_count = 0;
268
269 #if DEBUG
270 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
271 #else
272 static unsigned int ifnet_debug; /* debugging (disabled) */
273 #endif /* !DEBUG */
274 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
275 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
276 static struct zone *dlif_zone; /* zone for dlil_ifnet */
277
278 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
279 #define DLIF_ZONE_NAME "ifnet" /* zone name */
280
281 static unsigned int dlif_filt_size; /* size of ifnet_filter */
282 static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
283
284 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
285 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
286
287 static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
288 static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
289
290 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
291 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
292
293 static unsigned int dlif_proto_size; /* size of if_proto */
294 static struct zone *dlif_proto_zone; /* zone for if_proto */
295
296 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
297 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
298
299 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
300 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
301 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
302
303 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
304 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
305
306 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
307 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
308 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
309
310 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
311 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
312
313 static u_int32_t net_rtref;
314
315 static struct dlil_main_threading_info dlil_main_input_thread_info;
316 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
317 (struct dlil_threading_info *)&dlil_main_input_thread_info;
318
319 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
320 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
321 static void dlil_if_trace(struct dlil_ifnet *, int);
322 static void if_proto_ref(struct if_proto *);
323 static void if_proto_free(struct if_proto *);
324 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
325 static int dlil_ifp_proto_count(struct ifnet *);
326 static void if_flt_monitor_busy(struct ifnet *);
327 static void if_flt_monitor_unbusy(struct ifnet *);
328 static void if_flt_monitor_enter(struct ifnet *);
329 static void if_flt_monitor_leave(struct ifnet *);
330 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
331 char **, protocol_family_t);
332 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
333 protocol_family_t);
334 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
335 const struct sockaddr_dl *);
336 static int ifnet_lookup(struct ifnet *);
337 static void if_purgeaddrs(struct ifnet *);
338
339 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
340 struct mbuf *, char *);
341 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
342 struct mbuf *);
343 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
344 mbuf_t *, const struct sockaddr *, void *, char *, char *);
345 static void ifproto_media_event(struct ifnet *, protocol_family_t,
346 const struct kev_msg *);
347 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
348 unsigned long, void *);
349 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
350 struct sockaddr_dl *, size_t);
351 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
352 const struct sockaddr_dl *, const struct sockaddr *,
353 const struct sockaddr_dl *, const struct sockaddr *);
354
355 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
356 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
357 boolean_t poll, struct thread *tp);
358 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
359 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
360 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
361 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
362 protocol_family_t *);
363 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
364 const struct ifnet_demux_desc *, u_int32_t);
365 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
366 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
367 #if CONFIG_EMBEDDED
368 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
369 const struct sockaddr *, const char *, const char *,
370 u_int32_t *, u_int32_t *);
371 #else
372 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
373 const struct sockaddr *, const char *, const char *);
374 #endif /* CONFIG_EMBEDDED */
375 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
376 const struct sockaddr *, const char *, const char *,
377 u_int32_t *, u_int32_t *);
378 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
379 static void ifp_if_free(struct ifnet *);
380 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
381 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
382 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
383
384 static void dlil_main_input_thread_func(void *, wait_result_t);
385 static void dlil_input_thread_func(void *, wait_result_t);
386 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
387 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
388 static void dlil_terminate_input_thread(struct dlil_threading_info *);
389 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
390 struct dlil_threading_info *, boolean_t);
391 static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
392 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
393 u_int32_t, ifnet_model_t, boolean_t);
394 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
395 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
396
397 #if DEBUG || DEVELOPMENT
398 static void dlil_verify_sum16(void);
399 #endif /* DEBUG || DEVELOPMENT */
400 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
401 protocol_family_t);
402 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
403 protocol_family_t);
404
405 static void ifnet_detacher_thread_func(void *, wait_result_t);
406 static int ifnet_detacher_thread_cont(int);
407 static void ifnet_detach_final(struct ifnet *);
408 static void ifnet_detaching_enqueue(struct ifnet *);
409 static struct ifnet *ifnet_detaching_dequeue(void);
410
411 static void ifnet_start_thread_fn(void *, wait_result_t);
412 static void ifnet_poll_thread_fn(void *, wait_result_t);
413 static void ifnet_poll(struct ifnet *);
414 static errno_t ifnet_enqueue_common(struct ifnet *, void *,
415 classq_pkt_type_t, boolean_t, boolean_t *);
416
417 static void ifp_src_route_copyout(struct ifnet *, struct route *);
418 static void ifp_src_route_copyin(struct ifnet *, struct route *);
419 #if INET6
420 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
421 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
422 #endif /* INET6 */
423
424 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
425 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
426 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
427 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
428 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
429 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
430 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
431 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
432 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
433 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
434 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
435 static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS;
436
437 struct chain_len_stats tx_chain_len_stats;
438 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
439
440 #if TEST_INPUT_THREAD_TERMINATION
441 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
442 #endif /* TEST_INPUT_THREAD_TERMINATION */
443
444 /* The following are protected by dlil_ifnet_lock */
445 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
446 static u_int32_t ifnet_detaching_cnt;
447 static void *ifnet_delayed_run; /* wait channel for detaching thread */
448
449 decl_lck_mtx_data(static, ifnet_fc_lock);
450
451 static uint32_t ifnet_flowhash_seed;
452
453 struct ifnet_flowhash_key {
454 char ifk_name[IFNAMSIZ];
455 uint32_t ifk_unit;
456 uint32_t ifk_flags;
457 uint32_t ifk_eflags;
458 uint32_t ifk_capabilities;
459 uint32_t ifk_capenable;
460 uint32_t ifk_output_sched_model;
461 uint32_t ifk_rand1;
462 uint32_t ifk_rand2;
463 };
464
465 /* Flow control entry per interface */
466 struct ifnet_fc_entry {
467 RB_ENTRY(ifnet_fc_entry) ifce_entry;
468 u_int32_t ifce_flowhash;
469 struct ifnet *ifce_ifp;
470 };
471
472 static uint32_t ifnet_calc_flowhash(struct ifnet *);
473 static int ifce_cmp(const struct ifnet_fc_entry *,
474 const struct ifnet_fc_entry *);
475 static int ifnet_fc_add(struct ifnet *);
476 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
477 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
478
479 /* protected by ifnet_fc_lock */
480 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
481 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
482 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
483
484 static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
485 static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
486
487 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
488 #define IFNET_FC_ZONE_MAX 32
489
490 extern void bpfdetach(struct ifnet *);
491 extern void proto_input_run(void);
492
493 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
494 u_int32_t flags);
495 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
496 u_int32_t flags);
497
498 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
499
500 #if CONFIG_MACF
501 #ifdef CONFIG_EMBEDDED
502 int dlil_lladdr_ckreq = 1;
503 #else
504 int dlil_lladdr_ckreq = 0;
505 #endif
506 #endif
507
508 #if DEBUG
509 int dlil_verbose = 1;
510 #else
511 int dlil_verbose = 0;
512 #endif /* DEBUG */
513 #if IFNET_INPUT_SANITY_CHK
514 /* sanity checking of input packet lists received */
515 static u_int32_t dlil_input_sanity_check = 0;
516 #endif /* IFNET_INPUT_SANITY_CHK */
517 /* rate limit debug messages */
518 struct timespec dlil_dbgrate = { 1, 0 };
519
520 SYSCTL_DECL(_net_link_generic_system);
521
522 #if CONFIG_MACF
523 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_lladdr_ckreq,
524 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_lladdr_ckreq, 0,
525 "Require MACF system info check to expose link-layer address");
526 #endif
527
528 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
529 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
530
531 #define IF_SNDQ_MINLEN 32
532 u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
533 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
534 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
535 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
536
537 #define IF_RCVQ_MINLEN 32
538 #define IF_RCVQ_MAXLEN 256
539 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
540 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
541 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
542 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
543
544 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
545 static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
546 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
547 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
548 "ilog2 of EWMA decay rate of avg inbound packets");
549
550 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
551 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
552 static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
553 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
554 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
555 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
556 "Q", "input poll mode freeze time");
557
558 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
559 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
560 static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
561 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
562 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
563 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
564 "Q", "input poll sampling time");
565
566 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
567 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
568 static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
569 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
570 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
571 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
572 "Q", "input poll interval (time)");
573
574 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
575 static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
576 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
577 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
578 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
579
580 #define IF_RXPOLL_WLOWAT 10
581 static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
582 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
583 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
584 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
585 "I", "input poll wakeup low watermark");
586
587 #define IF_RXPOLL_WHIWAT 100
588 static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
589 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
590 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
591 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
592 "I", "input poll wakeup high watermark");
593
594 static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
595 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
596 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
597 "max packets per poll call");
598
599 static u_int32_t if_rxpoll = 1;
600 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
601 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
602 sysctl_rxpoll, "I", "enable opportunistic input polling");
603
604 #if TEST_INPUT_THREAD_TERMINATION
605 static u_int32_t if_input_thread_termination_spin = 0;
606 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
607 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
608 &if_input_thread_termination_spin, 0,
609 sysctl_input_thread_termination_spin,
610 "I", "input thread termination spin limit");
611 #endif /* TEST_INPUT_THREAD_TERMINATION */
612
613 static u_int32_t cur_dlil_input_threads = 0;
614 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
615 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
616 "Current number of DLIL input threads");
617
618 #if IFNET_INPUT_SANITY_CHK
619 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
620 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
621 "Turn on sanity checking in DLIL input");
622 #endif /* IFNET_INPUT_SANITY_CHK */
623
624 static u_int32_t if_flowadv = 1;
625 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
626 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
627 "enable flow-advisory mechanism");
628
629 static u_int32_t if_delaybased_queue = 1;
630 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
631 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
632 "enable delay based dynamic queue sizing");
633
634 static uint64_t hwcksum_in_invalidated = 0;
635 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
636 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
637 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
638
639 uint32_t hwcksum_dbg = 0;
640 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
641 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
642 "enable hardware cksum debugging");
643
644 u_int32_t ifnet_start_delayed = 0;
645 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
646 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
647 "number of times start was delayed");
648
649 u_int32_t ifnet_delay_start_disabled = 0;
650 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
651 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
652 "number of times start was delayed");
653
654 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
655 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
656 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
657 #define HWCKSUM_DBG_MASK \
658 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
659 HWCKSUM_DBG_FINALIZE_FORCED)
660
661 static uint32_t hwcksum_dbg_mode = 0;
662 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
663 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
664 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
665
666 static uint64_t hwcksum_dbg_partial_forced = 0;
667 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
668 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
669 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
670
671 static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
672 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
673 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
674 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
675
676 static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
677 SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
678 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
679 &hwcksum_dbg_partial_rxoff_forced, 0,
680 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
681 "forced partial cksum rx offset");
682
683 static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
684 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
685 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
686 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
687 "adjusted partial cksum rx offset");
688
689 static uint64_t hwcksum_dbg_verified = 0;
690 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
691 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
692 &hwcksum_dbg_verified, "packets verified for having good checksum");
693
694 static uint64_t hwcksum_dbg_bad_cksum = 0;
695 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
696 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
697 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
698
699 static uint64_t hwcksum_dbg_bad_rxoff = 0;
700 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
701 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
702 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
703
704 static uint64_t hwcksum_dbg_adjusted = 0;
705 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
706 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
707 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
708
709 static uint64_t hwcksum_dbg_finalized_hdr = 0;
710 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
711 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
712 &hwcksum_dbg_finalized_hdr, "finalized headers");
713
714 static uint64_t hwcksum_dbg_finalized_data = 0;
715 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
716 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
717 &hwcksum_dbg_finalized_data, "finalized payloads");
718
719 uint32_t hwcksum_tx = 1;
720 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
721 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
722 "enable transmit hardware checksum offload");
723
724 uint32_t hwcksum_rx = 1;
725 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
726 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
727 "enable receive hardware checksum offload");
728
729 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
730 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
731 sysctl_tx_chain_len_stats, "S", "");
732
733 uint32_t tx_chain_len_count = 0;
734 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
735 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
736
737 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_ports_used,
738 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_ports_used, "");
739
740 static uint32_t threshold_notify = 1; /* enable/disable */
741 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
742 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
743
744 static uint32_t threshold_interval = 2; /* in seconds */
745 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
746 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
747
748 #if (DEVELOPMENT || DEBUG)
749 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
750 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
751 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
752 #endif /* DEVELOPMENT || DEBUG */
753
754 struct net_api_stats net_api_stats;
755 SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD|CTLFLAG_LOCKED,
756 &net_api_stats, net_api_stats, "");
757
758
759 unsigned int net_rxpoll = 1;
760 unsigned int net_affinity = 1;
761 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
762
763 extern u_int32_t inject_buckets;
764
765 static lck_grp_attr_t *dlil_grp_attributes = NULL;
766 static lck_attr_t *dlil_lck_attributes = NULL;
767
768 /* DLIL data threshold thread call */
769 static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
770
771 static void dlil_mit_tcall_fn(thread_call_param_t, thread_call_param_t);
772
773 uint32_t dlil_rcv_mit_pkts_min = 5;
774 uint32_t dlil_rcv_mit_pkts_max = 64;
775 uint32_t dlil_rcv_mit_interval = (500 * 1000);
776
777 #if (DEVELOPMENT || DEBUG)
778 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_min,
779 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_min, 0, "");
780 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_max,
781 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_max, 0, "");
782 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_interval,
783 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_interval, 0, "");
784 #endif /* DEVELOPMENT || DEBUG */
785
786
787 #define DLIL_INPUT_CHECK(m, ifp) { \
788 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
789 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
790 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
791 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
792 /* NOTREACHED */ \
793 } \
794 }
795
796 #define DLIL_EWMA(old, new, decay) do { \
797 u_int32_t _avg; \
798 if ((_avg = (old)) > 0) \
799 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
800 else \
801 _avg = (new); \
802 (old) = _avg; \
803 } while (0)
804
805 #define MBPS (1ULL * 1000 * 1000)
806 #define GBPS (MBPS * 1000)
807
808 struct rxpoll_time_tbl {
809 u_int64_t speed; /* downlink speed */
810 u_int32_t plowat; /* packets low watermark */
811 u_int32_t phiwat; /* packets high watermark */
812 u_int32_t blowat; /* bytes low watermark */
813 u_int32_t bhiwat; /* bytes high watermark */
814 };
815
816 static struct rxpoll_time_tbl rxpoll_tbl[] = {
817 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
818 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
819 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
820 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
821 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
822 { 0, 0, 0, 0, 0 }
823 };
824
825 int
826 proto_hash_value(u_int32_t protocol_family)
827 {
828 /*
829 * dlil_proto_unplumb_all() depends on the mapping between
830 * the hash bucket index and the protocol family defined
831 * here; future changes must be applied there as well.
832 */
833 switch (protocol_family) {
834 case PF_INET:
835 return (0);
836 case PF_INET6:
837 return (1);
838 case PF_VLAN:
839 return (2);
840 case PF_UNSPEC:
841 default:
842 return (3);
843 }
844 }
845
846 /*
847 * Caller must already be holding ifnet lock.
848 */
849 static struct if_proto *
850 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
851 {
852 struct if_proto *proto = NULL;
853 u_int32_t i = proto_hash_value(protocol_family);
854
855 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
856
857 if (ifp->if_proto_hash != NULL)
858 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
859
860 while (proto != NULL && proto->protocol_family != protocol_family)
861 proto = SLIST_NEXT(proto, next_hash);
862
863 if (proto != NULL)
864 if_proto_ref(proto);
865
866 return (proto);
867 }
868
869 static void
870 if_proto_ref(struct if_proto *proto)
871 {
872 atomic_add_32(&proto->refcount, 1);
873 }
874
875 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
876
877 static void
878 if_proto_free(struct if_proto *proto)
879 {
880 u_int32_t oldval;
881 struct ifnet *ifp = proto->ifp;
882 u_int32_t proto_family = proto->protocol_family;
883 struct kev_dl_proto_data ev_pr_data;
884
885 oldval = atomic_add_32_ov(&proto->refcount, -1);
886 if (oldval > 1)
887 return;
888
889 /* No more reference on this, protocol must have been detached */
890 VERIFY(proto->detached);
891
892 if (proto->proto_kpi == kProtoKPI_v1) {
893 if (proto->kpi.v1.detached)
894 proto->kpi.v1.detached(ifp, proto->protocol_family);
895 }
896 if (proto->proto_kpi == kProtoKPI_v2) {
897 if (proto->kpi.v2.detached)
898 proto->kpi.v2.detached(ifp, proto->protocol_family);
899 }
900
901 /*
902 * Cleanup routes that may still be in the routing table for that
903 * interface/protocol pair.
904 */
905 if_rtproto_del(ifp, proto_family);
906
907 /*
908 * The reserved field carries the number of protocol still attached
909 * (subject to change)
910 */
911 ifnet_lock_shared(ifp);
912 ev_pr_data.proto_family = proto_family;
913 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
914 ifnet_lock_done(ifp);
915
916 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
917 (struct net_event_data *)&ev_pr_data,
918 sizeof (struct kev_dl_proto_data));
919
920 zfree(dlif_proto_zone, proto);
921 }
922
923 __private_extern__ void
924 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
925 {
926 #if !MACH_ASSERT
927 #pragma unused(ifp)
928 #endif
929 unsigned int type = 0;
930 int ass = 1;
931
932 switch (what) {
933 case IFNET_LCK_ASSERT_EXCLUSIVE:
934 type = LCK_RW_ASSERT_EXCLUSIVE;
935 break;
936
937 case IFNET_LCK_ASSERT_SHARED:
938 type = LCK_RW_ASSERT_SHARED;
939 break;
940
941 case IFNET_LCK_ASSERT_OWNED:
942 type = LCK_RW_ASSERT_HELD;
943 break;
944
945 case IFNET_LCK_ASSERT_NOTOWNED:
946 /* nothing to do here for RW lock; bypass assert */
947 ass = 0;
948 break;
949
950 default:
951 panic("bad ifnet assert type: %d", what);
952 /* NOTREACHED */
953 }
954 if (ass)
955 LCK_RW_ASSERT(&ifp->if_lock, type);
956 }
957
958 __private_extern__ void
959 ifnet_lock_shared(struct ifnet *ifp)
960 {
961 lck_rw_lock_shared(&ifp->if_lock);
962 }
963
964 __private_extern__ void
965 ifnet_lock_exclusive(struct ifnet *ifp)
966 {
967 lck_rw_lock_exclusive(&ifp->if_lock);
968 }
969
970 __private_extern__ void
971 ifnet_lock_done(struct ifnet *ifp)
972 {
973 lck_rw_done(&ifp->if_lock);
974 }
975
976 #if INET
977 __private_extern__ void
978 if_inetdata_lock_shared(struct ifnet *ifp)
979 {
980 lck_rw_lock_shared(&ifp->if_inetdata_lock);
981 }
982
983 __private_extern__ void
984 if_inetdata_lock_exclusive(struct ifnet *ifp)
985 {
986 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
987 }
988
989 __private_extern__ void
990 if_inetdata_lock_done(struct ifnet *ifp)
991 {
992 lck_rw_done(&ifp->if_inetdata_lock);
993 }
994 #endif
995
996 #if INET6
997 __private_extern__ void
998 if_inet6data_lock_shared(struct ifnet *ifp)
999 {
1000 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1001 }
1002
1003 __private_extern__ void
1004 if_inet6data_lock_exclusive(struct ifnet *ifp)
1005 {
1006 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1007 }
1008
1009 __private_extern__ void
1010 if_inet6data_lock_done(struct ifnet *ifp)
1011 {
1012 lck_rw_done(&ifp->if_inet6data_lock);
1013 }
1014 #endif
1015
1016 __private_extern__ void
1017 ifnet_head_lock_shared(void)
1018 {
1019 lck_rw_lock_shared(&ifnet_head_lock);
1020 }
1021
1022 __private_extern__ void
1023 ifnet_head_lock_exclusive(void)
1024 {
1025 lck_rw_lock_exclusive(&ifnet_head_lock);
1026 }
1027
1028 __private_extern__ void
1029 ifnet_head_done(void)
1030 {
1031 lck_rw_done(&ifnet_head_lock);
1032 }
1033
1034 __private_extern__ void
1035 ifnet_head_assert_exclusive(void)
1036 {
1037 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1038 }
1039
1040 /*
1041 * Caller must already be holding ifnet lock.
1042 */
1043 static int
1044 dlil_ifp_proto_count(struct ifnet *ifp)
1045 {
1046 int i, count = 0;
1047
1048 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1049
1050 if (ifp->if_proto_hash == NULL)
1051 goto done;
1052
1053 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1054 struct if_proto *proto;
1055 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1056 count++;
1057 }
1058 }
1059 done:
1060 return (count);
1061 }
1062
1063 __private_extern__ void
1064 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1065 u_int32_t event_code, struct net_event_data *event_data,
1066 u_int32_t event_data_len)
1067 {
1068 struct net_event_data ev_data;
1069 struct kev_msg ev_msg;
1070
1071 bzero(&ev_msg, sizeof (ev_msg));
1072 bzero(&ev_data, sizeof (ev_data));
1073 /*
1074 * a net event always starts with a net_event_data structure
1075 * but the caller can generate a simple net event or
1076 * provide a longer event structure to post
1077 */
1078 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1079 ev_msg.kev_class = KEV_NETWORK_CLASS;
1080 ev_msg.kev_subclass = event_subclass;
1081 ev_msg.event_code = event_code;
1082
1083 if (event_data == NULL) {
1084 event_data = &ev_data;
1085 event_data_len = sizeof (struct net_event_data);
1086 }
1087
1088 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1089 event_data->if_family = ifp->if_family;
1090 event_data->if_unit = (u_int32_t)ifp->if_unit;
1091
1092 ev_msg.dv[0].data_length = event_data_len;
1093 ev_msg.dv[0].data_ptr = event_data;
1094 ev_msg.dv[1].data_length = 0;
1095
1096 /* Don't update interface generation for quality and RRC state changess */
1097 bool update_generation = (event_subclass != KEV_DL_SUBCLASS ||
1098 (event_code != KEV_DL_LINK_QUALITY_METRIC_CHANGED &&
1099 event_code != KEV_DL_RRC_STATE_CHANGED));
1100
1101 dlil_event_internal(ifp, &ev_msg, update_generation);
1102 }
1103
1104 __private_extern__ int
1105 dlil_alloc_local_stats(struct ifnet *ifp)
1106 {
1107 int ret = EINVAL;
1108 void *buf, *base, **pbuf;
1109
1110 if (ifp == NULL)
1111 goto end;
1112
1113 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1114 /* allocate tcpstat_local structure */
1115 buf = zalloc(dlif_tcpstat_zone);
1116 if (buf == NULL) {
1117 ret = ENOMEM;
1118 goto end;
1119 }
1120 bzero(buf, dlif_tcpstat_bufsize);
1121
1122 /* Get the 64-bit aligned base address for this object */
1123 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1124 sizeof (u_int64_t));
1125 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1126 ((intptr_t)buf + dlif_tcpstat_bufsize));
1127
1128 /*
1129 * Wind back a pointer size from the aligned base and
1130 * save the original address so we can free it later.
1131 */
1132 pbuf = (void **)((intptr_t)base - sizeof (void *));
1133 *pbuf = buf;
1134 ifp->if_tcp_stat = base;
1135
1136 /* allocate udpstat_local structure */
1137 buf = zalloc(dlif_udpstat_zone);
1138 if (buf == NULL) {
1139 ret = ENOMEM;
1140 goto end;
1141 }
1142 bzero(buf, dlif_udpstat_bufsize);
1143
1144 /* Get the 64-bit aligned base address for this object */
1145 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1146 sizeof (u_int64_t));
1147 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1148 ((intptr_t)buf + dlif_udpstat_bufsize));
1149
1150 /*
1151 * Wind back a pointer size from the aligned base and
1152 * save the original address so we can free it later.
1153 */
1154 pbuf = (void **)((intptr_t)base - sizeof (void *));
1155 *pbuf = buf;
1156 ifp->if_udp_stat = base;
1157
1158 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
1159 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
1160
1161 ret = 0;
1162 }
1163
1164 if (ifp->if_ipv4_stat == NULL) {
1165 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1166 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1167 if (ifp->if_ipv4_stat == NULL) {
1168 ret = ENOMEM;
1169 goto end;
1170 }
1171 }
1172
1173 if (ifp->if_ipv6_stat == NULL) {
1174 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1175 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1176 if (ifp->if_ipv6_stat == NULL) {
1177 ret = ENOMEM;
1178 goto end;
1179 }
1180 }
1181 end:
1182 if (ret != 0) {
1183 if (ifp->if_tcp_stat != NULL) {
1184 pbuf = (void **)
1185 ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
1186 zfree(dlif_tcpstat_zone, *pbuf);
1187 ifp->if_tcp_stat = NULL;
1188 }
1189 if (ifp->if_udp_stat != NULL) {
1190 pbuf = (void **)
1191 ((intptr_t)ifp->if_udp_stat - sizeof (void *));
1192 zfree(dlif_udpstat_zone, *pbuf);
1193 ifp->if_udp_stat = NULL;
1194 }
1195 if (ifp->if_ipv4_stat != NULL) {
1196 FREE(ifp->if_ipv4_stat, M_TEMP);
1197 ifp->if_ipv4_stat = NULL;
1198 }
1199 if (ifp->if_ipv6_stat != NULL) {
1200 FREE(ifp->if_ipv6_stat, M_TEMP);
1201 ifp->if_ipv6_stat = NULL;
1202 }
1203 }
1204
1205 return (ret);
1206 }
1207
1208 static int
1209 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
1210 {
1211 thread_continue_t func;
1212 u_int32_t limit;
1213 int error;
1214
1215 /* NULL ifp indicates the main input thread, called at dlil_init time */
1216 if (ifp == NULL) {
1217 func = dlil_main_input_thread_func;
1218 VERIFY(inp == dlil_main_input_thread);
1219 (void) strlcat(inp->input_name,
1220 "main_input", DLIL_THREADNAME_LEN);
1221 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1222 func = dlil_rxpoll_input_thread_func;
1223 VERIFY(inp != dlil_main_input_thread);
1224 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1225 "%s_input_poll", if_name(ifp));
1226 } else {
1227 func = dlil_input_thread_func;
1228 VERIFY(inp != dlil_main_input_thread);
1229 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1230 "%s_input", if_name(ifp));
1231 }
1232 VERIFY(inp->input_thr == THREAD_NULL);
1233
1234 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1235 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1236
1237 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1238 inp->ifp = ifp; /* NULL for main input thread */
1239
1240 net_timerclear(&inp->mode_holdtime);
1241 net_timerclear(&inp->mode_lasttime);
1242 net_timerclear(&inp->sample_holdtime);
1243 net_timerclear(&inp->sample_lasttime);
1244 net_timerclear(&inp->dbg_lasttime);
1245
1246 /*
1247 * For interfaces that support opportunistic polling, set the
1248 * low and high watermarks for outstanding inbound packets/bytes.
1249 * Also define freeze times for transitioning between modes
1250 * and updating the average.
1251 */
1252 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1253 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1254 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1255 } else {
1256 limit = (u_int32_t)-1;
1257 }
1258
1259 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1260 if (inp == dlil_main_input_thread) {
1261 struct dlil_main_threading_info *inpm =
1262 (struct dlil_main_threading_info *)inp;
1263 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1264 }
1265
1266 error = kernel_thread_start(func, inp, &inp->input_thr);
1267 if (error == KERN_SUCCESS) {
1268 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
1269 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
1270 /*
1271 * We create an affinity set so that the matching workloop
1272 * thread or the starter thread (for loopback) can be
1273 * scheduled on the same processor set as the input thread.
1274 */
1275 if (net_affinity) {
1276 struct thread *tp = inp->input_thr;
1277 u_int32_t tag;
1278 /*
1279 * Randomize to reduce the probability
1280 * of affinity tag namespace collision.
1281 */
1282 read_frandom(&tag, sizeof (tag));
1283 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1284 thread_reference(tp);
1285 inp->tag = tag;
1286 inp->net_affinity = TRUE;
1287 }
1288 }
1289 } else if (inp == dlil_main_input_thread) {
1290 panic_plain("%s: couldn't create main input thread", __func__);
1291 /* NOTREACHED */
1292 } else {
1293 panic_plain("%s: couldn't create %s input thread", __func__,
1294 if_name(ifp));
1295 /* NOTREACHED */
1296 }
1297 OSAddAtomic(1, &cur_dlil_input_threads);
1298
1299 return (error);
1300 }
1301
1302 #if TEST_INPUT_THREAD_TERMINATION
1303 static int
1304 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1305 {
1306 #pragma unused(arg1, arg2)
1307 uint32_t i;
1308 int err;
1309
1310 i = if_input_thread_termination_spin;
1311
1312 err = sysctl_handle_int(oidp, &i, 0, req);
1313 if (err != 0 || req->newptr == USER_ADDR_NULL)
1314 return (err);
1315
1316 if (net_rxpoll == 0)
1317 return (ENXIO);
1318
1319 if_input_thread_termination_spin = i;
1320 return (err);
1321 }
1322 #endif /* TEST_INPUT_THREAD_TERMINATION */
1323
1324 static void
1325 dlil_clean_threading_info(struct dlil_threading_info *inp)
1326 {
1327 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1328 lck_grp_free(inp->lck_grp);
1329
1330 inp->input_waiting = 0;
1331 inp->wtot = 0;
1332 bzero(inp->input_name, sizeof (inp->input_name));
1333 inp->ifp = NULL;
1334 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1335 qlimit(&inp->rcvq_pkts) = 0;
1336 bzero(&inp->stats, sizeof (inp->stats));
1337
1338 VERIFY(!inp->net_affinity);
1339 inp->input_thr = THREAD_NULL;
1340 VERIFY(inp->wloop_thr == THREAD_NULL);
1341 VERIFY(inp->poll_thr == THREAD_NULL);
1342 VERIFY(inp->tag == 0);
1343
1344 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1345 bzero(&inp->tstats, sizeof (inp->tstats));
1346 bzero(&inp->pstats, sizeof (inp->pstats));
1347 bzero(&inp->sstats, sizeof (inp->sstats));
1348
1349 net_timerclear(&inp->mode_holdtime);
1350 net_timerclear(&inp->mode_lasttime);
1351 net_timerclear(&inp->sample_holdtime);
1352 net_timerclear(&inp->sample_lasttime);
1353 net_timerclear(&inp->dbg_lasttime);
1354
1355 #if IFNET_INPUT_SANITY_CHK
1356 inp->input_mbuf_cnt = 0;
1357 #endif /* IFNET_INPUT_SANITY_CHK */
1358 }
1359
1360 static void
1361 dlil_terminate_input_thread(struct dlil_threading_info *inp)
1362 {
1363 struct ifnet *ifp = inp->ifp;
1364
1365 VERIFY(current_thread() == inp->input_thr);
1366 VERIFY(inp != dlil_main_input_thread);
1367
1368 OSAddAtomic(-1, &cur_dlil_input_threads);
1369
1370 #if TEST_INPUT_THREAD_TERMINATION
1371 { /* do something useless that won't get optimized away */
1372 uint32_t v = 1;
1373 for (uint32_t i = 0;
1374 i < if_input_thread_termination_spin;
1375 i++) {
1376 v = (i + 1) * v;
1377 }
1378 printf("the value is %d\n", v);
1379 }
1380 #endif /* TEST_INPUT_THREAD_TERMINATION */
1381
1382 lck_mtx_lock_spin(&inp->input_lck);
1383 VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0);
1384 inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE;
1385 wakeup_one((caddr_t)&inp->input_waiting);
1386 lck_mtx_unlock(&inp->input_lck);
1387
1388 /* for the extra refcnt from kernel_thread_start() */
1389 thread_deallocate(current_thread());
1390
1391 if (dlil_verbose) {
1392 printf("%s: input thread terminated\n",
1393 if_name(ifp));
1394 }
1395
1396 /* this is the end */
1397 thread_terminate(current_thread());
1398 /* NOTREACHED */
1399 }
1400
1401 static kern_return_t
1402 dlil_affinity_set(struct thread *tp, u_int32_t tag)
1403 {
1404 thread_affinity_policy_data_t policy;
1405
1406 bzero(&policy, sizeof (policy));
1407 policy.affinity_tag = tag;
1408 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1409 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
1410 }
1411
1412 void
1413 dlil_init(void)
1414 {
1415 thread_t thread = THREAD_NULL;
1416
1417 /*
1418 * The following fields must be 64-bit aligned for atomic operations.
1419 */
1420 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1421 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1422 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1423 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1424 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1425 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1426 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1427 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1428 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1429 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1430 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1431 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1432 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1433 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1434 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1435
1436 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1437 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1438 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1439 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1440 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1441 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1442 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1443 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1444 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1445 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1446 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1447 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1448 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1449 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1450 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1451
1452 /*
1453 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1454 */
1455 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1456 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1457 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1458 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1459 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1460 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1461 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1462 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1463 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1464 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1465 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1466 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1467 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1468 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1469
1470 /*
1471 * ... as well as the mbuf checksum flags counterparts.
1472 */
1473 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1474 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1475 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1476 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1477 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1478 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1479 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1480 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1481 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1482 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1483 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1484
1485 /*
1486 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1487 */
1488 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1489 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1490
1491 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1492 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1493 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1494 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1495
1496 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1497 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1498 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1499
1500 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1501 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1502 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1503 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1504 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1505 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1506 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1507 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1508 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1509 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1510 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1511 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1512 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1513 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1514 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1515 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1516
1517 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1518 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1519 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1520 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1521 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1522 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1523 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1524
1525 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1526 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1527
1528 PE_parse_boot_argn("net_affinity", &net_affinity,
1529 sizeof (net_affinity));
1530
1531 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
1532
1533 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
1534
1535 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
1536
1537 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
1538 sizeof (struct dlil_ifnet_dbg);
1539 /* Enforce 64-bit alignment for dlil_ifnet structure */
1540 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
1541 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
1542 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1543 0, DLIF_ZONE_NAME);
1544 if (dlif_zone == NULL) {
1545 panic_plain("%s: failed allocating %s", __func__,
1546 DLIF_ZONE_NAME);
1547 /* NOTREACHED */
1548 }
1549 zone_change(dlif_zone, Z_EXPAND, TRUE);
1550 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1551
1552 dlif_filt_size = sizeof (struct ifnet_filter);
1553 dlif_filt_zone = zinit(dlif_filt_size,
1554 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1555 if (dlif_filt_zone == NULL) {
1556 panic_plain("%s: failed allocating %s", __func__,
1557 DLIF_FILT_ZONE_NAME);
1558 /* NOTREACHED */
1559 }
1560 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1561 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1562
1563 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
1564 dlif_phash_zone = zinit(dlif_phash_size,
1565 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1566 if (dlif_phash_zone == NULL) {
1567 panic_plain("%s: failed allocating %s", __func__,
1568 DLIF_PHASH_ZONE_NAME);
1569 /* NOTREACHED */
1570 }
1571 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1572 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1573
1574 dlif_proto_size = sizeof (struct if_proto);
1575 dlif_proto_zone = zinit(dlif_proto_size,
1576 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1577 if (dlif_proto_zone == NULL) {
1578 panic_plain("%s: failed allocating %s", __func__,
1579 DLIF_PROTO_ZONE_NAME);
1580 /* NOTREACHED */
1581 }
1582 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1583 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1584
1585 dlif_tcpstat_size = sizeof (struct tcpstat_local);
1586 /* Enforce 64-bit alignment for tcpstat_local structure */
1587 dlif_tcpstat_bufsize =
1588 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
1589 dlif_tcpstat_bufsize =
1590 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
1591 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1592 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1593 DLIF_TCPSTAT_ZONE_NAME);
1594 if (dlif_tcpstat_zone == NULL) {
1595 panic_plain("%s: failed allocating %s", __func__,
1596 DLIF_TCPSTAT_ZONE_NAME);
1597 /* NOTREACHED */
1598 }
1599 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1600 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1601
1602 dlif_udpstat_size = sizeof (struct udpstat_local);
1603 /* Enforce 64-bit alignment for udpstat_local structure */
1604 dlif_udpstat_bufsize =
1605 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
1606 dlif_udpstat_bufsize =
1607 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
1608 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1609 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1610 DLIF_UDPSTAT_ZONE_NAME);
1611 if (dlif_udpstat_zone == NULL) {
1612 panic_plain("%s: failed allocating %s", __func__,
1613 DLIF_UDPSTAT_ZONE_NAME);
1614 /* NOTREACHED */
1615 }
1616 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1617 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1618
1619 ifnet_llreach_init();
1620 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1621
1622 TAILQ_INIT(&dlil_ifnet_head);
1623 TAILQ_INIT(&ifnet_head);
1624 TAILQ_INIT(&ifnet_detaching_head);
1625 TAILQ_INIT(&ifnet_ordered_head);
1626
1627 /* Setup the lock groups we will use */
1628 dlil_grp_attributes = lck_grp_attr_alloc_init();
1629
1630 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1631 dlil_grp_attributes);
1632 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1633 dlil_grp_attributes);
1634 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1635 dlil_grp_attributes);
1636 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1637 dlil_grp_attributes);
1638 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1639 dlil_grp_attributes);
1640
1641 /* Setup the lock attributes we will use */
1642 dlil_lck_attributes = lck_attr_alloc_init();
1643
1644 ifnet_lock_attr = lck_attr_alloc_init();
1645
1646 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1647 dlil_lck_attributes);
1648 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1649
1650 /* Setup interface flow control related items */
1651 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1652
1653 ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry);
1654 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1655 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1656 if (ifnet_fc_zone == NULL) {
1657 panic_plain("%s: failed allocating %s", __func__,
1658 IFNET_FC_ZONE_NAME);
1659 /* NOTREACHED */
1660 }
1661 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1662 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
1663
1664 /* Initialize interface address subsystem */
1665 ifa_init();
1666
1667 #if PF
1668 /* Initialize the packet filter */
1669 pfinit();
1670 #endif /* PF */
1671
1672 /* Initialize queue algorithms */
1673 classq_init();
1674
1675 /* Initialize packet schedulers */
1676 pktsched_init();
1677
1678 /* Initialize flow advisory subsystem */
1679 flowadv_init();
1680
1681 /* Initialize the pktap virtual interface */
1682 pktap_init();
1683
1684 /* Initialize the service class to dscp map */
1685 net_qos_map_init();
1686
1687 #if DEBUG || DEVELOPMENT
1688 /* Run self-tests */
1689 dlil_verify_sum16();
1690 #endif /* DEBUG || DEVELOPMENT */
1691
1692 /* Initialize link layer table */
1693 lltable_glbl_init();
1694
1695 /*
1696 * Create and start up the main DLIL input thread and the interface
1697 * detacher threads once everything is initialized.
1698 */
1699 dlil_create_input_thread(NULL, dlil_main_input_thread);
1700
1701 if (kernel_thread_start(ifnet_detacher_thread_func,
1702 NULL, &thread) != KERN_SUCCESS) {
1703 panic_plain("%s: couldn't create detacher thread", __func__);
1704 /* NOTREACHED */
1705 }
1706 thread_deallocate(thread);
1707
1708 }
1709
1710 static void
1711 if_flt_monitor_busy(struct ifnet *ifp)
1712 {
1713 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1714
1715 ++ifp->if_flt_busy;
1716 VERIFY(ifp->if_flt_busy != 0);
1717 }
1718
1719 static void
1720 if_flt_monitor_unbusy(struct ifnet *ifp)
1721 {
1722 if_flt_monitor_leave(ifp);
1723 }
1724
1725 static void
1726 if_flt_monitor_enter(struct ifnet *ifp)
1727 {
1728 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1729
1730 while (ifp->if_flt_busy) {
1731 ++ifp->if_flt_waiters;
1732 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1733 (PZERO - 1), "if_flt_monitor", NULL);
1734 }
1735 if_flt_monitor_busy(ifp);
1736 }
1737
1738 static void
1739 if_flt_monitor_leave(struct ifnet *ifp)
1740 {
1741 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1742
1743 VERIFY(ifp->if_flt_busy != 0);
1744 --ifp->if_flt_busy;
1745
1746 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1747 ifp->if_flt_waiters = 0;
1748 wakeup(&ifp->if_flt_head);
1749 }
1750 }
1751
1752 __private_extern__ int
1753 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1754 interface_filter_t *filter_ref, u_int32_t flags)
1755 {
1756 int retval = 0;
1757 struct ifnet_filter *filter = NULL;
1758
1759 ifnet_head_lock_shared();
1760 /* Check that the interface is in the global list */
1761 if (!ifnet_lookup(ifp)) {
1762 retval = ENXIO;
1763 goto done;
1764 }
1765
1766 filter = zalloc(dlif_filt_zone);
1767 if (filter == NULL) {
1768 retval = ENOMEM;
1769 goto done;
1770 }
1771 bzero(filter, dlif_filt_size);
1772
1773 /* refcnt held above during lookup */
1774 filter->filt_flags = flags;
1775 filter->filt_ifp = ifp;
1776 filter->filt_cookie = if_filter->iff_cookie;
1777 filter->filt_name = if_filter->iff_name;
1778 filter->filt_protocol = if_filter->iff_protocol;
1779 /*
1780 * Do not install filter callbacks for internal coproc interface
1781 */
1782 if (!IFNET_IS_INTCOPROC(ifp)) {
1783 filter->filt_input = if_filter->iff_input;
1784 filter->filt_output = if_filter->iff_output;
1785 filter->filt_event = if_filter->iff_event;
1786 filter->filt_ioctl = if_filter->iff_ioctl;
1787 }
1788 filter->filt_detached = if_filter->iff_detached;
1789
1790 lck_mtx_lock(&ifp->if_flt_lock);
1791 if_flt_monitor_enter(ifp);
1792
1793 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1794 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1795
1796 if_flt_monitor_leave(ifp);
1797 lck_mtx_unlock(&ifp->if_flt_lock);
1798
1799 *filter_ref = filter;
1800
1801 /*
1802 * Bump filter count and route_generation ID to let TCP
1803 * know it shouldn't do TSO on this connection
1804 */
1805 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1806 OSAddAtomic(1, &dlil_filter_disable_tso_count);
1807 routegenid_update();
1808 }
1809 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1810 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1811 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1812 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1813 }
1814 if (dlil_verbose) {
1815 printf("%s: %s filter attached\n", if_name(ifp),
1816 if_filter->iff_name);
1817 }
1818 done:
1819 ifnet_head_done();
1820 if (retval != 0 && ifp != NULL) {
1821 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1822 if_name(ifp), if_filter->iff_name, retval);
1823 }
1824 if (retval != 0 && filter != NULL)
1825 zfree(dlif_filt_zone, filter);
1826
1827 return (retval);
1828 }
1829
1830 static int
1831 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1832 {
1833 int retval = 0;
1834
1835 if (detached == 0) {
1836 ifnet_t ifp = NULL;
1837
1838 ifnet_head_lock_shared();
1839 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1840 interface_filter_t entry = NULL;
1841
1842 lck_mtx_lock(&ifp->if_flt_lock);
1843 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1844 if (entry != filter || entry->filt_skip)
1845 continue;
1846 /*
1847 * We've found a match; since it's possible
1848 * that the thread gets blocked in the monitor,
1849 * we do the lock dance. Interface should
1850 * not be detached since we still have a use
1851 * count held during filter attach.
1852 */
1853 entry->filt_skip = 1; /* skip input/output */
1854 lck_mtx_unlock(&ifp->if_flt_lock);
1855 ifnet_head_done();
1856
1857 lck_mtx_lock(&ifp->if_flt_lock);
1858 if_flt_monitor_enter(ifp);
1859 LCK_MTX_ASSERT(&ifp->if_flt_lock,
1860 LCK_MTX_ASSERT_OWNED);
1861
1862 /* Remove the filter from the list */
1863 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1864 filt_next);
1865
1866 if_flt_monitor_leave(ifp);
1867 lck_mtx_unlock(&ifp->if_flt_lock);
1868 if (dlil_verbose) {
1869 printf("%s: %s filter detached\n",
1870 if_name(ifp), filter->filt_name);
1871 }
1872 goto destroy;
1873 }
1874 lck_mtx_unlock(&ifp->if_flt_lock);
1875 }
1876 ifnet_head_done();
1877
1878 /* filter parameter is not a valid filter ref */
1879 retval = EINVAL;
1880 goto done;
1881 }
1882
1883 if (dlil_verbose)
1884 printf("%s filter detached\n", filter->filt_name);
1885
1886 destroy:
1887
1888 /* Call the detached function if there is one */
1889 if (filter->filt_detached)
1890 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1891
1892 /*
1893 * Decrease filter count and route_generation ID to let TCP
1894 * know it should reevalute doing TSO or not
1895 */
1896 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1897 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
1898 routegenid_update();
1899 }
1900
1901 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1902
1903 /* Free the filter */
1904 zfree(dlif_filt_zone, filter);
1905 filter = NULL;
1906 done:
1907 if (retval != 0 && filter != NULL) {
1908 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1909 filter->filt_name, retval);
1910 }
1911
1912 return (retval);
1913 }
1914
1915 __private_extern__ void
1916 dlil_detach_filter(interface_filter_t filter)
1917 {
1918 if (filter == NULL)
1919 return;
1920 dlil_detach_filter_internal(filter, 0);
1921 }
1922
1923 /*
1924 * Main input thread:
1925 *
1926 * a) handles all inbound packets for lo0
1927 * b) handles all inbound packets for interfaces with no dedicated
1928 * input thread (e.g. anything but Ethernet/PDP or those that support
1929 * opportunistic polling.)
1930 * c) protocol registrations
1931 * d) packet injections
1932 */
1933 __attribute__((noreturn))
1934 static void
1935 dlil_main_input_thread_func(void *v, wait_result_t w)
1936 {
1937 #pragma unused(w)
1938 struct dlil_main_threading_info *inpm = v;
1939 struct dlil_threading_info *inp = v;
1940
1941 VERIFY(inp == dlil_main_input_thread);
1942 VERIFY(inp->ifp == NULL);
1943 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1944
1945 while (1) {
1946 struct mbuf *m = NULL, *m_loop = NULL;
1947 u_int32_t m_cnt, m_cnt_loop;
1948 boolean_t proto_req;
1949
1950 lck_mtx_lock_spin(&inp->input_lck);
1951
1952 /* Wait until there is work to be done */
1953 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1954 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1955 (void) msleep(&inp->input_waiting, &inp->input_lck,
1956 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1957 }
1958
1959 inp->input_waiting |= DLIL_INPUT_RUNNING;
1960 inp->input_waiting &= ~DLIL_INPUT_WAITING;
1961
1962 /* Main input thread cannot be terminated */
1963 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
1964
1965 proto_req = (inp->input_waiting &
1966 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
1967
1968 /* Packets for non-dedicated interfaces other than lo0 */
1969 m_cnt = qlen(&inp->rcvq_pkts);
1970 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
1971
1972 /* Packets exclusive to lo0 */
1973 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
1974 m_loop = _getq_all(&inpm->lo_rcvq_pkts, NULL, NULL, NULL);
1975
1976 inp->wtot = 0;
1977
1978 lck_mtx_unlock(&inp->input_lck);
1979
1980 /*
1981 * NOTE warning %%% attention !!!!
1982 * We should think about putting some thread starvation
1983 * safeguards if we deal with long chains of packets.
1984 */
1985 if (m_loop != NULL)
1986 dlil_input_packet_list_extended(lo_ifp, m_loop,
1987 m_cnt_loop, inp->mode);
1988
1989 if (m != NULL)
1990 dlil_input_packet_list_extended(NULL, m,
1991 m_cnt, inp->mode);
1992
1993 if (proto_req)
1994 proto_input_run();
1995 }
1996
1997 /* NOTREACHED */
1998 VERIFY(0); /* we should never get here */
1999 }
2000
2001 /*
2002 * Input thread for interfaces with legacy input model.
2003 */
2004 static void
2005 dlil_input_thread_func(void *v, wait_result_t w)
2006 {
2007 #pragma unused(w)
2008 char thread_name[MAXTHREADNAMESIZE];
2009 struct dlil_threading_info *inp = v;
2010 struct ifnet *ifp = inp->ifp;
2011
2012 /* Construct the name for this thread, and then apply it. */
2013 bzero(thread_name, sizeof(thread_name));
2014 snprintf(thread_name, sizeof(thread_name), "dlil_input_%s", ifp->if_xname);
2015 thread_set_thread_name(inp->input_thr, thread_name);
2016
2017 VERIFY(inp != dlil_main_input_thread);
2018 VERIFY(ifp != NULL);
2019 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
2020 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2021
2022 while (1) {
2023 struct mbuf *m = NULL;
2024 u_int32_t m_cnt;
2025
2026 lck_mtx_lock_spin(&inp->input_lck);
2027
2028 /* Wait until there is work to be done */
2029 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2030 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2031 (void) msleep(&inp->input_waiting, &inp->input_lck,
2032 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2033 }
2034
2035 inp->input_waiting |= DLIL_INPUT_RUNNING;
2036 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2037
2038 /*
2039 * Protocol registration and injection must always use
2040 * the main input thread; in theory the latter can utilize
2041 * the corresponding input thread where the packet arrived
2042 * on, but that requires our knowing the interface in advance
2043 * (and the benefits might not worth the trouble.)
2044 */
2045 VERIFY(!(inp->input_waiting &
2046 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2047
2048 /* Packets for this interface */
2049 m_cnt = qlen(&inp->rcvq_pkts);
2050 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2051
2052 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2053 lck_mtx_unlock(&inp->input_lck);
2054
2055 /* Free up pending packets */
2056 if (m != NULL)
2057 mbuf_freem_list(m);
2058
2059 dlil_terminate_input_thread(inp);
2060 /* NOTREACHED */
2061 return;
2062 }
2063
2064 inp->wtot = 0;
2065
2066 dlil_input_stats_sync(ifp, inp);
2067
2068 lck_mtx_unlock(&inp->input_lck);
2069
2070 /*
2071 * NOTE warning %%% attention !!!!
2072 * We should think about putting some thread starvation
2073 * safeguards if we deal with long chains of packets.
2074 */
2075 if (m != NULL)
2076 dlil_input_packet_list_extended(NULL, m,
2077 m_cnt, inp->mode);
2078 }
2079
2080 /* NOTREACHED */
2081 VERIFY(0); /* we should never get here */
2082 }
2083
2084 /*
2085 * Input thread for interfaces with opportunistic polling input model.
2086 */
2087 static void
2088 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2089 {
2090 #pragma unused(w)
2091 struct dlil_threading_info *inp = v;
2092 struct ifnet *ifp = inp->ifp;
2093 struct timespec ts;
2094
2095 VERIFY(inp != dlil_main_input_thread);
2096 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
2097
2098 while (1) {
2099 struct mbuf *m = NULL;
2100 u_int32_t m_cnt, m_size, poll_req = 0;
2101 ifnet_model_t mode;
2102 struct timespec now, delta;
2103 u_int64_t ival;
2104
2105 lck_mtx_lock_spin(&inp->input_lck);
2106
2107 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
2108 ival = IF_RXPOLL_INTERVALTIME_MIN;
2109
2110 /* Link parameters changed? */
2111 if (ifp->if_poll_update != 0) {
2112 ifp->if_poll_update = 0;
2113 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
2114 }
2115
2116 /* Current operating mode */
2117 mode = inp->mode;
2118
2119 /* Wait until there is work to be done */
2120 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2121 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2122 (void) msleep(&inp->input_waiting, &inp->input_lck,
2123 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2124 }
2125
2126 inp->input_waiting |= DLIL_INPUT_RUNNING;
2127 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2128
2129 /*
2130 * Protocol registration and injection must always use
2131 * the main input thread; in theory the latter can utilize
2132 * the corresponding input thread where the packet arrived
2133 * on, but that requires our knowing the interface in advance
2134 * (and the benefits might not worth the trouble.)
2135 */
2136 VERIFY(!(inp->input_waiting &
2137 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2138
2139 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2140 /* Free up pending packets */
2141 lck_mtx_convert_spin(&inp->input_lck);
2142 _flushq(&inp->rcvq_pkts);
2143 if (inp->input_mit_tcall != NULL) {
2144 if (thread_call_isactive(inp->input_mit_tcall))
2145 thread_call_cancel(inp->input_mit_tcall);
2146 }
2147 lck_mtx_unlock(&inp->input_lck);
2148
2149 dlil_terminate_input_thread(inp);
2150 /* NOTREACHED */
2151 return;
2152 }
2153
2154 /* Total count of all packets */
2155 m_cnt = qlen(&inp->rcvq_pkts);
2156
2157 /* Total bytes of all packets */
2158 m_size = qsize(&inp->rcvq_pkts);
2159
2160 /* Packets for this interface */
2161 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2162 VERIFY(m != NULL || m_cnt == 0);
2163
2164 nanouptime(&now);
2165 if (!net_timerisset(&inp->sample_lasttime))
2166 *(&inp->sample_lasttime) = *(&now);
2167
2168 net_timersub(&now, &inp->sample_lasttime, &delta);
2169 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
2170 u_int32_t ptot, btot;
2171
2172 /* Accumulate statistics for current sampling */
2173 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
2174
2175 if (net_timercmp(&delta, &inp->sample_holdtime, <))
2176 goto skip;
2177
2178 *(&inp->sample_lasttime) = *(&now);
2179
2180 /* Calculate min/max of inbound bytes */
2181 btot = (u_int32_t)inp->sstats.bytes;
2182 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
2183 inp->rxpoll_bmin = btot;
2184 if (btot > inp->rxpoll_bmax)
2185 inp->rxpoll_bmax = btot;
2186
2187 /* Calculate EWMA of inbound bytes */
2188 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
2189
2190 /* Calculate min/max of inbound packets */
2191 ptot = (u_int32_t)inp->sstats.packets;
2192 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
2193 inp->rxpoll_pmin = ptot;
2194 if (ptot > inp->rxpoll_pmax)
2195 inp->rxpoll_pmax = ptot;
2196
2197 /* Calculate EWMA of inbound packets */
2198 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
2199
2200 /* Reset sampling statistics */
2201 PKTCNTR_CLEAR(&inp->sstats);
2202
2203 /* Calculate EWMA of wakeup requests */
2204 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2205 inp->wtot = 0;
2206
2207 if (dlil_verbose) {
2208 if (!net_timerisset(&inp->dbg_lasttime))
2209 *(&inp->dbg_lasttime) = *(&now);
2210 net_timersub(&now, &inp->dbg_lasttime, &delta);
2211 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2212 *(&inp->dbg_lasttime) = *(&now);
2213 printf("%s: [%s] pkts avg %d max %d "
2214 "limits [%d/%d], wreq avg %d "
2215 "limits [%d/%d], bytes avg %d "
2216 "limits [%d/%d]\n", if_name(ifp),
2217 (inp->mode ==
2218 IFNET_MODEL_INPUT_POLL_ON) ?
2219 "ON" : "OFF", inp->rxpoll_pavg,
2220 inp->rxpoll_pmax,
2221 inp->rxpoll_plowat,
2222 inp->rxpoll_phiwat,
2223 inp->rxpoll_wavg,
2224 inp->rxpoll_wlowat,
2225 inp->rxpoll_whiwat,
2226 inp->rxpoll_bavg,
2227 inp->rxpoll_blowat,
2228 inp->rxpoll_bhiwat);
2229 }
2230 }
2231
2232 /* Perform mode transition, if necessary */
2233 if (!net_timerisset(&inp->mode_lasttime))
2234 *(&inp->mode_lasttime) = *(&now);
2235
2236 net_timersub(&now, &inp->mode_lasttime, &delta);
2237 if (net_timercmp(&delta, &inp->mode_holdtime, <))
2238 goto skip;
2239
2240 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2241 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
2242 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2243 mode = IFNET_MODEL_INPUT_POLL_OFF;
2244 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2245 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2246 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2247 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2248 mode = IFNET_MODEL_INPUT_POLL_ON;
2249 }
2250
2251 if (mode != inp->mode) {
2252 inp->mode = mode;
2253 *(&inp->mode_lasttime) = *(&now);
2254 poll_req++;
2255 }
2256 }
2257 skip:
2258 dlil_input_stats_sync(ifp, inp);
2259
2260 lck_mtx_unlock(&inp->input_lck);
2261
2262 /*
2263 * If there's a mode change and interface is still attached,
2264 * perform a downcall to the driver for the new mode. Also
2265 * hold an IO refcnt on the interface to prevent it from
2266 * being detached (will be release below.)
2267 */
2268 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2269 struct ifnet_model_params p = { mode, { 0 } };
2270 errno_t err;
2271
2272 if (dlil_verbose) {
2273 printf("%s: polling is now %s, "
2274 "pkts avg %d max %d limits [%d/%d], "
2275 "wreq avg %d limits [%d/%d], "
2276 "bytes avg %d limits [%d/%d]\n",
2277 if_name(ifp),
2278 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2279 "ON" : "OFF", inp->rxpoll_pavg,
2280 inp->rxpoll_pmax, inp->rxpoll_plowat,
2281 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2282 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2283 inp->rxpoll_bavg, inp->rxpoll_blowat,
2284 inp->rxpoll_bhiwat);
2285 }
2286
2287 if ((err = ((*ifp->if_input_ctl)(ifp,
2288 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
2289 printf("%s: error setting polling mode "
2290 "to %s (%d)\n", if_name(ifp),
2291 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2292 "ON" : "OFF", err);
2293 }
2294
2295 switch (mode) {
2296 case IFNET_MODEL_INPUT_POLL_OFF:
2297 ifnet_set_poll_cycle(ifp, NULL);
2298 inp->rxpoll_offreq++;
2299 if (err != 0)
2300 inp->rxpoll_offerr++;
2301 break;
2302
2303 case IFNET_MODEL_INPUT_POLL_ON:
2304 net_nsectimer(&ival, &ts);
2305 ifnet_set_poll_cycle(ifp, &ts);
2306 ifnet_poll(ifp);
2307 inp->rxpoll_onreq++;
2308 if (err != 0)
2309 inp->rxpoll_onerr++;
2310 break;
2311
2312 default:
2313 VERIFY(0);
2314 /* NOTREACHED */
2315 }
2316
2317 /* Release the IO refcnt */
2318 ifnet_decr_iorefcnt(ifp);
2319 }
2320
2321 /*
2322 * NOTE warning %%% attention !!!!
2323 * We should think about putting some thread starvation
2324 * safeguards if we deal with long chains of packets.
2325 */
2326 if (m != NULL)
2327 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2328 }
2329
2330 /* NOTREACHED */
2331 VERIFY(0); /* we should never get here */
2332 }
2333
2334 /*
2335 * Must be called on an attached ifnet (caller is expected to check.)
2336 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2337 */
2338 errno_t
2339 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2340 boolean_t locked)
2341 {
2342 struct dlil_threading_info *inp;
2343 u_int64_t sample_holdtime, inbw;
2344
2345 VERIFY(ifp != NULL);
2346 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2347 return (ENXIO);
2348
2349 if (p != NULL) {
2350 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2351 (p->packets_lowat != 0 && p->packets_hiwat == 0))
2352 return (EINVAL);
2353 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2354 p->packets_lowat >= p->packets_hiwat)
2355 return (EINVAL);
2356 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2357 (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
2358 return (EINVAL);
2359 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2360 p->bytes_lowat >= p->bytes_hiwat)
2361 return (EINVAL);
2362 if (p->interval_time != 0 &&
2363 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
2364 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2365 }
2366
2367 if (!locked)
2368 lck_mtx_lock(&inp->input_lck);
2369
2370 LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2371
2372 /*
2373 * Normally, we'd reset the parameters to the auto-tuned values
2374 * if the the input thread detects a change in link rate. If the
2375 * driver provides its own parameters right after a link rate
2376 * changes, but before the input thread gets to run, we want to
2377 * make sure to keep the driver's values. Clearing if_poll_update
2378 * will achieve that.
2379 */
2380 if (p != NULL && !locked && ifp->if_poll_update != 0)
2381 ifp->if_poll_update = 0;
2382
2383 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2384 sample_holdtime = 0; /* polling is disabled */
2385 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2386 inp->rxpoll_blowat = 0;
2387 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2388 inp->rxpoll_bhiwat = (u_int32_t)-1;
2389 inp->rxpoll_plim = 0;
2390 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2391 } else {
2392 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2393 u_int64_t ival;
2394 unsigned int n, i;
2395
2396 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2397 if (inbw < rxpoll_tbl[i].speed)
2398 break;
2399 n = i;
2400 }
2401 /* auto-tune if caller didn't specify a value */
2402 plowat = ((p == NULL || p->packets_lowat == 0) ?
2403 rxpoll_tbl[n].plowat : p->packets_lowat);
2404 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2405 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2406 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2407 rxpoll_tbl[n].blowat : p->bytes_lowat);
2408 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2409 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2410 plim = ((p == NULL || p->packets_limit == 0) ?
2411 if_rxpoll_max : p->packets_limit);
2412 ival = ((p == NULL || p->interval_time == 0) ?
2413 if_rxpoll_interval_time : p->interval_time);
2414
2415 VERIFY(plowat != 0 && phiwat != 0);
2416 VERIFY(blowat != 0 && bhiwat != 0);
2417 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2418
2419 sample_holdtime = if_rxpoll_sample_holdtime;
2420 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2421 inp->rxpoll_whiwat = if_rxpoll_whiwat;
2422 inp->rxpoll_plowat = plowat;
2423 inp->rxpoll_phiwat = phiwat;
2424 inp->rxpoll_blowat = blowat;
2425 inp->rxpoll_bhiwat = bhiwat;
2426 inp->rxpoll_plim = plim;
2427 inp->rxpoll_ival = ival;
2428 }
2429
2430 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2431 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2432
2433 if (dlil_verbose) {
2434 printf("%s: speed %llu bps, sample per %llu nsec, "
2435 "poll interval %llu nsec, pkts per poll %u, "
2436 "pkt limits [%u/%u], wreq limits [%u/%u], "
2437 "bytes limits [%u/%u]\n", if_name(ifp),
2438 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2439 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2440 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
2441 }
2442
2443 if (!locked)
2444 lck_mtx_unlock(&inp->input_lck);
2445
2446 return (0);
2447 }
2448
2449 /*
2450 * Must be called on an attached ifnet (caller is expected to check.)
2451 */
2452 errno_t
2453 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2454 {
2455 struct dlil_threading_info *inp;
2456
2457 VERIFY(ifp != NULL && p != NULL);
2458 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2459 return (ENXIO);
2460
2461 bzero(p, sizeof (*p));
2462
2463 lck_mtx_lock(&inp->input_lck);
2464 p->packets_limit = inp->rxpoll_plim;
2465 p->packets_lowat = inp->rxpoll_plowat;
2466 p->packets_hiwat = inp->rxpoll_phiwat;
2467 p->bytes_lowat = inp->rxpoll_blowat;
2468 p->bytes_hiwat = inp->rxpoll_bhiwat;
2469 p->interval_time = inp->rxpoll_ival;
2470 lck_mtx_unlock(&inp->input_lck);
2471
2472 return (0);
2473 }
2474
2475 errno_t
2476 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2477 const struct ifnet_stat_increment_param *s)
2478 {
2479 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
2480 }
2481
2482 errno_t
2483 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2484 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2485 {
2486 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
2487 }
2488
2489 static errno_t
2490 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2491 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2492 {
2493 dlil_input_func input_func;
2494 struct ifnet_stat_increment_param _s;
2495 u_int32_t m_cnt = 0, m_size = 0;
2496 struct mbuf *last;
2497 errno_t err = 0;
2498
2499 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2500 if (m_head != NULL)
2501 mbuf_freem_list(m_head);
2502 return (EINVAL);
2503 }
2504
2505 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2506 VERIFY(m_tail == NULL || ext);
2507 VERIFY(s != NULL || !ext);
2508
2509 /*
2510 * Drop the packet(s) if the parameters are invalid, or if the
2511 * interface is no longer attached; else hold an IO refcnt to
2512 * prevent it from being detached (will be released below.)
2513 */
2514 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
2515 if (m_head != NULL)
2516 mbuf_freem_list(m_head);
2517 return (EINVAL);
2518 }
2519
2520 input_func = ifp->if_input_dlil;
2521 VERIFY(input_func != NULL);
2522
2523 if (m_tail == NULL) {
2524 last = m_head;
2525 while (m_head != NULL) {
2526 #if IFNET_INPUT_SANITY_CHK
2527 if (dlil_input_sanity_check != 0)
2528 DLIL_INPUT_CHECK(last, ifp);
2529 #endif /* IFNET_INPUT_SANITY_CHK */
2530 m_cnt++;
2531 m_size += m_length(last);
2532 if (mbuf_nextpkt(last) == NULL)
2533 break;
2534 last = mbuf_nextpkt(last);
2535 }
2536 m_tail = last;
2537 } else {
2538 #if IFNET_INPUT_SANITY_CHK
2539 if (dlil_input_sanity_check != 0) {
2540 last = m_head;
2541 while (1) {
2542 DLIL_INPUT_CHECK(last, ifp);
2543 m_cnt++;
2544 m_size += m_length(last);
2545 if (mbuf_nextpkt(last) == NULL)
2546 break;
2547 last = mbuf_nextpkt(last);
2548 }
2549 } else {
2550 m_cnt = s->packets_in;
2551 m_size = s->bytes_in;
2552 last = m_tail;
2553 }
2554 #else
2555 m_cnt = s->packets_in;
2556 m_size = s->bytes_in;
2557 last = m_tail;
2558 #endif /* IFNET_INPUT_SANITY_CHK */
2559 }
2560
2561 if (last != m_tail) {
2562 panic_plain("%s: invalid input packet chain for %s, "
2563 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2564 m_tail, last);
2565 }
2566
2567 /*
2568 * Assert packet count only for the extended variant, for backwards
2569 * compatibility, since this came directly from the device driver.
2570 * Relax this assertion for input bytes, as the driver may have
2571 * included the link-layer headers in the computation; hence
2572 * m_size is just an approximation.
2573 */
2574 if (ext && s->packets_in != m_cnt) {
2575 panic_plain("%s: input packet count mismatch for %s, "
2576 "%d instead of %d\n", __func__, if_name(ifp),
2577 s->packets_in, m_cnt);
2578 }
2579
2580 if (s == NULL) {
2581 bzero(&_s, sizeof (_s));
2582 s = &_s;
2583 } else {
2584 _s = *s;
2585 }
2586 _s.packets_in = m_cnt;
2587 _s.bytes_in = m_size;
2588
2589 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2590
2591 if (ifp != lo_ifp) {
2592 /* Release the IO refcnt */
2593 ifnet_decr_iorefcnt(ifp);
2594 }
2595
2596 return (err);
2597 }
2598
2599
2600 errno_t
2601 dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2602 {
2603 return (ifp->if_output(ifp, m));
2604 }
2605
2606 errno_t
2607 dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2608 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2609 boolean_t poll, struct thread *tp)
2610 {
2611 struct dlil_threading_info *inp;
2612 u_int32_t m_cnt = s->packets_in;
2613 u_int32_t m_size = s->bytes_in;
2614
2615 if ((inp = ifp->if_inp) == NULL)
2616 inp = dlil_main_input_thread;
2617
2618 /*
2619 * If there is a matching DLIL input thread associated with an
2620 * affinity set, associate this thread with the same set. We
2621 * will only do this once.
2622 */
2623 lck_mtx_lock_spin(&inp->input_lck);
2624 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
2625 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2626 (poll && inp->poll_thr == THREAD_NULL))) {
2627 u_int32_t tag = inp->tag;
2628
2629 if (poll) {
2630 VERIFY(inp->poll_thr == THREAD_NULL);
2631 inp->poll_thr = tp;
2632 } else {
2633 VERIFY(inp->wloop_thr == THREAD_NULL);
2634 inp->wloop_thr = tp;
2635 }
2636 lck_mtx_unlock(&inp->input_lck);
2637
2638 /* Associate the current thread with the new affinity tag */
2639 (void) dlil_affinity_set(tp, tag);
2640
2641 /*
2642 * Take a reference on the current thread; during detach,
2643 * we will need to refer to it in order to tear down its
2644 * affinity.
2645 */
2646 thread_reference(tp);
2647 lck_mtx_lock_spin(&inp->input_lck);
2648 }
2649
2650 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2651
2652 /*
2653 * Because of loopbacked multicast we cannot stuff the ifp in
2654 * the rcvif of the packet header: loopback (lo0) packets use a
2655 * dedicated list so that we can later associate them with lo_ifp
2656 * on their way up the stack. Packets for other interfaces without
2657 * dedicated input threads go to the regular list.
2658 */
2659 if (m_head != NULL) {
2660 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2661 struct dlil_main_threading_info *inpm =
2662 (struct dlil_main_threading_info *)inp;
2663 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2664 m_cnt, m_size);
2665 } else {
2666 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2667 m_cnt, m_size);
2668 }
2669 }
2670
2671 #if IFNET_INPUT_SANITY_CHK
2672 if (dlil_input_sanity_check != 0) {
2673 u_int32_t count;
2674 struct mbuf *m0;
2675
2676 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
2677 count++;
2678
2679 if (count != m_cnt) {
2680 panic_plain("%s: invalid packet count %d "
2681 "(expected %d)\n", if_name(ifp),
2682 count, m_cnt);
2683 /* NOTREACHED */
2684 }
2685
2686 inp->input_mbuf_cnt += m_cnt;
2687 }
2688 #endif /* IFNET_INPUT_SANITY_CHK */
2689
2690 dlil_input_stats_add(s, inp, poll);
2691 /*
2692 * If we're using the main input thread, synchronize the
2693 * stats now since we have the interface context. All
2694 * other cases involving dedicated input threads will
2695 * have their stats synchronized there.
2696 */
2697 if (inp == dlil_main_input_thread)
2698 dlil_input_stats_sync(ifp, inp);
2699
2700 if (qlen(&inp->rcvq_pkts) >= dlil_rcv_mit_pkts_min &&
2701 qlen(&inp->rcvq_pkts) < dlil_rcv_mit_pkts_max &&
2702 (ifp->if_family == IFNET_FAMILY_ETHERNET ||
2703 ifp->if_type == IFT_CELLULAR)
2704 ) {
2705 if (!thread_call_isactive(inp->input_mit_tcall)) {
2706 uint64_t deadline;
2707 clock_interval_to_deadline(dlil_rcv_mit_interval,
2708 1, &deadline);
2709 (void) thread_call_enter_delayed(
2710 inp->input_mit_tcall, deadline);
2711 }
2712 } else {
2713 inp->input_waiting |= DLIL_INPUT_WAITING;
2714 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2715 inp->wtot++;
2716 wakeup_one((caddr_t)&inp->input_waiting);
2717 }
2718 }
2719 lck_mtx_unlock(&inp->input_lck);
2720
2721 return (0);
2722 }
2723
2724
2725 static void
2726 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
2727 {
2728 if (!(ifp->if_eflags & IFEF_TXSTART))
2729 return;
2730 /*
2731 * If the starter thread is inactive, signal it to do work,
2732 * unless the interface is being flow controlled from below,
2733 * e.g. a virtual interface being flow controlled by a real
2734 * network interface beneath it, or it's been disabled via
2735 * a call to ifnet_disable_output().
2736 */
2737 lck_mtx_lock_spin(&ifp->if_start_lock);
2738 if (resetfc) {
2739 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2740 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2741 lck_mtx_unlock(&ifp->if_start_lock);
2742 return;
2743 }
2744 ifp->if_start_req++;
2745 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2746 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2747 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
2748 ifp->if_start_delayed == 0)) {
2749 (void) thread_wakeup_thread((caddr_t)&ifp->if_start_thread,
2750 ifp->if_start_thread);
2751 }
2752 lck_mtx_unlock(&ifp->if_start_lock);
2753 }
2754
2755 void
2756 ifnet_start(struct ifnet *ifp)
2757 {
2758 ifnet_start_common(ifp, FALSE);
2759 }
2760
2761 static void
2762 ifnet_start_thread_fn(void *v, wait_result_t w)
2763 {
2764 #pragma unused(w)
2765 struct ifnet *ifp = v;
2766 char ifname[IFNAMSIZ + 1];
2767 char thread_name[MAXTHREADNAMESIZE];
2768 struct timespec *ts = NULL;
2769 struct ifclassq *ifq = &ifp->if_snd;
2770 struct timespec delay_start_ts;
2771
2772 /* Construct the name for this thread, and then apply it. */
2773 bzero(thread_name, sizeof(thread_name));
2774 (void) snprintf(thread_name, sizeof (thread_name),
2775 "ifnet_start_%s", ifp->if_xname);
2776 thread_set_thread_name(ifp->if_start_thread, thread_name);
2777
2778 /*
2779 * Treat the dedicated starter thread for lo0 as equivalent to
2780 * the driver workloop thread; if net_affinity is enabled for
2781 * the main input thread, associate this starter thread to it
2782 * by binding them with the same affinity tag. This is done
2783 * only once (as we only have one lo_ifp which never goes away.)
2784 */
2785 if (ifp == lo_ifp) {
2786 struct dlil_threading_info *inp = dlil_main_input_thread;
2787 struct thread *tp = current_thread();
2788
2789 lck_mtx_lock(&inp->input_lck);
2790 if (inp->net_affinity) {
2791 u_int32_t tag = inp->tag;
2792
2793 VERIFY(inp->wloop_thr == THREAD_NULL);
2794 VERIFY(inp->poll_thr == THREAD_NULL);
2795 inp->wloop_thr = tp;
2796 lck_mtx_unlock(&inp->input_lck);
2797
2798 /* Associate this thread with the affinity tag */
2799 (void) dlil_affinity_set(tp, tag);
2800 } else {
2801 lck_mtx_unlock(&inp->input_lck);
2802 }
2803 }
2804
2805 (void) snprintf(ifname, sizeof (ifname), "%s_starter", if_name(ifp));
2806
2807 lck_mtx_lock_spin(&ifp->if_start_lock);
2808
2809 for (;;) {
2810 if (ifp->if_start_thread != NULL) {
2811 (void) msleep(&ifp->if_start_thread,
2812 &ifp->if_start_lock,
2813 (PZERO - 1) | PSPIN, ifname, ts);
2814 }
2815 /* interface is detached? */
2816 if (ifp->if_start_thread == THREAD_NULL) {
2817 ifnet_set_start_cycle(ifp, NULL);
2818 lck_mtx_unlock(&ifp->if_start_lock);
2819 ifnet_purge(ifp);
2820
2821 if (dlil_verbose) {
2822 printf("%s: starter thread terminated\n",
2823 if_name(ifp));
2824 }
2825
2826 /* for the extra refcnt from kernel_thread_start() */
2827 thread_deallocate(current_thread());
2828 /* this is the end */
2829 thread_terminate(current_thread());
2830 /* NOTREACHED */
2831 return;
2832 }
2833
2834 ifp->if_start_active = 1;
2835
2836 for (;;) {
2837 u_int32_t req = ifp->if_start_req;
2838 if (!IFCQ_IS_EMPTY(ifq) &&
2839 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2840 ifp->if_start_delayed == 0 &&
2841 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2842 (ifp->if_eflags & IFEF_DELAY_START)) {
2843 ifp->if_start_delayed = 1;
2844 ifnet_start_delayed++;
2845 break;
2846 } else {
2847 ifp->if_start_delayed = 0;
2848 }
2849 lck_mtx_unlock(&ifp->if_start_lock);
2850
2851 /*
2852 * If no longer attached, don't call start because ifp
2853 * is being destroyed; else hold an IO refcnt to
2854 * prevent the interface from being detached (will be
2855 * released below.)
2856 */
2857 if (!ifnet_is_attached(ifp, 1)) {
2858 lck_mtx_lock_spin(&ifp->if_start_lock);
2859 break;
2860 }
2861
2862 /* invoke the driver's start routine */
2863 ((*ifp->if_start)(ifp));
2864
2865 /*
2866 * Release the io ref count taken by ifnet_is_attached.
2867 */
2868 ifnet_decr_iorefcnt(ifp);
2869
2870 lck_mtx_lock_spin(&ifp->if_start_lock);
2871
2872 /*
2873 * If there's no pending request or if the
2874 * interface has been disabled, we're done.
2875 */
2876 if (req == ifp->if_start_req ||
2877 (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
2878 break;
2879 }
2880 }
2881
2882 ifp->if_start_req = 0;
2883 ifp->if_start_active = 0;
2884
2885 /*
2886 * Wakeup N ns from now if rate-controlled by TBR, and if
2887 * there are still packets in the send queue which haven't
2888 * been dequeued so far; else sleep indefinitely (ts = NULL)
2889 * until ifnet_start() is called again.
2890 */
2891 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2892 &ifp->if_start_cycle : NULL);
2893
2894 if (ts == NULL && ifp->if_start_delayed == 1) {
2895 delay_start_ts.tv_sec = 0;
2896 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2897 ts = &delay_start_ts;
2898 }
2899
2900 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
2901 ts = NULL;
2902 }
2903
2904 /* NOTREACHED */
2905 }
2906
2907 void
2908 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2909 {
2910 if (ts == NULL)
2911 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
2912 else
2913 *(&ifp->if_start_cycle) = *ts;
2914
2915 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
2916 printf("%s: restart interval set to %lu nsec\n",
2917 if_name(ifp), ts->tv_nsec);
2918 }
2919
2920 static void
2921 ifnet_poll(struct ifnet *ifp)
2922 {
2923 /*
2924 * If the poller thread is inactive, signal it to do work.
2925 */
2926 lck_mtx_lock_spin(&ifp->if_poll_lock);
2927 ifp->if_poll_req++;
2928 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
2929 wakeup_one((caddr_t)&ifp->if_poll_thread);
2930 }
2931 lck_mtx_unlock(&ifp->if_poll_lock);
2932 }
2933
2934 static void
2935 ifnet_poll_thread_fn(void *v, wait_result_t w)
2936 {
2937 #pragma unused(w)
2938 struct dlil_threading_info *inp;
2939 struct ifnet *ifp = v;
2940 char ifname[IFNAMSIZ + 1];
2941 struct timespec *ts = NULL;
2942 struct ifnet_stat_increment_param s;
2943
2944 snprintf(ifname, sizeof (ifname), "%s_poller",
2945 if_name(ifp));
2946 bzero(&s, sizeof (s));
2947
2948 lck_mtx_lock_spin(&ifp->if_poll_lock);
2949
2950 inp = ifp->if_inp;
2951 VERIFY(inp != NULL);
2952
2953 for (;;) {
2954 if (ifp->if_poll_thread != THREAD_NULL) {
2955 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
2956 (PZERO - 1) | PSPIN, ifname, ts);
2957 }
2958
2959 /* interface is detached (maybe while asleep)? */
2960 if (ifp->if_poll_thread == THREAD_NULL) {
2961 ifnet_set_poll_cycle(ifp, NULL);
2962 lck_mtx_unlock(&ifp->if_poll_lock);
2963
2964 if (dlil_verbose) {
2965 printf("%s: poller thread terminated\n",
2966 if_name(ifp));
2967 }
2968
2969 /* for the extra refcnt from kernel_thread_start() */
2970 thread_deallocate(current_thread());
2971 /* this is the end */
2972 thread_terminate(current_thread());
2973 /* NOTREACHED */
2974 return;
2975 }
2976
2977 ifp->if_poll_active = 1;
2978 for (;;) {
2979 struct mbuf *m_head, *m_tail;
2980 u_int32_t m_lim, m_cnt, m_totlen;
2981 u_int16_t req = ifp->if_poll_req;
2982
2983 lck_mtx_unlock(&ifp->if_poll_lock);
2984
2985 /*
2986 * If no longer attached, there's nothing to do;
2987 * else hold an IO refcnt to prevent the interface
2988 * from being detached (will be released below.)
2989 */
2990 if (!ifnet_is_attached(ifp, 1)) {
2991 lck_mtx_lock_spin(&ifp->if_poll_lock);
2992 break;
2993 }
2994
2995 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
2996 MAX((qlimit(&inp->rcvq_pkts)),
2997 (inp->rxpoll_phiwat << 2));
2998
2999 if (dlil_verbose > 1) {
3000 printf("%s: polling up to %d pkts, "
3001 "pkts avg %d max %d, wreq avg %d, "
3002 "bytes avg %d\n",
3003 if_name(ifp), m_lim,
3004 inp->rxpoll_pavg, inp->rxpoll_pmax,
3005 inp->rxpoll_wavg, inp->rxpoll_bavg);
3006 }
3007
3008 /* invoke the driver's input poll routine */
3009 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3010 &m_cnt, &m_totlen));
3011
3012 if (m_head != NULL) {
3013 VERIFY(m_tail != NULL && m_cnt > 0);
3014
3015 if (dlil_verbose > 1) {
3016 printf("%s: polled %d pkts, "
3017 "pkts avg %d max %d, wreq avg %d, "
3018 "bytes avg %d\n",
3019 if_name(ifp), m_cnt,
3020 inp->rxpoll_pavg, inp->rxpoll_pmax,
3021 inp->rxpoll_wavg, inp->rxpoll_bavg);
3022 }
3023
3024 /* stats are required for extended variant */
3025 s.packets_in = m_cnt;
3026 s.bytes_in = m_totlen;
3027
3028 (void) ifnet_input_common(ifp, m_head, m_tail,
3029 &s, TRUE, TRUE);
3030 } else {
3031 if (dlil_verbose > 1) {
3032 printf("%s: no packets, "
3033 "pkts avg %d max %d, wreq avg %d, "
3034 "bytes avg %d\n",
3035 if_name(ifp), inp->rxpoll_pavg,
3036 inp->rxpoll_pmax, inp->rxpoll_wavg,
3037 inp->rxpoll_bavg);
3038 }
3039
3040 (void) ifnet_input_common(ifp, NULL, NULL,
3041 NULL, FALSE, TRUE);
3042 }
3043
3044 /* Release the io ref count */
3045 ifnet_decr_iorefcnt(ifp);
3046
3047 lck_mtx_lock_spin(&ifp->if_poll_lock);
3048
3049 /* if there's no pending request, we're done */
3050 if (req == ifp->if_poll_req) {
3051 break;
3052 }
3053 }
3054 ifp->if_poll_req = 0;
3055 ifp->if_poll_active = 0;
3056
3057 /*
3058 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3059 * until ifnet_poll() is called again.
3060 */
3061 ts = &ifp->if_poll_cycle;
3062 if (ts->tv_sec == 0 && ts->tv_nsec == 0)
3063 ts = NULL;
3064 }
3065
3066 /* NOTREACHED */
3067 }
3068
3069 void
3070 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3071 {
3072 if (ts == NULL)
3073 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
3074 else
3075 *(&ifp->if_poll_cycle) = *ts;
3076
3077 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
3078 printf("%s: poll interval set to %lu nsec\n",
3079 if_name(ifp), ts->tv_nsec);
3080 }
3081
3082 void
3083 ifnet_purge(struct ifnet *ifp)
3084 {
3085 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
3086 if_qflush(ifp, 0);
3087 }
3088
3089 void
3090 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3091 {
3092 IFCQ_LOCK_ASSERT_HELD(ifq);
3093
3094 if (!(IFCQ_IS_READY(ifq)))
3095 return;
3096
3097 if (IFCQ_TBR_IS_ENABLED(ifq)) {
3098 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
3099 ifq->ifcq_tbr.tbr_percent, 0 };
3100 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3101 }
3102
3103 ifclassq_update(ifq, ev);
3104 }
3105
3106 void
3107 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3108 {
3109 switch (ev) {
3110 case CLASSQ_EV_LINK_BANDWIDTH:
3111 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
3112 ifp->if_poll_update++;
3113 break;
3114
3115 default:
3116 break;
3117 }
3118 }
3119
3120 errno_t
3121 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3122 {
3123 struct ifclassq *ifq;
3124 u_int32_t omodel;
3125 errno_t err;
3126
3127 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX)
3128 return (EINVAL);
3129 else if (!(ifp->if_eflags & IFEF_TXSTART))
3130 return (ENXIO);
3131
3132 ifq = &ifp->if_snd;
3133 IFCQ_LOCK(ifq);
3134 omodel = ifp->if_output_sched_model;
3135 ifp->if_output_sched_model = model;
3136 if ((err = ifclassq_pktsched_setup(ifq)) != 0)
3137 ifp->if_output_sched_model = omodel;
3138 IFCQ_UNLOCK(ifq);
3139
3140 return (err);
3141 }
3142
3143 errno_t
3144 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3145 {
3146 if (ifp == NULL)
3147 return (EINVAL);
3148 else if (!(ifp->if_eflags & IFEF_TXSTART))
3149 return (ENXIO);
3150
3151 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3152
3153 return (0);
3154 }
3155
3156 errno_t
3157 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3158 {
3159 if (ifp == NULL || maxqlen == NULL)
3160 return (EINVAL);
3161 else if (!(ifp->if_eflags & IFEF_TXSTART))
3162 return (ENXIO);
3163
3164 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3165
3166 return (0);
3167 }
3168
3169 errno_t
3170 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
3171 {
3172 errno_t err;
3173
3174 if (ifp == NULL || pkts == NULL)
3175 err = EINVAL;
3176 else if (!(ifp->if_eflags & IFEF_TXSTART))
3177 err = ENXIO;
3178 else
3179 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3180 pkts, NULL);
3181
3182 return (err);
3183 }
3184
3185 errno_t
3186 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3187 u_int32_t *pkts, u_int32_t *bytes)
3188 {
3189 errno_t err;
3190
3191 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3192 (pkts == NULL && bytes == NULL))
3193 err = EINVAL;
3194 else if (!(ifp->if_eflags & IFEF_TXSTART))
3195 err = ENXIO;
3196 else
3197 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3198
3199 return (err);
3200 }
3201
3202 errno_t
3203 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3204 {
3205 struct dlil_threading_info *inp;
3206
3207 if (ifp == NULL)
3208 return (EINVAL);
3209 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3210 return (ENXIO);
3211
3212 if (maxqlen == 0)
3213 maxqlen = if_rcvq_maxlen;
3214 else if (maxqlen < IF_RCVQ_MINLEN)
3215 maxqlen = IF_RCVQ_MINLEN;
3216
3217 inp = ifp->if_inp;
3218 lck_mtx_lock(&inp->input_lck);
3219 qlimit(&inp->rcvq_pkts) = maxqlen;
3220 lck_mtx_unlock(&inp->input_lck);
3221
3222 return (0);
3223 }
3224
3225 errno_t
3226 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3227 {
3228 struct dlil_threading_info *inp;
3229
3230 if (ifp == NULL || maxqlen == NULL)
3231 return (EINVAL);
3232 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3233 return (ENXIO);
3234
3235 inp = ifp->if_inp;
3236 lck_mtx_lock(&inp->input_lck);
3237 *maxqlen = qlimit(&inp->rcvq_pkts);
3238 lck_mtx_unlock(&inp->input_lck);
3239 return (0);
3240 }
3241
3242 void
3243 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3244 uint16_t delay_timeout)
3245 {
3246 if (delay_qlen > 0 && delay_timeout > 0) {
3247 ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
3248 ifp->if_start_delay_qlen = min(100, delay_qlen);
3249 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3250 /* convert timeout to nanoseconds */
3251 ifp->if_start_delay_timeout *= 1000;
3252 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3253 ifp->if_xname, (uint32_t)delay_qlen,
3254 (uint32_t)delay_timeout);
3255 } else {
3256 ifp->if_eflags &= ~IFEF_ENQUEUE_MULTI;
3257 }
3258 }
3259
3260 static inline errno_t
3261 ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype,
3262 boolean_t flush, boolean_t *pdrop)
3263 {
3264 volatile uint64_t *fg_ts = NULL;
3265 volatile uint64_t *rt_ts = NULL;
3266 struct mbuf *m = p;
3267 struct timespec now;
3268 u_int64_t now_nsec = 0;
3269 int error = 0;
3270
3271 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3272
3273 /*
3274 * If packet already carries a timestamp, either from dlil_output()
3275 * or from flowswitch, use it here. Otherwise, record timestamp.
3276 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3277 * the timestamp value is used internally there.
3278 */
3279 switch (ptype) {
3280 case QP_MBUF:
3281 ASSERT(m->m_flags & M_PKTHDR);
3282 ASSERT(m->m_nextpkt == NULL);
3283
3284 if (!(m->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3285 m->m_pkthdr.pkt_timestamp == 0) {
3286 nanouptime(&now);
3287 net_timernsec(&now, &now_nsec);
3288 m->m_pkthdr.pkt_timestamp = now_nsec;
3289 }
3290 m->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3291 /*
3292 * If the packet service class is not background,
3293 * update the timestamp to indicate recent activity
3294 * on a foreground socket.
3295 */
3296 if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3297 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3298 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND)) {
3299 ifp->if_fg_sendts = _net_uptime;
3300 if (fg_ts != NULL)
3301 *fg_ts = _net_uptime;
3302 }
3303 if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3304 ifp->if_rt_sendts = _net_uptime;
3305 if (rt_ts != NULL)
3306 *rt_ts = _net_uptime;
3307 }
3308 }
3309 break;
3310
3311
3312 default:
3313 VERIFY(0);
3314 /* NOTREACHED */
3315 }
3316
3317 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3318 if (now_nsec == 0) {
3319 nanouptime(&now);
3320 net_timernsec(&now, &now_nsec);
3321 }
3322 /*
3323 * If the driver chose to delay start callback for
3324 * coalescing multiple packets, Then use the following
3325 * heuristics to make sure that start callback will
3326 * be delayed only when bulk data transfer is detected.
3327 * 1. number of packets enqueued in (delay_win * 2) is
3328 * greater than or equal to the delay qlen.
3329 * 2. If delay_start is enabled it will stay enabled for
3330 * another 10 idle windows. This is to take into account
3331 * variable RTT and burst traffic.
3332 * 3. If the time elapsed since last enqueue is more
3333 * than 200ms we disable delaying start callback. This is
3334 * is to take idle time into account.
3335 */
3336 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3337 if (ifp->if_start_delay_swin > 0) {
3338 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3339 ifp->if_start_delay_cnt++;
3340 } else if ((now_nsec - ifp->if_start_delay_swin)
3341 >= (200 * 1000 * 1000)) {
3342 ifp->if_start_delay_swin = now_nsec;
3343 ifp->if_start_delay_cnt = 1;
3344 ifp->if_start_delay_idle = 0;
3345 if (ifp->if_eflags & IFEF_DELAY_START) {
3346 ifp->if_eflags &=
3347 ~(IFEF_DELAY_START);
3348 ifnet_delay_start_disabled++;
3349 }
3350 } else {
3351 if (ifp->if_start_delay_cnt >=
3352 ifp->if_start_delay_qlen) {
3353 ifp->if_eflags |= IFEF_DELAY_START;
3354 ifp->if_start_delay_idle = 0;
3355 } else {
3356 if (ifp->if_start_delay_idle >= 10) {
3357 ifp->if_eflags &= ~(IFEF_DELAY_START);
3358 ifnet_delay_start_disabled++;
3359 } else {
3360 ifp->if_start_delay_idle++;
3361 }
3362 }
3363 ifp->if_start_delay_swin = now_nsec;
3364 ifp->if_start_delay_cnt = 1;
3365 }
3366 } else {
3367 ifp->if_start_delay_swin = now_nsec;
3368 ifp->if_start_delay_cnt = 1;
3369 ifp->if_start_delay_idle = 0;
3370 ifp->if_eflags &= ~(IFEF_DELAY_START);
3371 }
3372 } else {
3373 ifp->if_eflags &= ~(IFEF_DELAY_START);
3374 }
3375
3376 switch (ptype) {
3377 case QP_MBUF:
3378 /* enqueue the packet (caller consumes object) */
3379 error = ifclassq_enqueue(&ifp->if_snd, m, QP_MBUF, pdrop);
3380 m = NULL;
3381 break;
3382
3383
3384 default:
3385 break;
3386 }
3387
3388 /*
3389 * Tell the driver to start dequeueing; do this even when the queue
3390 * for the packet is suspended (EQSUSPENDED), as the driver could still
3391 * be dequeueing from other unsuspended queues.
3392 */
3393 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3394 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED))
3395 ifnet_start(ifp);
3396
3397 return (error);
3398 }
3399
3400 errno_t
3401 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3402 {
3403 boolean_t pdrop;
3404 return (ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop));
3405 }
3406
3407 errno_t
3408 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3409 boolean_t *pdrop)
3410 {
3411 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3412 m->m_nextpkt != NULL) {
3413 if (m != NULL) {
3414 m_freem_list(m);
3415 *pdrop = TRUE;
3416 }
3417 return (EINVAL);
3418 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3419 !IF_FULLY_ATTACHED(ifp)) {
3420 /* flag tested without lock for performance */
3421 m_freem(m);
3422 *pdrop = TRUE;
3423 return (ENXIO);
3424 } else if (!(ifp->if_flags & IFF_UP)) {
3425 m_freem(m);
3426 *pdrop = TRUE;
3427 return (ENETDOWN);
3428 }
3429
3430 return (ifnet_enqueue_common(ifp, m, QP_MBUF, flush, pdrop));
3431 }
3432
3433
3434 errno_t
3435 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3436 {
3437 errno_t rc;
3438 classq_pkt_type_t ptype;
3439 if (ifp == NULL || mp == NULL)
3440 return (EINVAL);
3441 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3442 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3443 return (ENXIO);
3444 if (!ifnet_is_attached(ifp, 1))
3445 return (ENXIO);
3446
3447 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3448 (void **)mp, NULL, NULL, NULL, &ptype);
3449 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
3450 ifnet_decr_iorefcnt(ifp);
3451
3452 return (rc);
3453 }
3454
3455 errno_t
3456 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3457 struct mbuf **mp)
3458 {
3459 errno_t rc;
3460 classq_pkt_type_t ptype;
3461 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
3462 return (EINVAL);
3463 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3464 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3465 return (ENXIO);
3466 if (!ifnet_is_attached(ifp, 1))
3467 return (ENXIO);
3468
3469 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
3470 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)mp, NULL, NULL,
3471 NULL, &ptype);
3472 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
3473 ifnet_decr_iorefcnt(ifp);
3474 return (rc);
3475 }
3476
3477 errno_t
3478 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3479 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3480 {
3481 errno_t rc;
3482 classq_pkt_type_t ptype;
3483 if (ifp == NULL || head == NULL || pkt_limit < 1)
3484 return (EINVAL);
3485 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3486 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3487 return (ENXIO);
3488 if (!ifnet_is_attached(ifp, 1))
3489 return (ENXIO);
3490
3491 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
3492 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head, (void **)tail, cnt,
3493 len, &ptype);
3494 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3495 ifnet_decr_iorefcnt(ifp);
3496 return (rc);
3497 }
3498
3499 errno_t
3500 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3501 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3502 {
3503 errno_t rc;
3504 classq_pkt_type_t ptype;
3505 if (ifp == NULL || head == NULL || byte_limit < 1)
3506 return (EINVAL);
3507 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3508 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3509 return (ENXIO);
3510 if (!ifnet_is_attached(ifp, 1))
3511 return (ENXIO);
3512
3513 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3514 byte_limit, (void **)head, (void **)tail, cnt, len, &ptype);
3515 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3516 ifnet_decr_iorefcnt(ifp);
3517 return (rc);
3518 }
3519
3520 errno_t
3521 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3522 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3523 u_int32_t *len)
3524 {
3525 errno_t rc;
3526 classq_pkt_type_t ptype;
3527 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3528 !MBUF_VALID_SC(sc))
3529 return (EINVAL);
3530 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3531 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3532 return (ENXIO);
3533 if (!ifnet_is_attached(ifp, 1))
3534 return (ENXIO);
3535
3536 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
3537 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head,
3538 (void **)tail, cnt, len, &ptype);
3539 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3540 ifnet_decr_iorefcnt(ifp);
3541 return (rc);
3542 }
3543
3544 #if !CONFIG_EMBEDDED
3545 errno_t
3546 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3547 const struct sockaddr *dest, const char *dest_linkaddr,
3548 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3549 {
3550 if (pre != NULL)
3551 *pre = 0;
3552 if (post != NULL)
3553 *post = 0;
3554
3555 return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
3556 }
3557 #endif /* !CONFIG_EMBEDDED */
3558
3559 static int
3560 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3561 char **frame_header_p, protocol_family_t protocol_family)
3562 {
3563 struct ifnet_filter *filter;
3564
3565 /*
3566 * Pass the inbound packet to the interface filters
3567 */
3568 lck_mtx_lock_spin(&ifp->if_flt_lock);
3569 /* prevent filter list from changing in case we drop the lock */
3570 if_flt_monitor_busy(ifp);
3571 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3572 int result;
3573
3574 if (!filter->filt_skip && filter->filt_input != NULL &&
3575 (filter->filt_protocol == 0 ||
3576 filter->filt_protocol == protocol_family)) {
3577 lck_mtx_unlock(&ifp->if_flt_lock);
3578
3579 result = (*filter->filt_input)(filter->filt_cookie,
3580 ifp, protocol_family, m_p, frame_header_p);
3581
3582 lck_mtx_lock_spin(&ifp->if_flt_lock);
3583 if (result != 0) {
3584 /* we're done with the filter list */
3585 if_flt_monitor_unbusy(ifp);
3586 lck_mtx_unlock(&ifp->if_flt_lock);
3587 return (result);
3588 }
3589 }
3590 }
3591 /* we're done with the filter list */
3592 if_flt_monitor_unbusy(ifp);
3593 lck_mtx_unlock(&ifp->if_flt_lock);
3594
3595 /*
3596 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3597 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3598 */
3599 if (*m_p != NULL)
3600 (*m_p)->m_flags &= ~M_PROTO1;
3601
3602 return (0);
3603 }
3604
3605 static int
3606 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3607 protocol_family_t protocol_family)
3608 {
3609 struct ifnet_filter *filter;
3610
3611 /*
3612 * Pass the outbound packet to the interface filters
3613 */
3614 lck_mtx_lock_spin(&ifp->if_flt_lock);
3615 /* prevent filter list from changing in case we drop the lock */
3616 if_flt_monitor_busy(ifp);
3617 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3618 int result;
3619
3620 if (!filter->filt_skip && filter->filt_output != NULL &&
3621 (filter->filt_protocol == 0 ||
3622 filter->filt_protocol == protocol_family)) {
3623 lck_mtx_unlock(&ifp->if_flt_lock);
3624
3625 result = filter->filt_output(filter->filt_cookie, ifp,
3626 protocol_family, m_p);
3627
3628 lck_mtx_lock_spin(&ifp->if_flt_lock);
3629 if (result != 0) {
3630 /* we're done with the filter list */
3631 if_flt_monitor_unbusy(ifp);
3632 lck_mtx_unlock(&ifp->if_flt_lock);
3633 return (result);
3634 }
3635 }
3636 }
3637 /* we're done with the filter list */
3638 if_flt_monitor_unbusy(ifp);
3639 lck_mtx_unlock(&ifp->if_flt_lock);
3640
3641 return (0);
3642 }
3643
3644 static void
3645 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
3646 {
3647 int error;
3648
3649 if (ifproto->proto_kpi == kProtoKPI_v1) {
3650 /* Version 1 protocols get one packet at a time */
3651 while (m != NULL) {
3652 char * frame_header;
3653 mbuf_t next_packet;
3654
3655 next_packet = m->m_nextpkt;
3656 m->m_nextpkt = NULL;
3657 frame_header = m->m_pkthdr.pkt_hdr;
3658 m->m_pkthdr.pkt_hdr = NULL;
3659 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3660 ifproto->protocol_family, m, frame_header);
3661 if (error != 0 && error != EJUSTRETURN)
3662 m_freem(m);
3663 m = next_packet;
3664 }
3665 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
3666 /* Version 2 protocols support packet lists */
3667 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
3668 ifproto->protocol_family, m);
3669 if (error != 0 && error != EJUSTRETURN)
3670 m_freem_list(m);
3671 }
3672 }
3673
3674 static void
3675 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3676 struct dlil_threading_info *inp, boolean_t poll)
3677 {
3678 struct ifnet_stat_increment_param *d = &inp->stats;
3679
3680 if (s->packets_in != 0)
3681 d->packets_in += s->packets_in;
3682 if (s->bytes_in != 0)
3683 d->bytes_in += s->bytes_in;
3684 if (s->errors_in != 0)
3685 d->errors_in += s->errors_in;
3686
3687 if (s->packets_out != 0)
3688 d->packets_out += s->packets_out;
3689 if (s->bytes_out != 0)
3690 d->bytes_out += s->bytes_out;
3691 if (s->errors_out != 0)
3692 d->errors_out += s->errors_out;
3693
3694 if (s->collisions != 0)
3695 d->collisions += s->collisions;
3696 if (s->dropped != 0)
3697 d->dropped += s->dropped;
3698
3699 if (poll)
3700 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
3701 }
3702
3703 static void
3704 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3705 {
3706 struct ifnet_stat_increment_param *s = &inp->stats;
3707
3708 /*
3709 * Use of atomic operations is unavoidable here because
3710 * these stats may also be incremented elsewhere via KPIs.
3711 */
3712 if (s->packets_in != 0) {
3713 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3714 s->packets_in = 0;
3715 }
3716 if (s->bytes_in != 0) {
3717 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3718 s->bytes_in = 0;
3719 }
3720 if (s->errors_in != 0) {
3721 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3722 s->errors_in = 0;
3723 }
3724
3725 if (s->packets_out != 0) {
3726 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3727 s->packets_out = 0;
3728 }
3729 if (s->bytes_out != 0) {
3730 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3731 s->bytes_out = 0;
3732 }
3733 if (s->errors_out != 0) {
3734 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3735 s->errors_out = 0;
3736 }
3737
3738 if (s->collisions != 0) {
3739 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3740 s->collisions = 0;
3741 }
3742 if (s->dropped != 0) {
3743 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3744 s->dropped = 0;
3745 }
3746
3747 if (ifp->if_data_threshold != 0) {
3748 lck_mtx_convert_spin(&inp->input_lck);
3749 ifnet_notify_data_threshold(ifp);
3750 }
3751
3752 /*
3753 * No need for atomic operations as they are modified here
3754 * only from within the DLIL input thread context.
3755 */
3756 if (inp->tstats.packets != 0) {
3757 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3758 inp->tstats.packets = 0;
3759 }
3760 if (inp->tstats.bytes != 0) {
3761 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3762 inp->tstats.bytes = 0;
3763 }
3764 }
3765
3766 __private_extern__ void
3767 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3768 {
3769 return (dlil_input_packet_list_common(ifp, m, 0,
3770 IFNET_MODEL_INPUT_POLL_OFF, FALSE));
3771 }
3772
3773 __private_extern__ void
3774 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3775 u_int32_t cnt, ifnet_model_t mode)
3776 {
3777 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
3778 }
3779
3780 static void
3781 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3782 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
3783 {
3784 int error = 0;
3785 protocol_family_t protocol_family;
3786 mbuf_t next_packet;
3787 ifnet_t ifp = ifp_param;
3788 char * frame_header;
3789 struct if_proto * last_ifproto = NULL;
3790 mbuf_t pkt_first = NULL;
3791 mbuf_t * pkt_next = NULL;
3792 u_int32_t poll_thresh = 0, poll_ival = 0;
3793
3794 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
3795
3796 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
3797 (poll_ival = if_rxpoll_interval_pkts) > 0)
3798 poll_thresh = cnt;
3799
3800 while (m != NULL) {
3801 struct if_proto *ifproto = NULL;
3802 int iorefcnt = 0;
3803 uint32_t pktf_mask; /* pkt flags to preserve */
3804
3805 if (ifp_param == NULL)
3806 ifp = m->m_pkthdr.rcvif;
3807
3808 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
3809 poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
3810 ifnet_poll(ifp);
3811
3812 /* Check if this mbuf looks valid */
3813 MBUF_INPUT_CHECK(m, ifp);
3814
3815 next_packet = m->m_nextpkt;
3816 m->m_nextpkt = NULL;
3817 frame_header = m->m_pkthdr.pkt_hdr;
3818 m->m_pkthdr.pkt_hdr = NULL;
3819
3820 /*
3821 * Get an IO reference count if the interface is not
3822 * loopback (lo0) and it is attached; lo0 never goes
3823 * away, so optimize for that.
3824 */
3825 if (ifp != lo_ifp) {
3826 if (!ifnet_is_attached(ifp, 1)) {
3827 m_freem(m);
3828 goto next;
3829 }
3830 iorefcnt = 1;
3831 /*
3832 * Preserve the time stamp if it was set.
3833 */
3834 pktf_mask = PKTF_TS_VALID;
3835 } else {
3836 /*
3837 * If this arrived on lo0, preserve interface addr
3838 * info to allow for connectivity between loopback
3839 * and local interface addresses.
3840 */
3841 pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
3842 }
3843
3844 /* make sure packet comes in clean */
3845 m_classifier_init(m, pktf_mask);
3846
3847 ifp_inc_traffic_class_in(ifp, m);
3848
3849 /* find which protocol family this packet is for */
3850 ifnet_lock_shared(ifp);
3851 error = (*ifp->if_demux)(ifp, m, frame_header,
3852 &protocol_family);
3853 ifnet_lock_done(ifp);
3854 if (error != 0) {
3855 if (error == EJUSTRETURN)
3856 goto next;
3857 protocol_family = 0;
3858 }
3859
3860 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
3861 !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
3862 dlil_input_cksum_dbg(ifp, m, frame_header,
3863 protocol_family);
3864
3865 /*
3866 * For partial checksum offload, we expect the driver to
3867 * set the start offset indicating the start of the span
3868 * that is covered by the hardware-computed checksum;
3869 * adjust this start offset accordingly because the data
3870 * pointer has been advanced beyond the link-layer header.
3871 *
3872 * Don't adjust if the interface is a bridge member, as
3873 * the adjustment will occur from the context of the
3874 * bridge interface during input.
3875 */
3876 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
3877 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
3878 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
3879 int adj;
3880
3881 if (frame_header == NULL ||
3882 frame_header < (char *)mbuf_datastart(m) ||
3883 frame_header > (char *)m->m_data ||
3884 (adj = (m->m_data - frame_header)) >
3885 m->m_pkthdr.csum_rx_start) {
3886 m->m_pkthdr.csum_data = 0;
3887 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
3888 hwcksum_in_invalidated++;
3889 } else {
3890 m->m_pkthdr.csum_rx_start -= adj;
3891 }
3892 }
3893
3894 pktap_input(ifp, protocol_family, m, frame_header);
3895
3896 if (m->m_flags & (M_BCAST|M_MCAST))
3897 atomic_add_64(&ifp->if_imcasts, 1);
3898
3899 /* run interface filters, exclude VLAN packets PR-3586856 */
3900 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
3901 error = dlil_interface_filters_input(ifp, &m,
3902 &frame_header, protocol_family);
3903 if (error != 0) {
3904 if (error != EJUSTRETURN)
3905 m_freem(m);
3906 goto next;
3907 }
3908 }
3909 if (error != 0 || ((m->m_flags & M_PROMISC) != 0)) {
3910 m_freem(m);
3911 goto next;
3912 }
3913
3914 /* Lookup the protocol attachment to this interface */
3915 if (protocol_family == 0) {
3916 ifproto = NULL;
3917 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
3918 (last_ifproto->protocol_family == protocol_family)) {
3919 VERIFY(ifproto == NULL);
3920 ifproto = last_ifproto;
3921 if_proto_ref(last_ifproto);
3922 } else {
3923 VERIFY(ifproto == NULL);
3924 ifnet_lock_shared(ifp);
3925 /* callee holds a proto refcnt upon success */
3926 ifproto = find_attached_proto(ifp, protocol_family);
3927 ifnet_lock_done(ifp);
3928 }
3929 if (ifproto == NULL) {
3930 /* no protocol for this packet, discard */
3931 m_freem(m);
3932 goto next;
3933 }
3934 if (ifproto != last_ifproto) {
3935 if (last_ifproto != NULL) {
3936 /* pass up the list for the previous protocol */
3937 dlil_ifproto_input(last_ifproto, pkt_first);
3938 pkt_first = NULL;
3939 if_proto_free(last_ifproto);
3940 }
3941 last_ifproto = ifproto;
3942 if_proto_ref(ifproto);
3943 }
3944 /* extend the list */
3945 m->m_pkthdr.pkt_hdr = frame_header;
3946 if (pkt_first == NULL) {
3947 pkt_first = m;
3948 } else {
3949 *pkt_next = m;
3950 }
3951 pkt_next = &m->m_nextpkt;
3952
3953 next:
3954 if (next_packet == NULL && last_ifproto != NULL) {
3955 /* pass up the last list of packets */
3956 dlil_ifproto_input(last_ifproto, pkt_first);
3957 if_proto_free(last_ifproto);
3958 last_ifproto = NULL;
3959 }
3960 if (ifproto != NULL) {
3961 if_proto_free(ifproto);
3962 ifproto = NULL;
3963 }
3964
3965 m = next_packet;
3966
3967 /* update the driver's multicast filter, if needed */
3968 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
3969 ifp->if_updatemcasts = 0;
3970 if (iorefcnt == 1)
3971 ifnet_decr_iorefcnt(ifp);
3972 }
3973
3974 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
3975 }
3976
3977 errno_t
3978 if_mcasts_update(struct ifnet *ifp)
3979 {
3980 errno_t err;
3981
3982 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
3983 if (err == EAFNOSUPPORT)
3984 err = 0;
3985 printf("%s: %s %d suspended link-layer multicast membership(s) "
3986 "(err=%d)\n", if_name(ifp),
3987 (err == 0 ? "successfully restored" : "failed to restore"),
3988 ifp->if_updatemcasts, err);
3989
3990 /* just return success */
3991 return (0);
3992 }
3993
3994 /* If ifp is set, we will increment the generation for the interface */
3995 int
3996 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
3997 {
3998 if (ifp != NULL) {
3999 ifnet_increment_generation(ifp);
4000 }
4001
4002 #if NECP
4003 necp_update_all_clients();
4004 #endif /* NECP */
4005
4006 return (kev_post_msg(event));
4007 }
4008
4009 #define TMP_IF_PROTO_ARR_SIZE 10
4010 static int
4011 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
4012 {
4013 struct ifnet_filter *filter = NULL;
4014 struct if_proto *proto = NULL;
4015 int if_proto_count = 0;
4016 struct if_proto **tmp_ifproto_arr = NULL;
4017 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4018 int tmp_ifproto_arr_idx = 0;
4019 bool tmp_malloc = false;
4020
4021 /*
4022 * Pass the event to the interface filters
4023 */
4024 lck_mtx_lock_spin(&ifp->if_flt_lock);
4025 /* prevent filter list from changing in case we drop the lock */
4026 if_flt_monitor_busy(ifp);
4027 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4028 if (filter->filt_event != NULL) {
4029 lck_mtx_unlock(&ifp->if_flt_lock);
4030
4031 filter->filt_event(filter->filt_cookie, ifp,
4032 filter->filt_protocol, event);
4033
4034 lck_mtx_lock_spin(&ifp->if_flt_lock);
4035 }
4036 }
4037 /* we're done with the filter list */
4038 if_flt_monitor_unbusy(ifp);
4039 lck_mtx_unlock(&ifp->if_flt_lock);
4040
4041 /* Get an io ref count if the interface is attached */
4042 if (!ifnet_is_attached(ifp, 1))
4043 goto done;
4044
4045 /*
4046 * An embedded tmp_list_entry in if_proto may still get
4047 * over-written by another thread after giving up ifnet lock,
4048 * therefore we are avoiding embedded pointers here.
4049 */
4050 ifnet_lock_shared(ifp);
4051 if_proto_count = dlil_ifp_proto_count(ifp);
4052 if (if_proto_count) {
4053 int i;
4054 VERIFY(ifp->if_proto_hash != NULL);
4055 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
4056 tmp_ifproto_arr = tmp_ifproto_stack_arr;
4057 } else {
4058 MALLOC(tmp_ifproto_arr, struct if_proto **,
4059 sizeof (*tmp_ifproto_arr) * if_proto_count,
4060 M_TEMP, M_ZERO);
4061 if (tmp_ifproto_arr == NULL) {
4062 ifnet_lock_done(ifp);
4063 goto cleanup;
4064 }
4065 tmp_malloc = true;
4066 }
4067
4068 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
4069 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
4070 next_hash) {
4071 if_proto_ref(proto);
4072 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
4073 tmp_ifproto_arr_idx++;
4074 }
4075 }
4076 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
4077 }
4078 ifnet_lock_done(ifp);
4079
4080 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
4081 tmp_ifproto_arr_idx++) {
4082 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
4083 VERIFY(proto != NULL);
4084 proto_media_event eventp =
4085 (proto->proto_kpi == kProtoKPI_v1 ?
4086 proto->kpi.v1.event :
4087 proto->kpi.v2.event);
4088
4089 if (eventp != NULL) {
4090 eventp(ifp, proto->protocol_family,
4091 event);
4092 }
4093 if_proto_free(proto);
4094 }
4095
4096 cleanup:
4097 if (tmp_malloc) {
4098 FREE(tmp_ifproto_arr, M_TEMP);
4099 }
4100
4101 /* Pass the event to the interface */
4102 if (ifp->if_event != NULL)
4103 ifp->if_event(ifp, event);
4104
4105 /* Release the io ref count */
4106 ifnet_decr_iorefcnt(ifp);
4107 done:
4108 return (dlil_post_complete_msg(update_generation ? ifp : NULL, event));
4109 }
4110
4111 errno_t
4112 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
4113 {
4114 struct kev_msg kev_msg;
4115 int result = 0;
4116
4117 if (ifp == NULL || event == NULL)
4118 return (EINVAL);
4119
4120 bzero(&kev_msg, sizeof (kev_msg));
4121 kev_msg.vendor_code = event->vendor_code;
4122 kev_msg.kev_class = event->kev_class;
4123 kev_msg.kev_subclass = event->kev_subclass;
4124 kev_msg.event_code = event->event_code;
4125 kev_msg.dv[0].data_ptr = &event->event_data[0];
4126 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
4127 kev_msg.dv[1].data_length = 0;
4128
4129 result = dlil_event_internal(ifp, &kev_msg, TRUE);
4130
4131 return (result);
4132 }
4133
4134 #if CONFIG_MACF_NET
4135 #include <netinet/ip6.h>
4136 #include <netinet/ip.h>
4137 static int
4138 dlil_get_socket_type(struct mbuf **mp, int family, int raw)
4139 {
4140 struct mbuf *m;
4141 struct ip *ip;
4142 struct ip6_hdr *ip6;
4143 int type = SOCK_RAW;
4144
4145 if (!raw) {
4146 switch (family) {
4147 case PF_INET:
4148 m = m_pullup(*mp, sizeof(struct ip));
4149 if (m == NULL)
4150 break;
4151 *mp = m;
4152 ip = mtod(m, struct ip *);
4153 if (ip->ip_p == IPPROTO_TCP)
4154 type = SOCK_STREAM;
4155 else if (ip->ip_p == IPPROTO_UDP)
4156 type = SOCK_DGRAM;
4157 break;
4158 case PF_INET6:
4159 m = m_pullup(*mp, sizeof(struct ip6_hdr));
4160 if (m == NULL)
4161 break;
4162 *mp = m;
4163 ip6 = mtod(m, struct ip6_hdr *);
4164 if (ip6->ip6_nxt == IPPROTO_TCP)
4165 type = SOCK_STREAM;
4166 else if (ip6->ip6_nxt == IPPROTO_UDP)
4167 type = SOCK_DGRAM;
4168 break;
4169 }
4170 }
4171
4172 return (type);
4173 }
4174 #endif
4175
4176 static void
4177 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4178 {
4179 mbuf_t n = m;
4180 int chainlen = 0;
4181
4182 while (n != NULL) {
4183 chainlen++;
4184 n = n->m_next;
4185 }
4186 switch (chainlen) {
4187 case 0:
4188 break;
4189 case 1:
4190 atomic_add_64(&cls->cls_one, 1);
4191 break;
4192 case 2:
4193 atomic_add_64(&cls->cls_two, 1);
4194 break;
4195 case 3:
4196 atomic_add_64(&cls->cls_three, 1);
4197 break;
4198 case 4:
4199 atomic_add_64(&cls->cls_four, 1);
4200 break;
4201 case 5:
4202 default:
4203 atomic_add_64(&cls->cls_five_or_more, 1);
4204 break;
4205 }
4206 }
4207
4208 /*
4209 * dlil_output
4210 *
4211 * Caller should have a lock on the protocol domain if the protocol
4212 * doesn't support finer grained locking. In most cases, the lock
4213 * will be held from the socket layer and won't be released until
4214 * we return back to the socket layer.
4215 *
4216 * This does mean that we must take a protocol lock before we take
4217 * an interface lock if we're going to take both. This makes sense
4218 * because a protocol is likely to interact with an ifp while it
4219 * is under the protocol lock.
4220 *
4221 * An advisory code will be returned if adv is not null. This
4222 * can be used to provide feedback about interface queues to the
4223 * application.
4224 */
4225 errno_t
4226 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
4227 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
4228 {
4229 char *frame_type = NULL;
4230 char *dst_linkaddr = NULL;
4231 int retval = 0;
4232 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4233 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
4234 struct if_proto *proto = NULL;
4235 mbuf_t m;
4236 mbuf_t send_head = NULL;
4237 mbuf_t *send_tail = &send_head;
4238 int iorefcnt = 0;
4239 u_int32_t pre = 0, post = 0;
4240 u_int32_t fpkts = 0, fbytes = 0;
4241 int32_t flen = 0;
4242 struct timespec now;
4243 u_int64_t now_nsec;
4244
4245 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4246
4247 /*
4248 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4249 * from happening while this operation is in progress
4250 */
4251 if (!ifnet_is_attached(ifp, 1)) {
4252 retval = ENXIO;
4253 goto cleanup;
4254 }
4255 iorefcnt = 1;
4256
4257 VERIFY(ifp->if_output_dlil != NULL);
4258
4259 /* update the driver's multicast filter, if needed */
4260 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
4261 ifp->if_updatemcasts = 0;
4262
4263 frame_type = frame_type_buffer;
4264 dst_linkaddr = dst_linkaddr_buffer;
4265
4266 if (raw == 0) {
4267 ifnet_lock_shared(ifp);
4268 /* callee holds a proto refcnt upon success */
4269 proto = find_attached_proto(ifp, proto_family);
4270 if (proto == NULL) {
4271 ifnet_lock_done(ifp);
4272 retval = ENXIO;
4273 goto cleanup;
4274 }
4275 ifnet_lock_done(ifp);
4276 }
4277
4278 preout_again:
4279 if (packetlist == NULL)
4280 goto cleanup;
4281
4282 m = packetlist;
4283 packetlist = packetlist->m_nextpkt;
4284 m->m_nextpkt = NULL;
4285
4286 if (raw == 0) {
4287 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
4288 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
4289 retval = 0;
4290 if (preoutp != NULL) {
4291 retval = preoutp(ifp, proto_family, &m, dest, route,
4292 frame_type, dst_linkaddr);
4293
4294 if (retval != 0) {
4295 if (retval == EJUSTRETURN)
4296 goto preout_again;
4297 m_freem(m);
4298 goto cleanup;
4299 }
4300 }
4301 }
4302
4303 #if CONFIG_MACF_NET
4304 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
4305 dlil_get_socket_type(&m, proto_family, raw));
4306 if (retval != 0) {
4307 m_freem(m);
4308 goto cleanup;
4309 }
4310 #endif
4311
4312 do {
4313 #if CONFIG_DTRACE
4314 if (!raw && proto_family == PF_INET) {
4315 struct ip *ip = mtod(m, struct ip *);
4316 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4317 struct ip *, ip, struct ifnet *, ifp,
4318 struct ip *, ip, struct ip6_hdr *, NULL);
4319
4320 } else if (!raw && proto_family == PF_INET6) {
4321 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
4322 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4323 struct ip6_hdr *, ip6, struct ifnet *, ifp,
4324 struct ip *, NULL, struct ip6_hdr *, ip6);
4325 }
4326 #endif /* CONFIG_DTRACE */
4327
4328 if (raw == 0 && ifp->if_framer != NULL) {
4329 int rcvif_set = 0;
4330
4331 /*
4332 * If this is a broadcast packet that needs to be
4333 * looped back into the system, set the inbound ifp
4334 * to that of the outbound ifp. This will allow
4335 * us to determine that it is a legitimate packet
4336 * for the system. Only set the ifp if it's not
4337 * already set, just to be safe.
4338 */
4339 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
4340 m->m_pkthdr.rcvif == NULL) {
4341 m->m_pkthdr.rcvif = ifp;
4342 rcvif_set = 1;
4343 }
4344
4345 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
4346 frame_type, &pre, &post);
4347 if (retval != 0) {
4348 if (retval != EJUSTRETURN)
4349 m_freem(m);
4350 goto next;
4351 }
4352
4353 /*
4354 * For partial checksum offload, adjust the start
4355 * and stuff offsets based on the prepended header.
4356 */
4357 if ((m->m_pkthdr.csum_flags &
4358 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4359 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4360 m->m_pkthdr.csum_tx_stuff += pre;
4361 m->m_pkthdr.csum_tx_start += pre;
4362 }
4363
4364 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
4365 dlil_output_cksum_dbg(ifp, m, pre,
4366 proto_family);
4367
4368 /*
4369 * Clear the ifp if it was set above, and to be
4370 * safe, only if it is still the same as the
4371 * outbound ifp we have in context. If it was
4372 * looped back, then a copy of it was sent to the
4373 * loopback interface with the rcvif set, and we
4374 * are clearing the one that will go down to the
4375 * layer below.
4376 */
4377 if (rcvif_set && m->m_pkthdr.rcvif == ifp)
4378 m->m_pkthdr.rcvif = NULL;
4379 }
4380
4381 /*
4382 * Let interface filters (if any) do their thing ...
4383 */
4384 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4385 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
4386 retval = dlil_interface_filters_output(ifp,
4387 &m, proto_family);
4388 if (retval != 0) {
4389 if (retval != EJUSTRETURN)
4390 m_freem(m);
4391 goto next;
4392 }
4393 }
4394 /*
4395 * Strip away M_PROTO1 bit prior to sending packet
4396 * to the driver as this field may be used by the driver
4397 */
4398 m->m_flags &= ~M_PROTO1;
4399
4400 /*
4401 * If the underlying interface is not capable of handling a
4402 * packet whose data portion spans across physically disjoint
4403 * pages, we need to "normalize" the packet so that we pass
4404 * down a chain of mbufs where each mbuf points to a span that
4405 * resides in the system page boundary. If the packet does
4406 * not cross page(s), the following is a no-op.
4407 */
4408 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
4409 if ((m = m_normalize(m)) == NULL)
4410 goto next;
4411 }
4412
4413 /*
4414 * If this is a TSO packet, make sure the interface still
4415 * advertise TSO capability.
4416 */
4417 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
4418 retval = EMSGSIZE;
4419 m_freem(m);
4420 goto cleanup;
4421 }
4422
4423 ifp_inc_traffic_class_out(ifp, m);
4424 pktap_output(ifp, proto_family, m, pre, post);
4425
4426 /*
4427 * Count the number of elements in the mbuf chain
4428 */
4429 if (tx_chain_len_count) {
4430 dlil_count_chain_len(m, &tx_chain_len_stats);
4431 }
4432
4433 /*
4434 * Record timestamp; ifnet_enqueue() will use this info
4435 * rather than redoing the work. An optimization could
4436 * involve doing this just once at the top, if there are
4437 * no interface filters attached, but that's probably
4438 * not a big deal.
4439 */
4440 nanouptime(&now);
4441 net_timernsec(&now, &now_nsec);
4442 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
4443
4444 /*
4445 * Discard partial sum information if this packet originated
4446 * from another interface; the packet would already have the
4447 * final checksum and we shouldn't recompute it.
4448 */
4449 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
4450 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID|CSUM_PARTIAL)) ==
4451 (CSUM_DATA_VALID|CSUM_PARTIAL)) {
4452 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4453 m->m_pkthdr.csum_data = 0;
4454 }
4455
4456 /*
4457 * Finally, call the driver.
4458 */
4459 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
4460 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4461 flen += (m_pktlen(m) - (pre + post));
4462 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4463 }
4464 *send_tail = m;
4465 send_tail = &m->m_nextpkt;
4466 } else {
4467 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4468 flen = (m_pktlen(m) - (pre + post));
4469 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4470 } else {
4471 flen = 0;
4472 }
4473 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4474 0, 0, 0, 0, 0);
4475 retval = (*ifp->if_output_dlil)(ifp, m);
4476 if (retval == EQFULL || retval == EQSUSPENDED) {
4477 if (adv != NULL && adv->code == FADV_SUCCESS) {
4478 adv->code = (retval == EQFULL ?
4479 FADV_FLOW_CONTROLLED :
4480 FADV_SUSPENDED);
4481 }
4482 retval = 0;
4483 }
4484 if (retval == 0 && flen > 0) {
4485 fbytes += flen;
4486 fpkts++;
4487 }
4488 if (retval != 0 && dlil_verbose) {
4489 printf("%s: output error on %s retval = %d\n",
4490 __func__, if_name(ifp),
4491 retval);
4492 }
4493 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
4494 0, 0, 0, 0, 0);
4495 }
4496 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4497
4498 next:
4499 m = packetlist;
4500 if (m != NULL) {
4501 packetlist = packetlist->m_nextpkt;
4502 m->m_nextpkt = NULL;
4503 }
4504 } while (m != NULL);
4505
4506 if (send_head != NULL) {
4507 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4508 0, 0, 0, 0, 0);
4509 if (ifp->if_eflags & IFEF_SENDLIST) {
4510 retval = (*ifp->if_output_dlil)(ifp, send_head);
4511 if (retval == EQFULL || retval == EQSUSPENDED) {
4512 if (adv != NULL) {
4513 adv->code = (retval == EQFULL ?
4514 FADV_FLOW_CONTROLLED :
4515 FADV_SUSPENDED);
4516 }
4517 retval = 0;
4518 }
4519 if (retval == 0 && flen > 0) {
4520 fbytes += flen;
4521 fpkts++;
4522 }
4523 if (retval != 0 && dlil_verbose) {
4524 printf("%s: output error on %s retval = %d\n",
4525 __func__, if_name(ifp), retval);
4526 }
4527 } else {
4528 struct mbuf *send_m;
4529 int enq_cnt = 0;
4530 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
4531 while (send_head != NULL) {
4532 send_m = send_head;
4533 send_head = send_m->m_nextpkt;
4534 send_m->m_nextpkt = NULL;
4535 retval = (*ifp->if_output_dlil)(ifp, send_m);
4536 if (retval == EQFULL || retval == EQSUSPENDED) {
4537 if (adv != NULL) {
4538 adv->code = (retval == EQFULL ?
4539 FADV_FLOW_CONTROLLED :
4540 FADV_SUSPENDED);
4541 }
4542 retval = 0;
4543 }
4544 if (retval == 0) {
4545 enq_cnt++;
4546 if (flen > 0)
4547 fpkts++;
4548 }
4549 if (retval != 0 && dlil_verbose) {
4550 printf("%s: output error on %s "
4551 "retval = %d\n",
4552 __func__, if_name(ifp), retval);
4553 }
4554 }
4555 if (enq_cnt > 0) {
4556 fbytes += flen;
4557 ifnet_start(ifp);
4558 }
4559 }
4560 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4561 }
4562
4563 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4564
4565 cleanup:
4566 if (fbytes > 0)
4567 ifp->if_fbytes += fbytes;
4568 if (fpkts > 0)
4569 ifp->if_fpackets += fpkts;
4570 if (proto != NULL)
4571 if_proto_free(proto);
4572 if (packetlist) /* if any packets are left, clean up */
4573 mbuf_freem_list(packetlist);
4574 if (retval == EJUSTRETURN)
4575 retval = 0;
4576 if (iorefcnt == 1)
4577 ifnet_decr_iorefcnt(ifp);
4578
4579 return (retval);
4580 }
4581
4582 errno_t
4583 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
4584 void *ioctl_arg)
4585 {
4586 struct ifnet_filter *filter;
4587 int retval = EOPNOTSUPP;
4588 int result = 0;
4589
4590 if (ifp == NULL || ioctl_code == 0)
4591 return (EINVAL);
4592
4593 /* Get an io ref count if the interface is attached */
4594 if (!ifnet_is_attached(ifp, 1))
4595 return (EOPNOTSUPP);
4596
4597 /*
4598 * Run the interface filters first.
4599 * We want to run all filters before calling the protocol,
4600 * interface family, or interface.
4601 */
4602 lck_mtx_lock_spin(&ifp->if_flt_lock);
4603 /* prevent filter list from changing in case we drop the lock */
4604 if_flt_monitor_busy(ifp);
4605 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4606 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
4607 filter->filt_protocol == proto_fam)) {
4608 lck_mtx_unlock(&ifp->if_flt_lock);
4609
4610 result = filter->filt_ioctl(filter->filt_cookie, ifp,
4611 proto_fam, ioctl_code, ioctl_arg);
4612
4613 lck_mtx_lock_spin(&ifp->if_flt_lock);
4614
4615 /* Only update retval if no one has handled the ioctl */
4616 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4617 if (result == ENOTSUP)
4618 result = EOPNOTSUPP;
4619 retval = result;
4620 if (retval != 0 && retval != EOPNOTSUPP) {
4621 /* we're done with the filter list */
4622 if_flt_monitor_unbusy(ifp);
4623 lck_mtx_unlock(&ifp->if_flt_lock);
4624 goto cleanup;
4625 }
4626 }
4627 }
4628 }
4629 /* we're done with the filter list */
4630 if_flt_monitor_unbusy(ifp);
4631 lck_mtx_unlock(&ifp->if_flt_lock);
4632
4633 /* Allow the protocol to handle the ioctl */
4634 if (proto_fam != 0) {
4635 struct if_proto *proto;
4636
4637 /* callee holds a proto refcnt upon success */
4638 ifnet_lock_shared(ifp);
4639 proto = find_attached_proto(ifp, proto_fam);
4640 ifnet_lock_done(ifp);
4641 if (proto != NULL) {
4642 proto_media_ioctl ioctlp =
4643 (proto->proto_kpi == kProtoKPI_v1 ?
4644 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
4645 result = EOPNOTSUPP;
4646 if (ioctlp != NULL)
4647 result = ioctlp(ifp, proto_fam, ioctl_code,
4648 ioctl_arg);
4649 if_proto_free(proto);
4650
4651 /* Only update retval if no one has handled the ioctl */
4652 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4653 if (result == ENOTSUP)
4654 result = EOPNOTSUPP;
4655 retval = result;
4656 if (retval && retval != EOPNOTSUPP)
4657 goto cleanup;
4658 }
4659 }
4660 }
4661
4662 /* retval is either 0 or EOPNOTSUPP */
4663
4664 /*
4665 * Let the interface handle this ioctl.
4666 * If it returns EOPNOTSUPP, ignore that, we may have
4667 * already handled this in the protocol or family.
4668 */
4669 if (ifp->if_ioctl)
4670 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
4671
4672 /* Only update retval if no one has handled the ioctl */
4673 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4674 if (result == ENOTSUP)
4675 result = EOPNOTSUPP;
4676 retval = result;
4677 if (retval && retval != EOPNOTSUPP) {
4678 goto cleanup;
4679 }
4680 }
4681
4682 cleanup:
4683 if (retval == EJUSTRETURN)
4684 retval = 0;
4685
4686 ifnet_decr_iorefcnt(ifp);
4687
4688 return (retval);
4689 }
4690
4691 __private_extern__ errno_t
4692 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
4693 {
4694 errno_t error = 0;
4695
4696
4697 if (ifp->if_set_bpf_tap) {
4698 /* Get an io reference on the interface if it is attached */
4699 if (!ifnet_is_attached(ifp, 1))
4700 return (ENXIO);
4701 error = ifp->if_set_bpf_tap(ifp, mode, callback);
4702 ifnet_decr_iorefcnt(ifp);
4703 }
4704 return (error);
4705 }
4706
4707 errno_t
4708 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4709 struct sockaddr *ll_addr, size_t ll_len)
4710 {
4711 errno_t result = EOPNOTSUPP;
4712 struct if_proto *proto;
4713 const struct sockaddr *verify;
4714 proto_media_resolve_multi resolvep;
4715
4716 if (!ifnet_is_attached(ifp, 1))
4717 return (result);
4718
4719 bzero(ll_addr, ll_len);
4720
4721 /* Call the protocol first; callee holds a proto refcnt upon success */
4722 ifnet_lock_shared(ifp);
4723 proto = find_attached_proto(ifp, proto_addr->sa_family);
4724 ifnet_lock_done(ifp);
4725 if (proto != NULL) {
4726 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4727 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4728 if (resolvep != NULL)
4729 result = resolvep(ifp, proto_addr,
4730 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
4731 if_proto_free(proto);
4732 }
4733
4734 /* Let the interface verify the multicast address */
4735 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4736 if (result == 0)
4737 verify = ll_addr;
4738 else
4739 verify = proto_addr;
4740 result = ifp->if_check_multi(ifp, verify);
4741 }
4742
4743 ifnet_decr_iorefcnt(ifp);
4744 return (result);
4745 }
4746
4747 __private_extern__ errno_t
4748 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4749 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4750 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4751 {
4752 struct if_proto *proto;
4753 errno_t result = 0;
4754
4755 /* callee holds a proto refcnt upon success */
4756 ifnet_lock_shared(ifp);
4757 proto = find_attached_proto(ifp, target_proto->sa_family);
4758 ifnet_lock_done(ifp);
4759 if (proto == NULL) {
4760 result = ENOTSUP;
4761 } else {
4762 proto_media_send_arp arpp;
4763 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4764 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4765 if (arpp == NULL) {
4766 result = ENOTSUP;
4767 } else {
4768 switch (arpop) {
4769 case ARPOP_REQUEST:
4770 arpstat.txrequests++;
4771 if (target_hw != NULL)
4772 arpstat.txurequests++;
4773 break;
4774 case ARPOP_REPLY:
4775 arpstat.txreplies++;
4776 break;
4777 }
4778 result = arpp(ifp, arpop, sender_hw, sender_proto,
4779 target_hw, target_proto);
4780 }
4781 if_proto_free(proto);
4782 }
4783
4784 return (result);
4785 }
4786
4787 struct net_thread_marks { };
4788 static const struct net_thread_marks net_thread_marks_base = { };
4789
4790 __private_extern__ const net_thread_marks_t net_thread_marks_none =
4791 &net_thread_marks_base;
4792
4793 __private_extern__ net_thread_marks_t
4794 net_thread_marks_push(u_int32_t push)
4795 {
4796 static const char *const base = (const void*)&net_thread_marks_base;
4797 u_int32_t pop = 0;
4798
4799 if (push != 0) {
4800 struct uthread *uth = get_bsdthread_info(current_thread());
4801
4802 pop = push & ~uth->uu_network_marks;
4803 if (pop != 0)
4804 uth->uu_network_marks |= pop;
4805 }
4806
4807 return ((net_thread_marks_t)&base[pop]);
4808 }
4809
4810 __private_extern__ net_thread_marks_t
4811 net_thread_unmarks_push(u_int32_t unpush)
4812 {
4813 static const char *const base = (const void*)&net_thread_marks_base;
4814 u_int32_t unpop = 0;
4815
4816 if (unpush != 0) {
4817 struct uthread *uth = get_bsdthread_info(current_thread());
4818
4819 unpop = unpush & uth->uu_network_marks;
4820 if (unpop != 0)
4821 uth->uu_network_marks &= ~unpop;
4822 }
4823
4824 return ((net_thread_marks_t)&base[unpop]);
4825 }
4826
4827 __private_extern__ void
4828 net_thread_marks_pop(net_thread_marks_t popx)
4829 {
4830 static const char *const base = (const void*)&net_thread_marks_base;
4831 const ptrdiff_t pop = (const char *)popx - (const char *)base;
4832
4833 if (pop != 0) {
4834 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4835 struct uthread *uth = get_bsdthread_info(current_thread());
4836
4837 VERIFY((pop & ones) == pop);
4838 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
4839 uth->uu_network_marks &= ~pop;
4840 }
4841 }
4842
4843 __private_extern__ void
4844 net_thread_unmarks_pop(net_thread_marks_t unpopx)
4845 {
4846 static const char *const base = (const void*)&net_thread_marks_base;
4847 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
4848
4849 if (unpop != 0) {
4850 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4851 struct uthread *uth = get_bsdthread_info(current_thread());
4852
4853 VERIFY((unpop & ones) == unpop);
4854 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
4855 uth->uu_network_marks |= unpop;
4856 }
4857 }
4858
4859 __private_extern__ u_int32_t
4860 net_thread_is_marked(u_int32_t check)
4861 {
4862 if (check != 0) {
4863 struct uthread *uth = get_bsdthread_info(current_thread());
4864 return (uth->uu_network_marks & check);
4865 }
4866 else
4867 return (0);
4868 }
4869
4870 __private_extern__ u_int32_t
4871 net_thread_is_unmarked(u_int32_t check)
4872 {
4873 if (check != 0) {
4874 struct uthread *uth = get_bsdthread_info(current_thread());
4875 return (~uth->uu_network_marks & check);
4876 }
4877 else
4878 return (0);
4879 }
4880
4881 static __inline__ int
4882 _is_announcement(const struct sockaddr_in * sender_sin,
4883 const struct sockaddr_in * target_sin)
4884 {
4885 if (sender_sin == NULL) {
4886 return (FALSE);
4887 }
4888 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
4889 }
4890
4891 __private_extern__ errno_t
4892 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4893 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4894 const struct sockaddr *target_proto0, u_int32_t rtflags)
4895 {
4896 errno_t result = 0;
4897 const struct sockaddr_in * sender_sin;
4898 const struct sockaddr_in * target_sin;
4899 struct sockaddr_inarp target_proto_sinarp;
4900 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
4901
4902 if (target_proto == NULL || (sender_proto != NULL &&
4903 sender_proto->sa_family != target_proto->sa_family))
4904 return (EINVAL);
4905
4906 /*
4907 * If the target is a (default) router, provide that
4908 * information to the send_arp callback routine.
4909 */
4910 if (rtflags & RTF_ROUTER) {
4911 bcopy(target_proto, &target_proto_sinarp,
4912 sizeof (struct sockaddr_in));
4913 target_proto_sinarp.sin_other |= SIN_ROUTER;
4914 target_proto = (struct sockaddr *)&target_proto_sinarp;
4915 }
4916
4917 /*
4918 * If this is an ARP request and the target IP is IPv4LL,
4919 * send the request on all interfaces. The exception is
4920 * an announcement, which must only appear on the specific
4921 * interface.
4922 */
4923 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
4924 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
4925 if (target_proto->sa_family == AF_INET &&
4926 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4927 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4928 !_is_announcement(target_sin, sender_sin)) {
4929 ifnet_t *ifp_list;
4930 u_int32_t count;
4931 u_int32_t ifp_on;
4932
4933 result = ENOTSUP;
4934
4935 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4936 for (ifp_on = 0; ifp_on < count; ifp_on++) {
4937 errno_t new_result;
4938 ifaddr_t source_hw = NULL;
4939 ifaddr_t source_ip = NULL;
4940 struct sockaddr_in source_ip_copy;
4941 struct ifnet *cur_ifp = ifp_list[ifp_on];
4942
4943 /*
4944 * Only arp on interfaces marked for IPv4LL
4945 * ARPing. This may mean that we don't ARP on
4946 * the interface the subnet route points to.
4947 */
4948 if (!(cur_ifp->if_eflags & IFEF_ARPLL))
4949 continue;
4950
4951 /* Find the source IP address */
4952 ifnet_lock_shared(cur_ifp);
4953 source_hw = cur_ifp->if_lladdr;
4954 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4955 ifa_link) {
4956 IFA_LOCK(source_ip);
4957 if (source_ip->ifa_addr != NULL &&
4958 source_ip->ifa_addr->sa_family ==
4959 AF_INET) {
4960 /* Copy the source IP address */
4961 source_ip_copy =
4962 *(struct sockaddr_in *)
4963 (void *)source_ip->ifa_addr;
4964 IFA_UNLOCK(source_ip);
4965 break;
4966 }
4967 IFA_UNLOCK(source_ip);
4968 }
4969
4970 /* No IP Source, don't arp */
4971 if (source_ip == NULL) {
4972 ifnet_lock_done(cur_ifp);
4973 continue;
4974 }
4975
4976 IFA_ADDREF(source_hw);
4977 ifnet_lock_done(cur_ifp);
4978
4979 /* Send the ARP */
4980 new_result = dlil_send_arp_internal(cur_ifp,
4981 arpop, (struct sockaddr_dl *)(void *)
4982 source_hw->ifa_addr,
4983 (struct sockaddr *)&source_ip_copy, NULL,
4984 target_proto);
4985
4986 IFA_REMREF(source_hw);
4987 if (result == ENOTSUP) {
4988 result = new_result;
4989 }
4990 }
4991 ifnet_list_free(ifp_list);
4992 }
4993 } else {
4994 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4995 sender_proto, target_hw, target_proto);
4996 }
4997
4998 return (result);
4999 }
5000
5001 /*
5002 * Caller must hold ifnet head lock.
5003 */
5004 static int
5005 ifnet_lookup(struct ifnet *ifp)
5006 {
5007 struct ifnet *_ifp;
5008
5009 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
5010 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
5011 if (_ifp == ifp)
5012 break;
5013 }
5014 return (_ifp != NULL);
5015 }
5016
5017 /*
5018 * Caller has to pass a non-zero refio argument to get a
5019 * IO reference count. This will prevent ifnet_detach from
5020 * being called when there are outstanding io reference counts.
5021 */
5022 int
5023 ifnet_is_attached(struct ifnet *ifp, int refio)
5024 {
5025 int ret;
5026
5027 lck_mtx_lock_spin(&ifp->if_ref_lock);
5028 if ((ret = IF_FULLY_ATTACHED(ifp))) {
5029 if (refio > 0)
5030 ifp->if_refio++;
5031 }
5032 lck_mtx_unlock(&ifp->if_ref_lock);
5033
5034 return (ret);
5035 }
5036
5037 /*
5038 * Caller must ensure the interface is attached; the assumption is that
5039 * there is at least an outstanding IO reference count held already.
5040 * Most callers would call ifnet_is_attached() instead.
5041 */
5042 void
5043 ifnet_incr_iorefcnt(struct ifnet *ifp)
5044 {
5045 lck_mtx_lock_spin(&ifp->if_ref_lock);
5046 VERIFY(IF_FULLY_ATTACHED(ifp));
5047 VERIFY(ifp->if_refio > 0);
5048 ifp->if_refio++;
5049 lck_mtx_unlock(&ifp->if_ref_lock);
5050 }
5051
5052 void
5053 ifnet_decr_iorefcnt(struct ifnet *ifp)
5054 {
5055 lck_mtx_lock_spin(&ifp->if_ref_lock);
5056 VERIFY(ifp->if_refio > 0);
5057 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
5058 ifp->if_refio--;
5059
5060 /*
5061 * if there are no more outstanding io references, wakeup the
5062 * ifnet_detach thread if detaching flag is set.
5063 */
5064 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING))
5065 wakeup(&(ifp->if_refio));
5066
5067 lck_mtx_unlock(&ifp->if_ref_lock);
5068 }
5069
5070 static void
5071 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
5072 {
5073 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
5074 ctrace_t *tr;
5075 u_int32_t idx;
5076 u_int16_t *cnt;
5077
5078 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
5079 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
5080 /* NOTREACHED */
5081 }
5082
5083 if (refhold) {
5084 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
5085 tr = dl_if_dbg->dldbg_if_refhold;
5086 } else {
5087 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
5088 tr = dl_if_dbg->dldbg_if_refrele;
5089 }
5090
5091 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
5092 ctrace_record(&tr[idx]);
5093 }
5094
5095 errno_t
5096 dlil_if_ref(struct ifnet *ifp)
5097 {
5098 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5099
5100 if (dl_if == NULL)
5101 return (EINVAL);
5102
5103 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5104 ++dl_if->dl_if_refcnt;
5105 if (dl_if->dl_if_refcnt == 0) {
5106 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
5107 /* NOTREACHED */
5108 }
5109 if (dl_if->dl_if_trace != NULL)
5110 (*dl_if->dl_if_trace)(dl_if, TRUE);
5111 lck_mtx_unlock(&dl_if->dl_if_lock);
5112
5113 return (0);
5114 }
5115
5116 errno_t
5117 dlil_if_free(struct ifnet *ifp)
5118 {
5119 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5120 bool need_release = FALSE;
5121
5122 if (dl_if == NULL)
5123 return (EINVAL);
5124
5125 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5126 switch (dl_if->dl_if_refcnt) {
5127 case 0:
5128 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
5129 /* NOTREACHED */
5130 break;
5131 case 1:
5132 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
5133 need_release = TRUE;
5134 }
5135 break;
5136 default:
5137 break;
5138 }
5139 --dl_if->dl_if_refcnt;
5140 if (dl_if->dl_if_trace != NULL)
5141 (*dl_if->dl_if_trace)(dl_if, FALSE);
5142 lck_mtx_unlock(&dl_if->dl_if_lock);
5143 if (need_release) {
5144 dlil_if_release(ifp);
5145 }
5146 return (0);
5147 }
5148
5149 static errno_t
5150 dlil_attach_protocol_internal(struct if_proto *proto,
5151 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
5152 uint32_t * proto_count)
5153 {
5154 struct kev_dl_proto_data ev_pr_data;
5155 struct ifnet *ifp = proto->ifp;
5156 int retval = 0;
5157 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
5158 struct if_proto *prev_proto;
5159 struct if_proto *_proto;
5160
5161 /* callee holds a proto refcnt upon success */
5162 ifnet_lock_exclusive(ifp);
5163 _proto = find_attached_proto(ifp, proto->protocol_family);
5164 if (_proto != NULL) {
5165 ifnet_lock_done(ifp);
5166 if_proto_free(_proto);
5167 return (EEXIST);
5168 }
5169
5170 /*
5171 * Call family module add_proto routine so it can refine the
5172 * demux descriptors as it wishes.
5173 */
5174 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
5175 demux_count);
5176 if (retval) {
5177 ifnet_lock_done(ifp);
5178 return (retval);
5179 }
5180
5181 /*
5182 * Insert the protocol in the hash
5183 */
5184 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
5185 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
5186 prev_proto = SLIST_NEXT(prev_proto, next_hash);
5187 if (prev_proto)
5188 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
5189 else
5190 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
5191 proto, next_hash);
5192
5193 /* hold a proto refcnt for attach */
5194 if_proto_ref(proto);
5195
5196 /*
5197 * The reserved field carries the number of protocol still attached
5198 * (subject to change)
5199 */
5200 ev_pr_data.proto_family = proto->protocol_family;
5201 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
5202 ifnet_lock_done(ifp);
5203
5204 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
5205 (struct net_event_data *)&ev_pr_data,
5206 sizeof (struct kev_dl_proto_data));
5207 if (proto_count != NULL) {
5208 *proto_count = ev_pr_data.proto_remaining_count;
5209 }
5210 return (retval);
5211 }
5212
5213 errno_t
5214 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
5215 const struct ifnet_attach_proto_param *proto_details)
5216 {
5217 int retval = 0;
5218 struct if_proto *ifproto = NULL;
5219 uint32_t proto_count = 0;
5220
5221 ifnet_head_lock_shared();
5222 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5223 retval = EINVAL;
5224 goto end;
5225 }
5226 /* Check that the interface is in the global list */
5227 if (!ifnet_lookup(ifp)) {
5228 retval = ENXIO;
5229 goto end;
5230 }
5231
5232 ifproto = zalloc(dlif_proto_zone);
5233 if (ifproto == NULL) {
5234 retval = ENOMEM;
5235 goto end;
5236 }
5237 bzero(ifproto, dlif_proto_size);
5238
5239 /* refcnt held above during lookup */
5240 ifproto->ifp = ifp;
5241 ifproto->protocol_family = protocol;
5242 ifproto->proto_kpi = kProtoKPI_v1;
5243 ifproto->kpi.v1.input = proto_details->input;
5244 ifproto->kpi.v1.pre_output = proto_details->pre_output;
5245 ifproto->kpi.v1.event = proto_details->event;
5246 ifproto->kpi.v1.ioctl = proto_details->ioctl;
5247 ifproto->kpi.v1.detached = proto_details->detached;
5248 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
5249 ifproto->kpi.v1.send_arp = proto_details->send_arp;
5250
5251 retval = dlil_attach_protocol_internal(ifproto,
5252 proto_details->demux_list, proto_details->demux_count,
5253 &proto_count);
5254
5255 end:
5256 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5257 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5258 if_name(ifp), protocol, retval);
5259 } else {
5260 if (dlil_verbose) {
5261 printf("%s: attached v1 protocol %d (count = %d)\n",
5262 if_name(ifp),
5263 protocol, proto_count);
5264 }
5265 }
5266 ifnet_head_done();
5267 if (retval == 0) {
5268 } else if (ifproto != NULL) {
5269 zfree(dlif_proto_zone, ifproto);
5270 }
5271 return (retval);
5272 }
5273
5274 errno_t
5275 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
5276 const struct ifnet_attach_proto_param_v2 *proto_details)
5277 {
5278 int retval = 0;
5279 struct if_proto *ifproto = NULL;
5280 uint32_t proto_count = 0;
5281
5282 ifnet_head_lock_shared();
5283 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5284 retval = EINVAL;
5285 goto end;
5286 }
5287 /* Check that the interface is in the global list */
5288 if (!ifnet_lookup(ifp)) {
5289 retval = ENXIO;
5290 goto end;
5291 }
5292
5293 ifproto = zalloc(dlif_proto_zone);
5294 if (ifproto == NULL) {
5295 retval = ENOMEM;
5296 goto end;
5297 }
5298 bzero(ifproto, sizeof(*ifproto));
5299
5300 /* refcnt held above during lookup */
5301 ifproto->ifp = ifp;
5302 ifproto->protocol_family = protocol;
5303 ifproto->proto_kpi = kProtoKPI_v2;
5304 ifproto->kpi.v2.input = proto_details->input;
5305 ifproto->kpi.v2.pre_output = proto_details->pre_output;
5306 ifproto->kpi.v2.event = proto_details->event;
5307 ifproto->kpi.v2.ioctl = proto_details->ioctl;
5308 ifproto->kpi.v2.detached = proto_details->detached;
5309 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
5310 ifproto->kpi.v2.send_arp = proto_details->send_arp;
5311
5312 retval = dlil_attach_protocol_internal(ifproto,
5313 proto_details->demux_list, proto_details->demux_count,
5314 &proto_count);
5315
5316 end:
5317 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5318 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5319 if_name(ifp), protocol, retval);
5320 } else {
5321 if (dlil_verbose) {
5322 printf("%s: attached v2 protocol %d (count = %d)\n",
5323 if_name(ifp),
5324 protocol, proto_count);
5325 }
5326 }
5327 ifnet_head_done();
5328 if (retval == 0) {
5329 } else if (ifproto != NULL) {
5330 zfree(dlif_proto_zone, ifproto);
5331 }
5332 return (retval);
5333 }
5334
5335 errno_t
5336 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
5337 {
5338 struct if_proto *proto = NULL;
5339 int retval = 0;
5340
5341 if (ifp == NULL || proto_family == 0) {
5342 retval = EINVAL;
5343 goto end;
5344 }
5345
5346 ifnet_lock_exclusive(ifp);
5347 /* callee holds a proto refcnt upon success */
5348 proto = find_attached_proto(ifp, proto_family);
5349 if (proto == NULL) {
5350 retval = ENXIO;
5351 ifnet_lock_done(ifp);
5352 goto end;
5353 }
5354
5355 /* call family module del_proto */
5356 if (ifp->if_del_proto)
5357 ifp->if_del_proto(ifp, proto->protocol_family);
5358
5359 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
5360 proto, if_proto, next_hash);
5361
5362 if (proto->proto_kpi == kProtoKPI_v1) {
5363 proto->kpi.v1.input = ifproto_media_input_v1;
5364 proto->kpi.v1.pre_output = ifproto_media_preout;
5365 proto->kpi.v1.event = ifproto_media_event;
5366 proto->kpi.v1.ioctl = ifproto_media_ioctl;
5367 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
5368 proto->kpi.v1.send_arp = ifproto_media_send_arp;
5369 } else {
5370 proto->kpi.v2.input = ifproto_media_input_v2;
5371 proto->kpi.v2.pre_output = ifproto_media_preout;
5372 proto->kpi.v2.event = ifproto_media_event;
5373 proto->kpi.v2.ioctl = ifproto_media_ioctl;
5374 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
5375 proto->kpi.v2.send_arp = ifproto_media_send_arp;
5376 }
5377 proto->detached = 1;
5378 ifnet_lock_done(ifp);
5379
5380 if (dlil_verbose) {
5381 printf("%s: detached %s protocol %d\n", if_name(ifp),
5382 (proto->proto_kpi == kProtoKPI_v1) ?
5383 "v1" : "v2", proto_family);
5384 }
5385
5386 /* release proto refcnt held during protocol attach */
5387 if_proto_free(proto);
5388
5389 /*
5390 * Release proto refcnt held during lookup; the rest of
5391 * protocol detach steps will happen when the last proto
5392 * reference is released.
5393 */
5394 if_proto_free(proto);
5395
5396 end:
5397 return (retval);
5398 }
5399
5400
5401 static errno_t
5402 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
5403 struct mbuf *packet, char *header)
5404 {
5405 #pragma unused(ifp, protocol, packet, header)
5406 return (ENXIO);
5407 }
5408
5409 static errno_t
5410 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
5411 struct mbuf *packet)
5412 {
5413 #pragma unused(ifp, protocol, packet)
5414 return (ENXIO);
5415
5416 }
5417
5418 static errno_t
5419 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
5420 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
5421 char *link_layer_dest)
5422 {
5423 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5424 return (ENXIO);
5425
5426 }
5427
5428 static void
5429 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
5430 const struct kev_msg *event)
5431 {
5432 #pragma unused(ifp, protocol, event)
5433 }
5434
5435 static errno_t
5436 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
5437 unsigned long command, void *argument)
5438 {
5439 #pragma unused(ifp, protocol, command, argument)
5440 return (ENXIO);
5441 }
5442
5443 static errno_t
5444 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
5445 struct sockaddr_dl *out_ll, size_t ll_len)
5446 {
5447 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5448 return (ENXIO);
5449 }
5450
5451 static errno_t
5452 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
5453 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5454 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5455 {
5456 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5457 return (ENXIO);
5458 }
5459
5460 extern int if_next_index(void);
5461 extern int tcp_ecn_outbound;
5462
5463 errno_t
5464 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
5465 {
5466 struct ifnet *tmp_if;
5467 struct ifaddr *ifa;
5468 struct if_data_internal if_data_saved;
5469 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5470 struct dlil_threading_info *dl_inp;
5471 u_int32_t sflags = 0;
5472 int err;
5473
5474 if (ifp == NULL)
5475 return (EINVAL);
5476
5477 /*
5478 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5479 * prevent the interface from being configured while it is
5480 * embryonic, as ifnet_head_lock is dropped and reacquired
5481 * below prior to marking the ifnet with IFRF_ATTACHED.
5482 */
5483 dlil_if_lock();
5484 ifnet_head_lock_exclusive();
5485 /* Verify we aren't already on the list */
5486 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
5487 if (tmp_if == ifp) {
5488 ifnet_head_done();
5489 dlil_if_unlock();
5490 return (EEXIST);
5491 }
5492 }
5493
5494 lck_mtx_lock_spin(&ifp->if_ref_lock);
5495 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
5496 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
5497 __func__, ifp);
5498 /* NOTREACHED */
5499 }
5500 lck_mtx_unlock(&ifp->if_ref_lock);
5501
5502 ifnet_lock_exclusive(ifp);
5503
5504 /* Sanity check */
5505 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5506 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5507
5508 if (ll_addr != NULL) {
5509 if (ifp->if_addrlen == 0) {
5510 ifp->if_addrlen = ll_addr->sdl_alen;
5511 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
5512 ifnet_lock_done(ifp);
5513 ifnet_head_done();
5514 dlil_if_unlock();
5515 return (EINVAL);
5516 }
5517 }
5518
5519 /*
5520 * Allow interfaces without protocol families to attach
5521 * only if they have the necessary fields filled out.
5522 */
5523 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
5524 DLIL_PRINTF("%s: Attempt to attach interface without "
5525 "family module - %d\n", __func__, ifp->if_family);
5526 ifnet_lock_done(ifp);
5527 ifnet_head_done();
5528 dlil_if_unlock();
5529 return (ENODEV);
5530 }
5531
5532 /* Allocate protocol hash table */
5533 VERIFY(ifp->if_proto_hash == NULL);
5534 ifp->if_proto_hash = zalloc(dlif_phash_zone);
5535 if (ifp->if_proto_hash == NULL) {
5536 ifnet_lock_done(ifp);
5537 ifnet_head_done();
5538 dlil_if_unlock();
5539 return (ENOBUFS);
5540 }
5541 bzero(ifp->if_proto_hash, dlif_phash_size);
5542
5543 lck_mtx_lock_spin(&ifp->if_flt_lock);
5544 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
5545 TAILQ_INIT(&ifp->if_flt_head);
5546 VERIFY(ifp->if_flt_busy == 0);
5547 VERIFY(ifp->if_flt_waiters == 0);
5548 lck_mtx_unlock(&ifp->if_flt_lock);
5549
5550 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
5551 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
5552 LIST_INIT(&ifp->if_multiaddrs);
5553 }
5554
5555 VERIFY(ifp->if_allhostsinm == NULL);
5556 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5557 TAILQ_INIT(&ifp->if_addrhead);
5558
5559 if (ifp->if_index == 0) {
5560 int idx = if_next_index();
5561
5562 if (idx == -1) {
5563 ifp->if_index = 0;
5564 ifnet_lock_done(ifp);
5565 ifnet_head_done();
5566 dlil_if_unlock();
5567 return (ENOBUFS);
5568 }
5569 ifp->if_index = idx;
5570 }
5571 /* There should not be anything occupying this slot */
5572 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5573
5574 /* allocate (if needed) and initialize a link address */
5575 ifa = dlil_alloc_lladdr(ifp, ll_addr);
5576 if (ifa == NULL) {
5577 ifnet_lock_done(ifp);
5578 ifnet_head_done();
5579 dlil_if_unlock();
5580 return (ENOBUFS);
5581 }
5582
5583 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5584 ifnet_addrs[ifp->if_index - 1] = ifa;
5585
5586 /* make this address the first on the list */
5587 IFA_LOCK(ifa);
5588 /* hold a reference for ifnet_addrs[] */
5589 IFA_ADDREF_LOCKED(ifa);
5590 /* if_attach_link_ifa() holds a reference for ifa_link */
5591 if_attach_link_ifa(ifp, ifa);
5592 IFA_UNLOCK(ifa);
5593
5594 #if CONFIG_MACF_NET
5595 mac_ifnet_label_associate(ifp);
5596 #endif
5597
5598 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5599 ifindex2ifnet[ifp->if_index] = ifp;
5600
5601 /* Hold a reference to the underlying dlil_ifnet */
5602 ifnet_reference(ifp);
5603
5604 /* Clear stats (save and restore other fields that we care) */
5605 if_data_saved = ifp->if_data;
5606 bzero(&ifp->if_data, sizeof (ifp->if_data));
5607 ifp->if_data.ifi_type = if_data_saved.ifi_type;
5608 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5609 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5610 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5611 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5612 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5613 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5614 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5615 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5616 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5617 ifnet_touch_lastchange(ifp);
5618
5619 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
5620 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
5621 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
5622
5623 /* By default, use SFB and enable flow advisory */
5624 sflags = PKTSCHEDF_QALG_SFB;
5625 if (if_flowadv)
5626 sflags |= PKTSCHEDF_QALG_FLOWCTL;
5627
5628 if (if_delaybased_queue)
5629 sflags |= PKTSCHEDF_QALG_DELAYBASED;
5630
5631 if (ifp->if_output_sched_model ==
5632 IFNET_SCHED_MODEL_DRIVER_MANAGED)
5633 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
5634
5635 /* Initialize transmit queue(s) */
5636 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
5637 if (err != 0) {
5638 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5639 "err=%d", __func__, ifp, err);
5640 /* NOTREACHED */
5641 }
5642
5643 /* Sanity checks on the input thread storage */
5644 dl_inp = &dl_if->dl_if_inpstorage;
5645 bzero(&dl_inp->stats, sizeof (dl_inp->stats));
5646 VERIFY(dl_inp->input_waiting == 0);
5647 VERIFY(dl_inp->wtot == 0);
5648 VERIFY(dl_inp->ifp == NULL);
5649 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
5650 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
5651 VERIFY(!dl_inp->net_affinity);
5652 VERIFY(ifp->if_inp == NULL);
5653 VERIFY(dl_inp->input_thr == THREAD_NULL);
5654 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
5655 VERIFY(dl_inp->poll_thr == THREAD_NULL);
5656 VERIFY(dl_inp->tag == 0);
5657 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
5658 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
5659 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
5660 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
5661 #if IFNET_INPUT_SANITY_CHK
5662 VERIFY(dl_inp->input_mbuf_cnt == 0);
5663 #endif /* IFNET_INPUT_SANITY_CHK */
5664
5665 /*
5666 * A specific DLIL input thread is created per Ethernet/cellular
5667 * interface or for an interface which supports opportunistic
5668 * input polling. Pseudo interfaces or other types of interfaces
5669 * use the main input thread instead.
5670 */
5671 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
5672 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
5673 ifp->if_inp = dl_inp;
5674 err = dlil_create_input_thread(ifp, ifp->if_inp);
5675 if (err != 0) {
5676 panic_plain("%s: ifp=%p couldn't get an input thread; "
5677 "err=%d", __func__, ifp, err);
5678 /* NOTREACHED */
5679 }
5680 }
5681
5682 if (ifp->if_inp != NULL && ifp->if_inp->input_mit_tcall == NULL) {
5683 ifp->if_inp->input_mit_tcall =
5684 thread_call_allocate_with_priority(dlil_mit_tcall_fn,
5685 ifp, THREAD_CALL_PRIORITY_KERNEL);
5686 }
5687
5688 /*
5689 * If the driver supports the new transmit model, calculate flow hash
5690 * and create a workloop starter thread to invoke the if_start callback
5691 * where the packets may be dequeued and transmitted.
5692 */
5693 if (ifp->if_eflags & IFEF_TXSTART) {
5694 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5695 VERIFY(ifp->if_flowhash != 0);
5696 VERIFY(ifp->if_start_thread == THREAD_NULL);
5697
5698 ifnet_set_start_cycle(ifp, NULL);
5699 ifp->if_start_active = 0;
5700 ifp->if_start_req = 0;
5701 ifp->if_start_flags = 0;
5702 VERIFY(ifp->if_start != NULL);
5703 if ((err = kernel_thread_start(ifnet_start_thread_fn,
5704 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
5705 panic_plain("%s: "
5706 "ifp=%p couldn't get a start thread; "
5707 "err=%d", __func__, ifp, err);
5708 /* NOTREACHED */
5709 }
5710 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
5711 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5712 } else {
5713 ifp->if_flowhash = 0;
5714 }
5715
5716 /*
5717 * If the driver supports the new receive model, create a poller
5718 * thread to invoke if_input_poll callback where the packets may
5719 * be dequeued from the driver and processed for reception.
5720 */
5721 if (ifp->if_eflags & IFEF_RXPOLL) {
5722 VERIFY(ifp->if_input_poll != NULL);
5723 VERIFY(ifp->if_input_ctl != NULL);
5724 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5725
5726 ifnet_set_poll_cycle(ifp, NULL);
5727 ifp->if_poll_update = 0;
5728 ifp->if_poll_active = 0;
5729 ifp->if_poll_req = 0;
5730 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
5731 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5732 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5733 "err=%d", __func__, ifp, err);
5734 /* NOTREACHED */
5735 }
5736 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
5737 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5738 }
5739
5740 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5741 VERIFY(ifp->if_desc.ifd_len == 0);
5742 VERIFY(ifp->if_desc.ifd_desc != NULL);
5743
5744 /* Record attach PC stacktrace */
5745 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5746
5747 ifp->if_updatemcasts = 0;
5748 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5749 struct ifmultiaddr *ifma;
5750 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5751 IFMA_LOCK(ifma);
5752 if (ifma->ifma_addr->sa_family == AF_LINK ||
5753 ifma->ifma_addr->sa_family == AF_UNSPEC)
5754 ifp->if_updatemcasts++;
5755 IFMA_UNLOCK(ifma);
5756 }
5757
5758 printf("%s: attached with %d suspended link-layer multicast "
5759 "membership(s)\n", if_name(ifp),
5760 ifp->if_updatemcasts);
5761 }
5762
5763 /* Clear logging parameters */
5764 bzero(&ifp->if_log, sizeof (ifp->if_log));
5765
5766 /* Clear foreground/realtime activity timestamps */
5767 ifp->if_fg_sendts = 0;
5768 ifp->if_rt_sendts = 0;
5769
5770 VERIFY(ifp->if_delegated.ifp == NULL);
5771 VERIFY(ifp->if_delegated.type == 0);
5772 VERIFY(ifp->if_delegated.family == 0);
5773 VERIFY(ifp->if_delegated.subfamily == 0);
5774 VERIFY(ifp->if_delegated.expensive == 0);
5775
5776 VERIFY(ifp->if_agentids == NULL);
5777 VERIFY(ifp->if_agentcount == 0);
5778
5779 /* Reset interface state */
5780 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5781 ifp->if_interface_state.valid_bitmask |=
5782 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5783 ifp->if_interface_state.interface_availability =
5784 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5785
5786 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5787 if (ifp == lo_ifp) {
5788 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5789 ifp->if_interface_state.valid_bitmask |=
5790 IF_INTERFACE_STATE_LQM_STATE_VALID;
5791 } else {
5792 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5793 }
5794
5795 /*
5796 * Enable ECN capability on this interface depending on the
5797 * value of ECN global setting
5798 */
5799 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
5800 ifp->if_eflags |= IFEF_ECN_ENABLE;
5801 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
5802 }
5803
5804 /*
5805 * Built-in Cyclops always on policy for WiFi infra
5806 */
5807 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5808 errno_t error;
5809
5810 error = if_set_qosmarking_mode(ifp,
5811 IFRTYPE_QOSMARKING_FASTLANE);
5812 if (error != 0) {
5813 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5814 __func__, ifp->if_xname, error);
5815 } else {
5816 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
5817 #if (DEVELOPMENT || DEBUG)
5818 printf("%s fastlane enabled on %s\n",
5819 __func__, ifp->if_xname);
5820 #endif /* (DEVELOPMENT || DEBUG) */
5821 }
5822 }
5823
5824 ifnet_lock_done(ifp);
5825 ifnet_head_done();
5826
5827
5828 lck_mtx_lock(&ifp->if_cached_route_lock);
5829 /* Enable forwarding cached route */
5830 ifp->if_fwd_cacheok = 1;
5831 /* Clean up any existing cached routes */
5832 ROUTE_RELEASE(&ifp->if_fwd_route);
5833 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
5834 ROUTE_RELEASE(&ifp->if_src_route);
5835 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
5836 ROUTE_RELEASE(&ifp->if_src_route6);
5837 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
5838 lck_mtx_unlock(&ifp->if_cached_route_lock);
5839
5840 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5841
5842 /*
5843 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5844 * and trees; do this before the ifnet is marked as attached.
5845 * The ifnet keeps the reference to the info structures even after
5846 * the ifnet is detached, since the network-layer records still
5847 * refer to the info structures even after that. This also
5848 * makes it possible for them to still function after the ifnet
5849 * is recycled or reattached.
5850 */
5851 #if INET
5852 if (IGMP_IFINFO(ifp) == NULL) {
5853 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
5854 VERIFY(IGMP_IFINFO(ifp) != NULL);
5855 } else {
5856 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5857 igmp_domifreattach(IGMP_IFINFO(ifp));
5858 }
5859 #endif /* INET */
5860 #if INET6
5861 if (MLD_IFINFO(ifp) == NULL) {
5862 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
5863 VERIFY(MLD_IFINFO(ifp) != NULL);
5864 } else {
5865 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5866 mld_domifreattach(MLD_IFINFO(ifp));
5867 }
5868 #endif /* INET6 */
5869
5870 VERIFY(ifp->if_data_threshold == 0);
5871 VERIFY(ifp->if_dt_tcall != NULL);
5872
5873 /*
5874 * Finally, mark this ifnet as attached.
5875 */
5876 lck_mtx_lock(rnh_lock);
5877 ifnet_lock_exclusive(ifp);
5878 lck_mtx_lock_spin(&ifp->if_ref_lock);
5879 ifp->if_refflags = IFRF_ATTACHED; /* clears embryonic */
5880 lck_mtx_unlock(&ifp->if_ref_lock);
5881 if (net_rtref) {
5882 /* boot-args override; enable idle notification */
5883 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5884 IFRF_IDLE_NOTIFY);
5885 } else {
5886 /* apply previous request(s) to set the idle flags, if any */
5887 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5888 ifp->if_idle_new_flags_mask);
5889
5890 }
5891 ifnet_lock_done(ifp);
5892 lck_mtx_unlock(rnh_lock);
5893 dlil_if_unlock();
5894
5895 #if PF
5896 /*
5897 * Attach packet filter to this interface, if enabled.
5898 */
5899 pf_ifnet_hook(ifp, 1);
5900 #endif /* PF */
5901
5902 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
5903
5904 if (dlil_verbose) {
5905 printf("%s: attached%s\n", if_name(ifp),
5906 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5907 }
5908
5909 return (0);
5910 }
5911
5912 /*
5913 * Prepare the storage for the first/permanent link address, which must
5914 * must have the same lifetime as the ifnet itself. Although the link
5915 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5916 * its location in memory must never change as it may still be referred
5917 * to by some parts of the system afterwards (unfortunate implementation
5918 * artifacts inherited from BSD.)
5919 *
5920 * Caller must hold ifnet lock as writer.
5921 */
5922 static struct ifaddr *
5923 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
5924 {
5925 struct ifaddr *ifa, *oifa;
5926 struct sockaddr_dl *asdl, *msdl;
5927 char workbuf[IFNAMSIZ*2];
5928 int namelen, masklen, socksize;
5929 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5930
5931 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
5932 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
5933
5934 namelen = snprintf(workbuf, sizeof (workbuf), "%s",
5935 if_name(ifp));
5936 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
5937 + ((namelen > 0) ? namelen : 0);
5938 socksize = masklen + ifp->if_addrlen;
5939 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
5940 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
5941 socksize = sizeof(struct sockaddr_dl);
5942 socksize = ROUNDUP(socksize);
5943 #undef ROUNDUP
5944
5945 ifa = ifp->if_lladdr;
5946 if (socksize > DLIL_SDLMAXLEN ||
5947 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
5948 /*
5949 * Rare, but in the event that the link address requires
5950 * more storage space than DLIL_SDLMAXLEN, allocate the
5951 * largest possible storages for address and mask, such
5952 * that we can reuse the same space when if_addrlen grows.
5953 * This same space will be used when if_addrlen shrinks.
5954 */
5955 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
5956 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
5957 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
5958 if (ifa == NULL)
5959 return (NULL);
5960 ifa_lock_init(ifa);
5961 /* Don't set IFD_ALLOC, as this is permanent */
5962 ifa->ifa_debug = IFD_LINK;
5963 }
5964 IFA_LOCK(ifa);
5965 /* address and mask sockaddr_dl locations */
5966 asdl = (struct sockaddr_dl *)(ifa + 1);
5967 bzero(asdl, SOCK_MAXADDRLEN);
5968 msdl = (struct sockaddr_dl *)(void *)
5969 ((char *)asdl + SOCK_MAXADDRLEN);
5970 bzero(msdl, SOCK_MAXADDRLEN);
5971 } else {
5972 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
5973 /*
5974 * Use the storage areas for address and mask within the
5975 * dlil_ifnet structure. This is the most common case.
5976 */
5977 if (ifa == NULL) {
5978 ifa = &dl_if->dl_if_lladdr.ifa;
5979 ifa_lock_init(ifa);
5980 /* Don't set IFD_ALLOC, as this is permanent */
5981 ifa->ifa_debug = IFD_LINK;
5982 }
5983 IFA_LOCK(ifa);
5984 /* address and mask sockaddr_dl locations */
5985 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
5986 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
5987 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
5988 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
5989 }
5990
5991 /* hold a permanent reference for the ifnet itself */
5992 IFA_ADDREF_LOCKED(ifa);
5993 oifa = ifp->if_lladdr;
5994 ifp->if_lladdr = ifa;
5995
5996 VERIFY(ifa->ifa_debug == IFD_LINK);
5997 ifa->ifa_ifp = ifp;
5998 ifa->ifa_rtrequest = link_rtrequest;
5999 ifa->ifa_addr = (struct sockaddr *)asdl;
6000 asdl->sdl_len = socksize;
6001 asdl->sdl_family = AF_LINK;
6002 if (namelen > 0) {
6003 bcopy(workbuf, asdl->sdl_data, min(namelen,
6004 sizeof (asdl->sdl_data)));
6005 asdl->sdl_nlen = namelen;
6006 } else {
6007 asdl->sdl_nlen = 0;
6008 }
6009 asdl->sdl_index = ifp->if_index;
6010 asdl->sdl_type = ifp->if_type;
6011 if (ll_addr != NULL) {
6012 asdl->sdl_alen = ll_addr->sdl_alen;
6013 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
6014 } else {
6015 asdl->sdl_alen = 0;
6016 }
6017 ifa->ifa_netmask = (struct sockaddr *)msdl;
6018 msdl->sdl_len = masklen;
6019 while (namelen > 0)
6020 msdl->sdl_data[--namelen] = 0xff;
6021 IFA_UNLOCK(ifa);
6022
6023 if (oifa != NULL)
6024 IFA_REMREF(oifa);
6025
6026 return (ifa);
6027 }
6028
6029 static void
6030 if_purgeaddrs(struct ifnet *ifp)
6031 {
6032 #if INET
6033 in_purgeaddrs(ifp);
6034 #endif /* INET */
6035 #if INET6
6036 in6_purgeaddrs(ifp);
6037 #endif /* INET6 */
6038 }
6039
6040 errno_t
6041 ifnet_detach(ifnet_t ifp)
6042 {
6043 struct ifnet *delegated_ifp;
6044 struct nd_ifinfo *ndi = NULL;
6045
6046 if (ifp == NULL)
6047 return (EINVAL);
6048
6049 ndi = ND_IFINFO(ifp);
6050 if (NULL != ndi)
6051 ndi->cga_initialized = FALSE;
6052
6053 lck_mtx_lock(rnh_lock);
6054 ifnet_head_lock_exclusive();
6055 ifnet_lock_exclusive(ifp);
6056
6057 /*
6058 * Check to see if this interface has previously triggered
6059 * aggressive protocol draining; if so, decrement the global
6060 * refcnt and clear PR_AGGDRAIN on the route domain if
6061 * there are no more of such an interface around.
6062 */
6063 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
6064
6065 lck_mtx_lock_spin(&ifp->if_ref_lock);
6066 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
6067 lck_mtx_unlock(&ifp->if_ref_lock);
6068 ifnet_lock_done(ifp);
6069 ifnet_head_done();
6070 lck_mtx_unlock(rnh_lock);
6071 return (EINVAL);
6072 } else if (ifp->if_refflags & IFRF_DETACHING) {
6073 /* Interface has already been detached */
6074 lck_mtx_unlock(&ifp->if_ref_lock);
6075 ifnet_lock_done(ifp);
6076 ifnet_head_done();
6077 lck_mtx_unlock(rnh_lock);
6078 return (ENXIO);
6079 }
6080 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
6081 /* Indicate this interface is being detached */
6082 ifp->if_refflags &= ~IFRF_ATTACHED;
6083 ifp->if_refflags |= IFRF_DETACHING;
6084 lck_mtx_unlock(&ifp->if_ref_lock);
6085
6086 if (dlil_verbose) {
6087 printf("%s: detaching\n", if_name(ifp));
6088 }
6089
6090 /* clean up flow control entry object if there's any */
6091 if (ifp->if_eflags & IFEF_TXSTART) {
6092 ifnet_flowadv(ifp->if_flowhash);
6093 }
6094
6095 /* Reset ECN enable/disable flags */
6096 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
6097 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
6098
6099 /*
6100 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6101 * no longer be visible during lookups from this point.
6102 */
6103 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
6104 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
6105 ifp->if_link.tqe_next = NULL;
6106 ifp->if_link.tqe_prev = NULL;
6107 if (ifp->if_ordered_link.tqe_next != NULL ||
6108 ifp->if_ordered_link.tqe_prev != NULL) {
6109 ifnet_remove_from_ordered_list(ifp);
6110 }
6111 ifindex2ifnet[ifp->if_index] = NULL;
6112
6113 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6114 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
6115
6116 /* Record detach PC stacktrace */
6117 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
6118
6119 /* Clear logging parameters */
6120 bzero(&ifp->if_log, sizeof (ifp->if_log));
6121
6122 /* Clear delegated interface info (reference released below) */
6123 delegated_ifp = ifp->if_delegated.ifp;
6124 bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
6125
6126 /* Reset interface state */
6127 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
6128
6129 ifnet_lock_done(ifp);
6130 ifnet_head_done();
6131 lck_mtx_unlock(rnh_lock);
6132
6133
6134 /* Release reference held on the delegated interface */
6135 if (delegated_ifp != NULL)
6136 ifnet_release(delegated_ifp);
6137
6138 /* Reset Link Quality Metric (unless loopback [lo0]) */
6139 if (ifp != lo_ifp)
6140 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
6141
6142 /* Reset TCP local statistics */
6143 if (ifp->if_tcp_stat != NULL)
6144 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
6145
6146 /* Reset UDP local statistics */
6147 if (ifp->if_udp_stat != NULL)
6148 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
6149
6150 /* Reset ifnet IPv4 stats */
6151 if (ifp->if_ipv4_stat != NULL)
6152 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
6153
6154 /* Reset ifnet IPv6 stats */
6155 if (ifp->if_ipv6_stat != NULL)
6156 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
6157
6158 /* Release memory held for interface link status report */
6159 if (ifp->if_link_status != NULL) {
6160 FREE(ifp->if_link_status, M_TEMP);
6161 ifp->if_link_status = NULL;
6162 }
6163
6164 /* Clear agent IDs */
6165 if (ifp->if_agentids != NULL) {
6166 FREE(ifp->if_agentids, M_NETAGENT);
6167 ifp->if_agentids = NULL;
6168 }
6169 ifp->if_agentcount = 0;
6170
6171
6172 /* Let BPF know we're detaching */
6173 bpfdetach(ifp);
6174
6175 /* Mark the interface as DOWN */
6176 if_down(ifp);
6177
6178 /* Disable forwarding cached route */
6179 lck_mtx_lock(&ifp->if_cached_route_lock);
6180 ifp->if_fwd_cacheok = 0;
6181 lck_mtx_unlock(&ifp->if_cached_route_lock);
6182
6183 /* Disable data threshold and wait for any pending event posting */
6184 ifp->if_data_threshold = 0;
6185 VERIFY(ifp->if_dt_tcall != NULL);
6186 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
6187
6188 /*
6189 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6190 * references to the info structures and leave them attached to
6191 * this ifnet.
6192 */
6193 #if INET
6194 igmp_domifdetach(ifp);
6195 #endif /* INET */
6196 #if INET6
6197 mld_domifdetach(ifp);
6198 #endif /* INET6 */
6199
6200 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
6201
6202 /* Let worker thread take care of the rest, to avoid reentrancy */
6203 dlil_if_lock();
6204 ifnet_detaching_enqueue(ifp);
6205 dlil_if_unlock();
6206
6207 return (0);
6208 }
6209
6210 static void
6211 ifnet_detaching_enqueue(struct ifnet *ifp)
6212 {
6213 dlil_if_lock_assert();
6214
6215 ++ifnet_detaching_cnt;
6216 VERIFY(ifnet_detaching_cnt != 0);
6217 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
6218 wakeup((caddr_t)&ifnet_delayed_run);
6219 }
6220
6221 static struct ifnet *
6222 ifnet_detaching_dequeue(void)
6223 {
6224 struct ifnet *ifp;
6225
6226 dlil_if_lock_assert();
6227
6228 ifp = TAILQ_FIRST(&ifnet_detaching_head);
6229 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
6230 if (ifp != NULL) {
6231 VERIFY(ifnet_detaching_cnt != 0);
6232 --ifnet_detaching_cnt;
6233 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
6234 ifp->if_detaching_link.tqe_next = NULL;
6235 ifp->if_detaching_link.tqe_prev = NULL;
6236 }
6237 return (ifp);
6238 }
6239
6240 static int
6241 ifnet_detacher_thread_cont(int err)
6242 {
6243 #pragma unused(err)
6244 struct ifnet *ifp;
6245
6246 for (;;) {
6247 dlil_if_lock_assert();
6248 while (ifnet_detaching_cnt == 0) {
6249 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6250 (PZERO - 1), "ifnet_detacher_cont", 0,
6251 ifnet_detacher_thread_cont);
6252 /* NOTREACHED */
6253 }
6254
6255 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
6256
6257 /* Take care of detaching ifnet */
6258 ifp = ifnet_detaching_dequeue();
6259 if (ifp != NULL) {
6260 dlil_if_unlock();
6261 ifnet_detach_final(ifp);
6262 dlil_if_lock();
6263 }
6264 }
6265 }
6266
6267 static void
6268 ifnet_detacher_thread_func(void *v, wait_result_t w)
6269 {
6270 #pragma unused(v, w)
6271 dlil_if_lock();
6272 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6273 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
6274 /*
6275 * msleep0() shouldn't have returned as PCATCH was not set;
6276 * therefore assert in this case.
6277 */
6278 dlil_if_unlock();
6279 VERIFY(0);
6280 }
6281
6282 static void
6283 ifnet_detach_final(struct ifnet *ifp)
6284 {
6285 struct ifnet_filter *filter, *filter_next;
6286 struct ifnet_filter_head fhead;
6287 struct dlil_threading_info *inp;
6288 struct ifaddr *ifa;
6289 ifnet_detached_func if_free;
6290 int i;
6291
6292 lck_mtx_lock(&ifp->if_ref_lock);
6293 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6294 panic("%s: flags mismatch (detaching not set) ifp=%p",
6295 __func__, ifp);
6296 /* NOTREACHED */
6297 }
6298
6299 /*
6300 * Wait until the existing IO references get released
6301 * before we proceed with ifnet_detach. This is not a
6302 * common case, so block without using a continuation.
6303 */
6304 while (ifp->if_refio > 0) {
6305 printf("%s: Waiting for IO references on %s interface "
6306 "to be released\n", __func__, if_name(ifp));
6307 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
6308 (PZERO - 1), "ifnet_ioref_wait", NULL);
6309 }
6310 lck_mtx_unlock(&ifp->if_ref_lock);
6311
6312 /* Drain and destroy send queue */
6313 ifclassq_teardown(ifp);
6314
6315 /* Detach interface filters */
6316 lck_mtx_lock(&ifp->if_flt_lock);
6317 if_flt_monitor_enter(ifp);
6318
6319 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6320 fhead = ifp->if_flt_head;
6321 TAILQ_INIT(&ifp->if_flt_head);
6322
6323 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
6324 filter_next = TAILQ_NEXT(filter, filt_next);
6325 lck_mtx_unlock(&ifp->if_flt_lock);
6326
6327 dlil_detach_filter_internal(filter, 1);
6328 lck_mtx_lock(&ifp->if_flt_lock);
6329 }
6330 if_flt_monitor_leave(ifp);
6331 lck_mtx_unlock(&ifp->if_flt_lock);
6332
6333 /* Tell upper layers to drop their network addresses */
6334 if_purgeaddrs(ifp);
6335
6336 ifnet_lock_exclusive(ifp);
6337
6338 /* Uplumb all protocols */
6339 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6340 struct if_proto *proto;
6341
6342 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6343 while (proto != NULL) {
6344 protocol_family_t family = proto->protocol_family;
6345 ifnet_lock_done(ifp);
6346 proto_unplumb(family, ifp);
6347 ifnet_lock_exclusive(ifp);
6348 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6349 }
6350 /* There should not be any protocols left */
6351 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
6352 }
6353 zfree(dlif_phash_zone, ifp->if_proto_hash);
6354 ifp->if_proto_hash = NULL;
6355
6356 /* Detach (permanent) link address from if_addrhead */
6357 ifa = TAILQ_FIRST(&ifp->if_addrhead);
6358 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
6359 IFA_LOCK(ifa);
6360 if_detach_link_ifa(ifp, ifa);
6361 IFA_UNLOCK(ifa);
6362
6363 /* Remove (permanent) link address from ifnet_addrs[] */
6364 IFA_REMREF(ifa);
6365 ifnet_addrs[ifp->if_index - 1] = NULL;
6366
6367 /* This interface should not be on {ifnet_head,detaching} */
6368 VERIFY(ifp->if_link.tqe_next == NULL);
6369 VERIFY(ifp->if_link.tqe_prev == NULL);
6370 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6371 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
6372 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
6373 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
6374
6375 /* The slot should have been emptied */
6376 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6377
6378 /* There should not be any addresses left */
6379 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
6380
6381 /*
6382 * Signal the starter thread to terminate itself.
6383 */
6384 if (ifp->if_start_thread != THREAD_NULL) {
6385 lck_mtx_lock_spin(&ifp->if_start_lock);
6386 ifp->if_start_flags = 0;
6387 ifp->if_start_thread = THREAD_NULL;
6388 wakeup_one((caddr_t)&ifp->if_start_thread);
6389 lck_mtx_unlock(&ifp->if_start_lock);
6390 }
6391
6392 /*
6393 * Signal the poller thread to terminate itself.
6394 */
6395 if (ifp->if_poll_thread != THREAD_NULL) {
6396 lck_mtx_lock_spin(&ifp->if_poll_lock);
6397 ifp->if_poll_thread = THREAD_NULL;
6398 wakeup_one((caddr_t)&ifp->if_poll_thread);
6399 lck_mtx_unlock(&ifp->if_poll_lock);
6400 }
6401
6402 /*
6403 * If thread affinity was set for the workloop thread, we will need
6404 * to tear down the affinity and release the extra reference count
6405 * taken at attach time. Does not apply to lo0 or other interfaces
6406 * without dedicated input threads.
6407 */
6408 if ((inp = ifp->if_inp) != NULL) {
6409 VERIFY(inp != dlil_main_input_thread);
6410
6411 if (inp->net_affinity) {
6412 struct thread *tp, *wtp, *ptp;
6413
6414 lck_mtx_lock_spin(&inp->input_lck);
6415 wtp = inp->wloop_thr;
6416 inp->wloop_thr = THREAD_NULL;
6417 ptp = inp->poll_thr;
6418 inp->poll_thr = THREAD_NULL;
6419 tp = inp->input_thr; /* don't nullify now */
6420 inp->tag = 0;
6421 inp->net_affinity = FALSE;
6422 lck_mtx_unlock(&inp->input_lck);
6423
6424 /* Tear down poll thread affinity */
6425 if (ptp != NULL) {
6426 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
6427 (void) dlil_affinity_set(ptp,
6428 THREAD_AFFINITY_TAG_NULL);
6429 thread_deallocate(ptp);
6430 }
6431
6432 /* Tear down workloop thread affinity */
6433 if (wtp != NULL) {
6434 (void) dlil_affinity_set(wtp,
6435 THREAD_AFFINITY_TAG_NULL);
6436 thread_deallocate(wtp);
6437 }
6438
6439 /* Tear down DLIL input thread affinity */
6440 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
6441 thread_deallocate(tp);
6442 }
6443
6444 /* disassociate ifp DLIL input thread */
6445 ifp->if_inp = NULL;
6446
6447 /* tell the input thread to terminate */
6448 lck_mtx_lock_spin(&inp->input_lck);
6449 inp->input_waiting |= DLIL_INPUT_TERMINATE;
6450 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
6451 wakeup_one((caddr_t)&inp->input_waiting);
6452 }
6453 lck_mtx_unlock(&inp->input_lck);
6454 ifnet_lock_done(ifp);
6455
6456 /* wait for the input thread to terminate */
6457 lck_mtx_lock_spin(&inp->input_lck);
6458 while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE)
6459 == 0) {
6460 (void) msleep(&inp->input_waiting, &inp->input_lck,
6461 (PZERO - 1) | PSPIN, inp->input_name, NULL);
6462 }
6463 lck_mtx_unlock(&inp->input_lck);
6464 ifnet_lock_exclusive(ifp);
6465
6466 /* clean-up input thread state */
6467 dlil_clean_threading_info(inp);
6468
6469 }
6470
6471 /* The driver might unload, so point these to ourselves */
6472 if_free = ifp->if_free;
6473 ifp->if_output_dlil = ifp_if_output;
6474 ifp->if_output = ifp_if_output;
6475 ifp->if_pre_enqueue = ifp_if_output;
6476 ifp->if_start = ifp_if_start;
6477 ifp->if_output_ctl = ifp_if_ctl;
6478 ifp->if_input_dlil = ifp_if_input;
6479 ifp->if_input_poll = ifp_if_input_poll;
6480 ifp->if_input_ctl = ifp_if_ctl;
6481 ifp->if_ioctl = ifp_if_ioctl;
6482 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
6483 ifp->if_free = ifp_if_free;
6484 ifp->if_demux = ifp_if_demux;
6485 ifp->if_event = ifp_if_event;
6486 ifp->if_framer_legacy = ifp_if_framer;
6487 ifp->if_framer = ifp_if_framer_extended;
6488 ifp->if_add_proto = ifp_if_add_proto;
6489 ifp->if_del_proto = ifp_if_del_proto;
6490 ifp->if_check_multi = ifp_if_check_multi;
6491
6492 /* wipe out interface description */
6493 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
6494 ifp->if_desc.ifd_len = 0;
6495 VERIFY(ifp->if_desc.ifd_desc != NULL);
6496 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
6497
6498 /* there shouldn't be any delegation by now */
6499 VERIFY(ifp->if_delegated.ifp == NULL);
6500 VERIFY(ifp->if_delegated.type == 0);
6501 VERIFY(ifp->if_delegated.family == 0);
6502 VERIFY(ifp->if_delegated.subfamily == 0);
6503 VERIFY(ifp->if_delegated.expensive == 0);
6504
6505 /* QoS marking get cleared */
6506 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
6507 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
6508
6509
6510 ifnet_lock_done(ifp);
6511
6512 #if PF
6513 /*
6514 * Detach this interface from packet filter, if enabled.
6515 */
6516 pf_ifnet_hook(ifp, 0);
6517 #endif /* PF */
6518
6519 /* Filter list should be empty */
6520 lck_mtx_lock_spin(&ifp->if_flt_lock);
6521 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6522 VERIFY(ifp->if_flt_busy == 0);
6523 VERIFY(ifp->if_flt_waiters == 0);
6524 lck_mtx_unlock(&ifp->if_flt_lock);
6525
6526 /* Last chance to drain send queue */
6527 if_qflush(ifp, 0);
6528
6529 /* Last chance to cleanup any cached route */
6530 lck_mtx_lock(&ifp->if_cached_route_lock);
6531 VERIFY(!ifp->if_fwd_cacheok);
6532 ROUTE_RELEASE(&ifp->if_fwd_route);
6533 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
6534 ROUTE_RELEASE(&ifp->if_src_route);
6535 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
6536 ROUTE_RELEASE(&ifp->if_src_route6);
6537 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
6538 lck_mtx_unlock(&ifp->if_cached_route_lock);
6539
6540 VERIFY(ifp->if_data_threshold == 0);
6541 VERIFY(ifp->if_dt_tcall != NULL);
6542 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
6543
6544 ifnet_llreach_ifdetach(ifp);
6545
6546 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
6547
6548 /*
6549 * Finally, mark this ifnet as detached.
6550 */
6551 lck_mtx_lock_spin(&ifp->if_ref_lock);
6552 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6553 panic("%s: flags mismatch (detaching not set) ifp=%p",
6554 __func__, ifp);
6555 /* NOTREACHED */
6556 }
6557 ifp->if_refflags &= ~IFRF_DETACHING;
6558 lck_mtx_unlock(&ifp->if_ref_lock);
6559 if (if_free != NULL)
6560 if_free(ifp);
6561
6562 if (dlil_verbose)
6563 printf("%s: detached\n", if_name(ifp));
6564
6565 /* Release reference held during ifnet attach */
6566 ifnet_release(ifp);
6567 }
6568
6569 errno_t
6570 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6571 {
6572 #pragma unused(ifp)
6573 m_freem_list(m);
6574 return (0);
6575 }
6576
6577 void
6578 ifp_if_start(struct ifnet *ifp)
6579 {
6580 ifnet_purge(ifp);
6581 }
6582
6583 static errno_t
6584 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6585 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6586 boolean_t poll, struct thread *tp)
6587 {
6588 #pragma unused(ifp, m_tail, s, poll, tp)
6589 m_freem_list(m_head);
6590 return (ENXIO);
6591 }
6592
6593 static void
6594 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6595 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6596 {
6597 #pragma unused(ifp, flags, max_cnt)
6598 if (m_head != NULL)
6599 *m_head = NULL;
6600 if (m_tail != NULL)
6601 *m_tail = NULL;
6602 if (cnt != NULL)
6603 *cnt = 0;
6604 if (len != NULL)
6605 *len = 0;
6606 }
6607
6608 static errno_t
6609 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6610 {
6611 #pragma unused(ifp, cmd, arglen, arg)
6612 return (EOPNOTSUPP);
6613 }
6614
6615 static errno_t
6616 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6617 {
6618 #pragma unused(ifp, fh, pf)
6619 m_freem(m);
6620 return (EJUSTRETURN);
6621 }
6622
6623 static errno_t
6624 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6625 const struct ifnet_demux_desc *da, u_int32_t dc)
6626 {
6627 #pragma unused(ifp, pf, da, dc)
6628 return (EINVAL);
6629 }
6630
6631 static errno_t
6632 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6633 {
6634 #pragma unused(ifp, pf)
6635 return (EINVAL);
6636 }
6637
6638 static errno_t
6639 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6640 {
6641 #pragma unused(ifp, sa)
6642 return (EOPNOTSUPP);
6643 }
6644
6645 #if CONFIG_EMBEDDED
6646 static errno_t
6647 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6648 const struct sockaddr *sa, const char *ll, const char *t,
6649 u_int32_t *pre, u_int32_t *post)
6650 #else
6651 static errno_t
6652 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6653 const struct sockaddr *sa, const char *ll, const char *t)
6654 #endif /* !CONFIG_EMBEDDED */
6655 {
6656 #pragma unused(ifp, m, sa, ll, t)
6657 #if CONFIG_EMBEDDED
6658 return (ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post));
6659 #else
6660 return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
6661 #endif /* !CONFIG_EMBEDDED */
6662 }
6663
6664 static errno_t
6665 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6666 const struct sockaddr *sa, const char *ll, const char *t,
6667 u_int32_t *pre, u_int32_t *post)
6668 {
6669 #pragma unused(ifp, sa, ll, t)
6670 m_freem(*m);
6671 *m = NULL;
6672
6673 if (pre != NULL)
6674 *pre = 0;
6675 if (post != NULL)
6676 *post = 0;
6677
6678 return (EJUSTRETURN);
6679 }
6680
6681 errno_t
6682 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6683 {
6684 #pragma unused(ifp, cmd, arg)
6685 return (EOPNOTSUPP);
6686 }
6687
6688 static errno_t
6689 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6690 {
6691 #pragma unused(ifp, tm, f)
6692 /* XXX not sure what to do here */
6693 return (0);
6694 }
6695
6696 static void
6697 ifp_if_free(struct ifnet *ifp)
6698 {
6699 #pragma unused(ifp)
6700 }
6701
6702 static void
6703 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6704 {
6705 #pragma unused(ifp, e)
6706 }
6707
6708 __private_extern__
6709 int dlil_if_acquire(u_int32_t family, const void *uniqueid,
6710 size_t uniqueid_len, struct ifnet **ifp)
6711 {
6712 struct ifnet *ifp1 = NULL;
6713 struct dlil_ifnet *dlifp1 = NULL;
6714 void *buf, *base, **pbuf;
6715 int ret = 0;
6716
6717 dlil_if_lock();
6718 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
6719 ifp1 = (struct ifnet *)dlifp1;
6720
6721 if (ifp1->if_family != family)
6722 continue;
6723
6724 lck_mtx_lock(&dlifp1->dl_if_lock);
6725 /* same uniqueid and same len or no unique id specified */
6726 if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) &&
6727 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
6728 /* check for matching interface in use */
6729 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6730 if (uniqueid_len) {
6731 ret = EBUSY;
6732 lck_mtx_unlock(&dlifp1->dl_if_lock);
6733 goto end;
6734 }
6735 } else {
6736 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
6737 lck_mtx_unlock(&dlifp1->dl_if_lock);
6738 *ifp = ifp1;
6739 goto end;
6740 }
6741 }
6742 lck_mtx_unlock(&dlifp1->dl_if_lock);
6743 }
6744
6745 /* no interface found, allocate a new one */
6746 buf = zalloc(dlif_zone);
6747 if (buf == NULL) {
6748 ret = ENOMEM;
6749 goto end;
6750 }
6751 bzero(buf, dlif_bufsize);
6752
6753 /* Get the 64-bit aligned base address for this object */
6754 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
6755 sizeof (u_int64_t));
6756 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
6757
6758 /*
6759 * Wind back a pointer size from the aligned base and
6760 * save the original address so we can free it later.
6761 */
6762 pbuf = (void **)((intptr_t)base - sizeof (void *));
6763 *pbuf = buf;
6764 dlifp1 = base;
6765
6766 if (uniqueid_len) {
6767 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
6768 M_NKE, M_WAITOK);
6769 if (dlifp1->dl_if_uniqueid == NULL) {
6770 zfree(dlif_zone, buf);
6771 ret = ENOMEM;
6772 goto end;
6773 }
6774 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
6775 dlifp1->dl_if_uniqueid_len = uniqueid_len;
6776 }
6777
6778 ifp1 = (struct ifnet *)dlifp1;
6779 dlifp1->dl_if_flags = DLIF_INUSE;
6780 if (ifnet_debug) {
6781 dlifp1->dl_if_flags |= DLIF_DEBUG;
6782 dlifp1->dl_if_trace = dlil_if_trace;
6783 }
6784 ifp1->if_name = dlifp1->dl_if_namestorage;
6785 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
6786
6787 /* initialize interface description */
6788 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
6789 ifp1->if_desc.ifd_len = 0;
6790 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
6791
6792
6793 #if CONFIG_MACF_NET
6794 mac_ifnet_label_init(ifp1);
6795 #endif
6796
6797 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
6798 DLIL_PRINTF("%s: failed to allocate if local stats, "
6799 "error: %d\n", __func__, ret);
6800 /* This probably shouldn't be fatal */
6801 ret = 0;
6802 }
6803
6804 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
6805 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
6806 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
6807 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6808 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
6809 ifnet_lock_attr);
6810 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
6811 #if INET
6812 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
6813 ifnet_lock_attr);
6814 ifp1->if_inetdata = NULL;
6815 #endif
6816 #if INET6
6817 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
6818 ifnet_lock_attr);
6819 ifp1->if_inet6data = NULL;
6820 #endif
6821 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
6822 ifnet_lock_attr);
6823 ifp1->if_link_status = NULL;
6824
6825 /* for send data paths */
6826 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
6827 ifnet_lock_attr);
6828 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
6829 ifnet_lock_attr);
6830 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
6831 ifnet_lock_attr);
6832
6833 /* for receive data paths */
6834 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
6835 ifnet_lock_attr);
6836
6837 /* thread call allocation is done with sleeping zalloc */
6838 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
6839 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
6840 if (ifp1->if_dt_tcall == NULL) {
6841 panic_plain("%s: couldn't create if_dt_tcall", __func__);
6842 /* NOTREACHED */
6843 }
6844
6845 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
6846
6847 *ifp = ifp1;
6848
6849 end:
6850 dlil_if_unlock();
6851
6852 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
6853 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
6854
6855 return (ret);
6856 }
6857
6858 __private_extern__ void
6859 dlil_if_release(ifnet_t ifp)
6860 {
6861 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
6862
6863 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
6864 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
6865 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
6866 }
6867
6868 ifnet_lock_exclusive(ifp);
6869 lck_mtx_lock(&dlifp->dl_if_lock);
6870 dlifp->dl_if_flags &= ~DLIF_INUSE;
6871 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6872 ifp->if_name = dlifp->dl_if_namestorage;
6873 /* Reset external name (name + unit) */
6874 ifp->if_xname = dlifp->dl_if_xnamestorage;
6875 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
6876 "%s?", ifp->if_name);
6877 lck_mtx_unlock(&dlifp->dl_if_lock);
6878 #if CONFIG_MACF_NET
6879 /*
6880 * We can either recycle the MAC label here or in dlil_if_acquire().
6881 * It seems logical to do it here but this means that anything that
6882 * still has a handle on ifp will now see it as unlabeled.
6883 * Since the interface is "dead" that may be OK. Revisit later.
6884 */
6885 mac_ifnet_label_recycle(ifp);
6886 #endif
6887 ifnet_lock_done(ifp);
6888 }
6889
6890 __private_extern__ void
6891 dlil_if_lock(void)
6892 {
6893 lck_mtx_lock(&dlil_ifnet_lock);
6894 }
6895
6896 __private_extern__ void
6897 dlil_if_unlock(void)
6898 {
6899 lck_mtx_unlock(&dlil_ifnet_lock);
6900 }
6901
6902 __private_extern__ void
6903 dlil_if_lock_assert(void)
6904 {
6905 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
6906 }
6907
6908 __private_extern__ void
6909 dlil_proto_unplumb_all(struct ifnet *ifp)
6910 {
6911 /*
6912 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6913 * each bucket contains exactly one entry; PF_VLAN does not need an
6914 * explicit unplumb.
6915 *
6916 * if_proto_hash[3] is for other protocols; we expect anything
6917 * in this bucket to respond to the DETACHING event (which would
6918 * have happened by now) and do the unplumb then.
6919 */
6920 (void) proto_unplumb(PF_INET, ifp);
6921 #if INET6
6922 (void) proto_unplumb(PF_INET6, ifp);
6923 #endif /* INET6 */
6924 }
6925
6926 static void
6927 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6928 {
6929 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6930 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6931
6932 route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
6933
6934 lck_mtx_unlock(&ifp->if_cached_route_lock);
6935 }
6936
6937 static void
6938 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6939 {
6940 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6941 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6942
6943 if (ifp->if_fwd_cacheok) {
6944 route_copyin(src, &ifp->if_src_route, sizeof (*src));
6945 } else {
6946 ROUTE_RELEASE(src);
6947 }
6948 lck_mtx_unlock(&ifp->if_cached_route_lock);
6949 }
6950
6951 #if INET6
6952 static void
6953 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6954 {
6955 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6956 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6957
6958 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6959 sizeof (*dst));
6960
6961 lck_mtx_unlock(&ifp->if_cached_route_lock);
6962 }
6963
6964 static void
6965 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6966 {
6967 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6968 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6969
6970 if (ifp->if_fwd_cacheok) {
6971 route_copyin((struct route *)src,
6972 (struct route *)&ifp->if_src_route6, sizeof (*src));
6973 } else {
6974 ROUTE_RELEASE(src);
6975 }
6976 lck_mtx_unlock(&ifp->if_cached_route_lock);
6977 }
6978 #endif /* INET6 */
6979
6980 struct rtentry *
6981 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6982 {
6983 struct route src_rt;
6984 struct sockaddr_in *dst;
6985
6986 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
6987
6988 ifp_src_route_copyout(ifp, &src_rt);
6989
6990 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6991 ROUTE_RELEASE(&src_rt);
6992 if (dst->sin_family != AF_INET) {
6993 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
6994 dst->sin_len = sizeof (src_rt.ro_dst);
6995 dst->sin_family = AF_INET;
6996 }
6997 dst->sin_addr = src_ip;
6998
6999 VERIFY(src_rt.ro_rt == NULL);
7000 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
7001 0, 0, ifp->if_index);
7002
7003 if (src_rt.ro_rt != NULL) {
7004 /* retain a ref, copyin consumes one */
7005 struct rtentry *rte = src_rt.ro_rt;
7006 RT_ADDREF(rte);
7007 ifp_src_route_copyin(ifp, &src_rt);
7008 src_rt.ro_rt = rte;
7009 }
7010 }
7011
7012 return (src_rt.ro_rt);
7013 }
7014
7015 #if INET6
7016 struct rtentry *
7017 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
7018 {
7019 struct route_in6 src_rt;
7020
7021 ifp_src_route6_copyout(ifp, &src_rt);
7022
7023 if (ROUTE_UNUSABLE(&src_rt) ||
7024 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
7025 ROUTE_RELEASE(&src_rt);
7026 if (src_rt.ro_dst.sin6_family != AF_INET6) {
7027 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
7028 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
7029 src_rt.ro_dst.sin6_family = AF_INET6;
7030 }
7031 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
7032 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
7033 sizeof (src_rt.ro_dst.sin6_addr));
7034
7035 if (src_rt.ro_rt == NULL) {
7036 src_rt.ro_rt = rtalloc1_scoped(
7037 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
7038 ifp->if_index);
7039
7040 if (src_rt.ro_rt != NULL) {
7041 /* retain a ref, copyin consumes one */
7042 struct rtentry *rte = src_rt.ro_rt;
7043 RT_ADDREF(rte);
7044 ifp_src_route6_copyin(ifp, &src_rt);
7045 src_rt.ro_rt = rte;
7046 }
7047 }
7048 }
7049
7050 return (src_rt.ro_rt);
7051 }
7052 #endif /* INET6 */
7053
7054 void
7055 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
7056 {
7057 struct kev_dl_link_quality_metric_data ev_lqm_data;
7058
7059 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
7060
7061 /* Normalize to edge */
7062 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
7063 lqm = IFNET_LQM_THRESH_ABORT;
7064 atomic_bitset_32(&tcbinfo.ipi_flags,
7065 INPCBINFO_HANDLE_LQM_ABORT);
7066 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
7067 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
7068 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
7069 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
7070 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
7071 lqm <= IFNET_LQM_THRESH_POOR) {
7072 lqm = IFNET_LQM_THRESH_POOR;
7073 } else if (lqm > IFNET_LQM_THRESH_POOR &&
7074 lqm <= IFNET_LQM_THRESH_GOOD) {
7075 lqm = IFNET_LQM_THRESH_GOOD;
7076 }
7077
7078 /*
7079 * Take the lock if needed
7080 */
7081 if (!locked)
7082 ifnet_lock_exclusive(ifp);
7083
7084 if (lqm == ifp->if_interface_state.lqm_state &&
7085 (ifp->if_interface_state.valid_bitmask &
7086 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
7087 /*
7088 * Release the lock if was not held by the caller
7089 */
7090 if (!locked)
7091 ifnet_lock_done(ifp);
7092 return; /* nothing to update */
7093 }
7094 ifp->if_interface_state.valid_bitmask |=
7095 IF_INTERFACE_STATE_LQM_STATE_VALID;
7096 ifp->if_interface_state.lqm_state = lqm;
7097
7098 /*
7099 * Don't want to hold the lock when issuing kernel events
7100 */
7101 ifnet_lock_done(ifp);
7102
7103 bzero(&ev_lqm_data, sizeof (ev_lqm_data));
7104 ev_lqm_data.link_quality_metric = lqm;
7105
7106 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
7107 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
7108
7109 /*
7110 * Reacquire the lock for the caller
7111 */
7112 if (locked)
7113 ifnet_lock_exclusive(ifp);
7114 }
7115
7116 static void
7117 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
7118 {
7119 struct kev_dl_rrc_state kev;
7120
7121 if (rrc_state == ifp->if_interface_state.rrc_state &&
7122 (ifp->if_interface_state.valid_bitmask &
7123 IF_INTERFACE_STATE_RRC_STATE_VALID))
7124 return;
7125
7126 ifp->if_interface_state.valid_bitmask |=
7127 IF_INTERFACE_STATE_RRC_STATE_VALID;
7128
7129 ifp->if_interface_state.rrc_state = rrc_state;
7130
7131 /*
7132 * Don't want to hold the lock when issuing kernel events
7133 */
7134 ifnet_lock_done(ifp);
7135
7136 bzero(&kev, sizeof(struct kev_dl_rrc_state));
7137 kev.rrc_state = rrc_state;
7138
7139 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
7140 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
7141
7142 ifnet_lock_exclusive(ifp);
7143 }
7144
7145 errno_t
7146 if_state_update(struct ifnet *ifp,
7147 struct if_interface_state *if_interface_state)
7148 {
7149 u_short if_index_available = 0;
7150
7151 ifnet_lock_exclusive(ifp);
7152
7153 if ((ifp->if_type != IFT_CELLULAR) &&
7154 (if_interface_state->valid_bitmask &
7155 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
7156 ifnet_lock_done(ifp);
7157 return (ENOTSUP);
7158 }
7159 if ((if_interface_state->valid_bitmask &
7160 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
7161 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
7162 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
7163 ifnet_lock_done(ifp);
7164 return (EINVAL);
7165 }
7166 if ((if_interface_state->valid_bitmask &
7167 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
7168 if_interface_state->rrc_state !=
7169 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
7170 if_interface_state->rrc_state !=
7171 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
7172 ifnet_lock_done(ifp);
7173 return (EINVAL);
7174 }
7175
7176 if (if_interface_state->valid_bitmask &
7177 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7178 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
7179 }
7180 if (if_interface_state->valid_bitmask &
7181 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7182 if_rrc_state_update(ifp, if_interface_state->rrc_state);
7183 }
7184 if (if_interface_state->valid_bitmask &
7185 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7186 ifp->if_interface_state.valid_bitmask |=
7187 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7188 ifp->if_interface_state.interface_availability =
7189 if_interface_state->interface_availability;
7190
7191 if (ifp->if_interface_state.interface_availability ==
7192 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
7193 if_index_available = ifp->if_index;
7194 }
7195 }
7196 ifnet_lock_done(ifp);
7197
7198 /*
7199 * Check if the TCP connections going on this interface should be
7200 * forced to send probe packets instead of waiting for TCP timers
7201 * to fire. This will be done when there is an explicit
7202 * notification that the interface became available.
7203 */
7204 if (if_index_available > 0)
7205 tcp_interface_send_probe(if_index_available);
7206
7207 return (0);
7208 }
7209
7210 void
7211 if_get_state(struct ifnet *ifp,
7212 struct if_interface_state *if_interface_state)
7213 {
7214 ifnet_lock_shared(ifp);
7215
7216 if_interface_state->valid_bitmask = 0;
7217
7218 if (ifp->if_interface_state.valid_bitmask &
7219 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7220 if_interface_state->valid_bitmask |=
7221 IF_INTERFACE_STATE_RRC_STATE_VALID;
7222 if_interface_state->rrc_state =
7223 ifp->if_interface_state.rrc_state;
7224 }
7225 if (ifp->if_interface_state.valid_bitmask &
7226 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7227 if_interface_state->valid_bitmask |=
7228 IF_INTERFACE_STATE_LQM_STATE_VALID;
7229 if_interface_state->lqm_state =
7230 ifp->if_interface_state.lqm_state;
7231 }
7232 if (ifp->if_interface_state.valid_bitmask &
7233 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7234 if_interface_state->valid_bitmask |=
7235 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7236 if_interface_state->interface_availability =
7237 ifp->if_interface_state.interface_availability;
7238 }
7239
7240 ifnet_lock_done(ifp);
7241 }
7242
7243 errno_t
7244 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
7245 {
7246 ifnet_lock_exclusive(ifp);
7247 if (conn_probe > 1) {
7248 ifnet_lock_done(ifp);
7249 return (EINVAL);
7250 }
7251 if (conn_probe == 0)
7252 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
7253 else
7254 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
7255 ifnet_lock_done(ifp);
7256
7257 #if NECP
7258 necp_update_all_clients();
7259 #endif /* NECP */
7260
7261 tcp_probe_connectivity(ifp, conn_probe);
7262 return (0);
7263 }
7264
7265 /* for uuid.c */
7266 int
7267 uuid_get_ethernet(u_int8_t *node)
7268 {
7269 struct ifnet *ifp;
7270 struct sockaddr_dl *sdl;
7271
7272 ifnet_head_lock_shared();
7273 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
7274 ifnet_lock_shared(ifp);
7275 IFA_LOCK_SPIN(ifp->if_lladdr);
7276 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
7277 if (sdl->sdl_type == IFT_ETHER) {
7278 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
7279 IFA_UNLOCK(ifp->if_lladdr);
7280 ifnet_lock_done(ifp);
7281 ifnet_head_done();
7282 return (0);
7283 }
7284 IFA_UNLOCK(ifp->if_lladdr);
7285 ifnet_lock_done(ifp);
7286 }
7287 ifnet_head_done();
7288
7289 return (-1);
7290 }
7291
7292 static int
7293 sysctl_rxpoll SYSCTL_HANDLER_ARGS
7294 {
7295 #pragma unused(arg1, arg2)
7296 uint32_t i;
7297 int err;
7298
7299 i = if_rxpoll;
7300
7301 err = sysctl_handle_int(oidp, &i, 0, req);
7302 if (err != 0 || req->newptr == USER_ADDR_NULL)
7303 return (err);
7304
7305 if (net_rxpoll == 0)
7306 return (ENXIO);
7307
7308 if_rxpoll = i;
7309 return (err);
7310 }
7311
7312 static int
7313 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7314 {
7315 #pragma unused(arg1, arg2)
7316 uint64_t q;
7317 int err;
7318
7319 q = if_rxpoll_mode_holdtime;
7320
7321 err = sysctl_handle_quad(oidp, &q, 0, req);
7322 if (err != 0 || req->newptr == USER_ADDR_NULL)
7323 return (err);
7324
7325 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN)
7326 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
7327
7328 if_rxpoll_mode_holdtime = q;
7329
7330 return (err);
7331 }
7332
7333 static int
7334 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7335 {
7336 #pragma unused(arg1, arg2)
7337 uint64_t q;
7338 int err;
7339
7340 q = if_rxpoll_sample_holdtime;
7341
7342 err = sysctl_handle_quad(oidp, &q, 0, req);
7343 if (err != 0 || req->newptr == USER_ADDR_NULL)
7344 return (err);
7345
7346 if (q < IF_RXPOLL_SAMPLETIME_MIN)
7347 q = IF_RXPOLL_SAMPLETIME_MIN;
7348
7349 if_rxpoll_sample_holdtime = q;
7350
7351 return (err);
7352 }
7353
7354 static int
7355 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7356 {
7357 #pragma unused(arg1, arg2)
7358 uint64_t q;
7359 int err;
7360
7361 q = if_rxpoll_interval_time;
7362
7363 err = sysctl_handle_quad(oidp, &q, 0, req);
7364 if (err != 0 || req->newptr == USER_ADDR_NULL)
7365 return (err);
7366
7367 if (q < IF_RXPOLL_INTERVALTIME_MIN)
7368 q = IF_RXPOLL_INTERVALTIME_MIN;
7369
7370 if_rxpoll_interval_time = q;
7371
7372 return (err);
7373 }
7374
7375 static int
7376 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7377 {
7378 #pragma unused(arg1, arg2)
7379 uint32_t i;
7380 int err;
7381
7382 i = if_rxpoll_wlowat;
7383
7384 err = sysctl_handle_int(oidp, &i, 0, req);
7385 if (err != 0 || req->newptr == USER_ADDR_NULL)
7386 return (err);
7387
7388 if (i == 0 || i >= if_rxpoll_whiwat)
7389 return (EINVAL);
7390
7391 if_rxpoll_wlowat = i;
7392 return (err);
7393 }
7394
7395 static int
7396 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7397 {
7398 #pragma unused(arg1, arg2)
7399 uint32_t i;
7400 int err;
7401
7402 i = if_rxpoll_whiwat;
7403
7404 err = sysctl_handle_int(oidp, &i, 0, req);
7405 if (err != 0 || req->newptr == USER_ADDR_NULL)
7406 return (err);
7407
7408 if (i <= if_rxpoll_wlowat)
7409 return (EINVAL);
7410
7411 if_rxpoll_whiwat = i;
7412 return (err);
7413 }
7414
7415 static int
7416 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
7417 {
7418 #pragma unused(arg1, arg2)
7419 int i, err;
7420
7421 i = if_sndq_maxlen;
7422
7423 err = sysctl_handle_int(oidp, &i, 0, req);
7424 if (err != 0 || req->newptr == USER_ADDR_NULL)
7425 return (err);
7426
7427 if (i < IF_SNDQ_MINLEN)
7428 i = IF_SNDQ_MINLEN;
7429
7430 if_sndq_maxlen = i;
7431 return (err);
7432 }
7433
7434 static int
7435 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
7436 {
7437 #pragma unused(arg1, arg2)
7438 int i, err;
7439
7440 i = if_rcvq_maxlen;
7441
7442 err = sysctl_handle_int(oidp, &i, 0, req);
7443 if (err != 0 || req->newptr == USER_ADDR_NULL)
7444 return (err);
7445
7446 if (i < IF_RCVQ_MINLEN)
7447 i = IF_RCVQ_MINLEN;
7448
7449 if_rcvq_maxlen = i;
7450 return (err);
7451 }
7452
7453 void
7454 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
7455 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
7456 {
7457 struct kev_dl_node_presence kev;
7458 struct sockaddr_dl *sdl;
7459 struct sockaddr_in6 *sin6;
7460
7461 VERIFY(ifp);
7462 VERIFY(sa);
7463 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7464
7465 bzero(&kev, sizeof (kev));
7466 sin6 = &kev.sin6_node_address;
7467 sdl = &kev.sdl_node_address;
7468 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7469 kev.rssi = rssi;
7470 kev.link_quality_metric = lqm;
7471 kev.node_proximity_metric = npm;
7472 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
7473
7474 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
7475 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
7476 &kev.link_data, sizeof (kev));
7477 }
7478
7479 void
7480 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
7481 {
7482 struct kev_dl_node_absence kev;
7483 struct sockaddr_in6 *sin6;
7484 struct sockaddr_dl *sdl;
7485
7486 VERIFY(ifp);
7487 VERIFY(sa);
7488 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7489
7490 bzero(&kev, sizeof (kev));
7491 sin6 = &kev.sin6_node_address;
7492 sdl = &kev.sdl_node_address;
7493 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7494
7495 nd6_alt_node_absent(ifp, sin6);
7496 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
7497 &kev.link_data, sizeof (kev));
7498 }
7499
7500 const void *
7501 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
7502 kauth_cred_t *credp)
7503 {
7504 const u_int8_t *bytes;
7505 size_t size;
7506
7507 bytes = CONST_LLADDR(sdl);
7508 size = sdl->sdl_alen;
7509
7510 #if CONFIG_MACF
7511 if (dlil_lladdr_ckreq) {
7512 switch (sdl->sdl_type) {
7513 case IFT_ETHER:
7514 case IFT_IEEE1394:
7515 break;
7516 default:
7517 credp = NULL;
7518 break;
7519 };
7520
7521 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
7522 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
7523 [0] = 2
7524 };
7525
7526 bytes = unspec;
7527 }
7528 }
7529 #else
7530 #pragma unused(credp)
7531 #endif
7532
7533 if (sizep != NULL) *sizep = size;
7534 return (bytes);
7535 }
7536
7537 void
7538 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
7539 u_int8_t info[DLIL_MODARGLEN])
7540 {
7541 struct kev_dl_issues kev;
7542 struct timeval tv;
7543
7544 VERIFY(ifp != NULL);
7545 VERIFY(modid != NULL);
7546 _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
7547 _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
7548
7549 bzero(&kev, sizeof (kev));
7550
7551 microtime(&tv);
7552 kev.timestamp = tv.tv_sec;
7553 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
7554 if (info != NULL)
7555 bcopy(info, &kev.info, DLIL_MODARGLEN);
7556
7557 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
7558 &kev.link_data, sizeof (kev));
7559 }
7560
7561 errno_t
7562 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7563 struct proc *p)
7564 {
7565 u_int32_t level = IFNET_THROTTLE_OFF;
7566 errno_t result = 0;
7567
7568 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
7569
7570 if (cmd == SIOCSIFOPPORTUNISTIC) {
7571 /*
7572 * XXX: Use priv_check_cred() instead of root check?
7573 */
7574 if ((result = proc_suser(p)) != 0)
7575 return (result);
7576
7577 if (ifr->ifr_opportunistic.ifo_flags ==
7578 IFRIFOF_BLOCK_OPPORTUNISTIC)
7579 level = IFNET_THROTTLE_OPPORTUNISTIC;
7580 else if (ifr->ifr_opportunistic.ifo_flags == 0)
7581 level = IFNET_THROTTLE_OFF;
7582 else
7583 result = EINVAL;
7584
7585 if (result == 0)
7586 result = ifnet_set_throttle(ifp, level);
7587 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
7588 ifr->ifr_opportunistic.ifo_flags = 0;
7589 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
7590 ifr->ifr_opportunistic.ifo_flags |=
7591 IFRIFOF_BLOCK_OPPORTUNISTIC;
7592 }
7593 }
7594
7595 /*
7596 * Return the count of current opportunistic connections
7597 * over the interface.
7598 */
7599 if (result == 0) {
7600 uint32_t flags = 0;
7601 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
7602 INPCB_OPPORTUNISTIC_SETCMD : 0;
7603 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
7604 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
7605 ifr->ifr_opportunistic.ifo_inuse =
7606 udp_count_opportunistic(ifp->if_index, flags) +
7607 tcp_count_opportunistic(ifp->if_index, flags);
7608 }
7609
7610 if (result == EALREADY)
7611 result = 0;
7612
7613 return (result);
7614 }
7615
7616 int
7617 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
7618 {
7619 struct ifclassq *ifq;
7620 int err = 0;
7621
7622 if (!(ifp->if_eflags & IFEF_TXSTART))
7623 return (ENXIO);
7624
7625 *level = IFNET_THROTTLE_OFF;
7626
7627 ifq = &ifp->if_snd;
7628 IFCQ_LOCK(ifq);
7629 /* Throttling works only for IFCQ, not ALTQ instances */
7630 if (IFCQ_IS_ENABLED(ifq))
7631 IFCQ_GET_THROTTLE(ifq, *level, err);
7632 IFCQ_UNLOCK(ifq);
7633
7634 return (err);
7635 }
7636
7637 int
7638 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
7639 {
7640 struct ifclassq *ifq;
7641 int err = 0;
7642
7643 if (!(ifp->if_eflags & IFEF_TXSTART))
7644 return (ENXIO);
7645
7646 ifq = &ifp->if_snd;
7647
7648 switch (level) {
7649 case IFNET_THROTTLE_OFF:
7650 case IFNET_THROTTLE_OPPORTUNISTIC:
7651 break;
7652 default:
7653 return (EINVAL);
7654 }
7655
7656 IFCQ_LOCK(ifq);
7657 if (IFCQ_IS_ENABLED(ifq))
7658 IFCQ_SET_THROTTLE(ifq, level, err);
7659 IFCQ_UNLOCK(ifq);
7660
7661 if (err == 0) {
7662 printf("%s: throttling level set to %d\n", if_name(ifp),
7663 level);
7664 if (level == IFNET_THROTTLE_OFF)
7665 ifnet_start(ifp);
7666 }
7667
7668 return (err);
7669 }
7670
7671 errno_t
7672 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7673 struct proc *p)
7674 {
7675 #pragma unused(p)
7676 errno_t result = 0;
7677 uint32_t flags;
7678 int level, category, subcategory;
7679
7680 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7681
7682 if (cmd == SIOCSIFLOG) {
7683 if ((result = priv_check_cred(kauth_cred_get(),
7684 PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
7685 return (result);
7686
7687 level = ifr->ifr_log.ifl_level;
7688 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX)
7689 result = EINVAL;
7690
7691 flags = ifr->ifr_log.ifl_flags;
7692 if ((flags &= IFNET_LOGF_MASK) == 0)
7693 result = EINVAL;
7694
7695 category = ifr->ifr_log.ifl_category;
7696 subcategory = ifr->ifr_log.ifl_subcategory;
7697
7698 if (result == 0)
7699 result = ifnet_set_log(ifp, level, flags,
7700 category, subcategory);
7701 } else {
7702 result = ifnet_get_log(ifp, &level, &flags, &category,
7703 &subcategory);
7704 if (result == 0) {
7705 ifr->ifr_log.ifl_level = level;
7706 ifr->ifr_log.ifl_flags = flags;
7707 ifr->ifr_log.ifl_category = category;
7708 ifr->ifr_log.ifl_subcategory = subcategory;
7709 }
7710 }
7711
7712 return (result);
7713 }
7714
7715 int
7716 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7717 int32_t category, int32_t subcategory)
7718 {
7719 int err = 0;
7720
7721 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7722 VERIFY(flags & IFNET_LOGF_MASK);
7723
7724 /*
7725 * The logging level applies to all facilities; make sure to
7726 * update them all with the most current level.
7727 */
7728 flags |= ifp->if_log.flags;
7729
7730 if (ifp->if_output_ctl != NULL) {
7731 struct ifnet_log_params l;
7732
7733 bzero(&l, sizeof (l));
7734 l.level = level;
7735 l.flags = flags;
7736 l.flags &= ~IFNET_LOGF_DLIL;
7737 l.category = category;
7738 l.subcategory = subcategory;
7739
7740 /* Send this request to lower layers */
7741 if (l.flags != 0) {
7742 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7743 sizeof (l), &l);
7744 }
7745 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7746 /*
7747 * If targeted to the lower layers without an output
7748 * control callback registered on the interface, just
7749 * silently ignore facilities other than ours.
7750 */
7751 flags &= IFNET_LOGF_DLIL;
7752 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL)))
7753 level = 0;
7754 }
7755
7756 if (err == 0) {
7757 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT)
7758 ifp->if_log.flags = 0;
7759 else
7760 ifp->if_log.flags |= flags;
7761
7762 log(LOG_INFO, "%s: logging level set to %d flags=%b "
7763 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
7764 ifp->if_log.level, ifp->if_log.flags,
7765 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
7766 category, subcategory);
7767 }
7768
7769 return (err);
7770 }
7771
7772 int
7773 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7774 int32_t *category, int32_t *subcategory)
7775 {
7776 if (level != NULL)
7777 *level = ifp->if_log.level;
7778 if (flags != NULL)
7779 *flags = ifp->if_log.flags;
7780 if (category != NULL)
7781 *category = ifp->if_log.category;
7782 if (subcategory != NULL)
7783 *subcategory = ifp->if_log.subcategory;
7784
7785 return (0);
7786 }
7787
7788 int
7789 ifnet_notify_address(struct ifnet *ifp, int af)
7790 {
7791 struct ifnet_notify_address_params na;
7792
7793 #if PF
7794 (void) pf_ifaddr_hook(ifp);
7795 #endif /* PF */
7796
7797 if (ifp->if_output_ctl == NULL)
7798 return (EOPNOTSUPP);
7799
7800 bzero(&na, sizeof (na));
7801 na.address_family = af;
7802
7803 return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7804 sizeof (na), &na));
7805 }
7806
7807 errno_t
7808 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7809 {
7810 if (ifp == NULL || flowid == NULL) {
7811 return (EINVAL);
7812 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7813 !IF_FULLY_ATTACHED(ifp)) {
7814 return (ENXIO);
7815 }
7816
7817 *flowid = ifp->if_flowhash;
7818
7819 return (0);
7820 }
7821
7822 errno_t
7823 ifnet_disable_output(struct ifnet *ifp)
7824 {
7825 int err;
7826
7827 if (ifp == NULL) {
7828 return (EINVAL);
7829 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7830 !IF_FULLY_ATTACHED(ifp)) {
7831 return (ENXIO);
7832 }
7833
7834 if ((err = ifnet_fc_add(ifp)) == 0) {
7835 lck_mtx_lock_spin(&ifp->if_start_lock);
7836 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7837 lck_mtx_unlock(&ifp->if_start_lock);
7838 }
7839 return (err);
7840 }
7841
7842 errno_t
7843 ifnet_enable_output(struct ifnet *ifp)
7844 {
7845 if (ifp == NULL) {
7846 return (EINVAL);
7847 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7848 !IF_FULLY_ATTACHED(ifp)) {
7849 return (ENXIO);
7850 }
7851
7852 ifnet_start_common(ifp, TRUE);
7853 return (0);
7854 }
7855
7856 void
7857 ifnet_flowadv(uint32_t flowhash)
7858 {
7859 struct ifnet_fc_entry *ifce;
7860 struct ifnet *ifp;
7861
7862 ifce = ifnet_fc_get(flowhash);
7863 if (ifce == NULL)
7864 return;
7865
7866 VERIFY(ifce->ifce_ifp != NULL);
7867 ifp = ifce->ifce_ifp;
7868
7869 /* flow hash gets recalculated per attach, so check */
7870 if (ifnet_is_attached(ifp, 1)) {
7871 if (ifp->if_flowhash == flowhash)
7872 (void) ifnet_enable_output(ifp);
7873 ifnet_decr_iorefcnt(ifp);
7874 }
7875 ifnet_fc_entry_free(ifce);
7876 }
7877
7878 /*
7879 * Function to compare ifnet_fc_entries in ifnet flow control tree
7880 */
7881 static inline int
7882 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7883 {
7884 return (fc1->ifce_flowhash - fc2->ifce_flowhash);
7885 }
7886
7887 static int
7888 ifnet_fc_add(struct ifnet *ifp)
7889 {
7890 struct ifnet_fc_entry keyfc, *ifce;
7891 uint32_t flowhash;
7892
7893 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7894 VERIFY(ifp->if_flowhash != 0);
7895 flowhash = ifp->if_flowhash;
7896
7897 bzero(&keyfc, sizeof (keyfc));
7898 keyfc.ifce_flowhash = flowhash;
7899
7900 lck_mtx_lock_spin(&ifnet_fc_lock);
7901 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7902 if (ifce != NULL && ifce->ifce_ifp == ifp) {
7903 /* Entry is already in ifnet_fc_tree, return */
7904 lck_mtx_unlock(&ifnet_fc_lock);
7905 return (0);
7906 }
7907
7908 if (ifce != NULL) {
7909 /*
7910 * There is a different fc entry with the same flow hash
7911 * but different ifp pointer. There can be a collision
7912 * on flow hash but the probability is low. Let's just
7913 * avoid adding a second one when there is a collision.
7914 */
7915 lck_mtx_unlock(&ifnet_fc_lock);
7916 return (EAGAIN);
7917 }
7918
7919 /* become regular mutex */
7920 lck_mtx_convert_spin(&ifnet_fc_lock);
7921
7922 ifce = zalloc(ifnet_fc_zone);
7923 if (ifce == NULL) {
7924 /* memory allocation failed */
7925 lck_mtx_unlock(&ifnet_fc_lock);
7926 return (ENOMEM);
7927 }
7928 bzero(ifce, ifnet_fc_zone_size);
7929
7930 ifce->ifce_flowhash = flowhash;
7931 ifce->ifce_ifp = ifp;
7932
7933 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7934 lck_mtx_unlock(&ifnet_fc_lock);
7935 return (0);
7936 }
7937
7938 static struct ifnet_fc_entry *
7939 ifnet_fc_get(uint32_t flowhash)
7940 {
7941 struct ifnet_fc_entry keyfc, *ifce;
7942 struct ifnet *ifp;
7943
7944 bzero(&keyfc, sizeof (keyfc));
7945 keyfc.ifce_flowhash = flowhash;
7946
7947 lck_mtx_lock_spin(&ifnet_fc_lock);
7948 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7949 if (ifce == NULL) {
7950 /* Entry is not present in ifnet_fc_tree, return */
7951 lck_mtx_unlock(&ifnet_fc_lock);
7952 return (NULL);
7953 }
7954
7955 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7956
7957 VERIFY(ifce->ifce_ifp != NULL);
7958 ifp = ifce->ifce_ifp;
7959
7960 /* become regular mutex */
7961 lck_mtx_convert_spin(&ifnet_fc_lock);
7962
7963 if (!ifnet_is_attached(ifp, 0)) {
7964 /*
7965 * This ifp is not attached or in the process of being
7966 * detached; just don't process it.
7967 */
7968 ifnet_fc_entry_free(ifce);
7969 ifce = NULL;
7970 }
7971 lck_mtx_unlock(&ifnet_fc_lock);
7972
7973 return (ifce);
7974 }
7975
7976 static void
7977 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7978 {
7979 zfree(ifnet_fc_zone, ifce);
7980 }
7981
7982 static uint32_t
7983 ifnet_calc_flowhash(struct ifnet *ifp)
7984 {
7985 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7986 uint32_t flowhash = 0;
7987
7988 if (ifnet_flowhash_seed == 0)
7989 ifnet_flowhash_seed = RandomULong();
7990
7991 bzero(&fh, sizeof (fh));
7992
7993 (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name);
7994 fh.ifk_unit = ifp->if_unit;
7995 fh.ifk_flags = ifp->if_flags;
7996 fh.ifk_eflags = ifp->if_eflags;
7997 fh.ifk_capabilities = ifp->if_capabilities;
7998 fh.ifk_capenable = ifp->if_capenable;
7999 fh.ifk_output_sched_model = ifp->if_output_sched_model;
8000 fh.ifk_rand1 = RandomULong();
8001 fh.ifk_rand2 = RandomULong();
8002
8003 try_again:
8004 flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed);
8005 if (flowhash == 0) {
8006 /* try to get a non-zero flowhash */
8007 ifnet_flowhash_seed = RandomULong();
8008 goto try_again;
8009 }
8010
8011 return (flowhash);
8012 }
8013
8014 int
8015 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
8016 uint16_t flags, uint8_t *data)
8017 {
8018 #pragma unused(flags)
8019 int error = 0;
8020
8021 switch (family) {
8022 case AF_INET:
8023 if_inetdata_lock_exclusive(ifp);
8024 if (IN_IFEXTRA(ifp) != NULL) {
8025 if (len == 0) {
8026 /* Allow clearing the signature */
8027 IN_IFEXTRA(ifp)->netsig_len = 0;
8028 bzero(IN_IFEXTRA(ifp)->netsig,
8029 sizeof (IN_IFEXTRA(ifp)->netsig));
8030 if_inetdata_lock_done(ifp);
8031 break;
8032 } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) {
8033 error = EINVAL;
8034 if_inetdata_lock_done(ifp);
8035 break;
8036 }
8037 IN_IFEXTRA(ifp)->netsig_len = len;
8038 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
8039 } else {
8040 error = ENOMEM;
8041 }
8042 if_inetdata_lock_done(ifp);
8043 break;
8044
8045 case AF_INET6:
8046 if_inet6data_lock_exclusive(ifp);
8047 if (IN6_IFEXTRA(ifp) != NULL) {
8048 if (len == 0) {
8049 /* Allow clearing the signature */
8050 IN6_IFEXTRA(ifp)->netsig_len = 0;
8051 bzero(IN6_IFEXTRA(ifp)->netsig,
8052 sizeof (IN6_IFEXTRA(ifp)->netsig));
8053 if_inet6data_lock_done(ifp);
8054 break;
8055 } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) {
8056 error = EINVAL;
8057 if_inet6data_lock_done(ifp);
8058 break;
8059 }
8060 IN6_IFEXTRA(ifp)->netsig_len = len;
8061 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
8062 } else {
8063 error = ENOMEM;
8064 }
8065 if_inet6data_lock_done(ifp);
8066 break;
8067
8068 default:
8069 error = EINVAL;
8070 break;
8071 }
8072
8073 return (error);
8074 }
8075
8076 int
8077 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
8078 uint16_t *flags, uint8_t *data)
8079 {
8080 int error = 0;
8081
8082 if (ifp == NULL || len == NULL || data == NULL)
8083 return (EINVAL);
8084
8085 switch (family) {
8086 case AF_INET:
8087 if_inetdata_lock_shared(ifp);
8088 if (IN_IFEXTRA(ifp) != NULL) {
8089 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
8090 error = EINVAL;
8091 if_inetdata_lock_done(ifp);
8092 break;
8093 }
8094 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0)
8095 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
8096 else
8097 error = ENOENT;
8098 } else {
8099 error = ENOMEM;
8100 }
8101 if_inetdata_lock_done(ifp);
8102 break;
8103
8104 case AF_INET6:
8105 if_inet6data_lock_shared(ifp);
8106 if (IN6_IFEXTRA(ifp) != NULL) {
8107 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
8108 error = EINVAL;
8109 if_inet6data_lock_done(ifp);
8110 break;
8111 }
8112 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0)
8113 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
8114 else
8115 error = ENOENT;
8116 } else {
8117 error = ENOMEM;
8118 }
8119 if_inet6data_lock_done(ifp);
8120 break;
8121
8122 default:
8123 error = EINVAL;
8124 break;
8125 }
8126
8127 if (error == 0 && flags != NULL)
8128 *flags = 0;
8129
8130 return (error);
8131 }
8132
8133 #if INET6
8134 int
8135 ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8136 {
8137 int i, error = 0, one_set = 0;
8138
8139 if_inet6data_lock_exclusive(ifp);
8140
8141 if (IN6_IFEXTRA(ifp) == NULL) {
8142 error = ENOMEM;
8143 goto out;
8144 }
8145
8146 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8147 uint32_t prefix_len =
8148 prefixes[i].prefix_len;
8149 struct in6_addr *prefix =
8150 &prefixes[i].ipv6_prefix;
8151
8152 if (prefix_len == 0) {
8153 /* Allow clearing the signature */
8154 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
8155 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8156 sizeof(struct in6_addr));
8157
8158 continue;
8159 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
8160 prefix_len != NAT64_PREFIX_LEN_40 &&
8161 prefix_len != NAT64_PREFIX_LEN_48 &&
8162 prefix_len != NAT64_PREFIX_LEN_56 &&
8163 prefix_len != NAT64_PREFIX_LEN_64 &&
8164 prefix_len != NAT64_PREFIX_LEN_96) {
8165 error = EINVAL;
8166 goto out;
8167 }
8168
8169 if (IN6_IS_SCOPE_EMBED(prefix)) {
8170 error = EINVAL;
8171 goto out;
8172 }
8173
8174 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
8175 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8176 sizeof(struct in6_addr));
8177 one_set = 1;
8178 }
8179
8180 out:
8181 if_inet6data_lock_done(ifp);
8182
8183 if (error == 0 && one_set != 0)
8184 necp_update_all_clients();
8185
8186 return (error);
8187 }
8188
8189 int
8190 ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8191 {
8192 int i, found_one = 0, error = 0;
8193
8194 if (ifp == NULL)
8195 return (EINVAL);
8196
8197 if_inet6data_lock_shared(ifp);
8198
8199 if (IN6_IFEXTRA(ifp) == NULL) {
8200 error = ENOMEM;
8201 goto out;
8202 }
8203
8204 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8205 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0)
8206 found_one = 1;
8207 }
8208
8209 if (found_one == 0) {
8210 error = ENOENT;
8211 goto out;
8212 }
8213
8214 if (prefixes)
8215 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
8216 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
8217
8218 out:
8219 if_inet6data_lock_done(ifp);
8220
8221 return (error);
8222 }
8223 #endif
8224
8225 static void
8226 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
8227 protocol_family_t pf)
8228 {
8229 #pragma unused(ifp)
8230 uint32_t did_sw;
8231
8232 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
8233 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6)))
8234 return;
8235
8236 switch (pf) {
8237 case PF_INET:
8238 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
8239 if (did_sw & CSUM_DELAY_IP)
8240 hwcksum_dbg_finalized_hdr++;
8241 if (did_sw & CSUM_DELAY_DATA)
8242 hwcksum_dbg_finalized_data++;
8243 break;
8244 #if INET6
8245 case PF_INET6:
8246 /*
8247 * Checksum offload should not have been enabled when
8248 * extension headers exist; that also means that we
8249 * cannot force-finalize packets with extension headers.
8250 * Indicate to the callee should it skip such case by
8251 * setting optlen to -1.
8252 */
8253 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
8254 m->m_pkthdr.csum_flags);
8255 if (did_sw & CSUM_DELAY_IPV6_DATA)
8256 hwcksum_dbg_finalized_data++;
8257 break;
8258 #endif /* INET6 */
8259 default:
8260 return;
8261 }
8262 }
8263
8264 static void
8265 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
8266 protocol_family_t pf)
8267 {
8268 uint16_t sum = 0;
8269 uint32_t hlen;
8270
8271 if (frame_header == NULL ||
8272 frame_header < (char *)mbuf_datastart(m) ||
8273 frame_header > (char *)m->m_data) {
8274 printf("%s: frame header pointer 0x%llx out of range "
8275 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
8276 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
8277 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
8278 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
8279 (uint64_t)VM_KERNEL_ADDRPERM(m));
8280 return;
8281 }
8282 hlen = (m->m_data - frame_header);
8283
8284 switch (pf) {
8285 case PF_INET:
8286 #if INET6
8287 case PF_INET6:
8288 #endif /* INET6 */
8289 break;
8290 default:
8291 return;
8292 }
8293
8294 /*
8295 * Force partial checksum offload; useful to simulate cases
8296 * where the hardware does not support partial checksum offload,
8297 * in order to validate correctness throughout the layers above.
8298 */
8299 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
8300 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
8301
8302 if (foff > (uint32_t)m->m_pkthdr.len)
8303 return;
8304
8305 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
8306
8307 /* Compute 16-bit 1's complement sum from forced offset */
8308 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
8309
8310 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
8311 m->m_pkthdr.csum_rx_val = sum;
8312 m->m_pkthdr.csum_rx_start = (foff + hlen);
8313
8314 hwcksum_dbg_partial_forced++;
8315 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
8316 }
8317
8318 /*
8319 * Partial checksum offload verification (and adjustment);
8320 * useful to validate and test cases where the hardware
8321 * supports partial checksum offload.
8322 */
8323 if ((m->m_pkthdr.csum_flags &
8324 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
8325 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
8326 uint32_t rxoff;
8327
8328 /* Start offset must begin after frame header */
8329 rxoff = m->m_pkthdr.csum_rx_start;
8330 if (hlen > rxoff) {
8331 hwcksum_dbg_bad_rxoff++;
8332 if (dlil_verbose) {
8333 printf("%s: partial cksum start offset %d "
8334 "is less than frame header length %d for "
8335 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
8336 (uint64_t)VM_KERNEL_ADDRPERM(m));
8337 }
8338 return;
8339 }
8340 rxoff -= hlen;
8341
8342 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
8343 /*
8344 * Compute the expected 16-bit 1's complement sum;
8345 * skip this if we've already computed it above
8346 * when partial checksum offload is forced.
8347 */
8348 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
8349
8350 /* Hardware or driver is buggy */
8351 if (sum != m->m_pkthdr.csum_rx_val) {
8352 hwcksum_dbg_bad_cksum++;
8353 if (dlil_verbose) {
8354 printf("%s: bad partial cksum value "
8355 "0x%x (expected 0x%x) for mbuf "
8356 "0x%llx [rx_start %d]\n",
8357 if_name(ifp),
8358 m->m_pkthdr.csum_rx_val, sum,
8359 (uint64_t)VM_KERNEL_ADDRPERM(m),
8360 m->m_pkthdr.csum_rx_start);
8361 }
8362 return;
8363 }
8364 }
8365 hwcksum_dbg_verified++;
8366
8367 /*
8368 * This code allows us to emulate various hardwares that
8369 * perform 16-bit 1's complement sum beginning at various
8370 * start offset values.
8371 */
8372 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
8373 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
8374
8375 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len)
8376 return;
8377
8378 sum = m_adj_sum16(m, rxoff, aoff,
8379 m_pktlen(m) - aoff, sum);
8380
8381 m->m_pkthdr.csum_rx_val = sum;
8382 m->m_pkthdr.csum_rx_start = (aoff + hlen);
8383
8384 hwcksum_dbg_adjusted++;
8385 }
8386 }
8387 }
8388
8389 static int
8390 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8391 {
8392 #pragma unused(arg1, arg2)
8393 u_int32_t i;
8394 int err;
8395
8396 i = hwcksum_dbg_mode;
8397
8398 err = sysctl_handle_int(oidp, &i, 0, req);
8399 if (err != 0 || req->newptr == USER_ADDR_NULL)
8400 return (err);
8401
8402 if (hwcksum_dbg == 0)
8403 return (ENODEV);
8404
8405 if ((i & ~HWCKSUM_DBG_MASK) != 0)
8406 return (EINVAL);
8407
8408 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
8409
8410 return (err);
8411 }
8412
8413 static int
8414 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8415 {
8416 #pragma unused(arg1, arg2)
8417 u_int32_t i;
8418 int err;
8419
8420 i = hwcksum_dbg_partial_rxoff_forced;
8421
8422 err = sysctl_handle_int(oidp, &i, 0, req);
8423 if (err != 0 || req->newptr == USER_ADDR_NULL)
8424 return (err);
8425
8426 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED))
8427 return (ENODEV);
8428
8429 hwcksum_dbg_partial_rxoff_forced = i;
8430
8431 return (err);
8432 }
8433
8434 static int
8435 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8436 {
8437 #pragma unused(arg1, arg2)
8438 u_int32_t i;
8439 int err;
8440
8441 i = hwcksum_dbg_partial_rxoff_adj;
8442
8443 err = sysctl_handle_int(oidp, &i, 0, req);
8444 if (err != 0 || req->newptr == USER_ADDR_NULL)
8445 return (err);
8446
8447 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ))
8448 return (ENODEV);
8449
8450 hwcksum_dbg_partial_rxoff_adj = i;
8451
8452 return (err);
8453 }
8454
8455 static int
8456 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8457 {
8458 #pragma unused(oidp, arg1, arg2)
8459 int err;
8460
8461 if (req->oldptr == USER_ADDR_NULL) {
8462
8463 }
8464 if (req->newptr != USER_ADDR_NULL) {
8465 return (EPERM);
8466 }
8467 err = SYSCTL_OUT(req, &tx_chain_len_stats,
8468 sizeof(struct chain_len_stats));
8469
8470 return (err);
8471 }
8472
8473
8474 #if DEBUG || DEVELOPMENT
8475 /* Blob for sum16 verification */
8476 static uint8_t sumdata[] = {
8477 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8478 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8479 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8480 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8481 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8482 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8483 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8484 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8485 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8486 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8487 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8488 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8489 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8490 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8491 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8492 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8493 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8494 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8495 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8496 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8497 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8498 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8499 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8500 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8501 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8502 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8503 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8504 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8505 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8506 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8507 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8508 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8509 0xc8, 0x28, 0x02, 0x00, 0x00
8510 };
8511
8512 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
8513 static struct {
8514 boolean_t init;
8515 uint16_t len;
8516 uint16_t sumr; /* reference */
8517 uint16_t sumrp; /* reference, precomputed */
8518 } sumtbl[] = {
8519 { FALSE, 0, 0, 0x0000 },
8520 { FALSE, 1, 0, 0x001f },
8521 { FALSE, 2, 0, 0x8b1f },
8522 { FALSE, 3, 0, 0x8b27 },
8523 { FALSE, 7, 0, 0x790e },
8524 { FALSE, 11, 0, 0xcb6d },
8525 { FALSE, 20, 0, 0x20dd },
8526 { FALSE, 27, 0, 0xbabd },
8527 { FALSE, 32, 0, 0xf3e8 },
8528 { FALSE, 37, 0, 0x197d },
8529 { FALSE, 43, 0, 0x9eae },
8530 { FALSE, 64, 0, 0x4678 },
8531 { FALSE, 127, 0, 0x9399 },
8532 { FALSE, 256, 0, 0xd147 },
8533 { FALSE, 325, 0, 0x0358 },
8534 };
8535 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8536
8537 static void
8538 dlil_verify_sum16(void)
8539 {
8540 struct mbuf *m;
8541 uint8_t *buf;
8542 int n;
8543
8544 /* Make sure test data plus extra room for alignment fits in cluster */
8545 _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES);
8546
8547 kprintf("DLIL: running SUM16 self-tests ... ");
8548
8549 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
8550 MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */
8551 buf = mtod(m, uint8_t *); /* base address */
8552
8553 for (n = 0; n < SUMTBL_MAX; n++) {
8554 uint16_t len = sumtbl[n].len;
8555 int i;
8556
8557 /* Verify for all possible alignments */
8558 for (i = 0; i < (int)sizeof (uint64_t); i++) {
8559 uint16_t sum, sumr;
8560 uint8_t *c;
8561
8562 /* Copy over test data to mbuf */
8563 VERIFY(len <= sizeof (sumdata));
8564 c = buf + i;
8565 bcopy(sumdata, c, len);
8566
8567 /* Zero-offset test (align by data pointer) */
8568 m->m_data = (caddr_t)c;
8569 m->m_len = len;
8570 sum = m_sum16(m, 0, len);
8571
8572 if (!sumtbl[n].init) {
8573 sumr = in_cksum_mbuf_ref(m, len, 0, 0);
8574 sumtbl[n].sumr = sumr;
8575 sumtbl[n].init = TRUE;
8576 } else {
8577 sumr = sumtbl[n].sumr;
8578 }
8579
8580 /* Something is horribly broken; stop now */
8581 if (sumr != sumtbl[n].sumrp) {
8582 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
8583 "for len=%d align=%d sum=0x%04x "
8584 "[expected=0x%04x]\n", __func__,
8585 len, i, sum, sumr);
8586 /* NOTREACHED */
8587 } else if (sum != sumr) {
8588 panic_plain("\n%s: broken m_sum16() for len=%d "
8589 "align=%d sum=0x%04x [expected=0x%04x]\n",
8590 __func__, len, i, sum, sumr);
8591 /* NOTREACHED */
8592 }
8593
8594 /* Alignment test by offset (fixed data pointer) */
8595 m->m_data = (caddr_t)buf;
8596 m->m_len = i + len;
8597 sum = m_sum16(m, i, len);
8598
8599 /* Something is horribly broken; stop now */
8600 if (sum != sumr) {
8601 panic_plain("\n%s: broken m_sum16() for len=%d "
8602 "offset=%d sum=0x%04x [expected=0x%04x]\n",
8603 __func__, len, i, sum, sumr);
8604 /* NOTREACHED */
8605 }
8606 #if INET
8607 /* Simple sum16 contiguous buffer test by aligment */
8608 sum = b_sum16(c, len);
8609
8610 /* Something is horribly broken; stop now */
8611 if (sum != sumr) {
8612 panic_plain("\n%s: broken b_sum16() for len=%d "
8613 "align=%d sum=0x%04x [expected=0x%04x]\n",
8614 __func__, len, i, sum, sumr);
8615 /* NOTREACHED */
8616 }
8617 #endif /* INET */
8618 }
8619 }
8620 m_freem(m);
8621
8622 kprintf("PASSED\n");
8623 }
8624 #endif /* DEBUG || DEVELOPMENT */
8625
8626 #define CASE_STRINGIFY(x) case x: return #x
8627
8628 __private_extern__ const char *
8629 dlil_kev_dl_code_str(u_int32_t event_code)
8630 {
8631 switch (event_code) {
8632 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
8633 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
8634 CASE_STRINGIFY(KEV_DL_SIFMTU);
8635 CASE_STRINGIFY(KEV_DL_SIFPHYS);
8636 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
8637 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
8638 CASE_STRINGIFY(KEV_DL_ADDMULTI);
8639 CASE_STRINGIFY(KEV_DL_DELMULTI);
8640 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
8641 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
8642 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
8643 CASE_STRINGIFY(KEV_DL_LINK_OFF);
8644 CASE_STRINGIFY(KEV_DL_LINK_ON);
8645 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
8646 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
8647 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
8648 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
8649 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
8650 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
8651 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
8652 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
8653 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
8654 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
8655 CASE_STRINGIFY(KEV_DL_ISSUES);
8656 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
8657 default:
8658 break;
8659 }
8660 return ("");
8661 }
8662
8663 /*
8664 * Mirror the arguments of ifnet_get_local_ports_extended()
8665 * ifindex
8666 * protocol
8667 * flags
8668 */
8669 static int
8670 sysctl_get_ports_used SYSCTL_HANDLER_ARGS
8671 {
8672 #pragma unused(oidp)
8673 int *name = (int *)arg1;
8674 int namelen = arg2;
8675 int error = 0;
8676 int idx;
8677 protocol_family_t protocol;
8678 u_int32_t flags;
8679 ifnet_t ifp = NULL;
8680 u_int8_t *bitfield = NULL;
8681
8682 if (req->newptr != USER_ADDR_NULL) {
8683 error = EPERM;
8684 goto done;
8685 }
8686 if (namelen != 3) {
8687 error = ENOENT;
8688 goto done;
8689 }
8690
8691 if (req->oldptr == USER_ADDR_NULL) {
8692 req->oldidx = bitstr_size(65536);
8693 goto done;
8694 }
8695 if (req->oldlen < bitstr_size(65536)) {
8696 error = ENOMEM;
8697 goto done;
8698 }
8699
8700 idx = name[0];
8701 protocol = name[1];
8702 flags = name[2];
8703
8704 ifnet_head_lock_shared();
8705 if (!IF_INDEX_IN_RANGE(idx)) {
8706 ifnet_head_done();
8707 error = ENOENT;
8708 goto done;
8709 }
8710 ifp = ifindex2ifnet[idx];
8711 ifnet_head_done();
8712
8713 bitfield = _MALLOC(bitstr_size(65536), M_TEMP, M_WAITOK | M_ZERO);
8714 if (bitfield == NULL) {
8715 error = ENOMEM;
8716 goto done;
8717 }
8718 error = ifnet_get_local_ports_extended(ifp, protocol, flags, bitfield);
8719 if (error != 0) {
8720 printf("%s: ifnet_get_local_ports_extended() error %d\n",
8721 __func__, error);
8722 goto done;
8723 }
8724 error = SYSCTL_OUT(req, bitfield, bitstr_size(65536));
8725 done:
8726 if (bitfield != NULL)
8727 _FREE(bitfield, M_TEMP);
8728 return (error);
8729 }
8730
8731 static void
8732 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
8733 {
8734 #pragma unused(arg1)
8735 struct ifnet *ifp = arg0;
8736
8737 if (ifnet_is_attached(ifp, 1)) {
8738 nstat_ifnet_threshold_reached(ifp->if_index);
8739 ifnet_decr_iorefcnt(ifp);
8740 }
8741 }
8742
8743 void
8744 ifnet_notify_data_threshold(struct ifnet *ifp)
8745 {
8746 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
8747 uint64_t oldbytes = ifp->if_dt_bytes;
8748
8749 ASSERT(ifp->if_dt_tcall != NULL);
8750
8751 /*
8752 * If we went over the threshold, notify NetworkStatistics.
8753 * We rate-limit it based on the threshold interval value.
8754 */
8755 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
8756 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
8757 !thread_call_isactive(ifp->if_dt_tcall)) {
8758 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
8759 uint64_t now = mach_absolute_time(), deadline = now;
8760 uint64_t ival;
8761
8762 if (tival != 0) {
8763 nanoseconds_to_absolutetime(tival, &ival);
8764 clock_deadline_for_periodic_event(ival, now, &deadline);
8765 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
8766 deadline);
8767 } else {
8768 (void) thread_call_enter(ifp->if_dt_tcall);
8769 }
8770 }
8771 }
8772
8773 #if (DEVELOPMENT || DEBUG)
8774 /*
8775 * The sysctl variable name contains the input parameters of
8776 * ifnet_get_keepalive_offload_frames()
8777 * ifp (interface index): name[0]
8778 * frames_array_count: name[1]
8779 * frame_data_offset: name[2]
8780 * The return length gives used_frames_count
8781 */
8782 static int
8783 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8784 {
8785 #pragma unused(oidp)
8786 int *name = (int *)arg1;
8787 u_int namelen = arg2;
8788 int idx;
8789 ifnet_t ifp = NULL;
8790 u_int32_t frames_array_count;
8791 size_t frame_data_offset;
8792 u_int32_t used_frames_count;
8793 struct ifnet_keepalive_offload_frame *frames_array = NULL;
8794 int error = 0;
8795 u_int32_t i;
8796
8797 /*
8798 * Only root can get look at other people TCP frames
8799 */
8800 error = proc_suser(current_proc());
8801 if (error != 0)
8802 goto done;
8803 /*
8804 * Validate the input parameters
8805 */
8806 if (req->newptr != USER_ADDR_NULL) {
8807 error = EPERM;
8808 goto done;
8809 }
8810 if (namelen != 3) {
8811 error = EINVAL;
8812 goto done;
8813 }
8814 if (req->oldptr == USER_ADDR_NULL) {
8815 error = EINVAL;
8816 goto done;
8817 }
8818 if (req->oldlen == 0) {
8819 error = EINVAL;
8820 goto done;
8821 }
8822 idx = name[0];
8823 frames_array_count = name[1];
8824 frame_data_offset = name[2];
8825
8826 /* Make sure the passed buffer is large enough */
8827 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
8828 req->oldlen) {
8829 error = ENOMEM;
8830 goto done;
8831 }
8832
8833 ifnet_head_lock_shared();
8834 if (!IF_INDEX_IN_RANGE(idx)) {
8835 ifnet_head_done();
8836 error = ENOENT;
8837 goto done;
8838 }
8839 ifp = ifindex2ifnet[idx];
8840 ifnet_head_done();
8841
8842 frames_array = _MALLOC(frames_array_count *
8843 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
8844 if (frames_array == NULL) {
8845 error = ENOMEM;
8846 goto done;
8847 }
8848
8849 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
8850 frames_array_count, frame_data_offset, &used_frames_count);
8851 if (error != 0) {
8852 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8853 __func__, error);
8854 goto done;
8855 }
8856
8857 for (i = 0; i < used_frames_count; i++) {
8858 error = SYSCTL_OUT(req, frames_array + i,
8859 sizeof(struct ifnet_keepalive_offload_frame));
8860 if (error != 0) {
8861 goto done;
8862 }
8863 }
8864 done:
8865 if (frames_array != NULL)
8866 _FREE(frames_array, M_TEMP);
8867 return (error);
8868 }
8869 #endif /* DEVELOPMENT || DEBUG */
8870
8871 void
8872 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
8873 struct ifnet *ifp)
8874 {
8875 tcp_update_stats_per_flow(ifs, ifp);
8876 }
8877
8878 static void
8879 dlil_mit_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
8880 {
8881 #pragma unused(arg1)
8882 struct ifnet *ifp = (struct ifnet *)arg0;
8883 struct dlil_threading_info *inp = ifp->if_inp;
8884
8885 ifnet_lock_shared(ifp);
8886 if (!IF_FULLY_ATTACHED(ifp) || inp == NULL) {
8887 ifnet_lock_done(ifp);
8888 return;
8889 }
8890
8891 lck_mtx_lock_spin(&inp->input_lck);
8892 inp->input_waiting |= DLIL_INPUT_WAITING;
8893 if (!(inp->input_waiting & DLIL_INPUT_RUNNING) ||
8894 !qempty(&inp->rcvq_pkts)) {
8895 inp->wtot++;
8896 wakeup_one((caddr_t)&inp->input_waiting);
8897 }
8898 lck_mtx_unlock(&inp->input_lck);
8899 ifnet_lock_done(ifp);
8900 }