]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/dlil.c
b7fa46e15bc0a04ec69c611c525222b19e6a3327
[apple/xnu.git] / bsd / net / dlil.c
1 /*
2 * Copyright (c) 1999-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
43 #include <sys/user.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
47 #include <net/if.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
50 #include <net/dlil.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
59 #include <sys/priv.h>
60
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
67
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_llreach.h>
71 #include <net/kpi_interfacefilter.h>
72 #include <net/classq/classq.h>
73 #include <net/classq/classq_sfb.h>
74 #include <net/flowhash.h>
75 #include <net/ntstat.h>
76
77 #if INET
78 #include <netinet/in_var.h>
79 #include <netinet/igmp_var.h>
80 #include <netinet/ip_var.h>
81 #include <netinet/tcp.h>
82 #include <netinet/tcp_var.h>
83 #include <netinet/udp.h>
84 #include <netinet/udp_var.h>
85 #include <netinet/if_ether.h>
86 #include <netinet/in_pcb.h>
87 #endif /* INET */
88
89 #if INET6
90 #include <netinet6/in6_var.h>
91 #include <netinet6/nd6.h>
92 #include <netinet6/mld6_var.h>
93 #include <netinet6/scope6_var.h>
94 #endif /* INET6 */
95
96 #include <libkern/OSAtomic.h>
97 #include <libkern/tree.h>
98
99 #include <dev/random/randomdev.h>
100 #include <machine/machine_routines.h>
101
102 #include <mach/thread_act.h>
103 #include <mach/sdt.h>
104
105 #if CONFIG_MACF
106 #include <sys/kauth.h>
107 #include <security/mac_framework.h>
108 #include <net/ethernet.h>
109 #include <net/firewire.h>
110 #endif
111
112 #if PF
113 #include <net/pfvar.h>
114 #endif /* PF */
115 #if PF_ALTQ
116 #include <net/altq/altq.h>
117 #endif /* PF_ALTQ */
118 #include <net/pktsched/pktsched.h>
119
120 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
121 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
122 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
123 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
124 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
125
126 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
127 #define MAX_LINKADDR 4 /* LONGWORDS */
128 #define M_NKE M_IFADDR
129
130 #if 1
131 #define DLIL_PRINTF printf
132 #else
133 #define DLIL_PRINTF kprintf
134 #endif
135
136 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
137 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
138
139 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
140 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
141
142 enum {
143 kProtoKPI_v1 = 1,
144 kProtoKPI_v2 = 2
145 };
146
147 /*
148 * List of if_proto structures in if_proto_hash[] is protected by
149 * the ifnet lock. The rest of the fields are initialized at protocol
150 * attach time and never change, thus no lock required as long as
151 * a reference to it is valid, via if_proto_ref().
152 */
153 struct if_proto {
154 SLIST_ENTRY(if_proto) next_hash;
155 u_int32_t refcount;
156 u_int32_t detached;
157 struct ifnet *ifp;
158 protocol_family_t protocol_family;
159 int proto_kpi;
160 union {
161 struct {
162 proto_media_input input;
163 proto_media_preout pre_output;
164 proto_media_event event;
165 proto_media_ioctl ioctl;
166 proto_media_detached detached;
167 proto_media_resolve_multi resolve_multi;
168 proto_media_send_arp send_arp;
169 } v1;
170 struct {
171 proto_media_input_v2 input;
172 proto_media_preout pre_output;
173 proto_media_event event;
174 proto_media_ioctl ioctl;
175 proto_media_detached detached;
176 proto_media_resolve_multi resolve_multi;
177 proto_media_send_arp send_arp;
178 } v2;
179 } kpi;
180 };
181
182 SLIST_HEAD(proto_hash_entry, if_proto);
183
184 #define DLIL_SDLMAXLEN 64
185 #define DLIL_SDLDATALEN \
186 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
187
188 struct dlil_ifnet {
189 struct ifnet dl_if; /* public ifnet */
190 /*
191 * DLIL private fields, protected by dl_if_lock
192 */
193 decl_lck_mtx_data(, dl_if_lock);
194 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
195 u_int32_t dl_if_flags; /* flags (below) */
196 u_int32_t dl_if_refcnt; /* refcnt */
197 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
198 void *dl_if_uniqueid; /* unique interface id */
199 size_t dl_if_uniqueid_len; /* length of the unique id */
200 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
201 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
202 struct {
203 struct ifaddr ifa; /* lladdr ifa */
204 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
205 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
206 } dl_if_lladdr;
207 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
208 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
209 ctrace_t dl_if_attach; /* attach PC stacktrace */
210 ctrace_t dl_if_detach; /* detach PC stacktrace */
211 };
212
213 /* Values for dl_if_flags (private to DLIL) */
214 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
215 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
216 #define DLIF_DEBUG 0x4 /* has debugging info */
217
218 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
219
220 /* For gdb */
221 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
222
223 struct dlil_ifnet_dbg {
224 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
225 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
226 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
227 /*
228 * Circular lists of ifnet_{reference,release} callers.
229 */
230 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
231 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
232 };
233
234 #define DLIL_TO_IFP(s) (&s->dl_if)
235 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
236
237 struct ifnet_filter {
238 TAILQ_ENTRY(ifnet_filter) filt_next;
239 u_int32_t filt_skip;
240 u_int32_t filt_flags;
241 ifnet_t filt_ifp;
242 const char *filt_name;
243 void *filt_cookie;
244 protocol_family_t filt_protocol;
245 iff_input_func filt_input;
246 iff_output_func filt_output;
247 iff_event_func filt_event;
248 iff_ioctl_func filt_ioctl;
249 iff_detached_func filt_detached;
250 };
251
252 struct proto_input_entry;
253
254 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
255 static lck_grp_t *dlil_lock_group;
256 lck_grp_t *ifnet_lock_group;
257 static lck_grp_t *ifnet_head_lock_group;
258 static lck_grp_t *ifnet_snd_lock_group;
259 static lck_grp_t *ifnet_rcv_lock_group;
260 lck_attr_t *ifnet_lock_attr;
261 decl_lck_rw_data(static, ifnet_head_lock);
262 decl_lck_mtx_data(static, dlil_ifnet_lock);
263 u_int32_t dlil_filter_disable_tso_count = 0;
264
265 #if DEBUG
266 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
267 #else
268 static unsigned int ifnet_debug; /* debugging (disabled) */
269 #endif /* !DEBUG */
270 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
271 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
272 static struct zone *dlif_zone; /* zone for dlil_ifnet */
273
274 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
275 #define DLIF_ZONE_NAME "ifnet" /* zone name */
276
277 static unsigned int dlif_filt_size; /* size of ifnet_filter */
278 static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
279
280 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
281 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
282
283 static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
284 static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
285
286 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
287 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
288
289 static unsigned int dlif_proto_size; /* size of if_proto */
290 static struct zone *dlif_proto_zone; /* zone for if_proto */
291
292 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
293 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
294
295 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
296 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
297 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
298
299 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
300 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
301
302 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
303 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
304 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
305
306 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
307 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
308
309 /*
310 * Updating this variable should be done by first acquiring the global
311 * radix node head (rnh_lock), in tandem with settting/clearing the
312 * PR_AGGDRAIN for routedomain.
313 */
314 u_int32_t ifnet_aggressive_drainers;
315 static u_int32_t net_rtref;
316
317 static struct dlil_main_threading_info dlil_main_input_thread_info;
318 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
319 (struct dlil_threading_info *)&dlil_main_input_thread_info;
320
321 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg);
322 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
323 static void dlil_if_trace(struct dlil_ifnet *, int);
324 static void if_proto_ref(struct if_proto *);
325 static void if_proto_free(struct if_proto *);
326 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
327 static int dlil_ifp_proto_count(struct ifnet *);
328 static void if_flt_monitor_busy(struct ifnet *);
329 static void if_flt_monitor_unbusy(struct ifnet *);
330 static void if_flt_monitor_enter(struct ifnet *);
331 static void if_flt_monitor_leave(struct ifnet *);
332 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
333 char **, protocol_family_t);
334 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
335 protocol_family_t);
336 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
337 const struct sockaddr_dl *);
338 static int ifnet_lookup(struct ifnet *);
339 static void if_purgeaddrs(struct ifnet *);
340
341 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
342 struct mbuf *, char *);
343 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
344 struct mbuf *);
345 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
346 mbuf_t *, const struct sockaddr *, void *, char *, char *);
347 static void ifproto_media_event(struct ifnet *, protocol_family_t,
348 const struct kev_msg *);
349 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
350 unsigned long, void *);
351 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
352 struct sockaddr_dl *, size_t);
353 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
354 const struct sockaddr_dl *, const struct sockaddr *,
355 const struct sockaddr_dl *, const struct sockaddr *);
356
357 static errno_t ifp_if_output(struct ifnet *, struct mbuf *);
358 static void ifp_if_start(struct ifnet *);
359 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
360 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
361 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
362 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
363 protocol_family_t *);
364 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
365 const struct ifnet_demux_desc *, u_int32_t);
366 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
367 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
368 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
369 const struct sockaddr *, const char *, const char *);
370 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
371 const struct sockaddr *, const char *, const char *,
372 u_int32_t *, u_int32_t *);
373 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
374 static void ifp_if_free(struct ifnet *);
375 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
376 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
377 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
378
379 static void dlil_main_input_thread_func(void *, wait_result_t);
380 static void dlil_input_thread_func(void *, wait_result_t);
381 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
382 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
383 static void dlil_terminate_input_thread(struct dlil_threading_info *);
384 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
385 struct dlil_threading_info *, boolean_t);
386 static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
387 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
388 u_int32_t, ifnet_model_t, boolean_t);
389 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
390 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
391
392 #if DEBUG
393 static void dlil_verify_sum16(void);
394 #endif /* DEBUG */
395 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
396 protocol_family_t);
397 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
398 protocol_family_t);
399
400 static void ifnet_detacher_thread_func(void *, wait_result_t);
401 static int ifnet_detacher_thread_cont(int);
402 static void ifnet_detach_final(struct ifnet *);
403 static void ifnet_detaching_enqueue(struct ifnet *);
404 static struct ifnet *ifnet_detaching_dequeue(void);
405
406 static void ifnet_start_thread_fn(void *, wait_result_t);
407 static void ifnet_poll_thread_fn(void *, wait_result_t);
408 static void ifnet_poll(struct ifnet *);
409
410 static void ifp_src_route_copyout(struct ifnet *, struct route *);
411 static void ifp_src_route_copyin(struct ifnet *, struct route *);
412 #if INET6
413 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
414 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
415 #endif /* INET6 */
416
417 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
418 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
419 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
420 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
421 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
422 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
423 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
424 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
425 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
426 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
427 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
428 static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS;
429
430 struct chain_len_stats tx_chain_len_stats;
431 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
432
433 /* The following are protected by dlil_ifnet_lock */
434 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
435 static u_int32_t ifnet_detaching_cnt;
436 static void *ifnet_delayed_run; /* wait channel for detaching thread */
437
438 decl_lck_mtx_data(static, ifnet_fc_lock);
439
440 static uint32_t ifnet_flowhash_seed;
441
442 struct ifnet_flowhash_key {
443 char ifk_name[IFNAMSIZ];
444 uint32_t ifk_unit;
445 uint32_t ifk_flags;
446 uint32_t ifk_eflags;
447 uint32_t ifk_capabilities;
448 uint32_t ifk_capenable;
449 uint32_t ifk_output_sched_model;
450 uint32_t ifk_rand1;
451 uint32_t ifk_rand2;
452 };
453
454 /* Flow control entry per interface */
455 struct ifnet_fc_entry {
456 RB_ENTRY(ifnet_fc_entry) ifce_entry;
457 u_int32_t ifce_flowhash;
458 struct ifnet *ifce_ifp;
459 };
460
461 static uint32_t ifnet_calc_flowhash(struct ifnet *);
462 static int ifce_cmp(const struct ifnet_fc_entry *,
463 const struct ifnet_fc_entry *);
464 static int ifnet_fc_add(struct ifnet *);
465 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
466 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
467
468 /* protected by ifnet_fc_lock */
469 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
470 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
471 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
472
473 static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
474 static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
475
476 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
477 #define IFNET_FC_ZONE_MAX 32
478
479 extern void bpfdetach(struct ifnet*);
480 extern void proto_input_run(void);
481
482 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
483 u_int32_t flags);
484 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
485 u_int32_t flags);
486
487 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
488
489 #if CONFIG_MACF
490 int dlil_lladdr_ckreq = 0;
491 #endif
492
493 #if DEBUG
494 int dlil_verbose = 1;
495 #else
496 int dlil_verbose = 0;
497 #endif /* DEBUG */
498 #if IFNET_INPUT_SANITY_CHK
499 /* sanity checking of input packet lists received */
500 static u_int32_t dlil_input_sanity_check = 0;
501 #endif /* IFNET_INPUT_SANITY_CHK */
502 /* rate limit debug messages */
503 struct timespec dlil_dbgrate = { 1, 0 };
504
505 SYSCTL_DECL(_net_link_generic_system);
506
507 #if CONFIG_MACF
508 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_lladdr_ckreq,
509 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_lladdr_ckreq, 0,
510 "Require MACF system info check to expose link-layer address");
511 #endif
512
513 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
514 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
515
516 #define IF_SNDQ_MINLEN 32
517 u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
518 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
519 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
520 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
521
522 #define IF_RCVQ_MINLEN 32
523 #define IF_RCVQ_MAXLEN 256
524 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
525 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
526 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
527 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
528
529 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
530 static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
531 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
532 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
533 "ilog2 of EWMA decay rate of avg inbound packets");
534
535 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
536 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
537 static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
538 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
539 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
540 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
541 "Q", "input poll mode freeze time");
542
543 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
544 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
545 static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
546 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
547 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
548 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
549 "Q", "input poll sampling time");
550
551 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
552 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
553 static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
554 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
555 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
556 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
557 "Q", "input poll interval (time)");
558
559 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
560 static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
561 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
562 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
563 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
564
565 #define IF_RXPOLL_WLOWAT 10
566 static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
567 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
568 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
569 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
570 "I", "input poll wakeup low watermark");
571
572 #define IF_RXPOLL_WHIWAT 100
573 static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
574 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
575 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
576 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
577 "I", "input poll wakeup high watermark");
578
579 static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
580 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
581 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
582 "max packets per poll call");
583
584 static u_int32_t if_rxpoll = 1;
585 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
586 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
587 sysctl_rxpoll, "I", "enable opportunistic input polling");
588
589 u_int32_t if_bw_smoothing_val = 3;
590 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, if_bw_smoothing_val,
591 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_smoothing_val, 0, "");
592
593 u_int32_t if_bw_measure_size = 10;
594 SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_bw_measure_size,
595 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_measure_size, 0, "");
596
597 static u_int32_t cur_dlil_input_threads = 0;
598 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
599 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads , 0,
600 "Current number of DLIL input threads");
601
602 #if IFNET_INPUT_SANITY_CHK
603 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
604 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check , 0,
605 "Turn on sanity checking in DLIL input");
606 #endif /* IFNET_INPUT_SANITY_CHK */
607
608 static u_int32_t if_flowadv = 1;
609 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
610 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
611 "enable flow-advisory mechanism");
612
613 static u_int32_t if_delaybased_queue = 1;
614 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
615 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
616 "enable delay based dynamic queue sizing");
617
618 static uint64_t hwcksum_in_invalidated = 0;
619 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
620 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
621 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
622
623 uint32_t hwcksum_dbg = 0;
624 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
625 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
626 "enable hardware cksum debugging");
627
628 u_int32_t ifnet_start_delayed = 0;
629 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
630 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
631 "number of times start was delayed");
632
633 u_int32_t ifnet_delay_start_disabled = 0;
634 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
635 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
636 "number of times start was delayed");
637
638 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
639 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
640 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
641 #define HWCKSUM_DBG_MASK \
642 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
643 HWCKSUM_DBG_FINALIZE_FORCED)
644
645 static uint32_t hwcksum_dbg_mode = 0;
646 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
647 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
648 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
649
650 static uint64_t hwcksum_dbg_partial_forced = 0;
651 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
652 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
653 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
654
655 static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
656 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
657 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
658 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
659
660 static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
661 SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
662 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
663 &hwcksum_dbg_partial_rxoff_forced, 0,
664 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
665 "forced partial cksum rx offset");
666
667 static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
668 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
669 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
670 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
671 "adjusted partial cksum rx offset");
672
673 static uint64_t hwcksum_dbg_verified = 0;
674 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
675 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
676 &hwcksum_dbg_verified, "packets verified for having good checksum");
677
678 static uint64_t hwcksum_dbg_bad_cksum = 0;
679 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
680 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
681 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
682
683 static uint64_t hwcksum_dbg_bad_rxoff = 0;
684 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
685 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
686 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
687
688 static uint64_t hwcksum_dbg_adjusted = 0;
689 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
690 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
691 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
692
693 static uint64_t hwcksum_dbg_finalized_hdr = 0;
694 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
695 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
696 &hwcksum_dbg_finalized_hdr, "finalized headers");
697
698 static uint64_t hwcksum_dbg_finalized_data = 0;
699 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
700 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
701 &hwcksum_dbg_finalized_data, "finalized payloads");
702
703 uint32_t hwcksum_tx = 1;
704 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
705 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
706 "enable transmit hardware checksum offload");
707
708 uint32_t hwcksum_rx = 1;
709 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
710 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
711 "enable receive hardware checksum offload");
712
713 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
714 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
715 sysctl_tx_chain_len_stats, "S", "");
716
717 uint32_t tx_chain_len_count = 0;
718 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
719 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0,
720 "");
721
722 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_ports_used,
723 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_ports_used, "");
724
725 unsigned int net_rxpoll = 1;
726 unsigned int net_affinity = 1;
727 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
728
729 extern u_int32_t inject_buckets;
730
731 static lck_grp_attr_t *dlil_grp_attributes = NULL;
732 static lck_attr_t *dlil_lck_attributes = NULL;
733
734
735 #define DLIL_INPUT_CHECK(m, ifp) { \
736 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
737 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
738 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
739 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
740 /* NOTREACHED */ \
741 } \
742 }
743
744 #define DLIL_EWMA(old, new, decay) do { \
745 u_int32_t _avg; \
746 if ((_avg = (old)) > 0) \
747 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
748 else \
749 _avg = (new); \
750 (old) = _avg; \
751 } while (0)
752
753 #define MBPS (1ULL * 1000 * 1000)
754 #define GBPS (MBPS * 1000)
755
756 struct rxpoll_time_tbl {
757 u_int64_t speed; /* downlink speed */
758 u_int32_t plowat; /* packets low watermark */
759 u_int32_t phiwat; /* packets high watermark */
760 u_int32_t blowat; /* bytes low watermark */
761 u_int32_t bhiwat; /* bytes high watermark */
762 };
763
764 static struct rxpoll_time_tbl rxpoll_tbl[] = {
765 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
766 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
767 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
768 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
769 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
770 { 0, 0, 0, 0, 0 }
771 };
772
773 int
774 proto_hash_value(u_int32_t protocol_family)
775 {
776 /*
777 * dlil_proto_unplumb_all() depends on the mapping between
778 * the hash bucket index and the protocol family defined
779 * here; future changes must be applied there as well.
780 */
781 switch(protocol_family) {
782 case PF_INET:
783 return (0);
784 case PF_INET6:
785 return (1);
786 case PF_VLAN:
787 return (2);
788 case PF_UNSPEC:
789 default:
790 return (3);
791 }
792 }
793
794 /*
795 * Caller must already be holding ifnet lock.
796 */
797 static struct if_proto *
798 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
799 {
800 struct if_proto *proto = NULL;
801 u_int32_t i = proto_hash_value(protocol_family);
802
803 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
804
805 if (ifp->if_proto_hash != NULL)
806 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
807
808 while (proto != NULL && proto->protocol_family != protocol_family)
809 proto = SLIST_NEXT(proto, next_hash);
810
811 if (proto != NULL)
812 if_proto_ref(proto);
813
814 return (proto);
815 }
816
817 static void
818 if_proto_ref(struct if_proto *proto)
819 {
820 atomic_add_32(&proto->refcount, 1);
821 }
822
823 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
824
825 static void
826 if_proto_free(struct if_proto *proto)
827 {
828 u_int32_t oldval;
829 struct ifnet *ifp = proto->ifp;
830 u_int32_t proto_family = proto->protocol_family;
831 struct kev_dl_proto_data ev_pr_data;
832
833 oldval = atomic_add_32_ov(&proto->refcount, -1);
834 if (oldval > 1)
835 return;
836
837 /* No more reference on this, protocol must have been detached */
838 VERIFY(proto->detached);
839
840 if (proto->proto_kpi == kProtoKPI_v1) {
841 if (proto->kpi.v1.detached)
842 proto->kpi.v1.detached(ifp, proto->protocol_family);
843 }
844 if (proto->proto_kpi == kProtoKPI_v2) {
845 if (proto->kpi.v2.detached)
846 proto->kpi.v2.detached(ifp, proto->protocol_family);
847 }
848
849 /*
850 * Cleanup routes that may still be in the routing table for that
851 * interface/protocol pair.
852 */
853 if_rtproto_del(ifp, proto_family);
854
855 /*
856 * The reserved field carries the number of protocol still attached
857 * (subject to change)
858 */
859 ifnet_lock_shared(ifp);
860 ev_pr_data.proto_family = proto_family;
861 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
862 ifnet_lock_done(ifp);
863
864 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
865 (struct net_event_data *)&ev_pr_data,
866 sizeof(struct kev_dl_proto_data));
867
868 zfree(dlif_proto_zone, proto);
869 }
870
871 __private_extern__ void
872 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
873 {
874 unsigned int type = 0;
875 int ass = 1;
876
877 switch (what) {
878 case IFNET_LCK_ASSERT_EXCLUSIVE:
879 type = LCK_RW_ASSERT_EXCLUSIVE;
880 break;
881
882 case IFNET_LCK_ASSERT_SHARED:
883 type = LCK_RW_ASSERT_SHARED;
884 break;
885
886 case IFNET_LCK_ASSERT_OWNED:
887 type = LCK_RW_ASSERT_HELD;
888 break;
889
890 case IFNET_LCK_ASSERT_NOTOWNED:
891 /* nothing to do here for RW lock; bypass assert */
892 ass = 0;
893 break;
894
895 default:
896 panic("bad ifnet assert type: %d", what);
897 /* NOTREACHED */
898 }
899 if (ass)
900 lck_rw_assert(&ifp->if_lock, type);
901 }
902
903 __private_extern__ void
904 ifnet_lock_shared(struct ifnet *ifp)
905 {
906 lck_rw_lock_shared(&ifp->if_lock);
907 }
908
909 __private_extern__ void
910 ifnet_lock_exclusive(struct ifnet *ifp)
911 {
912 lck_rw_lock_exclusive(&ifp->if_lock);
913 }
914
915 __private_extern__ void
916 ifnet_lock_done(struct ifnet *ifp)
917 {
918 lck_rw_done(&ifp->if_lock);
919 }
920
921 #if INET
922 __private_extern__ void
923 if_inetdata_lock_shared(struct ifnet *ifp)
924 {
925 lck_rw_lock_shared(&ifp->if_inetdata_lock);
926 }
927
928 __private_extern__ void
929 if_inetdata_lock_exclusive(struct ifnet *ifp)
930 {
931 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
932 }
933
934 __private_extern__ void
935 if_inetdata_lock_done(struct ifnet *ifp)
936 {
937 lck_rw_done(&ifp->if_inetdata_lock);
938 }
939 #endif
940
941 #if INET6
942 __private_extern__ void
943 if_inet6data_lock_shared(struct ifnet *ifp)
944 {
945 lck_rw_lock_shared(&ifp->if_inet6data_lock);
946 }
947
948 __private_extern__ void
949 if_inet6data_lock_exclusive(struct ifnet *ifp)
950 {
951 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
952 }
953
954 __private_extern__ void
955 if_inet6data_lock_done(struct ifnet *ifp)
956 {
957 lck_rw_done(&ifp->if_inet6data_lock);
958 }
959 #endif
960
961 __private_extern__ void
962 ifnet_head_lock_shared(void)
963 {
964 lck_rw_lock_shared(&ifnet_head_lock);
965 }
966
967 __private_extern__ void
968 ifnet_head_lock_exclusive(void)
969 {
970 lck_rw_lock_exclusive(&ifnet_head_lock);
971 }
972
973 __private_extern__ void
974 ifnet_head_done(void)
975 {
976 lck_rw_done(&ifnet_head_lock);
977 }
978
979 /*
980 * Caller must already be holding ifnet lock.
981 */
982 static int
983 dlil_ifp_proto_count(struct ifnet * ifp)
984 {
985 int i, count = 0;
986
987 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
988
989 if (ifp->if_proto_hash == NULL)
990 goto done;
991
992 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
993 struct if_proto *proto;
994 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
995 count++;
996 }
997 }
998 done:
999 return (count);
1000 }
1001
1002 __private_extern__ void
1003 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1004 u_int32_t event_code, struct net_event_data *event_data,
1005 u_int32_t event_data_len)
1006 {
1007 struct net_event_data ev_data;
1008 struct kev_msg ev_msg;
1009
1010 bzero(&ev_msg, sizeof (ev_msg));
1011 bzero(&ev_data, sizeof (ev_data));
1012 /*
1013 * a net event always starts with a net_event_data structure
1014 * but the caller can generate a simple net event or
1015 * provide a longer event structure to post
1016 */
1017 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1018 ev_msg.kev_class = KEV_NETWORK_CLASS;
1019 ev_msg.kev_subclass = event_subclass;
1020 ev_msg.event_code = event_code;
1021
1022 if (event_data == NULL) {
1023 event_data = &ev_data;
1024 event_data_len = sizeof(struct net_event_data);
1025 }
1026
1027 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1028 event_data->if_family = ifp->if_family;
1029 event_data->if_unit = (u_int32_t) ifp->if_unit;
1030
1031 ev_msg.dv[0].data_length = event_data_len;
1032 ev_msg.dv[0].data_ptr = event_data;
1033 ev_msg.dv[1].data_length = 0;
1034
1035 dlil_event_internal(ifp, &ev_msg);
1036 }
1037
1038 __private_extern__ int
1039 dlil_alloc_local_stats(struct ifnet *ifp)
1040 {
1041 int ret = EINVAL;
1042 void *buf, *base, **pbuf;
1043
1044 if (ifp == NULL)
1045 goto end;
1046
1047 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1048 /* allocate tcpstat_local structure */
1049 buf = zalloc(dlif_tcpstat_zone);
1050 if (buf == NULL) {
1051 ret = ENOMEM;
1052 goto end;
1053 }
1054 bzero(buf, dlif_tcpstat_bufsize);
1055
1056 /* Get the 64-bit aligned base address for this object */
1057 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1058 sizeof (u_int64_t));
1059 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1060 ((intptr_t)buf + dlif_tcpstat_bufsize));
1061
1062 /*
1063 * Wind back a pointer size from the aligned base and
1064 * save the original address so we can free it later.
1065 */
1066 pbuf = (void **)((intptr_t)base - sizeof (void *));
1067 *pbuf = buf;
1068 ifp->if_tcp_stat = base;
1069
1070 /* allocate udpstat_local structure */
1071 buf = zalloc(dlif_udpstat_zone);
1072 if (buf == NULL) {
1073 ret = ENOMEM;
1074 goto end;
1075 }
1076 bzero(buf, dlif_udpstat_bufsize);
1077
1078 /* Get the 64-bit aligned base address for this object */
1079 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1080 sizeof (u_int64_t));
1081 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1082 ((intptr_t)buf + dlif_udpstat_bufsize));
1083
1084 /*
1085 * Wind back a pointer size from the aligned base and
1086 * save the original address so we can free it later.
1087 */
1088 pbuf = (void **)((intptr_t)base - sizeof (void *));
1089 *pbuf = buf;
1090 ifp->if_udp_stat = base;
1091
1092 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
1093 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
1094
1095 ret = 0;
1096 }
1097
1098 if (ifp->if_ipv4_stat == NULL) {
1099 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1100 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1101 if (ifp->if_ipv4_stat == NULL) {
1102 ret = ENOMEM;
1103 goto end;
1104 }
1105 }
1106
1107 if (ifp->if_ipv6_stat == NULL) {
1108 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1109 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1110 if (ifp->if_ipv6_stat == NULL) {
1111 ret = ENOMEM;
1112 goto end;
1113 }
1114 }
1115 end:
1116 if (ret != 0) {
1117 if (ifp->if_tcp_stat != NULL) {
1118 pbuf = (void **)
1119 ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
1120 zfree(dlif_tcpstat_zone, *pbuf);
1121 ifp->if_tcp_stat = NULL;
1122 }
1123 if (ifp->if_udp_stat != NULL) {
1124 pbuf = (void **)
1125 ((intptr_t)ifp->if_udp_stat - sizeof (void *));
1126 zfree(dlif_udpstat_zone, *pbuf);
1127 ifp->if_udp_stat = NULL;
1128 }
1129 if (ifp->if_ipv4_stat != NULL) {
1130 FREE(ifp->if_ipv4_stat, M_TEMP);
1131 ifp->if_ipv4_stat = NULL;
1132 }
1133 if (ifp->if_ipv6_stat != NULL) {
1134 FREE(ifp->if_ipv6_stat, M_TEMP);
1135 ifp->if_ipv6_stat = NULL;
1136 }
1137 }
1138
1139 return (ret);
1140 }
1141
1142 static int
1143 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
1144 {
1145 thread_continue_t func;
1146 u_int32_t limit;
1147 int error;
1148
1149 /* NULL ifp indicates the main input thread, called at dlil_init time */
1150 if (ifp == NULL) {
1151 func = dlil_main_input_thread_func;
1152 VERIFY(inp == dlil_main_input_thread);
1153 (void) strlcat(inp->input_name,
1154 "main_input", DLIL_THREADNAME_LEN);
1155 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1156 func = dlil_rxpoll_input_thread_func;
1157 VERIFY(inp != dlil_main_input_thread);
1158 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1159 "%s_input_poll", if_name(ifp));
1160 } else {
1161 func = dlil_input_thread_func;
1162 VERIFY(inp != dlil_main_input_thread);
1163 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1164 "%s_input", if_name(ifp));
1165 }
1166 VERIFY(inp->input_thr == THREAD_NULL);
1167
1168 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1169 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1170
1171 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1172 inp->ifp = ifp; /* NULL for main input thread */
1173
1174 net_timerclear(&inp->mode_holdtime);
1175 net_timerclear(&inp->mode_lasttime);
1176 net_timerclear(&inp->sample_holdtime);
1177 net_timerclear(&inp->sample_lasttime);
1178 net_timerclear(&inp->dbg_lasttime);
1179
1180 /*
1181 * For interfaces that support opportunistic polling, set the
1182 * low and high watermarks for outstanding inbound packets/bytes.
1183 * Also define freeze times for transitioning between modes
1184 * and updating the average.
1185 */
1186 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1187 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1188 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1189 } else {
1190 limit = (u_int32_t)-1;
1191 }
1192
1193 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit);
1194 if (inp == dlil_main_input_thread) {
1195 struct dlil_main_threading_info *inpm =
1196 (struct dlil_main_threading_info *)inp;
1197 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit);
1198 }
1199
1200 error = kernel_thread_start(func, inp, &inp->input_thr);
1201 if (error == KERN_SUCCESS) {
1202 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
1203 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
1204 /*
1205 * We create an affinity set so that the matching workloop
1206 * thread or the starter thread (for loopback) can be
1207 * scheduled on the same processor set as the input thread.
1208 */
1209 if (net_affinity) {
1210 struct thread *tp = inp->input_thr;
1211 u_int32_t tag;
1212 /*
1213 * Randomize to reduce the probability
1214 * of affinity tag namespace collision.
1215 */
1216 read_random(&tag, sizeof (tag));
1217 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1218 thread_reference(tp);
1219 inp->tag = tag;
1220 inp->net_affinity = TRUE;
1221 }
1222 }
1223 } else if (inp == dlil_main_input_thread) {
1224 panic_plain("%s: couldn't create main input thread", __func__);
1225 /* NOTREACHED */
1226 } else {
1227 panic_plain("%s: couldn't create %s input thread", __func__,
1228 if_name(ifp));
1229 /* NOTREACHED */
1230 }
1231 OSAddAtomic(1, &cur_dlil_input_threads);
1232
1233 return (error);
1234 }
1235
1236 static void
1237 dlil_terminate_input_thread(struct dlil_threading_info *inp)
1238 {
1239 struct ifnet *ifp;
1240
1241 VERIFY(current_thread() == inp->input_thr);
1242 VERIFY(inp != dlil_main_input_thread);
1243
1244 OSAddAtomic(-1, &cur_dlil_input_threads);
1245
1246 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1247 lck_grp_free(inp->lck_grp);
1248
1249 inp->input_waiting = 0;
1250 inp->wtot = 0;
1251 bzero(inp->input_name, sizeof (inp->input_name));
1252 ifp = inp->ifp;
1253 inp->ifp = NULL;
1254 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1255 qlimit(&inp->rcvq_pkts) = 0;
1256 bzero(&inp->stats, sizeof (inp->stats));
1257
1258 VERIFY(!inp->net_affinity);
1259 inp->input_thr = THREAD_NULL;
1260 VERIFY(inp->wloop_thr == THREAD_NULL);
1261 VERIFY(inp->poll_thr == THREAD_NULL);
1262 VERIFY(inp->tag == 0);
1263
1264 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1265 bzero(&inp->tstats, sizeof (inp->tstats));
1266 bzero(&inp->pstats, sizeof (inp->pstats));
1267 bzero(&inp->sstats, sizeof (inp->sstats));
1268
1269 net_timerclear(&inp->mode_holdtime);
1270 net_timerclear(&inp->mode_lasttime);
1271 net_timerclear(&inp->sample_holdtime);
1272 net_timerclear(&inp->sample_lasttime);
1273 net_timerclear(&inp->dbg_lasttime);
1274
1275 #if IFNET_INPUT_SANITY_CHK
1276 inp->input_mbuf_cnt = 0;
1277 #endif /* IFNET_INPUT_SANITY_CHK */
1278
1279 if (dlil_verbose) {
1280 printf("%s: input thread terminated\n",
1281 if_name(ifp));
1282 }
1283
1284 /* for the extra refcnt from kernel_thread_start() */
1285 thread_deallocate(current_thread());
1286
1287 /* this is the end */
1288 thread_terminate(current_thread());
1289 /* NOTREACHED */
1290 }
1291
1292 static kern_return_t
1293 dlil_affinity_set(struct thread *tp, u_int32_t tag)
1294 {
1295 thread_affinity_policy_data_t policy;
1296
1297 bzero(&policy, sizeof (policy));
1298 policy.affinity_tag = tag;
1299 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1300 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
1301 }
1302
1303 void
1304 dlil_init(void)
1305 {
1306 thread_t thread = THREAD_NULL;
1307
1308 /*
1309 * The following fields must be 64-bit aligned for atomic operations.
1310 */
1311 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1312 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
1313 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1314 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1315 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1316 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1317 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1318 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1319 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1320 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1321 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1322 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1323 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1324 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1325 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1326
1327 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1328 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
1329 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1330 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1331 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1332 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1333 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1334 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1335 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1336 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1337 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1338 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1339 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1340 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1341 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1342
1343 /*
1344 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1345 */
1346 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1347 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1348 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1349 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1350 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1351 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1352 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1353 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1354 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1355 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1356 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1357 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1358 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1359
1360 /*
1361 * ... as well as the mbuf checksum flags counterparts.
1362 */
1363 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1364 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1365 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1366 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1367 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1368 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1369 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1370 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1371 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1372 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1373
1374 /*
1375 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1376 */
1377 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1378 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1379
1380 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1381 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1382 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1383 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1384
1385 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1386 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1387 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1388
1389 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1390 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1391 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1392 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1393 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1394 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1395 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1396 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1397 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1398 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1399 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1400 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1401 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1402 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1403 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1404 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1405
1406 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1407 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1408 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1409 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1410 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1411 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1412
1413 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1414 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1415
1416 PE_parse_boot_argn("net_affinity", &net_affinity,
1417 sizeof (net_affinity));
1418
1419 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
1420
1421 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
1422
1423 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
1424
1425 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
1426 sizeof (struct dlil_ifnet_dbg);
1427 /* Enforce 64-bit alignment for dlil_ifnet structure */
1428 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
1429 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
1430 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1431 0, DLIF_ZONE_NAME);
1432 if (dlif_zone == NULL) {
1433 panic_plain("%s: failed allocating %s", __func__,
1434 DLIF_ZONE_NAME);
1435 /* NOTREACHED */
1436 }
1437 zone_change(dlif_zone, Z_EXPAND, TRUE);
1438 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1439
1440 dlif_filt_size = sizeof (struct ifnet_filter);
1441 dlif_filt_zone = zinit(dlif_filt_size,
1442 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1443 if (dlif_filt_zone == NULL) {
1444 panic_plain("%s: failed allocating %s", __func__,
1445 DLIF_FILT_ZONE_NAME);
1446 /* NOTREACHED */
1447 }
1448 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1449 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1450
1451 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
1452 dlif_phash_zone = zinit(dlif_phash_size,
1453 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1454 if (dlif_phash_zone == NULL) {
1455 panic_plain("%s: failed allocating %s", __func__,
1456 DLIF_PHASH_ZONE_NAME);
1457 /* NOTREACHED */
1458 }
1459 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1460 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1461
1462 dlif_proto_size = sizeof (struct if_proto);
1463 dlif_proto_zone = zinit(dlif_proto_size,
1464 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1465 if (dlif_proto_zone == NULL) {
1466 panic_plain("%s: failed allocating %s", __func__,
1467 DLIF_PROTO_ZONE_NAME);
1468 /* NOTREACHED */
1469 }
1470 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1471 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1472
1473 dlif_tcpstat_size = sizeof (struct tcpstat_local);
1474 /* Enforce 64-bit alignment for tcpstat_local structure */
1475 dlif_tcpstat_bufsize =
1476 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
1477 dlif_tcpstat_bufsize =
1478 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
1479 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1480 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1481 DLIF_TCPSTAT_ZONE_NAME);
1482 if (dlif_tcpstat_zone == NULL) {
1483 panic_plain("%s: failed allocating %s", __func__,
1484 DLIF_TCPSTAT_ZONE_NAME);
1485 /* NOTREACHED */
1486 }
1487 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1488 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1489
1490 dlif_udpstat_size = sizeof (struct udpstat_local);
1491 /* Enforce 64-bit alignment for udpstat_local structure */
1492 dlif_udpstat_bufsize =
1493 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
1494 dlif_udpstat_bufsize =
1495 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
1496 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1497 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1498 DLIF_UDPSTAT_ZONE_NAME);
1499 if (dlif_udpstat_zone == NULL) {
1500 panic_plain("%s: failed allocating %s", __func__,
1501 DLIF_UDPSTAT_ZONE_NAME);
1502 /* NOTREACHED */
1503 }
1504 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1505 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1506
1507 ifnet_llreach_init();
1508
1509 TAILQ_INIT(&dlil_ifnet_head);
1510 TAILQ_INIT(&ifnet_head);
1511 TAILQ_INIT(&ifnet_detaching_head);
1512
1513 /* Setup the lock groups we will use */
1514 dlil_grp_attributes = lck_grp_attr_alloc_init();
1515
1516 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1517 dlil_grp_attributes);
1518 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1519 dlil_grp_attributes);
1520 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1521 dlil_grp_attributes);
1522 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1523 dlil_grp_attributes);
1524 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1525 dlil_grp_attributes);
1526
1527 /* Setup the lock attributes we will use */
1528 dlil_lck_attributes = lck_attr_alloc_init();
1529
1530 ifnet_lock_attr = lck_attr_alloc_init();
1531
1532 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1533 dlil_lck_attributes);
1534 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1535
1536 /* Setup interface flow control related items */
1537 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1538
1539 ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry);
1540 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1541 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1542 if (ifnet_fc_zone == NULL) {
1543 panic_plain("%s: failed allocating %s", __func__,
1544 IFNET_FC_ZONE_NAME);
1545 /* NOTREACHED */
1546 }
1547 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1548 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
1549
1550 /* Initialize interface address subsystem */
1551 ifa_init();
1552
1553 #if PF
1554 /* Initialize the packet filter */
1555 pfinit();
1556 #endif /* PF */
1557
1558 /* Initialize queue algorithms */
1559 classq_init();
1560
1561 /* Initialize packet schedulers */
1562 pktsched_init();
1563
1564 /* Initialize flow advisory subsystem */
1565 flowadv_init();
1566
1567 /* Initialize the pktap virtual interface */
1568 pktap_init();
1569
1570 #if DEBUG
1571 /* Run self-tests */
1572 dlil_verify_sum16();
1573 #endif /* DEBUG */
1574
1575 /*
1576 * Create and start up the main DLIL input thread and the interface
1577 * detacher threads once everything is initialized.
1578 */
1579 dlil_create_input_thread(NULL, dlil_main_input_thread);
1580
1581 if (kernel_thread_start(ifnet_detacher_thread_func,
1582 NULL, &thread) != KERN_SUCCESS) {
1583 panic_plain("%s: couldn't create detacher thread", __func__);
1584 /* NOTREACHED */
1585 }
1586 thread_deallocate(thread);
1587 }
1588
1589 static void
1590 if_flt_monitor_busy(struct ifnet *ifp)
1591 {
1592 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1593
1594 ++ifp->if_flt_busy;
1595 VERIFY(ifp->if_flt_busy != 0);
1596 }
1597
1598 static void
1599 if_flt_monitor_unbusy(struct ifnet *ifp)
1600 {
1601 if_flt_monitor_leave(ifp);
1602 }
1603
1604 static void
1605 if_flt_monitor_enter(struct ifnet *ifp)
1606 {
1607 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1608
1609 while (ifp->if_flt_busy) {
1610 ++ifp->if_flt_waiters;
1611 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1612 (PZERO - 1), "if_flt_monitor", NULL);
1613 }
1614 if_flt_monitor_busy(ifp);
1615 }
1616
1617 static void
1618 if_flt_monitor_leave(struct ifnet *ifp)
1619 {
1620 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1621
1622 VERIFY(ifp->if_flt_busy != 0);
1623 --ifp->if_flt_busy;
1624
1625 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1626 ifp->if_flt_waiters = 0;
1627 wakeup(&ifp->if_flt_head);
1628 }
1629 }
1630
1631 __private_extern__ int
1632 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1633 interface_filter_t *filter_ref, u_int32_t flags)
1634 {
1635 int retval = 0;
1636 struct ifnet_filter *filter = NULL;
1637
1638 ifnet_head_lock_shared();
1639 /* Check that the interface is in the global list */
1640 if (!ifnet_lookup(ifp)) {
1641 retval = ENXIO;
1642 goto done;
1643 }
1644
1645 filter = zalloc(dlif_filt_zone);
1646 if (filter == NULL) {
1647 retval = ENOMEM;
1648 goto done;
1649 }
1650 bzero(filter, dlif_filt_size);
1651
1652 /* refcnt held above during lookup */
1653 filter->filt_flags = flags;
1654 filter->filt_ifp = ifp;
1655 filter->filt_cookie = if_filter->iff_cookie;
1656 filter->filt_name = if_filter->iff_name;
1657 filter->filt_protocol = if_filter->iff_protocol;
1658 filter->filt_input = if_filter->iff_input;
1659 filter->filt_output = if_filter->iff_output;
1660 filter->filt_event = if_filter->iff_event;
1661 filter->filt_ioctl = if_filter->iff_ioctl;
1662 filter->filt_detached = if_filter->iff_detached;
1663
1664 lck_mtx_lock(&ifp->if_flt_lock);
1665 if_flt_monitor_enter(ifp);
1666
1667 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1668 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1669
1670 if_flt_monitor_leave(ifp);
1671 lck_mtx_unlock(&ifp->if_flt_lock);
1672
1673 *filter_ref = filter;
1674
1675 /*
1676 * Bump filter count and route_generation ID to let TCP
1677 * know it shouldn't do TSO on this connection
1678 */
1679 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1680 OSAddAtomic(1, &dlil_filter_disable_tso_count);
1681 routegenid_update();
1682 }
1683 if (dlil_verbose) {
1684 printf("%s: %s filter attached\n", if_name(ifp),
1685 if_filter->iff_name);
1686 }
1687 done:
1688 ifnet_head_done();
1689 if (retval != 0 && ifp != NULL) {
1690 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1691 if_name(ifp), if_filter->iff_name, retval);
1692 }
1693 if (retval != 0 && filter != NULL)
1694 zfree(dlif_filt_zone, filter);
1695
1696 return (retval);
1697 }
1698
1699 static int
1700 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1701 {
1702 int retval = 0;
1703
1704 if (detached == 0) {
1705 ifnet_t ifp = NULL;
1706
1707 ifnet_head_lock_shared();
1708 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1709 interface_filter_t entry = NULL;
1710
1711 lck_mtx_lock(&ifp->if_flt_lock);
1712 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1713 if (entry != filter || entry->filt_skip)
1714 continue;
1715 /*
1716 * We've found a match; since it's possible
1717 * that the thread gets blocked in the monitor,
1718 * we do the lock dance. Interface should
1719 * not be detached since we still have a use
1720 * count held during filter attach.
1721 */
1722 entry->filt_skip = 1; /* skip input/output */
1723 lck_mtx_unlock(&ifp->if_flt_lock);
1724 ifnet_head_done();
1725
1726 lck_mtx_lock(&ifp->if_flt_lock);
1727 if_flt_monitor_enter(ifp);
1728 lck_mtx_assert(&ifp->if_flt_lock,
1729 LCK_MTX_ASSERT_OWNED);
1730
1731 /* Remove the filter from the list */
1732 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1733 filt_next);
1734
1735 if_flt_monitor_leave(ifp);
1736 lck_mtx_unlock(&ifp->if_flt_lock);
1737 if (dlil_verbose) {
1738 printf("%s: %s filter detached\n",
1739 if_name(ifp), filter->filt_name);
1740 }
1741 goto destroy;
1742 }
1743 lck_mtx_unlock(&ifp->if_flt_lock);
1744 }
1745 ifnet_head_done();
1746
1747 /* filter parameter is not a valid filter ref */
1748 retval = EINVAL;
1749 goto done;
1750 }
1751
1752 if (dlil_verbose)
1753 printf("%s filter detached\n", filter->filt_name);
1754
1755 destroy:
1756
1757 /* Call the detached function if there is one */
1758 if (filter->filt_detached)
1759 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1760
1761 /* Free the filter */
1762 zfree(dlif_filt_zone, filter);
1763
1764 /*
1765 * Decrease filter count and route_generation ID to let TCP
1766 * know it should reevalute doing TSO or not
1767 */
1768 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1769 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
1770 routegenid_update();
1771 }
1772 done:
1773 if (retval != 0) {
1774 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1775 filter->filt_name, retval);
1776 }
1777 return (retval);
1778 }
1779
1780 __private_extern__ void
1781 dlil_detach_filter(interface_filter_t filter)
1782 {
1783 if (filter == NULL)
1784 return;
1785 dlil_detach_filter_internal(filter, 0);
1786 }
1787
1788 /*
1789 * Main input thread:
1790 *
1791 * a) handles all inbound packets for lo0
1792 * b) handles all inbound packets for interfaces with no dedicated
1793 * input thread (e.g. anything but Ethernet/PDP or those that support
1794 * opportunistic polling.)
1795 * c) protocol registrations
1796 * d) packet injections
1797 */
1798 static void
1799 dlil_main_input_thread_func(void *v, wait_result_t w)
1800 {
1801 #pragma unused(w)
1802 struct dlil_main_threading_info *inpm = v;
1803 struct dlil_threading_info *inp = v;
1804
1805 VERIFY(inp == dlil_main_input_thread);
1806 VERIFY(inp->ifp == NULL);
1807 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1808
1809 while (1) {
1810 struct mbuf *m = NULL, *m_loop = NULL;
1811 u_int32_t m_cnt, m_cnt_loop;
1812 boolean_t proto_req;
1813
1814 lck_mtx_lock_spin(&inp->input_lck);
1815
1816 /* Wait until there is work to be done */
1817 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1818 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1819 (void) msleep(&inp->input_waiting, &inp->input_lck,
1820 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1821 }
1822
1823 inp->input_waiting |= DLIL_INPUT_RUNNING;
1824 inp->input_waiting &= ~DLIL_INPUT_WAITING;
1825
1826 /* Main input thread cannot be terminated */
1827 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
1828
1829 proto_req = (inp->input_waiting &
1830 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
1831
1832 /* Packets for non-dedicated interfaces other than lo0 */
1833 m_cnt = qlen(&inp->rcvq_pkts);
1834 m = _getq_all(&inp->rcvq_pkts);
1835
1836 /* Packets exclusive to lo0 */
1837 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
1838 m_loop = _getq_all(&inpm->lo_rcvq_pkts);
1839
1840 inp->wtot = 0;
1841
1842 lck_mtx_unlock(&inp->input_lck);
1843
1844 /*
1845 * NOTE warning %%% attention !!!!
1846 * We should think about putting some thread starvation
1847 * safeguards if we deal with long chains of packets.
1848 */
1849 if (m_loop != NULL)
1850 dlil_input_packet_list_extended(lo_ifp, m_loop,
1851 m_cnt_loop, inp->mode);
1852
1853 if (m != NULL)
1854 dlil_input_packet_list_extended(NULL, m,
1855 m_cnt, inp->mode);
1856
1857 if (proto_req)
1858 proto_input_run();
1859 }
1860
1861 /* NOTREACHED */
1862 VERIFY(0); /* we should never get here */
1863 }
1864
1865 /*
1866 * Input thread for interfaces with legacy input model.
1867 */
1868 static void
1869 dlil_input_thread_func(void *v, wait_result_t w)
1870 {
1871 #pragma unused(w)
1872 struct dlil_threading_info *inp = v;
1873 struct ifnet *ifp = inp->ifp;
1874
1875 VERIFY(inp != dlil_main_input_thread);
1876 VERIFY(ifp != NULL);
1877 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
1878 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1879
1880 while (1) {
1881 struct mbuf *m = NULL;
1882 u_int32_t m_cnt;
1883
1884 lck_mtx_lock_spin(&inp->input_lck);
1885
1886 /* Wait until there is work to be done */
1887 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1888 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1889 (void) msleep(&inp->input_waiting, &inp->input_lck,
1890 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1891 }
1892
1893 inp->input_waiting |= DLIL_INPUT_RUNNING;
1894 inp->input_waiting &= ~DLIL_INPUT_WAITING;
1895
1896 /*
1897 * Protocol registration and injection must always use
1898 * the main input thread; in theory the latter can utilize
1899 * the corresponding input thread where the packet arrived
1900 * on, but that requires our knowing the interface in advance
1901 * (and the benefits might not worth the trouble.)
1902 */
1903 VERIFY(!(inp->input_waiting &
1904 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
1905
1906 /* Packets for this interface */
1907 m_cnt = qlen(&inp->rcvq_pkts);
1908 m = _getq_all(&inp->rcvq_pkts);
1909
1910 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
1911 lck_mtx_unlock(&inp->input_lck);
1912
1913 /* Free up pending packets */
1914 if (m != NULL)
1915 mbuf_freem_list(m);
1916
1917 dlil_terminate_input_thread(inp);
1918 /* NOTREACHED */
1919 return;
1920 }
1921
1922 inp->wtot = 0;
1923
1924 dlil_input_stats_sync(ifp, inp);
1925
1926 lck_mtx_unlock(&inp->input_lck);
1927
1928 /*
1929 * NOTE warning %%% attention !!!!
1930 * We should think about putting some thread starvation
1931 * safeguards if we deal with long chains of packets.
1932 */
1933 if (m != NULL)
1934 dlil_input_packet_list_extended(NULL, m,
1935 m_cnt, inp->mode);
1936 }
1937
1938 /* NOTREACHED */
1939 VERIFY(0); /* we should never get here */
1940 }
1941
1942 /*
1943 * Input thread for interfaces with opportunistic polling input model.
1944 */
1945 static void
1946 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
1947 {
1948 #pragma unused(w)
1949 struct dlil_threading_info *inp = v;
1950 struct ifnet *ifp = inp->ifp;
1951 struct timespec ts;
1952
1953 VERIFY(inp != dlil_main_input_thread);
1954 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
1955
1956 while (1) {
1957 struct mbuf *m = NULL;
1958 u_int32_t m_cnt, m_size, poll_req = 0;
1959 ifnet_model_t mode;
1960 struct timespec now, delta;
1961 u_int64_t ival;
1962
1963 lck_mtx_lock_spin(&inp->input_lck);
1964
1965 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
1966 ival = IF_RXPOLL_INTERVALTIME_MIN;
1967
1968 /* Link parameters changed? */
1969 if (ifp->if_poll_update != 0) {
1970 ifp->if_poll_update = 0;
1971 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
1972 }
1973
1974 /* Current operating mode */
1975 mode = inp->mode;
1976
1977 /* Wait until there is work to be done */
1978 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1979 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1980 (void) msleep(&inp->input_waiting, &inp->input_lck,
1981 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1982 }
1983
1984 inp->input_waiting |= DLIL_INPUT_RUNNING;
1985 inp->input_waiting &= ~DLIL_INPUT_WAITING;
1986
1987 /*
1988 * Protocol registration and injection must always use
1989 * the main input thread; in theory the latter can utilize
1990 * the corresponding input thread where the packet arrived
1991 * on, but that requires our knowing the interface in advance
1992 * (and the benefits might not worth the trouble.)
1993 */
1994 VERIFY(!(inp->input_waiting &
1995 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
1996
1997 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
1998 /* Free up pending packets */
1999 _flushq(&inp->rcvq_pkts);
2000 lck_mtx_unlock(&inp->input_lck);
2001
2002 dlil_terminate_input_thread(inp);
2003 /* NOTREACHED */
2004 return;
2005 }
2006
2007 /* Total count of all packets */
2008 m_cnt = qlen(&inp->rcvq_pkts);
2009
2010 /* Total bytes of all packets */
2011 m_size = qsize(&inp->rcvq_pkts);
2012
2013 /* Packets for this interface */
2014 m = _getq_all(&inp->rcvq_pkts);
2015 VERIFY(m != NULL || m_cnt == 0);
2016
2017 nanouptime(&now);
2018 if (!net_timerisset(&inp->sample_lasttime))
2019 *(&inp->sample_lasttime) = *(&now);
2020
2021 net_timersub(&now, &inp->sample_lasttime, &delta);
2022 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
2023 u_int32_t ptot, btot;
2024
2025 /* Accumulate statistics for current sampling */
2026 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
2027
2028 if (net_timercmp(&delta, &inp->sample_holdtime, <))
2029 goto skip;
2030
2031 *(&inp->sample_lasttime) = *(&now);
2032
2033 /* Calculate min/max of inbound bytes */
2034 btot = (u_int32_t)inp->sstats.bytes;
2035 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
2036 inp->rxpoll_bmin = btot;
2037 if (btot > inp->rxpoll_bmax)
2038 inp->rxpoll_bmax = btot;
2039
2040 /* Calculate EWMA of inbound bytes */
2041 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
2042
2043 /* Calculate min/max of inbound packets */
2044 ptot = (u_int32_t)inp->sstats.packets;
2045 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
2046 inp->rxpoll_pmin = ptot;
2047 if (ptot > inp->rxpoll_pmax)
2048 inp->rxpoll_pmax = ptot;
2049
2050 /* Calculate EWMA of inbound packets */
2051 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
2052
2053 /* Reset sampling statistics */
2054 PKTCNTR_CLEAR(&inp->sstats);
2055
2056 /* Calculate EWMA of wakeup requests */
2057 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2058 inp->wtot = 0;
2059
2060 if (dlil_verbose) {
2061 if (!net_timerisset(&inp->dbg_lasttime))
2062 *(&inp->dbg_lasttime) = *(&now);
2063 net_timersub(&now, &inp->dbg_lasttime, &delta);
2064 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2065 *(&inp->dbg_lasttime) = *(&now);
2066 printf("%s: [%s] pkts avg %d max %d "
2067 "limits [%d/%d], wreq avg %d "
2068 "limits [%d/%d], bytes avg %d "
2069 "limits [%d/%d]\n", if_name(ifp),
2070 (inp->mode ==
2071 IFNET_MODEL_INPUT_POLL_ON) ?
2072 "ON" : "OFF", inp->rxpoll_pavg,
2073 inp->rxpoll_pmax,
2074 inp->rxpoll_plowat,
2075 inp->rxpoll_phiwat,
2076 inp->rxpoll_wavg,
2077 inp->rxpoll_wlowat,
2078 inp->rxpoll_whiwat,
2079 inp->rxpoll_bavg,
2080 inp->rxpoll_blowat,
2081 inp->rxpoll_bhiwat);
2082 }
2083 }
2084
2085 /* Perform mode transition, if necessary */
2086 if (!net_timerisset(&inp->mode_lasttime))
2087 *(&inp->mode_lasttime) = *(&now);
2088
2089 net_timersub(&now, &inp->mode_lasttime, &delta);
2090 if (net_timercmp(&delta, &inp->mode_holdtime, <))
2091 goto skip;
2092
2093 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2094 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
2095 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2096 mode = IFNET_MODEL_INPUT_POLL_OFF;
2097 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2098 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2099 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2100 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2101 mode = IFNET_MODEL_INPUT_POLL_ON;
2102 }
2103
2104 if (mode != inp->mode) {
2105 inp->mode = mode;
2106 *(&inp->mode_lasttime) = *(&now);
2107 poll_req++;
2108 }
2109 }
2110 skip:
2111 dlil_input_stats_sync(ifp, inp);
2112
2113 lck_mtx_unlock(&inp->input_lck);
2114
2115 /*
2116 * If there's a mode change and interface is still attached,
2117 * perform a downcall to the driver for the new mode. Also
2118 * hold an IO refcnt on the interface to prevent it from
2119 * being detached (will be release below.)
2120 */
2121 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2122 struct ifnet_model_params p = { mode, { 0 } };
2123 errno_t err;
2124
2125 if (dlil_verbose) {
2126 printf("%s: polling is now %s, "
2127 "pkts avg %d max %d limits [%d/%d], "
2128 "wreq avg %d limits [%d/%d], "
2129 "bytes avg %d limits [%d/%d]\n",
2130 if_name(ifp),
2131 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2132 "ON" : "OFF", inp->rxpoll_pavg,
2133 inp->rxpoll_pmax, inp->rxpoll_plowat,
2134 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2135 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2136 inp->rxpoll_bavg, inp->rxpoll_blowat,
2137 inp->rxpoll_bhiwat);
2138 }
2139
2140 if ((err = ((*ifp->if_input_ctl)(ifp,
2141 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
2142 printf("%s: error setting polling mode "
2143 "to %s (%d)\n", if_name(ifp),
2144 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2145 "ON" : "OFF", err);
2146 }
2147
2148 switch (mode) {
2149 case IFNET_MODEL_INPUT_POLL_OFF:
2150 ifnet_set_poll_cycle(ifp, NULL);
2151 inp->rxpoll_offreq++;
2152 if (err != 0)
2153 inp->rxpoll_offerr++;
2154 break;
2155
2156 case IFNET_MODEL_INPUT_POLL_ON:
2157 net_nsectimer(&ival, &ts);
2158 ifnet_set_poll_cycle(ifp, &ts);
2159 ifnet_poll(ifp);
2160 inp->rxpoll_onreq++;
2161 if (err != 0)
2162 inp->rxpoll_onerr++;
2163 break;
2164
2165 default:
2166 VERIFY(0);
2167 /* NOTREACHED */
2168 }
2169
2170 /* Release the IO refcnt */
2171 ifnet_decr_iorefcnt(ifp);
2172 }
2173
2174 /*
2175 * NOTE warning %%% attention !!!!
2176 * We should think about putting some thread starvation
2177 * safeguards if we deal with long chains of packets.
2178 */
2179 if (m != NULL)
2180 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2181 }
2182
2183 /* NOTREACHED */
2184 VERIFY(0); /* we should never get here */
2185 }
2186
2187 /*
2188 * Must be called on an attached ifnet (caller is expected to check.)
2189 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2190 */
2191 errno_t
2192 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2193 boolean_t locked)
2194 {
2195 struct dlil_threading_info *inp;
2196 u_int64_t sample_holdtime, inbw;
2197
2198 VERIFY(ifp != NULL);
2199 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2200 return (ENXIO);
2201
2202 if (p != NULL) {
2203 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2204 (p->packets_lowat != 0 && p->packets_hiwat == 0))
2205 return (EINVAL);
2206 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2207 p->packets_lowat >= p->packets_hiwat)
2208 return (EINVAL);
2209 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2210 (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
2211 return (EINVAL);
2212 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2213 p->bytes_lowat >= p->bytes_hiwat)
2214 return (EINVAL);
2215 if (p->interval_time != 0 &&
2216 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
2217 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2218 }
2219
2220 if (!locked)
2221 lck_mtx_lock(&inp->input_lck);
2222
2223 lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2224
2225 /*
2226 * Normally, we'd reset the parameters to the auto-tuned values
2227 * if the the input thread detects a change in link rate. If the
2228 * driver provides its own parameters right after a link rate
2229 * changes, but before the input thread gets to run, we want to
2230 * make sure to keep the driver's values. Clearing if_poll_update
2231 * will achieve that.
2232 */
2233 if (p != NULL && !locked && ifp->if_poll_update != 0)
2234 ifp->if_poll_update = 0;
2235
2236 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2237 sample_holdtime = 0; /* polling is disabled */
2238 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2239 inp->rxpoll_blowat = 0;
2240 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2241 inp->rxpoll_bhiwat = (u_int32_t)-1;
2242 inp->rxpoll_plim = 0;
2243 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2244 } else {
2245 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2246 u_int64_t ival;
2247 unsigned int n, i;
2248
2249 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2250 if (inbw < rxpoll_tbl[i].speed)
2251 break;
2252 n = i;
2253 }
2254 /* auto-tune if caller didn't specify a value */
2255 plowat = ((p == NULL || p->packets_lowat == 0) ?
2256 rxpoll_tbl[n].plowat : p->packets_lowat);
2257 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2258 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2259 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2260 rxpoll_tbl[n].blowat : p->bytes_lowat);
2261 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2262 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2263 plim = ((p == NULL || p->packets_limit == 0) ?
2264 if_rxpoll_max : p->packets_limit);
2265 ival = ((p == NULL || p->interval_time == 0) ?
2266 if_rxpoll_interval_time : p->interval_time);
2267
2268 VERIFY(plowat != 0 && phiwat != 0);
2269 VERIFY(blowat != 0 && bhiwat != 0);
2270 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2271
2272 sample_holdtime = if_rxpoll_sample_holdtime;
2273 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2274 inp->rxpoll_whiwat = if_rxpoll_whiwat;
2275 inp->rxpoll_plowat = plowat;
2276 inp->rxpoll_phiwat = phiwat;
2277 inp->rxpoll_blowat = blowat;
2278 inp->rxpoll_bhiwat = bhiwat;
2279 inp->rxpoll_plim = plim;
2280 inp->rxpoll_ival = ival;
2281 }
2282
2283 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2284 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2285
2286 if (dlil_verbose) {
2287 printf("%s: speed %llu bps, sample per %llu nsec, "
2288 "poll interval %llu nsec, pkts per poll %u, "
2289 "pkt limits [%u/%u], wreq limits [%u/%u], "
2290 "bytes limits [%u/%u]\n", if_name(ifp),
2291 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2292 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2293 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
2294 }
2295
2296 if (!locked)
2297 lck_mtx_unlock(&inp->input_lck);
2298
2299 return (0);
2300 }
2301
2302 /*
2303 * Must be called on an attached ifnet (caller is expected to check.)
2304 */
2305 errno_t
2306 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2307 {
2308 struct dlil_threading_info *inp;
2309
2310 VERIFY(ifp != NULL && p != NULL);
2311 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2312 return (ENXIO);
2313
2314 bzero(p, sizeof (*p));
2315
2316 lck_mtx_lock(&inp->input_lck);
2317 p->packets_limit = inp->rxpoll_plim;
2318 p->packets_lowat = inp->rxpoll_plowat;
2319 p->packets_hiwat = inp->rxpoll_phiwat;
2320 p->bytes_lowat = inp->rxpoll_blowat;
2321 p->bytes_hiwat = inp->rxpoll_bhiwat;
2322 p->interval_time = inp->rxpoll_ival;
2323 lck_mtx_unlock(&inp->input_lck);
2324
2325 return (0);
2326 }
2327
2328 errno_t
2329 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2330 const struct ifnet_stat_increment_param *s)
2331 {
2332 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
2333 }
2334
2335 errno_t
2336 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2337 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2338 {
2339 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
2340 }
2341
2342 static errno_t
2343 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2344 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2345 {
2346 struct thread *tp = current_thread();
2347 struct mbuf *last;
2348 struct dlil_threading_info *inp;
2349 u_int32_t m_cnt = 0, m_size = 0;
2350
2351 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2352 if (m_head != NULL)
2353 mbuf_freem_list(m_head);
2354 return (EINVAL);
2355 }
2356
2357 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2358 VERIFY(m_tail == NULL || ext);
2359 VERIFY(s != NULL || !ext);
2360
2361 /*
2362 * Drop the packet(s) if the parameters are invalid, or if the
2363 * interface is no longer attached; else hold an IO refcnt to
2364 * prevent it from being detached (will be released below.)
2365 */
2366 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
2367 if (m_head != NULL)
2368 mbuf_freem_list(m_head);
2369 return (EINVAL);
2370 }
2371
2372 if (m_tail == NULL) {
2373 last = m_head;
2374 while (m_head != NULL) {
2375 #if IFNET_INPUT_SANITY_CHK
2376 if (dlil_input_sanity_check != 0)
2377 DLIL_INPUT_CHECK(last, ifp);
2378 #endif /* IFNET_INPUT_SANITY_CHK */
2379 m_cnt++;
2380 m_size += m_length(last);
2381 if (mbuf_nextpkt(last) == NULL)
2382 break;
2383 last = mbuf_nextpkt(last);
2384 }
2385 m_tail = last;
2386 } else {
2387 #if IFNET_INPUT_SANITY_CHK
2388 if (dlil_input_sanity_check != 0) {
2389 last = m_head;
2390 while (1) {
2391 DLIL_INPUT_CHECK(last, ifp);
2392 m_cnt++;
2393 m_size += m_length(last);
2394 if (mbuf_nextpkt(last) == NULL)
2395 break;
2396 last = mbuf_nextpkt(last);
2397 }
2398 } else {
2399 m_cnt = s->packets_in;
2400 m_size = s->bytes_in;
2401 last = m_tail;
2402 }
2403 #else
2404 m_cnt = s->packets_in;
2405 m_size = s->bytes_in;
2406 last = m_tail;
2407 #endif /* IFNET_INPUT_SANITY_CHK */
2408 }
2409
2410 if (last != m_tail) {
2411 panic_plain("%s: invalid input packet chain for %s, "
2412 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2413 m_tail, last);
2414 }
2415
2416 /*
2417 * Assert packet count only for the extended variant, for backwards
2418 * compatibility, since this came directly from the device driver.
2419 * Relax this assertion for input bytes, as the driver may have
2420 * included the link-layer headers in the computation; hence
2421 * m_size is just an approximation.
2422 */
2423 if (ext && s->packets_in != m_cnt) {
2424 panic_plain("%s: input packet count mismatch for %s, "
2425 "%d instead of %d\n", __func__, if_name(ifp),
2426 s->packets_in, m_cnt);
2427 }
2428
2429 if ((inp = ifp->if_inp) == NULL)
2430 inp = dlil_main_input_thread;
2431
2432 /*
2433 * If there is a matching DLIL input thread associated with an
2434 * affinity set, associate this thread with the same set. We
2435 * will only do this once.
2436 */
2437 lck_mtx_lock_spin(&inp->input_lck);
2438 if (inp != dlil_main_input_thread && inp->net_affinity &&
2439 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2440 (poll && inp->poll_thr == THREAD_NULL))) {
2441 u_int32_t tag = inp->tag;
2442
2443 if (poll) {
2444 VERIFY(inp->poll_thr == THREAD_NULL);
2445 inp->poll_thr = tp;
2446 } else {
2447 VERIFY(inp->wloop_thr == THREAD_NULL);
2448 inp->wloop_thr = tp;
2449 }
2450 lck_mtx_unlock(&inp->input_lck);
2451
2452 /* Associate the current thread with the new affinity tag */
2453 (void) dlil_affinity_set(tp, tag);
2454
2455 /*
2456 * Take a reference on the current thread; during detach,
2457 * we will need to refer to it in order ot tear down its
2458 * affinity.
2459 */
2460 thread_reference(tp);
2461 lck_mtx_lock_spin(&inp->input_lck);
2462 }
2463
2464 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2465
2466 /*
2467 * Because of loopbacked multicast we cannot stuff the ifp in
2468 * the rcvif of the packet header: loopback (lo0) packets use a
2469 * dedicated list so that we can later associate them with lo_ifp
2470 * on their way up the stack. Packets for other interfaces without
2471 * dedicated input threads go to the regular list.
2472 */
2473 if (m_head != NULL) {
2474 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2475 struct dlil_main_threading_info *inpm =
2476 (struct dlil_main_threading_info *)inp;
2477 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2478 m_cnt, m_size);
2479 } else {
2480 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2481 m_cnt, m_size);
2482 }
2483 }
2484
2485 #if IFNET_INPUT_SANITY_CHK
2486 if (dlil_input_sanity_check != 0) {
2487 u_int32_t count;
2488 struct mbuf *m0;
2489
2490 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
2491 count++;
2492
2493 if (count != m_cnt) {
2494 panic_plain("%s: invalid packet count %d "
2495 "(expected %d)\n", if_name(ifp),
2496 count, m_cnt);
2497 /* NOTREACHED */
2498 }
2499
2500 inp->input_mbuf_cnt += m_cnt;
2501 }
2502 #endif /* IFNET_INPUT_SANITY_CHK */
2503
2504 if (s != NULL) {
2505 dlil_input_stats_add(s, inp, poll);
2506 /*
2507 * If we're using the main input thread, synchronize the
2508 * stats now since we have the interface context. All
2509 * other cases involving dedicated input threads will
2510 * have their stats synchronized there.
2511 */
2512 if (inp == dlil_main_input_thread)
2513 dlil_input_stats_sync(ifp, inp);
2514 }
2515
2516 inp->input_waiting |= DLIL_INPUT_WAITING;
2517 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2518 inp->wtot++;
2519 wakeup_one((caddr_t)&inp->input_waiting);
2520 }
2521 lck_mtx_unlock(&inp->input_lck);
2522
2523 if (ifp != lo_ifp) {
2524 /* Release the IO refcnt */
2525 ifnet_decr_iorefcnt(ifp);
2526 }
2527
2528 return (0);
2529 }
2530
2531 static void
2532 ifnet_start_common(struct ifnet *ifp, int resetfc)
2533 {
2534 if (!(ifp->if_eflags & IFEF_TXSTART))
2535 return;
2536 /*
2537 * If the starter thread is inactive, signal it to do work,
2538 * unless the interface is being flow controlled from below,
2539 * e.g. a virtual interface being flow controlled by a real
2540 * network interface beneath it.
2541 */
2542 lck_mtx_lock_spin(&ifp->if_start_lock);
2543 if (resetfc) {
2544 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2545 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2546 lck_mtx_unlock(&ifp->if_start_lock);
2547 return;
2548 }
2549 ifp->if_start_req++;
2550 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2551 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2552 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen
2553 || ifp->if_start_delayed == 0)) {
2554 wakeup_one((caddr_t)&ifp->if_start_thread);
2555 }
2556 lck_mtx_unlock(&ifp->if_start_lock);
2557 }
2558
2559 void
2560 ifnet_start(struct ifnet *ifp)
2561 {
2562 ifnet_start_common(ifp, 0);
2563 }
2564
2565 static void
2566 ifnet_start_thread_fn(void *v, wait_result_t w)
2567 {
2568 #pragma unused(w)
2569 struct ifnet *ifp = v;
2570 char ifname[IFNAMSIZ + 1];
2571 struct timespec *ts = NULL;
2572 struct ifclassq *ifq = &ifp->if_snd;
2573 struct timespec delay_start_ts;
2574
2575 /*
2576 * Treat the dedicated starter thread for lo0 as equivalent to
2577 * the driver workloop thread; if net_affinity is enabled for
2578 * the main input thread, associate this starter thread to it
2579 * by binding them with the same affinity tag. This is done
2580 * only once (as we only have one lo_ifp which never goes away.)
2581 */
2582 if (ifp == lo_ifp) {
2583 struct dlil_threading_info *inp = dlil_main_input_thread;
2584 struct thread *tp = current_thread();
2585
2586 lck_mtx_lock(&inp->input_lck);
2587 if (inp->net_affinity) {
2588 u_int32_t tag = inp->tag;
2589
2590 VERIFY(inp->wloop_thr == THREAD_NULL);
2591 VERIFY(inp->poll_thr == THREAD_NULL);
2592 inp->wloop_thr = tp;
2593 lck_mtx_unlock(&inp->input_lck);
2594
2595 /* Associate this thread with the affinity tag */
2596 (void) dlil_affinity_set(tp, tag);
2597 } else {
2598 lck_mtx_unlock(&inp->input_lck);
2599 }
2600 }
2601
2602 snprintf(ifname, sizeof (ifname), "%s_starter",
2603 if_name(ifp));
2604
2605 lck_mtx_lock_spin(&ifp->if_start_lock);
2606
2607 for (;;) {
2608 if (ifp->if_start_thread != NULL)
2609 (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock,
2610 (PZERO - 1) | PSPIN, ifname, ts);
2611
2612 /* interface is detached? */
2613 if (ifp->if_start_thread == THREAD_NULL) {
2614 ifnet_set_start_cycle(ifp, NULL);
2615 lck_mtx_unlock(&ifp->if_start_lock);
2616 ifnet_purge(ifp);
2617
2618 if (dlil_verbose) {
2619 printf("%s: starter thread terminated\n",
2620 if_name(ifp));
2621 }
2622
2623 /* for the extra refcnt from kernel_thread_start() */
2624 thread_deallocate(current_thread());
2625 /* this is the end */
2626 thread_terminate(current_thread());
2627 /* NOTREACHED */
2628 return;
2629 }
2630
2631 ifp->if_start_active = 1;
2632
2633 for (;;) {
2634 u_int32_t req = ifp->if_start_req;
2635 if (!IFCQ_IS_EMPTY(ifq) &&
2636 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2637 ifp->if_start_delayed == 0 &&
2638 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2639 (ifp->if_eflags & IFEF_DELAY_START)) {
2640 ifp->if_start_delayed = 1;
2641 ifnet_start_delayed++;
2642 break;
2643 } else {
2644 ifp->if_start_delayed = 0;
2645 }
2646 lck_mtx_unlock(&ifp->if_start_lock);
2647
2648 /*
2649 * If no longer attached, don't call start because ifp
2650 * is being destroyed; else hold an IO refcnt to
2651 * prevent the interface from being detached (will be
2652 * released below.)
2653 */
2654 if (!ifnet_is_attached(ifp, 1)) {
2655 lck_mtx_lock_spin(&ifp->if_start_lock);
2656 break;
2657 }
2658
2659 /* invoke the driver's start routine */
2660 ((*ifp->if_start)(ifp));
2661
2662 /*
2663 * Release the io ref count taken by ifnet_is_attached.
2664 */
2665 ifnet_decr_iorefcnt(ifp);
2666
2667 lck_mtx_lock_spin(&ifp->if_start_lock);
2668
2669 /* if there's no pending request, we're done */
2670 if (req == ifp->if_start_req)
2671 break;
2672 }
2673
2674 ifp->if_start_req = 0;
2675 ifp->if_start_active = 0;
2676
2677 /*
2678 * Wakeup N ns from now if rate-controlled by TBR, and if
2679 * there are still packets in the send queue which haven't
2680 * been dequeued so far; else sleep indefinitely (ts = NULL)
2681 * until ifnet_start() is called again.
2682 */
2683 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2684 &ifp->if_start_cycle : NULL);
2685
2686 if (ts == NULL && ifp->if_start_delayed == 1) {
2687 delay_start_ts.tv_sec = 0;
2688 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2689 ts = &delay_start_ts;
2690 }
2691
2692 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
2693 ts = NULL;
2694 }
2695
2696 /* NOTREACHED */
2697 }
2698
2699 void
2700 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2701 {
2702 if (ts == NULL)
2703 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
2704 else
2705 *(&ifp->if_start_cycle) = *ts;
2706
2707 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
2708 printf("%s: restart interval set to %lu nsec\n",
2709 if_name(ifp), ts->tv_nsec);
2710 }
2711
2712 static void
2713 ifnet_poll(struct ifnet *ifp)
2714 {
2715 /*
2716 * If the poller thread is inactive, signal it to do work.
2717 */
2718 lck_mtx_lock_spin(&ifp->if_poll_lock);
2719 ifp->if_poll_req++;
2720 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
2721 wakeup_one((caddr_t)&ifp->if_poll_thread);
2722 }
2723 lck_mtx_unlock(&ifp->if_poll_lock);
2724 }
2725
2726 static void
2727 ifnet_poll_thread_fn(void *v, wait_result_t w)
2728 {
2729 #pragma unused(w)
2730 struct dlil_threading_info *inp;
2731 struct ifnet *ifp = v;
2732 char ifname[IFNAMSIZ + 1];
2733 struct timespec *ts = NULL;
2734 struct ifnet_stat_increment_param s;
2735
2736 snprintf(ifname, sizeof (ifname), "%s_poller",
2737 if_name(ifp));
2738 bzero(&s, sizeof (s));
2739
2740 lck_mtx_lock_spin(&ifp->if_poll_lock);
2741
2742 inp = ifp->if_inp;
2743 VERIFY(inp != NULL);
2744
2745 for (;;) {
2746 if (ifp->if_poll_thread != THREAD_NULL) {
2747 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
2748 (PZERO - 1) | PSPIN, ifname, ts);
2749 }
2750
2751 /* interface is detached (maybe while asleep)? */
2752 if (ifp->if_poll_thread == THREAD_NULL) {
2753 ifnet_set_poll_cycle(ifp, NULL);
2754 lck_mtx_unlock(&ifp->if_poll_lock);
2755
2756 if (dlil_verbose) {
2757 printf("%s: poller thread terminated\n",
2758 if_name(ifp));
2759 }
2760
2761 /* for the extra refcnt from kernel_thread_start() */
2762 thread_deallocate(current_thread());
2763 /* this is the end */
2764 thread_terminate(current_thread());
2765 /* NOTREACHED */
2766 return;
2767 }
2768
2769 ifp->if_poll_active = 1;
2770 for (;;) {
2771 struct mbuf *m_head, *m_tail;
2772 u_int32_t m_lim, m_cnt, m_totlen;
2773 u_int16_t req = ifp->if_poll_req;
2774
2775 lck_mtx_unlock(&ifp->if_poll_lock);
2776
2777 /*
2778 * If no longer attached, there's nothing to do;
2779 * else hold an IO refcnt to prevent the interface
2780 * from being detached (will be released below.)
2781 */
2782 if (!ifnet_is_attached(ifp, 1)) {
2783 lck_mtx_lock_spin(&ifp->if_poll_lock);
2784 break;
2785 }
2786
2787 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
2788 MAX((qlimit(&inp->rcvq_pkts)),
2789 (inp->rxpoll_phiwat << 2));
2790
2791 if (dlil_verbose > 1) {
2792 printf("%s: polling up to %d pkts, "
2793 "pkts avg %d max %d, wreq avg %d, "
2794 "bytes avg %d\n",
2795 if_name(ifp), m_lim,
2796 inp->rxpoll_pavg, inp->rxpoll_pmax,
2797 inp->rxpoll_wavg, inp->rxpoll_bavg);
2798 }
2799
2800 /* invoke the driver's input poll routine */
2801 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
2802 &m_cnt, &m_totlen));
2803
2804 if (m_head != NULL) {
2805 VERIFY(m_tail != NULL && m_cnt > 0);
2806
2807 if (dlil_verbose > 1) {
2808 printf("%s: polled %d pkts, "
2809 "pkts avg %d max %d, wreq avg %d, "
2810 "bytes avg %d\n",
2811 if_name(ifp), m_cnt,
2812 inp->rxpoll_pavg, inp->rxpoll_pmax,
2813 inp->rxpoll_wavg, inp->rxpoll_bavg);
2814 }
2815
2816 /* stats are required for extended variant */
2817 s.packets_in = m_cnt;
2818 s.bytes_in = m_totlen;
2819
2820 (void) ifnet_input_common(ifp, m_head, m_tail,
2821 &s, TRUE, TRUE);
2822 } else {
2823 if (dlil_verbose > 1) {
2824 printf("%s: no packets, "
2825 "pkts avg %d max %d, wreq avg %d, "
2826 "bytes avg %d\n",
2827 if_name(ifp), inp->rxpoll_pavg,
2828 inp->rxpoll_pmax, inp->rxpoll_wavg,
2829 inp->rxpoll_bavg);
2830 }
2831
2832 (void) ifnet_input_common(ifp, NULL, NULL,
2833 NULL, FALSE, TRUE);
2834 }
2835
2836 /* Release the io ref count */
2837 ifnet_decr_iorefcnt(ifp);
2838
2839 lck_mtx_lock_spin(&ifp->if_poll_lock);
2840
2841 /* if there's no pending request, we're done */
2842 if (req == ifp->if_poll_req)
2843 break;
2844 }
2845 ifp->if_poll_req = 0;
2846 ifp->if_poll_active = 0;
2847
2848 /*
2849 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2850 * until ifnet_poll() is called again.
2851 */
2852 ts = &ifp->if_poll_cycle;
2853 if (ts->tv_sec == 0 && ts->tv_nsec == 0)
2854 ts = NULL;
2855 }
2856
2857 /* NOTREACHED */
2858 }
2859
2860 void
2861 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
2862 {
2863 if (ts == NULL)
2864 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
2865 else
2866 *(&ifp->if_poll_cycle) = *ts;
2867
2868 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
2869 printf("%s: poll interval set to %lu nsec\n",
2870 if_name(ifp), ts->tv_nsec);
2871 }
2872
2873 void
2874 ifnet_purge(struct ifnet *ifp)
2875 {
2876 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
2877 if_qflush(ifp, 0);
2878 }
2879
2880 void
2881 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
2882 {
2883 IFCQ_LOCK_ASSERT_HELD(ifq);
2884
2885 if (!(IFCQ_IS_READY(ifq)))
2886 return;
2887
2888 if (IFCQ_TBR_IS_ENABLED(ifq)) {
2889 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
2890 ifq->ifcq_tbr.tbr_percent, 0 };
2891 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
2892 }
2893
2894 ifclassq_update(ifq, ev);
2895 }
2896
2897 void
2898 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
2899 {
2900 switch (ev) {
2901 case CLASSQ_EV_LINK_BANDWIDTH:
2902 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
2903 ifp->if_poll_update++;
2904 break;
2905
2906 default:
2907 break;
2908 }
2909 }
2910
2911 errno_t
2912 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
2913 {
2914 struct ifclassq *ifq;
2915 u_int32_t omodel;
2916 errno_t err;
2917
2918 if (ifp == NULL || (model != IFNET_SCHED_MODEL_DRIVER_MANAGED &&
2919 model != IFNET_SCHED_MODEL_NORMAL))
2920 return (EINVAL);
2921 else if (!(ifp->if_eflags & IFEF_TXSTART))
2922 return (ENXIO);
2923
2924 ifq = &ifp->if_snd;
2925 IFCQ_LOCK(ifq);
2926 omodel = ifp->if_output_sched_model;
2927 ifp->if_output_sched_model = model;
2928 if ((err = ifclassq_pktsched_setup(ifq)) != 0)
2929 ifp->if_output_sched_model = omodel;
2930 IFCQ_UNLOCK(ifq);
2931
2932 return (err);
2933 }
2934
2935 errno_t
2936 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2937 {
2938 if (ifp == NULL)
2939 return (EINVAL);
2940 else if (!(ifp->if_eflags & IFEF_TXSTART))
2941 return (ENXIO);
2942
2943 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
2944
2945 return (0);
2946 }
2947
2948 errno_t
2949 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2950 {
2951 if (ifp == NULL || maxqlen == NULL)
2952 return (EINVAL);
2953 else if (!(ifp->if_eflags & IFEF_TXSTART))
2954 return (ENXIO);
2955
2956 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
2957
2958 return (0);
2959 }
2960
2961 errno_t
2962 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
2963 {
2964 errno_t err;
2965
2966 if (ifp == NULL || pkts == NULL)
2967 err = EINVAL;
2968 else if (!(ifp->if_eflags & IFEF_TXSTART))
2969 err = ENXIO;
2970 else
2971 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
2972 pkts, NULL);
2973
2974 return (err);
2975 }
2976
2977 errno_t
2978 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
2979 u_int32_t *pkts, u_int32_t *bytes)
2980 {
2981 errno_t err;
2982
2983 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
2984 (pkts == NULL && bytes == NULL))
2985 err = EINVAL;
2986 else if (!(ifp->if_eflags & IFEF_TXSTART))
2987 err = ENXIO;
2988 else
2989 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
2990
2991 return (err);
2992 }
2993
2994 errno_t
2995 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2996 {
2997 struct dlil_threading_info *inp;
2998
2999 if (ifp == NULL)
3000 return (EINVAL);
3001 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3002 return (ENXIO);
3003
3004 if (maxqlen == 0)
3005 maxqlen = if_rcvq_maxlen;
3006 else if (maxqlen < IF_RCVQ_MINLEN)
3007 maxqlen = IF_RCVQ_MINLEN;
3008
3009 inp = ifp->if_inp;
3010 lck_mtx_lock(&inp->input_lck);
3011 qlimit(&inp->rcvq_pkts) = maxqlen;
3012 lck_mtx_unlock(&inp->input_lck);
3013
3014 return (0);
3015 }
3016
3017 errno_t
3018 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3019 {
3020 struct dlil_threading_info *inp;
3021
3022 if (ifp == NULL || maxqlen == NULL)
3023 return (EINVAL);
3024 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3025 return (ENXIO);
3026
3027 inp = ifp->if_inp;
3028 lck_mtx_lock(&inp->input_lck);
3029 *maxqlen = qlimit(&inp->rcvq_pkts);
3030 lck_mtx_unlock(&inp->input_lck);
3031 return (0);
3032 }
3033
3034 errno_t
3035 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3036 {
3037 int error;
3038 struct timespec now;
3039 u_int64_t now_nsec;
3040
3041 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3042 m->m_nextpkt != NULL) {
3043 if (m != NULL)
3044 m_freem_list(m);
3045 return (EINVAL);
3046 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3047 !(ifp->if_refflags & IFRF_ATTACHED)) {
3048 /* flag tested without lock for performance */
3049 m_freem(m);
3050 return (ENXIO);
3051 } else if (!(ifp->if_flags & IFF_UP)) {
3052 m_freem(m);
3053 return (ENETDOWN);
3054 }
3055
3056 nanouptime(&now);
3057 net_timernsec(&now, &now_nsec);
3058 m->m_pkthdr.pkt_enqueue_ts = now_nsec;
3059
3060 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3061 /*
3062 * If the driver chose to delay start callback for
3063 * coalescing multiple packets, Then use the following
3064 * heuristics to make sure that start callback will
3065 * be delayed only when bulk data transfer is detected.
3066 * 1. number of packets enqueued in (delay_win * 2) is
3067 * greater than or equal to the delay qlen.
3068 * 2. If delay_start is enabled it will stay enabled for
3069 * another 10 idle windows. This is to take into account
3070 * variable RTT and burst traffic.
3071 * 3. If the time elapsed since last enqueue is more
3072 * than 200ms we disable delaying start callback. This is
3073 * is to take idle time into account.
3074 */
3075 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3076 if (ifp->if_start_delay_swin > 0) {
3077 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3078 ifp->if_start_delay_cnt++;
3079 } else if ((now_nsec - ifp->if_start_delay_swin)
3080 >= (200 * 1000 * 1000)) {
3081 ifp->if_start_delay_swin = now_nsec;
3082 ifp->if_start_delay_cnt = 1;
3083 ifp->if_start_delay_idle = 0;
3084 if (ifp->if_eflags & IFEF_DELAY_START) {
3085 ifp->if_eflags &=
3086 ~(IFEF_DELAY_START);
3087 ifnet_delay_start_disabled++;
3088 }
3089 } else {
3090 if (ifp->if_start_delay_cnt >=
3091 ifp->if_start_delay_qlen) {
3092 ifp->if_eflags |= IFEF_DELAY_START;
3093 ifp->if_start_delay_idle = 0;
3094 } else {
3095 if (ifp->if_start_delay_idle >= 10) {
3096 ifp->if_eflags &= ~(IFEF_DELAY_START);
3097 ifnet_delay_start_disabled++;
3098 } else {
3099 ifp->if_start_delay_idle++;
3100 }
3101 }
3102 ifp->if_start_delay_swin = now_nsec;
3103 ifp->if_start_delay_cnt = 1;
3104 }
3105 } else {
3106 ifp->if_start_delay_swin = now_nsec;
3107 ifp->if_start_delay_cnt = 1;
3108 ifp->if_start_delay_idle = 0;
3109 ifp->if_eflags &= ~(IFEF_DELAY_START);
3110 }
3111 } else {
3112 ifp->if_eflags &= ~(IFEF_DELAY_START);
3113 }
3114
3115 /* enqueue the packet */
3116 error = ifclassq_enqueue(&ifp->if_snd, m);
3117
3118 /*
3119 * Tell the driver to start dequeueing; do this even when the queue
3120 * for the packet is suspended (EQSUSPENDED), as the driver could still
3121 * be dequeueing from other unsuspended queues.
3122 */
3123 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3124 (error == 0 || error == EQFULL || error == EQSUSPENDED))
3125 ifnet_start(ifp);
3126
3127 return (error);
3128 }
3129
3130 errno_t
3131 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3132 {
3133 errno_t rc;
3134 if (ifp == NULL || mp == NULL)
3135 return (EINVAL);
3136 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3137 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
3138 return (ENXIO);
3139 if (!ifnet_is_attached(ifp, 1))
3140 return (ENXIO);
3141 rc = ifclassq_dequeue(&ifp->if_snd, 1, mp, NULL, NULL, NULL);
3142 ifnet_decr_iorefcnt(ifp);
3143
3144 return (rc);
3145 }
3146
3147 errno_t
3148 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3149 struct mbuf **mp)
3150 {
3151 errno_t rc;
3152 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
3153 return (EINVAL);
3154 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3155 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
3156 return (ENXIO);
3157 if (!ifnet_is_attached(ifp, 1))
3158 return (ENXIO);
3159
3160 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL);
3161 ifnet_decr_iorefcnt(ifp);
3162 return (rc);
3163 }
3164
3165 errno_t
3166 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t limit, struct mbuf **head,
3167 struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3168 {
3169 errno_t rc;
3170 if (ifp == NULL || head == NULL || limit < 1)
3171 return (EINVAL);
3172 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3173 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
3174 return (ENXIO);
3175 if (!ifnet_is_attached(ifp, 1))
3176 return (ENXIO);
3177
3178 rc = ifclassq_dequeue(&ifp->if_snd, limit, head, tail, cnt, len);
3179 ifnet_decr_iorefcnt(ifp);
3180 return (rc);
3181 }
3182
3183 errno_t
3184 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3185 u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3186 u_int32_t *len)
3187 {
3188 errno_t rc;
3189 if (ifp == NULL || head == NULL || limit < 1 || !MBUF_VALID_SC(sc))
3190 return (EINVAL);
3191 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3192 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
3193 return (ENXIO);
3194 if (!ifnet_is_attached(ifp, 1))
3195 return (ENXIO);
3196 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, limit, head,
3197 tail, cnt, len);
3198 ifnet_decr_iorefcnt(ifp);
3199 return (rc);
3200 }
3201
3202 errno_t
3203 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3204 const struct sockaddr *dest, const char *dest_linkaddr,
3205 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3206 {
3207 if (pre != NULL)
3208 *pre = 0;
3209 if (post != NULL)
3210 *post = 0;
3211
3212 return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
3213 }
3214
3215 static int
3216 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3217 char **frame_header_p, protocol_family_t protocol_family)
3218 {
3219 struct ifnet_filter *filter;
3220
3221 /*
3222 * Pass the inbound packet to the interface filters
3223 */
3224 lck_mtx_lock_spin(&ifp->if_flt_lock);
3225 /* prevent filter list from changing in case we drop the lock */
3226 if_flt_monitor_busy(ifp);
3227 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3228 int result;
3229
3230 if (!filter->filt_skip && filter->filt_input != NULL &&
3231 (filter->filt_protocol == 0 ||
3232 filter->filt_protocol == protocol_family)) {
3233 lck_mtx_unlock(&ifp->if_flt_lock);
3234
3235 result = (*filter->filt_input)(filter->filt_cookie,
3236 ifp, protocol_family, m_p, frame_header_p);
3237
3238 lck_mtx_lock_spin(&ifp->if_flt_lock);
3239 if (result != 0) {
3240 /* we're done with the filter list */
3241 if_flt_monitor_unbusy(ifp);
3242 lck_mtx_unlock(&ifp->if_flt_lock);
3243 return (result);
3244 }
3245 }
3246 }
3247 /* we're done with the filter list */
3248 if_flt_monitor_unbusy(ifp);
3249 lck_mtx_unlock(&ifp->if_flt_lock);
3250
3251 /*
3252 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3253 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3254 */
3255 if (*m_p != NULL)
3256 (*m_p)->m_flags &= ~M_PROTO1;
3257
3258 return (0);
3259 }
3260
3261 static int
3262 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3263 protocol_family_t protocol_family)
3264 {
3265 struct ifnet_filter *filter;
3266
3267 /*
3268 * Pass the outbound packet to the interface filters
3269 */
3270 lck_mtx_lock_spin(&ifp->if_flt_lock);
3271 /* prevent filter list from changing in case we drop the lock */
3272 if_flt_monitor_busy(ifp);
3273 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3274 int result;
3275
3276 if (!filter->filt_skip && filter->filt_output != NULL &&
3277 (filter->filt_protocol == 0 ||
3278 filter->filt_protocol == protocol_family)) {
3279 lck_mtx_unlock(&ifp->if_flt_lock);
3280
3281 result = filter->filt_output(filter->filt_cookie, ifp,
3282 protocol_family, m_p);
3283
3284 lck_mtx_lock_spin(&ifp->if_flt_lock);
3285 if (result != 0) {
3286 /* we're done with the filter list */
3287 if_flt_monitor_unbusy(ifp);
3288 lck_mtx_unlock(&ifp->if_flt_lock);
3289 return (result);
3290 }
3291 }
3292 }
3293 /* we're done with the filter list */
3294 if_flt_monitor_unbusy(ifp);
3295 lck_mtx_unlock(&ifp->if_flt_lock);
3296
3297 return (0);
3298 }
3299
3300 static void
3301 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
3302 {
3303 int error;
3304
3305 if (ifproto->proto_kpi == kProtoKPI_v1) {
3306 /* Version 1 protocols get one packet at a time */
3307 while (m != NULL) {
3308 char * frame_header;
3309 mbuf_t next_packet;
3310
3311 next_packet = m->m_nextpkt;
3312 m->m_nextpkt = NULL;
3313 frame_header = m->m_pkthdr.pkt_hdr;
3314 m->m_pkthdr.pkt_hdr = NULL;
3315 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3316 ifproto->protocol_family, m, frame_header);
3317 if (error != 0 && error != EJUSTRETURN)
3318 m_freem(m);
3319 m = next_packet;
3320 }
3321 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
3322 /* Version 2 protocols support packet lists */
3323 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
3324 ifproto->protocol_family, m);
3325 if (error != 0 && error != EJUSTRETURN)
3326 m_freem_list(m);
3327 }
3328 return;
3329 }
3330
3331 static void
3332 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3333 struct dlil_threading_info *inp, boolean_t poll)
3334 {
3335 struct ifnet_stat_increment_param *d = &inp->stats;
3336
3337 if (s->packets_in != 0)
3338 d->packets_in += s->packets_in;
3339 if (s->bytes_in != 0)
3340 d->bytes_in += s->bytes_in;
3341 if (s->errors_in != 0)
3342 d->errors_in += s->errors_in;
3343
3344 if (s->packets_out != 0)
3345 d->packets_out += s->packets_out;
3346 if (s->bytes_out != 0)
3347 d->bytes_out += s->bytes_out;
3348 if (s->errors_out != 0)
3349 d->errors_out += s->errors_out;
3350
3351 if (s->collisions != 0)
3352 d->collisions += s->collisions;
3353 if (s->dropped != 0)
3354 d->dropped += s->dropped;
3355
3356 if (poll)
3357 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
3358 }
3359
3360 static void
3361 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3362 {
3363 struct ifnet_stat_increment_param *s = &inp->stats;
3364
3365 /*
3366 * Use of atomic operations is unavoidable here because
3367 * these stats may also be incremented elsewhere via KPIs.
3368 */
3369 if (s->packets_in != 0) {
3370 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3371 s->packets_in = 0;
3372 }
3373 if (s->bytes_in != 0) {
3374 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3375 s->bytes_in = 0;
3376 }
3377 if (s->errors_in != 0) {
3378 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3379 s->errors_in = 0;
3380 }
3381
3382 if (s->packets_out != 0) {
3383 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3384 s->packets_out = 0;
3385 }
3386 if (s->bytes_out != 0) {
3387 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3388 s->bytes_out = 0;
3389 }
3390 if (s->errors_out != 0) {
3391 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3392 s->errors_out = 0;
3393 }
3394
3395 if (s->collisions != 0) {
3396 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3397 s->collisions = 0;
3398 }
3399 if (s->dropped != 0) {
3400 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3401 s->dropped = 0;
3402 }
3403 /*
3404 * If we went over the threshold, notify NetworkStatistics.
3405 */
3406 if (ifp->if_data_threshold &&
3407 (ifp->if_ibytes + ifp->if_obytes) - ifp->if_dt_bytes >
3408 ifp->if_data_threshold) {
3409 ifp->if_dt_bytes = ifp->if_ibytes + ifp->if_obytes;
3410 nstat_ifnet_threshold_reached(ifp->if_index);
3411 }
3412 /*
3413 * No need for atomic operations as they are modified here
3414 * only from within the DLIL input thread context.
3415 */
3416 if (inp->tstats.packets != 0) {
3417 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3418 inp->tstats.packets = 0;
3419 }
3420 if (inp->tstats.bytes != 0) {
3421 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3422 inp->tstats.bytes = 0;
3423 }
3424 }
3425
3426 __private_extern__ void
3427 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3428 {
3429 return (dlil_input_packet_list_common(ifp, m, 0,
3430 IFNET_MODEL_INPUT_POLL_OFF, FALSE));
3431 }
3432
3433 __private_extern__ void
3434 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3435 u_int32_t cnt, ifnet_model_t mode)
3436 {
3437 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
3438 }
3439
3440 static void
3441 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3442 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
3443 {
3444 int error = 0;
3445 protocol_family_t protocol_family;
3446 mbuf_t next_packet;
3447 ifnet_t ifp = ifp_param;
3448 char * frame_header;
3449 struct if_proto * last_ifproto = NULL;
3450 mbuf_t pkt_first = NULL;
3451 mbuf_t * pkt_next = NULL;
3452 u_int32_t poll_thresh = 0, poll_ival = 0;
3453
3454 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0);
3455
3456 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
3457 (poll_ival = if_rxpoll_interval_pkts) > 0)
3458 poll_thresh = cnt;
3459
3460 while (m != NULL) {
3461 struct if_proto *ifproto = NULL;
3462 int iorefcnt = 0;
3463 uint32_t pktf_mask; /* pkt flags to preserve */
3464
3465 if (ifp_param == NULL)
3466 ifp = m->m_pkthdr.rcvif;
3467
3468 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
3469 poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
3470 ifnet_poll(ifp);
3471
3472 /* Check if this mbuf looks valid */
3473 MBUF_INPUT_CHECK(m, ifp);
3474
3475 next_packet = m->m_nextpkt;
3476 m->m_nextpkt = NULL;
3477 frame_header = m->m_pkthdr.pkt_hdr;
3478 m->m_pkthdr.pkt_hdr = NULL;
3479
3480 /*
3481 * Get an IO reference count if the interface is not
3482 * loopback (lo0) and it is attached; lo0 never goes
3483 * away, so optimize for that.
3484 */
3485 if (ifp != lo_ifp) {
3486 if (!ifnet_is_attached(ifp, 1)) {
3487 m_freem(m);
3488 goto next;
3489 }
3490 iorefcnt = 1;
3491 pktf_mask = 0;
3492 } else {
3493 /*
3494 * If this arrived on lo0, preserve interface addr
3495 * info to allow for connectivity between loopback
3496 * and local interface addresses.
3497 */
3498 pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
3499 }
3500
3501 /* make sure packet comes in clean */
3502 m_classifier_init(m, pktf_mask);
3503
3504 ifp_inc_traffic_class_in(ifp, m);
3505
3506 /* find which protocol family this packet is for */
3507 ifnet_lock_shared(ifp);
3508 error = (*ifp->if_demux)(ifp, m, frame_header,
3509 &protocol_family);
3510 ifnet_lock_done(ifp);
3511 if (error != 0) {
3512 if (error == EJUSTRETURN)
3513 goto next;
3514 protocol_family = 0;
3515 }
3516
3517 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
3518 !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
3519 dlil_input_cksum_dbg(ifp, m, frame_header,
3520 protocol_family);
3521
3522 /*
3523 * For partial checksum offload, we expect the driver to
3524 * set the start offset indicating the start of the span
3525 * that is covered by the hardware-computed checksum;
3526 * adjust this start offset accordingly because the data
3527 * pointer has been advanced beyond the link-layer header.
3528 *
3529 * Don't adjust if the interface is a bridge member, as
3530 * the adjustment will occur from the context of the
3531 * bridge interface during input.
3532 */
3533 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
3534 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
3535 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
3536 int adj;
3537
3538 if (frame_header == NULL ||
3539 frame_header < (char *)mbuf_datastart(m) ||
3540 frame_header > (char *)m->m_data ||
3541 (adj = (m->m_data - frame_header)) >
3542 m->m_pkthdr.csum_rx_start) {
3543 m->m_pkthdr.csum_data = 0;
3544 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
3545 hwcksum_in_invalidated++;
3546 } else {
3547 m->m_pkthdr.csum_rx_start -= adj;
3548 }
3549 }
3550
3551 pktap_input(ifp, protocol_family, m, frame_header);
3552
3553 if (m->m_flags & (M_BCAST|M_MCAST))
3554 atomic_add_64(&ifp->if_imcasts, 1);
3555
3556 /* run interface filters, exclude VLAN packets PR-3586856 */
3557 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
3558 error = dlil_interface_filters_input(ifp, &m,
3559 &frame_header, protocol_family);
3560 if (error != 0) {
3561 if (error != EJUSTRETURN)
3562 m_freem(m);
3563 goto next;
3564 }
3565 }
3566 if (error != 0 || ((m->m_flags & M_PROMISC) != 0) ) {
3567 m_freem(m);
3568 goto next;
3569 }
3570
3571 /* Lookup the protocol attachment to this interface */
3572 if (protocol_family == 0) {
3573 ifproto = NULL;
3574 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
3575 (last_ifproto->protocol_family == protocol_family)) {
3576 VERIFY(ifproto == NULL);
3577 ifproto = last_ifproto;
3578 if_proto_ref(last_ifproto);
3579 } else {
3580 VERIFY(ifproto == NULL);
3581 ifnet_lock_shared(ifp);
3582 /* callee holds a proto refcnt upon success */
3583 ifproto = find_attached_proto(ifp, protocol_family);
3584 ifnet_lock_done(ifp);
3585 }
3586 if (ifproto == NULL) {
3587 /* no protocol for this packet, discard */
3588 m_freem(m);
3589 goto next;
3590 }
3591 if (ifproto != last_ifproto) {
3592 if (last_ifproto != NULL) {
3593 /* pass up the list for the previous protocol */
3594 dlil_ifproto_input(last_ifproto, pkt_first);
3595 pkt_first = NULL;
3596 if_proto_free(last_ifproto);
3597 }
3598 last_ifproto = ifproto;
3599 if_proto_ref(ifproto);
3600 }
3601 /* extend the list */
3602 m->m_pkthdr.pkt_hdr = frame_header;
3603 if (pkt_first == NULL) {
3604 pkt_first = m;
3605 } else {
3606 *pkt_next = m;
3607 }
3608 pkt_next = &m->m_nextpkt;
3609
3610 next:
3611 if (next_packet == NULL && last_ifproto != NULL) {
3612 /* pass up the last list of packets */
3613 dlil_ifproto_input(last_ifproto, pkt_first);
3614 if_proto_free(last_ifproto);
3615 last_ifproto = NULL;
3616 }
3617 if (ifproto != NULL) {
3618 if_proto_free(ifproto);
3619 ifproto = NULL;
3620 }
3621
3622 m = next_packet;
3623
3624 /* update the driver's multicast filter, if needed */
3625 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
3626 ifp->if_updatemcasts = 0;
3627 if (iorefcnt == 1)
3628 ifnet_decr_iorefcnt(ifp);
3629 }
3630
3631 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0);
3632 }
3633
3634 errno_t
3635 if_mcasts_update(struct ifnet *ifp)
3636 {
3637 errno_t err;
3638
3639 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
3640 if (err == EAFNOSUPPORT)
3641 err = 0;
3642 printf("%s: %s %d suspended link-layer multicast membership(s) "
3643 "(err=%d)\n", if_name(ifp),
3644 (err == 0 ? "successfully restored" : "failed to restore"),
3645 ifp->if_updatemcasts, err);
3646
3647 /* just return success */
3648 return (0);
3649 }
3650
3651
3652 #define TMP_IF_PROTO_ARR_SIZE 10
3653 static int
3654 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event)
3655 {
3656 struct ifnet_filter *filter = NULL;
3657 struct if_proto *proto = NULL;
3658 int if_proto_count = 0;
3659 struct if_proto **tmp_ifproto_arr = NULL;
3660 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
3661 int tmp_ifproto_arr_idx = 0;
3662 bool tmp_malloc = false;
3663
3664 /*
3665 * Pass the event to the interface filters
3666 */
3667 lck_mtx_lock_spin(&ifp->if_flt_lock);
3668 /* prevent filter list from changing in case we drop the lock */
3669 if_flt_monitor_busy(ifp);
3670 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3671 if (filter->filt_event != NULL) {
3672 lck_mtx_unlock(&ifp->if_flt_lock);
3673
3674 filter->filt_event(filter->filt_cookie, ifp,
3675 filter->filt_protocol, event);
3676
3677 lck_mtx_lock_spin(&ifp->if_flt_lock);
3678 }
3679 }
3680 /* we're done with the filter list */
3681 if_flt_monitor_unbusy(ifp);
3682 lck_mtx_unlock(&ifp->if_flt_lock);
3683
3684 /* Get an io ref count if the interface is attached */
3685 if (!ifnet_is_attached(ifp, 1))
3686 goto done;
3687
3688 /*
3689 * An embedded tmp_list_entry in if_proto may still get
3690 * over-written by another thread after giving up ifnet lock,
3691 * therefore we are avoiding embedded pointers here.
3692 */
3693 ifnet_lock_shared(ifp);
3694 if_proto_count = dlil_ifp_proto_count(ifp);
3695 if (if_proto_count) {
3696 int i;
3697 VERIFY(ifp->if_proto_hash != NULL);
3698 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
3699 tmp_ifproto_arr = tmp_ifproto_stack_arr;
3700 } else {
3701 MALLOC(tmp_ifproto_arr, struct if_proto **,
3702 sizeof (*tmp_ifproto_arr) * if_proto_count,
3703 M_TEMP, M_ZERO);
3704 if (tmp_ifproto_arr == NULL) {
3705 ifnet_lock_done(ifp);
3706 goto cleanup;
3707 }
3708 tmp_malloc = true;
3709 }
3710
3711 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
3712 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
3713 next_hash) {
3714 if_proto_ref(proto);
3715 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
3716 tmp_ifproto_arr_idx++;
3717 }
3718 }
3719 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
3720 }
3721 ifnet_lock_done(ifp);
3722
3723 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
3724 tmp_ifproto_arr_idx++) {
3725 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
3726 VERIFY(proto != NULL);
3727 proto_media_event eventp =
3728 (proto->proto_kpi == kProtoKPI_v1 ?
3729 proto->kpi.v1.event :
3730 proto->kpi.v2.event);
3731
3732 if (eventp != NULL) {
3733 eventp(ifp, proto->protocol_family,
3734 event);
3735 }
3736 if_proto_free(proto);
3737 }
3738
3739 cleanup:
3740 if (tmp_malloc) {
3741 FREE(tmp_ifproto_arr, M_TEMP);
3742 }
3743
3744 /* Pass the event to the interface */
3745 if (ifp->if_event != NULL)
3746 ifp->if_event(ifp, event);
3747
3748 /* Release the io ref count */
3749 ifnet_decr_iorefcnt(ifp);
3750 done:
3751 return (kev_post_msg(event));
3752 }
3753
3754 errno_t
3755 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
3756 {
3757 struct kev_msg kev_msg;
3758 int result = 0;
3759
3760 if (ifp == NULL || event == NULL)
3761 return (EINVAL);
3762
3763 bzero(&kev_msg, sizeof (kev_msg));
3764 kev_msg.vendor_code = event->vendor_code;
3765 kev_msg.kev_class = event->kev_class;
3766 kev_msg.kev_subclass = event->kev_subclass;
3767 kev_msg.event_code = event->event_code;
3768 kev_msg.dv[0].data_ptr = &event->event_data[0];
3769 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
3770 kev_msg.dv[1].data_length = 0;
3771
3772 result = dlil_event_internal(ifp, &kev_msg);
3773
3774 return (result);
3775 }
3776
3777 #if CONFIG_MACF_NET
3778 #include <netinet/ip6.h>
3779 #include <netinet/ip.h>
3780 static int
3781 dlil_get_socket_type(struct mbuf **mp, int family, int raw)
3782 {
3783 struct mbuf *m;
3784 struct ip *ip;
3785 struct ip6_hdr *ip6;
3786 int type = SOCK_RAW;
3787
3788 if (!raw) {
3789 switch (family) {
3790 case PF_INET:
3791 m = m_pullup(*mp, sizeof(struct ip));
3792 if (m == NULL)
3793 break;
3794 *mp = m;
3795 ip = mtod(m, struct ip *);
3796 if (ip->ip_p == IPPROTO_TCP)
3797 type = SOCK_STREAM;
3798 else if (ip->ip_p == IPPROTO_UDP)
3799 type = SOCK_DGRAM;
3800 break;
3801 case PF_INET6:
3802 m = m_pullup(*mp, sizeof(struct ip6_hdr));
3803 if (m == NULL)
3804 break;
3805 *mp = m;
3806 ip6 = mtod(m, struct ip6_hdr *);
3807 if (ip6->ip6_nxt == IPPROTO_TCP)
3808 type = SOCK_STREAM;
3809 else if (ip6->ip6_nxt == IPPROTO_UDP)
3810 type = SOCK_DGRAM;
3811 break;
3812 }
3813 }
3814
3815 return (type);
3816 }
3817 #endif
3818
3819 /*
3820 * This is mostly called from the context of the DLIL input thread;
3821 * because of that there is no need for atomic operations.
3822 */
3823 static __inline void
3824 ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
3825 {
3826 if (!(m->m_flags & M_PKTHDR))
3827 return;
3828
3829 switch (m_get_traffic_class(m)) {
3830 case MBUF_TC_BE:
3831 ifp->if_tc.ifi_ibepackets++;
3832 ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len;
3833 break;
3834 case MBUF_TC_BK:
3835 ifp->if_tc.ifi_ibkpackets++;
3836 ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len;
3837 break;
3838 case MBUF_TC_VI:
3839 ifp->if_tc.ifi_ivipackets++;
3840 ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len;
3841 break;
3842 case MBUF_TC_VO:
3843 ifp->if_tc.ifi_ivopackets++;
3844 ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len;
3845 break;
3846 default:
3847 break;
3848 }
3849
3850 if (mbuf_is_traffic_class_privileged(m)) {
3851 ifp->if_tc.ifi_ipvpackets++;
3852 ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len;
3853 }
3854 }
3855
3856 /*
3857 * This is called from DLIL output, hence multiple threads could end
3858 * up modifying the statistics. We trade off acccuracy for performance
3859 * by not using atomic operations here.
3860 */
3861 static __inline void
3862 ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
3863 {
3864 if (!(m->m_flags & M_PKTHDR))
3865 return;
3866
3867 switch (m_get_traffic_class(m)) {
3868 case MBUF_TC_BE:
3869 ifp->if_tc.ifi_obepackets++;
3870 ifp->if_tc.ifi_obebytes += m->m_pkthdr.len;
3871 break;
3872 case MBUF_TC_BK:
3873 ifp->if_tc.ifi_obkpackets++;
3874 ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len;
3875 break;
3876 case MBUF_TC_VI:
3877 ifp->if_tc.ifi_ovipackets++;
3878 ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len;
3879 break;
3880 case MBUF_TC_VO:
3881 ifp->if_tc.ifi_ovopackets++;
3882 ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len;
3883 break;
3884 default:
3885 break;
3886 }
3887
3888 if (mbuf_is_traffic_class_privileged(m)) {
3889 ifp->if_tc.ifi_opvpackets++;
3890 ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len;
3891 }
3892 }
3893
3894 static void
3895 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
3896 {
3897 mbuf_t n = m;
3898 int chainlen = 0;
3899
3900 while (n != NULL) {
3901 chainlen++;
3902 n = n->m_next;
3903 }
3904 switch (chainlen) {
3905 case 0:
3906 break;
3907 case 1:
3908 atomic_add_64(&cls->cls_one, 1);
3909 break;
3910 case 2:
3911 atomic_add_64(&cls->cls_two, 1);
3912 break;
3913 case 3:
3914 atomic_add_64(&cls->cls_three, 1);
3915 break;
3916 case 4:
3917 atomic_add_64(&cls->cls_four, 1);
3918 break;
3919 case 5:
3920 default:
3921 atomic_add_64(&cls->cls_five_or_more, 1);
3922 break;
3923 }
3924 }
3925
3926 /*
3927 * dlil_output
3928 *
3929 * Caller should have a lock on the protocol domain if the protocol
3930 * doesn't support finer grained locking. In most cases, the lock
3931 * will be held from the socket layer and won't be released until
3932 * we return back to the socket layer.
3933 *
3934 * This does mean that we must take a protocol lock before we take
3935 * an interface lock if we're going to take both. This makes sense
3936 * because a protocol is likely to interact with an ifp while it
3937 * is under the protocol lock.
3938 *
3939 * An advisory code will be returned if adv is not null. This
3940 * can be used to provide feedback about interface queues to the
3941 * application.
3942 */
3943 errno_t
3944 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
3945 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
3946 {
3947 char *frame_type = NULL;
3948 char *dst_linkaddr = NULL;
3949 int retval = 0;
3950 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
3951 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
3952 struct if_proto *proto = NULL;
3953 mbuf_t m;
3954 mbuf_t send_head = NULL;
3955 mbuf_t *send_tail = &send_head;
3956 int iorefcnt = 0;
3957 u_int32_t pre = 0, post = 0;
3958 u_int32_t fpkts = 0, fbytes = 0;
3959 int32_t flen = 0;
3960
3961 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
3962
3963 /* Get an io refcnt if the interface is attached to prevent ifnet_detach
3964 * from happening while this operation is in progress */
3965 if (!ifnet_is_attached(ifp, 1)) {
3966 retval = ENXIO;
3967 goto cleanup;
3968 }
3969 iorefcnt = 1;
3970
3971 /* update the driver's multicast filter, if needed */
3972 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
3973 ifp->if_updatemcasts = 0;
3974
3975 frame_type = frame_type_buffer;
3976 dst_linkaddr = dst_linkaddr_buffer;
3977
3978 if (raw == 0) {
3979 ifnet_lock_shared(ifp);
3980 /* callee holds a proto refcnt upon success */
3981 proto = find_attached_proto(ifp, proto_family);
3982 if (proto == NULL) {
3983 ifnet_lock_done(ifp);
3984 retval = ENXIO;
3985 goto cleanup;
3986 }
3987 ifnet_lock_done(ifp);
3988 }
3989
3990 preout_again:
3991 if (packetlist == NULL)
3992 goto cleanup;
3993
3994 m = packetlist;
3995 packetlist = packetlist->m_nextpkt;
3996 m->m_nextpkt = NULL;
3997
3998 if (raw == 0) {
3999 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
4000 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
4001 retval = 0;
4002 if (preoutp != NULL) {
4003 retval = preoutp(ifp, proto_family, &m, dest, route,
4004 frame_type, dst_linkaddr);
4005
4006 if (retval != 0) {
4007 if (retval == EJUSTRETURN)
4008 goto preout_again;
4009 m_freem(m);
4010 goto cleanup;
4011 }
4012 }
4013 }
4014
4015 #if CONFIG_MACF_NET
4016 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
4017 dlil_get_socket_type(&m, proto_family, raw));
4018 if (retval != 0) {
4019 m_freem(m);
4020 goto cleanup;
4021 }
4022 #endif
4023
4024 do {
4025 #if CONFIG_DTRACE
4026 if (!raw && proto_family == PF_INET) {
4027 struct ip *ip = mtod(m, struct ip*);
4028 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4029 struct ip *, ip, struct ifnet *, ifp,
4030 struct ip *, ip, struct ip6_hdr *, NULL);
4031
4032 } else if (!raw && proto_family == PF_INET6) {
4033 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr*);
4034 DTRACE_IP6(send, struct mbuf*, m, struct inpcb *, NULL,
4035 struct ip6_hdr *, ip6, struct ifnet*, ifp,
4036 struct ip*, NULL, struct ip6_hdr *, ip6);
4037 }
4038 #endif /* CONFIG_DTRACE */
4039
4040 if (raw == 0 && ifp->if_framer != NULL) {
4041 int rcvif_set = 0;
4042
4043 /*
4044 * If this is a broadcast packet that needs to be
4045 * looped back into the system, set the inbound ifp
4046 * to that of the outbound ifp. This will allow
4047 * us to determine that it is a legitimate packet
4048 * for the system. Only set the ifp if it's not
4049 * already set, just to be safe.
4050 */
4051 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
4052 m->m_pkthdr.rcvif == NULL) {
4053 m->m_pkthdr.rcvif = ifp;
4054 rcvif_set = 1;
4055 }
4056
4057 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
4058 frame_type, &pre, &post);
4059 if (retval != 0) {
4060 if (retval != EJUSTRETURN)
4061 m_freem(m);
4062 goto next;
4063 }
4064
4065 /*
4066 * For partial checksum offload, adjust the start
4067 * and stuff offsets based on the prepended header.
4068 */
4069 if ((m->m_pkthdr.csum_flags &
4070 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4071 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4072 m->m_pkthdr.csum_tx_stuff += pre;
4073 m->m_pkthdr.csum_tx_start += pre;
4074 }
4075
4076 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
4077 dlil_output_cksum_dbg(ifp, m, pre,
4078 proto_family);
4079
4080 /*
4081 * Clear the ifp if it was set above, and to be
4082 * safe, only if it is still the same as the
4083 * outbound ifp we have in context. If it was
4084 * looped back, then a copy of it was sent to the
4085 * loopback interface with the rcvif set, and we
4086 * are clearing the one that will go down to the
4087 * layer below.
4088 */
4089 if (rcvif_set && m->m_pkthdr.rcvif == ifp)
4090 m->m_pkthdr.rcvif = NULL;
4091 }
4092
4093 /*
4094 * Let interface filters (if any) do their thing ...
4095 */
4096 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4097 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
4098 retval = dlil_interface_filters_output(ifp,
4099 &m, proto_family);
4100 if (retval != 0) {
4101 if (retval != EJUSTRETURN)
4102 m_freem(m);
4103 goto next;
4104 }
4105 }
4106 /*
4107 * Strip away M_PROTO1 bit prior to sending packet
4108 * to the driver as this field may be used by the driver
4109 */
4110 m->m_flags &= ~M_PROTO1;
4111
4112 /*
4113 * If the underlying interface is not capable of handling a
4114 * packet whose data portion spans across physically disjoint
4115 * pages, we need to "normalize" the packet so that we pass
4116 * down a chain of mbufs where each mbuf points to a span that
4117 * resides in the system page boundary. If the packet does
4118 * not cross page(s), the following is a no-op.
4119 */
4120 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
4121 if ((m = m_normalize(m)) == NULL)
4122 goto next;
4123 }
4124
4125 /*
4126 * If this is a TSO packet, make sure the interface still
4127 * advertise TSO capability.
4128 */
4129 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
4130 retval = EMSGSIZE;
4131 m_freem(m);
4132 goto cleanup;
4133 }
4134
4135 /*
4136 * If the packet service class is not background,
4137 * update the timestamp to indicate recent activity
4138 * on a foreground socket.
4139 */
4140 if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
4141 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
4142 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND))
4143 ifp->if_fg_sendts = net_uptime();
4144
4145 if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME)
4146 ifp->if_rt_sendts = net_uptime();
4147 }
4148
4149 ifp_inc_traffic_class_out(ifp, m);
4150 pktap_output(ifp, proto_family, m, pre, post);
4151
4152 /*
4153 * Count the number of elements in the mbuf chain
4154 */
4155 if (tx_chain_len_count) {
4156 dlil_count_chain_len(m, &tx_chain_len_stats);
4157 }
4158
4159 /*
4160 * Finally, call the driver.
4161 */
4162 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
4163 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4164 flen += (m_pktlen(m) - (pre + post));
4165 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4166 }
4167 *send_tail = m;
4168 send_tail = &m->m_nextpkt;
4169 } else {
4170 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4171 flen = (m_pktlen(m) - (pre + post));
4172 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4173 } else {
4174 flen = 0;
4175 }
4176 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4177 0, 0, 0, 0, 0);
4178 retval = (*ifp->if_output)(ifp, m);
4179 if (retval == EQFULL || retval == EQSUSPENDED) {
4180 if (adv != NULL && adv->code == FADV_SUCCESS) {
4181 adv->code = (retval == EQFULL ?
4182 FADV_FLOW_CONTROLLED :
4183 FADV_SUSPENDED);
4184 }
4185 retval = 0;
4186 }
4187 if (retval == 0 && flen > 0) {
4188 fbytes += flen;
4189 fpkts++;
4190 }
4191 if (retval != 0 && dlil_verbose) {
4192 printf("%s: output error on %s retval = %d\n",
4193 __func__, if_name(ifp),
4194 retval);
4195 }
4196 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
4197 0, 0, 0, 0, 0);
4198 }
4199 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4200
4201 next:
4202 m = packetlist;
4203 if (m != NULL) {
4204 packetlist = packetlist->m_nextpkt;
4205 m->m_nextpkt = NULL;
4206 }
4207 } while (m != NULL);
4208
4209 if (send_head != NULL) {
4210 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4211 0, 0, 0, 0, 0);
4212 if (ifp->if_eflags & IFEF_SENDLIST) {
4213 retval = (*ifp->if_output)(ifp, send_head);
4214 if (retval == EQFULL || retval == EQSUSPENDED) {
4215 if (adv != NULL) {
4216 adv->code = (retval == EQFULL ?
4217 FADV_FLOW_CONTROLLED :
4218 FADV_SUSPENDED);
4219 }
4220 retval = 0;
4221 }
4222 if (retval == 0 && flen > 0) {
4223 fbytes += flen;
4224 fpkts++;
4225 }
4226 if (retval != 0 && dlil_verbose) {
4227 printf("%s: output error on %s retval = %d\n",
4228 __func__, if_name(ifp), retval);
4229 }
4230 } else {
4231 struct mbuf *send_m;
4232 int enq_cnt = 0;
4233 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
4234 while (send_head != NULL) {
4235 send_m = send_head;
4236 send_head = send_m->m_nextpkt;
4237 send_m->m_nextpkt = NULL;
4238 retval = (*ifp->if_output)(ifp, send_m);
4239 if (retval == EQFULL || retval == EQSUSPENDED) {
4240 if (adv != NULL) {
4241 adv->code = (retval == EQFULL ?
4242 FADV_FLOW_CONTROLLED :
4243 FADV_SUSPENDED);
4244 }
4245 retval = 0;
4246 }
4247 if (retval == 0) {
4248 enq_cnt++;
4249 if (flen > 0)
4250 fpkts++;
4251 }
4252 if (retval != 0 && dlil_verbose) {
4253 printf("%s: output error on %s retval = %d\n",
4254 __func__, if_name(ifp), retval);
4255 }
4256 }
4257 if (enq_cnt > 0) {
4258 fbytes += flen;
4259 ifnet_start(ifp);
4260 }
4261 }
4262 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4263 }
4264
4265 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4266
4267 cleanup:
4268 if (fbytes > 0)
4269 ifp->if_fbytes += fbytes;
4270 if (fpkts > 0)
4271 ifp->if_fpackets += fpkts;
4272 if (proto != NULL)
4273 if_proto_free(proto);
4274 if (packetlist) /* if any packets are left, clean up */
4275 mbuf_freem_list(packetlist);
4276 if (retval == EJUSTRETURN)
4277 retval = 0;
4278 if (iorefcnt == 1)
4279 ifnet_decr_iorefcnt(ifp);
4280
4281 return (retval);
4282 }
4283
4284 errno_t
4285 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
4286 void *ioctl_arg)
4287 {
4288 struct ifnet_filter *filter;
4289 int retval = EOPNOTSUPP;
4290 int result = 0;
4291
4292 if (ifp == NULL || ioctl_code == 0)
4293 return (EINVAL);
4294
4295 /* Get an io ref count if the interface is attached */
4296 if (!ifnet_is_attached(ifp, 1))
4297 return (EOPNOTSUPP);
4298
4299 /* Run the interface filters first.
4300 * We want to run all filters before calling the protocol,
4301 * interface family, or interface.
4302 */
4303 lck_mtx_lock_spin(&ifp->if_flt_lock);
4304 /* prevent filter list from changing in case we drop the lock */
4305 if_flt_monitor_busy(ifp);
4306 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4307 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
4308 filter->filt_protocol == proto_fam)) {
4309 lck_mtx_unlock(&ifp->if_flt_lock);
4310
4311 result = filter->filt_ioctl(filter->filt_cookie, ifp,
4312 proto_fam, ioctl_code, ioctl_arg);
4313
4314 lck_mtx_lock_spin(&ifp->if_flt_lock);
4315
4316 /* Only update retval if no one has handled the ioctl */
4317 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4318 if (result == ENOTSUP)
4319 result = EOPNOTSUPP;
4320 retval = result;
4321 if (retval != 0 && retval != EOPNOTSUPP) {
4322 /* we're done with the filter list */
4323 if_flt_monitor_unbusy(ifp);
4324 lck_mtx_unlock(&ifp->if_flt_lock);
4325 goto cleanup;
4326 }
4327 }
4328 }
4329 }
4330 /* we're done with the filter list */
4331 if_flt_monitor_unbusy(ifp);
4332 lck_mtx_unlock(&ifp->if_flt_lock);
4333
4334 /* Allow the protocol to handle the ioctl */
4335 if (proto_fam != 0) {
4336 struct if_proto *proto;
4337
4338 /* callee holds a proto refcnt upon success */
4339 ifnet_lock_shared(ifp);
4340 proto = find_attached_proto(ifp, proto_fam);
4341 ifnet_lock_done(ifp);
4342 if (proto != NULL) {
4343 proto_media_ioctl ioctlp =
4344 (proto->proto_kpi == kProtoKPI_v1 ?
4345 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
4346 result = EOPNOTSUPP;
4347 if (ioctlp != NULL)
4348 result = ioctlp(ifp, proto_fam, ioctl_code,
4349 ioctl_arg);
4350 if_proto_free(proto);
4351
4352 /* Only update retval if no one has handled the ioctl */
4353 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4354 if (result == ENOTSUP)
4355 result = EOPNOTSUPP;
4356 retval = result;
4357 if (retval && retval != EOPNOTSUPP)
4358 goto cleanup;
4359 }
4360 }
4361 }
4362
4363 /* retval is either 0 or EOPNOTSUPP */
4364
4365 /*
4366 * Let the interface handle this ioctl.
4367 * If it returns EOPNOTSUPP, ignore that, we may have
4368 * already handled this in the protocol or family.
4369 */
4370 if (ifp->if_ioctl)
4371 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
4372
4373 /* Only update retval if no one has handled the ioctl */
4374 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4375 if (result == ENOTSUP)
4376 result = EOPNOTSUPP;
4377 retval = result;
4378 if (retval && retval != EOPNOTSUPP) {
4379 goto cleanup;
4380 }
4381 }
4382
4383 cleanup:
4384 if (retval == EJUSTRETURN)
4385 retval = 0;
4386
4387 ifnet_decr_iorefcnt(ifp);
4388
4389 return (retval);
4390 }
4391
4392 __private_extern__ errno_t
4393 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
4394 {
4395 errno_t error = 0;
4396
4397
4398 if (ifp->if_set_bpf_tap) {
4399 /* Get an io reference on the interface if it is attached */
4400 if (!ifnet_is_attached(ifp, 1))
4401 return ENXIO;
4402 error = ifp->if_set_bpf_tap(ifp, mode, callback);
4403 ifnet_decr_iorefcnt(ifp);
4404 }
4405 return (error);
4406 }
4407
4408 errno_t
4409 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4410 struct sockaddr *ll_addr, size_t ll_len)
4411 {
4412 errno_t result = EOPNOTSUPP;
4413 struct if_proto *proto;
4414 const struct sockaddr *verify;
4415 proto_media_resolve_multi resolvep;
4416
4417 if (!ifnet_is_attached(ifp, 1))
4418 return result;
4419
4420 bzero(ll_addr, ll_len);
4421
4422 /* Call the protocol first; callee holds a proto refcnt upon success */
4423 ifnet_lock_shared(ifp);
4424 proto = find_attached_proto(ifp, proto_addr->sa_family);
4425 ifnet_lock_done(ifp);
4426 if (proto != NULL) {
4427 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4428 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4429 if (resolvep != NULL)
4430 result = resolvep(ifp, proto_addr,
4431 (struct sockaddr_dl*)(void *)ll_addr, ll_len);
4432 if_proto_free(proto);
4433 }
4434
4435 /* Let the interface verify the multicast address */
4436 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4437 if (result == 0)
4438 verify = ll_addr;
4439 else
4440 verify = proto_addr;
4441 result = ifp->if_check_multi(ifp, verify);
4442 }
4443
4444 ifnet_decr_iorefcnt(ifp);
4445 return (result);
4446 }
4447
4448 __private_extern__ errno_t
4449 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4450 const struct sockaddr_dl* sender_hw, const struct sockaddr* sender_proto,
4451 const struct sockaddr_dl* target_hw, const struct sockaddr* target_proto)
4452 {
4453 struct if_proto *proto;
4454 errno_t result = 0;
4455
4456 /* callee holds a proto refcnt upon success */
4457 ifnet_lock_shared(ifp);
4458 proto = find_attached_proto(ifp, target_proto->sa_family);
4459 ifnet_lock_done(ifp);
4460 if (proto == NULL) {
4461 result = ENOTSUP;
4462 } else {
4463 proto_media_send_arp arpp;
4464 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4465 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4466 if (arpp == NULL) {
4467 result = ENOTSUP;
4468 } else {
4469 switch (arpop) {
4470 case ARPOP_REQUEST:
4471 arpstat.txrequests++;
4472 if (target_hw != NULL)
4473 arpstat.txurequests++;
4474 break;
4475 case ARPOP_REPLY:
4476 arpstat.txreplies++;
4477 break;
4478 }
4479 result = arpp(ifp, arpop, sender_hw, sender_proto,
4480 target_hw, target_proto);
4481 }
4482 if_proto_free(proto);
4483 }
4484
4485 return (result);
4486 }
4487
4488 struct net_thread_marks { };
4489 static const struct net_thread_marks net_thread_marks_base = { };
4490
4491 __private_extern__ const net_thread_marks_t net_thread_marks_none =
4492 &net_thread_marks_base;
4493
4494 __private_extern__ net_thread_marks_t
4495 net_thread_marks_push(u_int32_t push)
4496 {
4497 static const char *const base = (const void*)&net_thread_marks_base;
4498 u_int32_t pop = 0;
4499
4500 if (push != 0) {
4501 struct uthread *uth = get_bsdthread_info(current_thread());
4502
4503 pop = push & ~uth->uu_network_marks;
4504 if (pop != 0)
4505 uth->uu_network_marks |= pop;
4506 }
4507
4508 return ((net_thread_marks_t)&base[pop]);
4509 }
4510
4511 __private_extern__ net_thread_marks_t
4512 net_thread_unmarks_push(u_int32_t unpush)
4513 {
4514 static const char *const base = (const void*)&net_thread_marks_base;
4515 u_int32_t unpop = 0;
4516
4517 if (unpush != 0) {
4518 struct uthread *uth = get_bsdthread_info(current_thread());
4519
4520 unpop = unpush & uth->uu_network_marks;
4521 if (unpop != 0)
4522 uth->uu_network_marks &= ~unpop;
4523 }
4524
4525 return ((net_thread_marks_t)&base[unpop]);
4526 }
4527
4528 __private_extern__ void
4529 net_thread_marks_pop(net_thread_marks_t popx)
4530 {
4531 static const char *const base = (const void*)&net_thread_marks_base;
4532 const ptrdiff_t pop = (const char *)popx - (const char *)base;
4533
4534 if (pop != 0) {
4535 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4536 struct uthread *uth = get_bsdthread_info(current_thread());
4537
4538 VERIFY((pop & ones) == pop);
4539 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
4540 uth->uu_network_marks &= ~pop;
4541 }
4542 }
4543
4544 __private_extern__ void
4545 net_thread_unmarks_pop(net_thread_marks_t unpopx)
4546 {
4547 static const char *const base = (const void*)&net_thread_marks_base;
4548 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
4549
4550 if (unpop != 0) {
4551 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4552 struct uthread *uth = get_bsdthread_info(current_thread());
4553
4554 VERIFY((unpop & ones) == unpop);
4555 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
4556 uth->uu_network_marks |= unpop;
4557 }
4558 }
4559
4560 __private_extern__ u_int32_t
4561 net_thread_is_marked(u_int32_t check)
4562 {
4563 if (check != 0) {
4564 struct uthread *uth = get_bsdthread_info(current_thread());
4565 return (uth->uu_network_marks & check);
4566 }
4567 else
4568 return (0);
4569 }
4570
4571 __private_extern__ u_int32_t
4572 net_thread_is_unmarked(u_int32_t check)
4573 {
4574 if (check != 0) {
4575 struct uthread *uth = get_bsdthread_info(current_thread());
4576 return (~uth->uu_network_marks & check);
4577 }
4578 else
4579 return (0);
4580 }
4581
4582 static __inline__ int
4583 _is_announcement(const struct sockaddr_in * sender_sin,
4584 const struct sockaddr_in * target_sin)
4585 {
4586 if (sender_sin == NULL) {
4587 return (FALSE);
4588 }
4589 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
4590 }
4591
4592 __private_extern__ errno_t
4593 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw,
4594 const struct sockaddr* sender_proto, const struct sockaddr_dl* target_hw,
4595 const struct sockaddr* target_proto0, u_int32_t rtflags)
4596 {
4597 errno_t result = 0;
4598 const struct sockaddr_in * sender_sin;
4599 const struct sockaddr_in * target_sin;
4600 struct sockaddr_inarp target_proto_sinarp;
4601 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
4602
4603 if (target_proto == NULL || (sender_proto != NULL &&
4604 sender_proto->sa_family != target_proto->sa_family))
4605 return (EINVAL);
4606
4607 /*
4608 * If the target is a (default) router, provide that
4609 * information to the send_arp callback routine.
4610 */
4611 if (rtflags & RTF_ROUTER) {
4612 bcopy(target_proto, &target_proto_sinarp,
4613 sizeof (struct sockaddr_in));
4614 target_proto_sinarp.sin_other |= SIN_ROUTER;
4615 target_proto = (struct sockaddr *)&target_proto_sinarp;
4616 }
4617
4618 /*
4619 * If this is an ARP request and the target IP is IPv4LL,
4620 * send the request on all interfaces. The exception is
4621 * an announcement, which must only appear on the specific
4622 * interface.
4623 */
4624 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
4625 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
4626 if (target_proto->sa_family == AF_INET &&
4627 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4628 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4629 !_is_announcement(target_sin, sender_sin)) {
4630 ifnet_t *ifp_list;
4631 u_int32_t count;
4632 u_int32_t ifp_on;
4633
4634 result = ENOTSUP;
4635
4636 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4637 for (ifp_on = 0; ifp_on < count; ifp_on++) {
4638 errno_t new_result;
4639 ifaddr_t source_hw = NULL;
4640 ifaddr_t source_ip = NULL;
4641 struct sockaddr_in source_ip_copy;
4642 struct ifnet *cur_ifp = ifp_list[ifp_on];
4643
4644 /*
4645 * Only arp on interfaces marked for IPv4LL
4646 * ARPing. This may mean that we don't ARP on
4647 * the interface the subnet route points to.
4648 */
4649 if (!(cur_ifp->if_eflags & IFEF_ARPLL))
4650 continue;
4651
4652 /* Find the source IP address */
4653 ifnet_lock_shared(cur_ifp);
4654 source_hw = cur_ifp->if_lladdr;
4655 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4656 ifa_link) {
4657 IFA_LOCK(source_ip);
4658 if (source_ip->ifa_addr != NULL &&
4659 source_ip->ifa_addr->sa_family ==
4660 AF_INET) {
4661 /* Copy the source IP address */
4662 source_ip_copy =
4663 *(struct sockaddr_in *)
4664 (void *)source_ip->ifa_addr;
4665 IFA_UNLOCK(source_ip);
4666 break;
4667 }
4668 IFA_UNLOCK(source_ip);
4669 }
4670
4671 /* No IP Source, don't arp */
4672 if (source_ip == NULL) {
4673 ifnet_lock_done(cur_ifp);
4674 continue;
4675 }
4676
4677 IFA_ADDREF(source_hw);
4678 ifnet_lock_done(cur_ifp);
4679
4680 /* Send the ARP */
4681 new_result = dlil_send_arp_internal(cur_ifp,
4682 arpop, (struct sockaddr_dl *)(void *)
4683 source_hw->ifa_addr,
4684 (struct sockaddr *)&source_ip_copy, NULL,
4685 target_proto);
4686
4687 IFA_REMREF(source_hw);
4688 if (result == ENOTSUP) {
4689 result = new_result;
4690 }
4691 }
4692 ifnet_list_free(ifp_list);
4693 }
4694 } else {
4695 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4696 sender_proto, target_hw, target_proto);
4697 }
4698
4699 return (result);
4700 }
4701
4702 /*
4703 * Caller must hold ifnet head lock.
4704 */
4705 static int
4706 ifnet_lookup(struct ifnet *ifp)
4707 {
4708 struct ifnet *_ifp;
4709
4710 lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
4711 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
4712 if (_ifp == ifp)
4713 break;
4714 }
4715 return (_ifp != NULL);
4716 }
4717 /*
4718 * Caller has to pass a non-zero refio argument to get a
4719 * IO reference count. This will prevent ifnet_detach from
4720 * being called when there are outstanding io reference counts.
4721 */
4722 int
4723 ifnet_is_attached(struct ifnet *ifp, int refio)
4724 {
4725 int ret;
4726
4727 lck_mtx_lock_spin(&ifp->if_ref_lock);
4728 if ((ret = ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
4729 IFRF_ATTACHED))) {
4730 if (refio > 0)
4731 ifp->if_refio++;
4732 }
4733 lck_mtx_unlock(&ifp->if_ref_lock);
4734
4735 return (ret);
4736 }
4737
4738 void
4739 ifnet_decr_iorefcnt(struct ifnet *ifp)
4740 {
4741 lck_mtx_lock_spin(&ifp->if_ref_lock);
4742 VERIFY(ifp->if_refio > 0);
4743 VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) != 0);
4744 ifp->if_refio--;
4745
4746 /* if there are no more outstanding io references, wakeup the
4747 * ifnet_detach thread if detaching flag is set.
4748 */
4749 if (ifp->if_refio == 0 &&
4750 (ifp->if_refflags & IFRF_DETACHING) != 0) {
4751 wakeup(&(ifp->if_refio));
4752 }
4753 lck_mtx_unlock(&ifp->if_ref_lock);
4754 }
4755
4756 static void
4757 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
4758 {
4759 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
4760 ctrace_t *tr;
4761 u_int32_t idx;
4762 u_int16_t *cnt;
4763
4764 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
4765 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
4766 /* NOTREACHED */
4767 }
4768
4769 if (refhold) {
4770 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
4771 tr = dl_if_dbg->dldbg_if_refhold;
4772 } else {
4773 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
4774 tr = dl_if_dbg->dldbg_if_refrele;
4775 }
4776
4777 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
4778 ctrace_record(&tr[idx]);
4779 }
4780
4781 errno_t
4782 dlil_if_ref(struct ifnet *ifp)
4783 {
4784 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4785
4786 if (dl_if == NULL)
4787 return (EINVAL);
4788
4789 lck_mtx_lock_spin(&dl_if->dl_if_lock);
4790 ++dl_if->dl_if_refcnt;
4791 if (dl_if->dl_if_refcnt == 0) {
4792 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
4793 /* NOTREACHED */
4794 }
4795 if (dl_if->dl_if_trace != NULL)
4796 (*dl_if->dl_if_trace)(dl_if, TRUE);
4797 lck_mtx_unlock(&dl_if->dl_if_lock);
4798
4799 return (0);
4800 }
4801
4802 errno_t
4803 dlil_if_free(struct ifnet *ifp)
4804 {
4805 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4806
4807 if (dl_if == NULL)
4808 return (EINVAL);
4809
4810 lck_mtx_lock_spin(&dl_if->dl_if_lock);
4811 if (dl_if->dl_if_refcnt == 0) {
4812 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
4813 /* NOTREACHED */
4814 }
4815 --dl_if->dl_if_refcnt;
4816 if (dl_if->dl_if_trace != NULL)
4817 (*dl_if->dl_if_trace)(dl_if, FALSE);
4818 lck_mtx_unlock(&dl_if->dl_if_lock);
4819
4820 return (0);
4821 }
4822
4823 static errno_t
4824 dlil_attach_protocol_internal(struct if_proto *proto,
4825 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count)
4826 {
4827 struct kev_dl_proto_data ev_pr_data;
4828 struct ifnet *ifp = proto->ifp;
4829 int retval = 0;
4830 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
4831 struct if_proto *prev_proto;
4832 struct if_proto *_proto;
4833
4834 /* callee holds a proto refcnt upon success */
4835 ifnet_lock_exclusive(ifp);
4836 _proto = find_attached_proto(ifp, proto->protocol_family);
4837 if (_proto != NULL) {
4838 ifnet_lock_done(ifp);
4839 if_proto_free(_proto);
4840 return (EEXIST);
4841 }
4842
4843 /*
4844 * Call family module add_proto routine so it can refine the
4845 * demux descriptors as it wishes.
4846 */
4847 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
4848 demux_count);
4849 if (retval) {
4850 ifnet_lock_done(ifp);
4851 return (retval);
4852 }
4853
4854 /*
4855 * Insert the protocol in the hash
4856 */
4857 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
4858 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
4859 prev_proto = SLIST_NEXT(prev_proto, next_hash);
4860 if (prev_proto)
4861 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
4862 else
4863 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
4864 proto, next_hash);
4865
4866 /* hold a proto refcnt for attach */
4867 if_proto_ref(proto);
4868
4869 /*
4870 * The reserved field carries the number of protocol still attached
4871 * (subject to change)
4872 */
4873 ev_pr_data.proto_family = proto->protocol_family;
4874 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
4875 ifnet_lock_done(ifp);
4876
4877 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
4878 (struct net_event_data *)&ev_pr_data,
4879 sizeof (struct kev_dl_proto_data));
4880 return (retval);
4881 }
4882
4883 errno_t
4884 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
4885 const struct ifnet_attach_proto_param *proto_details)
4886 {
4887 int retval = 0;
4888 struct if_proto *ifproto = NULL;
4889
4890 ifnet_head_lock_shared();
4891 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4892 retval = EINVAL;
4893 goto end;
4894 }
4895 /* Check that the interface is in the global list */
4896 if (!ifnet_lookup(ifp)) {
4897 retval = ENXIO;
4898 goto end;
4899 }
4900
4901 ifproto = zalloc(dlif_proto_zone);
4902 if (ifproto == NULL) {
4903 retval = ENOMEM;
4904 goto end;
4905 }
4906 bzero(ifproto, dlif_proto_size);
4907
4908 /* refcnt held above during lookup */
4909 ifproto->ifp = ifp;
4910 ifproto->protocol_family = protocol;
4911 ifproto->proto_kpi = kProtoKPI_v1;
4912 ifproto->kpi.v1.input = proto_details->input;
4913 ifproto->kpi.v1.pre_output = proto_details->pre_output;
4914 ifproto->kpi.v1.event = proto_details->event;
4915 ifproto->kpi.v1.ioctl = proto_details->ioctl;
4916 ifproto->kpi.v1.detached = proto_details->detached;
4917 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
4918 ifproto->kpi.v1.send_arp = proto_details->send_arp;
4919
4920 retval = dlil_attach_protocol_internal(ifproto,
4921 proto_details->demux_list, proto_details->demux_count);
4922
4923 if (dlil_verbose) {
4924 printf("%s: attached v1 protocol %d\n", if_name(ifp),
4925 protocol);
4926 }
4927
4928 end:
4929 if (retval != 0 && retval != EEXIST && ifp != NULL) {
4930 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
4931 if_name(ifp), protocol, retval);
4932 }
4933 ifnet_head_done();
4934 if (retval != 0 && ifproto != NULL)
4935 zfree(dlif_proto_zone, ifproto);
4936 return (retval);
4937 }
4938
4939 errno_t
4940 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
4941 const struct ifnet_attach_proto_param_v2 *proto_details)
4942 {
4943 int retval = 0;
4944 struct if_proto *ifproto = NULL;
4945
4946 ifnet_head_lock_shared();
4947 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4948 retval = EINVAL;
4949 goto end;
4950 }
4951 /* Check that the interface is in the global list */
4952 if (!ifnet_lookup(ifp)) {
4953 retval = ENXIO;
4954 goto end;
4955 }
4956
4957 ifproto = zalloc(dlif_proto_zone);
4958 if (ifproto == NULL) {
4959 retval = ENOMEM;
4960 goto end;
4961 }
4962 bzero(ifproto, sizeof(*ifproto));
4963
4964 /* refcnt held above during lookup */
4965 ifproto->ifp = ifp;
4966 ifproto->protocol_family = protocol;
4967 ifproto->proto_kpi = kProtoKPI_v2;
4968 ifproto->kpi.v2.input = proto_details->input;
4969 ifproto->kpi.v2.pre_output = proto_details->pre_output;
4970 ifproto->kpi.v2.event = proto_details->event;
4971 ifproto->kpi.v2.ioctl = proto_details->ioctl;
4972 ifproto->kpi.v2.detached = proto_details->detached;
4973 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
4974 ifproto->kpi.v2.send_arp = proto_details->send_arp;
4975
4976 retval = dlil_attach_protocol_internal(ifproto,
4977 proto_details->demux_list, proto_details->demux_count);
4978
4979 if (dlil_verbose) {
4980 printf("%s: attached v2 protocol %d\n", if_name(ifp),
4981 protocol);
4982 }
4983
4984 end:
4985 if (retval != 0 && retval != EEXIST && ifp != NULL) {
4986 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
4987 if_name(ifp), protocol, retval);
4988 }
4989 ifnet_head_done();
4990 if (retval != 0 && ifproto != NULL)
4991 zfree(dlif_proto_zone, ifproto);
4992 return (retval);
4993 }
4994
4995 errno_t
4996 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
4997 {
4998 struct if_proto *proto = NULL;
4999 int retval = 0;
5000
5001 if (ifp == NULL || proto_family == 0) {
5002 retval = EINVAL;
5003 goto end;
5004 }
5005
5006 ifnet_lock_exclusive(ifp);
5007 /* callee holds a proto refcnt upon success */
5008 proto = find_attached_proto(ifp, proto_family);
5009 if (proto == NULL) {
5010 retval = ENXIO;
5011 ifnet_lock_done(ifp);
5012 goto end;
5013 }
5014
5015 /* call family module del_proto */
5016 if (ifp->if_del_proto)
5017 ifp->if_del_proto(ifp, proto->protocol_family);
5018
5019 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
5020 proto, if_proto, next_hash);
5021
5022 if (proto->proto_kpi == kProtoKPI_v1) {
5023 proto->kpi.v1.input = ifproto_media_input_v1;
5024 proto->kpi.v1.pre_output= ifproto_media_preout;
5025 proto->kpi.v1.event = ifproto_media_event;
5026 proto->kpi.v1.ioctl = ifproto_media_ioctl;
5027 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
5028 proto->kpi.v1.send_arp = ifproto_media_send_arp;
5029 } else {
5030 proto->kpi.v2.input = ifproto_media_input_v2;
5031 proto->kpi.v2.pre_output = ifproto_media_preout;
5032 proto->kpi.v2.event = ifproto_media_event;
5033 proto->kpi.v2.ioctl = ifproto_media_ioctl;
5034 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
5035 proto->kpi.v2.send_arp = ifproto_media_send_arp;
5036 }
5037 proto->detached = 1;
5038 ifnet_lock_done(ifp);
5039
5040 if (dlil_verbose) {
5041 printf("%s: detached %s protocol %d\n", if_name(ifp),
5042 (proto->proto_kpi == kProtoKPI_v1) ?
5043 "v1" : "v2", proto_family);
5044 }
5045
5046 /* release proto refcnt held during protocol attach */
5047 if_proto_free(proto);
5048
5049 /*
5050 * Release proto refcnt held during lookup; the rest of
5051 * protocol detach steps will happen when the last proto
5052 * reference is released.
5053 */
5054 if_proto_free(proto);
5055
5056 end:
5057 return (retval);
5058 }
5059
5060
5061 static errno_t
5062 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
5063 struct mbuf *packet, char *header)
5064 {
5065 #pragma unused(ifp, protocol, packet, header)
5066 return (ENXIO);
5067 }
5068
5069 static errno_t
5070 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
5071 struct mbuf *packet)
5072 {
5073 #pragma unused(ifp, protocol, packet)
5074 return (ENXIO);
5075
5076 }
5077
5078 static errno_t
5079 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
5080 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
5081 char *link_layer_dest)
5082 {
5083 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5084 return (ENXIO);
5085
5086 }
5087
5088 static void
5089 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
5090 const struct kev_msg *event)
5091 {
5092 #pragma unused(ifp, protocol, event)
5093 }
5094
5095 static errno_t
5096 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
5097 unsigned long command, void *argument)
5098 {
5099 #pragma unused(ifp, protocol, command, argument)
5100 return (ENXIO);
5101 }
5102
5103 static errno_t
5104 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
5105 struct sockaddr_dl *out_ll, size_t ll_len)
5106 {
5107 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5108 return (ENXIO);
5109 }
5110
5111 static errno_t
5112 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
5113 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5114 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5115 {
5116 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5117 return (ENXIO);
5118 }
5119
5120 extern int if_next_index(void);
5121 extern int tcp_ecn_outbound;
5122
5123 errno_t
5124 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
5125 {
5126 struct ifnet *tmp_if;
5127 struct ifaddr *ifa;
5128 struct if_data_internal if_data_saved;
5129 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5130 struct dlil_threading_info *dl_inp;
5131 u_int32_t sflags = 0;
5132 int err;
5133
5134 if (ifp == NULL)
5135 return (EINVAL);
5136
5137 /*
5138 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5139 * prevent the interface from being configured while it is
5140 * embryonic, as ifnet_head_lock is dropped and reacquired
5141 * below prior to marking the ifnet with IFRF_ATTACHED.
5142 */
5143 dlil_if_lock();
5144 ifnet_head_lock_exclusive();
5145 /* Verify we aren't already on the list */
5146 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
5147 if (tmp_if == ifp) {
5148 ifnet_head_done();
5149 dlil_if_unlock();
5150 return (EEXIST);
5151 }
5152 }
5153
5154 lck_mtx_lock_spin(&ifp->if_ref_lock);
5155 if (ifp->if_refflags & IFRF_ATTACHED) {
5156 panic_plain("%s: flags mismatch (attached set) ifp=%p",
5157 __func__, ifp);
5158 /* NOTREACHED */
5159 }
5160 lck_mtx_unlock(&ifp->if_ref_lock);
5161
5162 ifnet_lock_exclusive(ifp);
5163
5164 /* Sanity check */
5165 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5166 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5167
5168 if (ll_addr != NULL) {
5169 if (ifp->if_addrlen == 0) {
5170 ifp->if_addrlen = ll_addr->sdl_alen;
5171 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
5172 ifnet_lock_done(ifp);
5173 ifnet_head_done();
5174 dlil_if_unlock();
5175 return (EINVAL);
5176 }
5177 }
5178
5179 /*
5180 * Allow interfaces without protocol families to attach
5181 * only if they have the necessary fields filled out.
5182 */
5183 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
5184 DLIL_PRINTF("%s: Attempt to attach interface without "
5185 "family module - %d\n", __func__, ifp->if_family);
5186 ifnet_lock_done(ifp);
5187 ifnet_head_done();
5188 dlil_if_unlock();
5189 return (ENODEV);
5190 }
5191
5192 /* Allocate protocol hash table */
5193 VERIFY(ifp->if_proto_hash == NULL);
5194 ifp->if_proto_hash = zalloc(dlif_phash_zone);
5195 if (ifp->if_proto_hash == NULL) {
5196 ifnet_lock_done(ifp);
5197 ifnet_head_done();
5198 dlil_if_unlock();
5199 return (ENOBUFS);
5200 }
5201 bzero(ifp->if_proto_hash, dlif_phash_size);
5202
5203 lck_mtx_lock_spin(&ifp->if_flt_lock);
5204 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
5205 TAILQ_INIT(&ifp->if_flt_head);
5206 VERIFY(ifp->if_flt_busy == 0);
5207 VERIFY(ifp->if_flt_waiters == 0);
5208 lck_mtx_unlock(&ifp->if_flt_lock);
5209
5210 VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
5211 TAILQ_INIT(&ifp->if_prefixhead);
5212
5213 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
5214 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
5215 LIST_INIT(&ifp->if_multiaddrs);
5216 }
5217
5218 VERIFY(ifp->if_allhostsinm == NULL);
5219 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5220 TAILQ_INIT(&ifp->if_addrhead);
5221
5222 if (ifp->if_index == 0) {
5223 int idx = if_next_index();
5224
5225 if (idx == -1) {
5226 ifp->if_index = 0;
5227 ifnet_lock_done(ifp);
5228 ifnet_head_done();
5229 dlil_if_unlock();
5230 return (ENOBUFS);
5231 }
5232 ifp->if_index = idx;
5233 }
5234 /* There should not be anything occupying this slot */
5235 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5236
5237 /* allocate (if needed) and initialize a link address */
5238 VERIFY(!(dl_if->dl_if_flags & DLIF_REUSE) || ifp->if_lladdr != NULL);
5239 ifa = dlil_alloc_lladdr(ifp, ll_addr);
5240 if (ifa == NULL) {
5241 ifnet_lock_done(ifp);
5242 ifnet_head_done();
5243 dlil_if_unlock();
5244 return (ENOBUFS);
5245 }
5246
5247 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5248 ifnet_addrs[ifp->if_index - 1] = ifa;
5249
5250 /* make this address the first on the list */
5251 IFA_LOCK(ifa);
5252 /* hold a reference for ifnet_addrs[] */
5253 IFA_ADDREF_LOCKED(ifa);
5254 /* if_attach_link_ifa() holds a reference for ifa_link */
5255 if_attach_link_ifa(ifp, ifa);
5256 IFA_UNLOCK(ifa);
5257
5258 #if CONFIG_MACF_NET
5259 mac_ifnet_label_associate(ifp);
5260 #endif
5261
5262 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5263 ifindex2ifnet[ifp->if_index] = ifp;
5264
5265 /* Hold a reference to the underlying dlil_ifnet */
5266 ifnet_reference(ifp);
5267
5268 /* Clear stats (save and restore other fields that we care) */
5269 if_data_saved = ifp->if_data;
5270 bzero(&ifp->if_data, sizeof (ifp->if_data));
5271 ifp->if_data.ifi_type = if_data_saved.ifi_type;
5272 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5273 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5274 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5275 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5276 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5277 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5278 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5279 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5280 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5281 ifnet_touch_lastchange(ifp);
5282
5283 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
5284 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED);
5285
5286 /* By default, use SFB and enable flow advisory */
5287 sflags = PKTSCHEDF_QALG_SFB;
5288 if (if_flowadv)
5289 sflags |= PKTSCHEDF_QALG_FLOWCTL;
5290
5291 if (if_delaybased_queue)
5292 sflags |= PKTSCHEDF_QALG_DELAYBASED;
5293
5294 /* Initialize transmit queue(s) */
5295 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
5296 if (err != 0) {
5297 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5298 "err=%d", __func__, ifp, err);
5299 /* NOTREACHED */
5300 }
5301
5302 /* Sanity checks on the input thread storage */
5303 dl_inp = &dl_if->dl_if_inpstorage;
5304 bzero(&dl_inp->stats, sizeof (dl_inp->stats));
5305 VERIFY(dl_inp->input_waiting == 0);
5306 VERIFY(dl_inp->wtot == 0);
5307 VERIFY(dl_inp->ifp == NULL);
5308 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
5309 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
5310 VERIFY(!dl_inp->net_affinity);
5311 VERIFY(ifp->if_inp == NULL);
5312 VERIFY(dl_inp->input_thr == THREAD_NULL);
5313 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
5314 VERIFY(dl_inp->poll_thr == THREAD_NULL);
5315 VERIFY(dl_inp->tag == 0);
5316 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
5317 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
5318 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
5319 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
5320 #if IFNET_INPUT_SANITY_CHK
5321 VERIFY(dl_inp->input_mbuf_cnt == 0);
5322 #endif /* IFNET_INPUT_SANITY_CHK */
5323
5324 /*
5325 * A specific DLIL input thread is created per Ethernet/cellular
5326 * interface or for an interface which supports opportunistic
5327 * input polling. Pseudo interfaces or other types of interfaces
5328 * use the main input thread instead.
5329 */
5330 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
5331 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
5332 ifp->if_inp = dl_inp;
5333 err = dlil_create_input_thread(ifp, ifp->if_inp);
5334 if (err != 0) {
5335 panic_plain("%s: ifp=%p couldn't get an input thread; "
5336 "err=%d", __func__, ifp, err);
5337 /* NOTREACHED */
5338 }
5339 }
5340
5341 /*
5342 * If the driver supports the new transmit model, calculate flow hash
5343 * and create a workloop starter thread to invoke the if_start callback
5344 * where the packets may be dequeued and transmitted.
5345 */
5346 if (ifp->if_eflags & IFEF_TXSTART) {
5347 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5348 VERIFY(ifp->if_flowhash != 0);
5349
5350 VERIFY(ifp->if_start != NULL);
5351 VERIFY(ifp->if_start_thread == THREAD_NULL);
5352
5353 ifnet_set_start_cycle(ifp, NULL);
5354 ifp->if_start_active = 0;
5355 ifp->if_start_req = 0;
5356 ifp->if_start_flags = 0;
5357 if ((err = kernel_thread_start(ifnet_start_thread_fn, ifp,
5358 &ifp->if_start_thread)) != KERN_SUCCESS) {
5359 panic_plain("%s: ifp=%p couldn't get a start thread; "
5360 "err=%d", __func__, ifp, err);
5361 /* NOTREACHED */
5362 }
5363 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
5364 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5365 } else {
5366 ifp->if_flowhash = 0;
5367 }
5368
5369 /*
5370 * If the driver supports the new receive model, create a poller
5371 * thread to invoke if_input_poll callback where the packets may
5372 * be dequeued from the driver and processed for reception.
5373 */
5374 if (ifp->if_eflags & IFEF_RXPOLL) {
5375 VERIFY(ifp->if_input_poll != NULL);
5376 VERIFY(ifp->if_input_ctl != NULL);
5377 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5378
5379 ifnet_set_poll_cycle(ifp, NULL);
5380 ifp->if_poll_update = 0;
5381 ifp->if_poll_active = 0;
5382 ifp->if_poll_req = 0;
5383 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
5384 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5385 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5386 "err=%d", __func__, ifp, err);
5387 /* NOTREACHED */
5388 }
5389 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
5390 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5391 }
5392
5393 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5394 VERIFY(ifp->if_desc.ifd_len == 0);
5395 VERIFY(ifp->if_desc.ifd_desc != NULL);
5396
5397 /* Record attach PC stacktrace */
5398 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5399
5400 ifp->if_updatemcasts = 0;
5401 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5402 struct ifmultiaddr *ifma;
5403 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5404 IFMA_LOCK(ifma);
5405 if (ifma->ifma_addr->sa_family == AF_LINK ||
5406 ifma->ifma_addr->sa_family == AF_UNSPEC)
5407 ifp->if_updatemcasts++;
5408 IFMA_UNLOCK(ifma);
5409 }
5410
5411 printf("%s: attached with %d suspended link-layer multicast "
5412 "membership(s)\n", if_name(ifp),
5413 ifp->if_updatemcasts);
5414 }
5415
5416 /* Clear logging parameters */
5417 bzero(&ifp->if_log, sizeof (ifp->if_log));
5418 ifp->if_fg_sendts = 0;
5419
5420 VERIFY(ifp->if_delegated.ifp == NULL);
5421 VERIFY(ifp->if_delegated.type == 0);
5422 VERIFY(ifp->if_delegated.family == 0);
5423 VERIFY(ifp->if_delegated.subfamily == 0);
5424 VERIFY(ifp->if_delegated.expensive == 0);
5425
5426 bzero(&ifp->if_agentids, sizeof(ifp->if_agentids));
5427
5428 /* Reset interface state */
5429 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5430 ifp->if_interface_state.valid_bitmask |=
5431 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5432 ifp->if_interface_state.interface_availability =
5433 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5434
5435 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5436 if (ifp == lo_ifp) {
5437 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5438 ifp->if_interface_state.valid_bitmask |=
5439 IF_INTERFACE_STATE_LQM_STATE_VALID;
5440 } else {
5441 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5442 }
5443
5444 /*
5445 * Enable ECN capability on this interface depending on the
5446 * value of ECN global setting
5447 */
5448 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
5449 ifp->if_eflags |= IFEF_ECN_ENABLE;
5450 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
5451 }
5452
5453 ifnet_lock_done(ifp);
5454 ifnet_head_done();
5455
5456 lck_mtx_lock(&ifp->if_cached_route_lock);
5457 /* Enable forwarding cached route */
5458 ifp->if_fwd_cacheok = 1;
5459 /* Clean up any existing cached routes */
5460 ROUTE_RELEASE(&ifp->if_fwd_route);
5461 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
5462 ROUTE_RELEASE(&ifp->if_src_route);
5463 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
5464 ROUTE_RELEASE(&ifp->if_src_route6);
5465 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
5466 lck_mtx_unlock(&ifp->if_cached_route_lock);
5467
5468 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5469
5470 /*
5471 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5472 * and trees; do this before the ifnet is marked as attached.
5473 * The ifnet keeps the reference to the info structures even after
5474 * the ifnet is detached, since the network-layer records still
5475 * refer to the info structures even after that. This also
5476 * makes it possible for them to still function after the ifnet
5477 * is recycled or reattached.
5478 */
5479 #if INET
5480 if (IGMP_IFINFO(ifp) == NULL) {
5481 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
5482 VERIFY(IGMP_IFINFO(ifp) != NULL);
5483 } else {
5484 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5485 igmp_domifreattach(IGMP_IFINFO(ifp));
5486 }
5487 #endif /* INET */
5488 #if INET6
5489 if (MLD_IFINFO(ifp) == NULL) {
5490 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
5491 VERIFY(MLD_IFINFO(ifp) != NULL);
5492 } else {
5493 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5494 mld_domifreattach(MLD_IFINFO(ifp));
5495 }
5496 #endif /* INET6 */
5497
5498 VERIFY(ifp->if_data_threshold == 0);
5499
5500 /*
5501 * Finally, mark this ifnet as attached.
5502 */
5503 lck_mtx_lock(rnh_lock);
5504 ifnet_lock_exclusive(ifp);
5505 lck_mtx_lock_spin(&ifp->if_ref_lock);
5506 ifp->if_refflags = IFRF_ATTACHED;
5507 lck_mtx_unlock(&ifp->if_ref_lock);
5508 if (net_rtref) {
5509 /* boot-args override; enable idle notification */
5510 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5511 IFRF_IDLE_NOTIFY);
5512 } else {
5513 /* apply previous request(s) to set the idle flags, if any */
5514 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5515 ifp->if_idle_new_flags_mask);
5516
5517 }
5518 ifnet_lock_done(ifp);
5519 lck_mtx_unlock(rnh_lock);
5520 dlil_if_unlock();
5521
5522 #if PF
5523 /*
5524 * Attach packet filter to this interface, if enabled.
5525 */
5526 pf_ifnet_hook(ifp, 1);
5527 #endif /* PF */
5528
5529 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
5530
5531 if (dlil_verbose) {
5532 printf("%s: attached%s\n", if_name(ifp),
5533 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5534 }
5535
5536 return (0);
5537 }
5538
5539 /*
5540 * Prepare the storage for the first/permanent link address, which must
5541 * must have the same lifetime as the ifnet itself. Although the link
5542 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5543 * its location in memory must never change as it may still be referred
5544 * to by some parts of the system afterwards (unfortunate implementation
5545 * artifacts inherited from BSD.)
5546 *
5547 * Caller must hold ifnet lock as writer.
5548 */
5549 static struct ifaddr *
5550 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
5551 {
5552 struct ifaddr *ifa, *oifa;
5553 struct sockaddr_dl *asdl, *msdl;
5554 char workbuf[IFNAMSIZ*2];
5555 int namelen, masklen, socksize;
5556 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5557
5558 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
5559 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
5560
5561 namelen = snprintf(workbuf, sizeof (workbuf), "%s",
5562 if_name(ifp));
5563 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
5564 socksize = masklen + ifp->if_addrlen;
5565 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
5566 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
5567 socksize = sizeof(struct sockaddr_dl);
5568 socksize = ROUNDUP(socksize);
5569 #undef ROUNDUP
5570
5571 ifa = ifp->if_lladdr;
5572 if (socksize > DLIL_SDLMAXLEN ||
5573 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
5574 /*
5575 * Rare, but in the event that the link address requires
5576 * more storage space than DLIL_SDLMAXLEN, allocate the
5577 * largest possible storages for address and mask, such
5578 * that we can reuse the same space when if_addrlen grows.
5579 * This same space will be used when if_addrlen shrinks.
5580 */
5581 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
5582 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
5583 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
5584 if (ifa == NULL)
5585 return (NULL);
5586 ifa_lock_init(ifa);
5587 /* Don't set IFD_ALLOC, as this is permanent */
5588 ifa->ifa_debug = IFD_LINK;
5589 }
5590 IFA_LOCK(ifa);
5591 /* address and mask sockaddr_dl locations */
5592 asdl = (struct sockaddr_dl *)(ifa + 1);
5593 bzero(asdl, SOCK_MAXADDRLEN);
5594 msdl = (struct sockaddr_dl *)(void *)
5595 ((char *)asdl + SOCK_MAXADDRLEN);
5596 bzero(msdl, SOCK_MAXADDRLEN);
5597 } else {
5598 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
5599 /*
5600 * Use the storage areas for address and mask within the
5601 * dlil_ifnet structure. This is the most common case.
5602 */
5603 if (ifa == NULL) {
5604 ifa = &dl_if->dl_if_lladdr.ifa;
5605 ifa_lock_init(ifa);
5606 /* Don't set IFD_ALLOC, as this is permanent */
5607 ifa->ifa_debug = IFD_LINK;
5608 }
5609 IFA_LOCK(ifa);
5610 /* address and mask sockaddr_dl locations */
5611 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
5612 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
5613 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
5614 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
5615 }
5616
5617 /* hold a permanent reference for the ifnet itself */
5618 IFA_ADDREF_LOCKED(ifa);
5619 oifa = ifp->if_lladdr;
5620 ifp->if_lladdr = ifa;
5621
5622 VERIFY(ifa->ifa_debug == IFD_LINK);
5623 ifa->ifa_ifp = ifp;
5624 ifa->ifa_rtrequest = link_rtrequest;
5625 ifa->ifa_addr = (struct sockaddr *)asdl;
5626 asdl->sdl_len = socksize;
5627 asdl->sdl_family = AF_LINK;
5628 bcopy(workbuf, asdl->sdl_data, namelen);
5629 asdl->sdl_nlen = namelen;
5630 asdl->sdl_index = ifp->if_index;
5631 asdl->sdl_type = ifp->if_type;
5632 if (ll_addr != NULL) {
5633 asdl->sdl_alen = ll_addr->sdl_alen;
5634 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
5635 } else {
5636 asdl->sdl_alen = 0;
5637 }
5638 ifa->ifa_netmask = (struct sockaddr*)msdl;
5639 msdl->sdl_len = masklen;
5640 while (namelen != 0)
5641 msdl->sdl_data[--namelen] = 0xff;
5642 IFA_UNLOCK(ifa);
5643
5644 if (oifa != NULL)
5645 IFA_REMREF(oifa);
5646
5647 return (ifa);
5648 }
5649
5650 static void
5651 if_purgeaddrs(struct ifnet *ifp)
5652 {
5653 #if INET
5654 in_purgeaddrs(ifp);
5655 #endif /* INET */
5656 #if INET6
5657 in6_purgeaddrs(ifp);
5658 #endif /* INET6 */
5659 }
5660
5661 errno_t
5662 ifnet_detach(ifnet_t ifp)
5663 {
5664 struct ifnet *delegated_ifp;
5665
5666 if (ifp == NULL)
5667 return (EINVAL);
5668
5669 lck_mtx_lock(rnh_lock);
5670 ifnet_head_lock_exclusive();
5671 ifnet_lock_exclusive(ifp);
5672
5673 /*
5674 * Check to see if this interface has previously triggered
5675 * aggressive protocol draining; if so, decrement the global
5676 * refcnt and clear PR_AGGDRAIN on the route domain if
5677 * there are no more of such an interface around.
5678 */
5679 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
5680
5681 lck_mtx_lock_spin(&ifp->if_ref_lock);
5682 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
5683 lck_mtx_unlock(&ifp->if_ref_lock);
5684 ifnet_lock_done(ifp);
5685 ifnet_head_done();
5686 lck_mtx_unlock(rnh_lock);
5687 return (EINVAL);
5688 } else if (ifp->if_refflags & IFRF_DETACHING) {
5689 /* Interface has already been detached */
5690 lck_mtx_unlock(&ifp->if_ref_lock);
5691 ifnet_lock_done(ifp);
5692 ifnet_head_done();
5693 lck_mtx_unlock(rnh_lock);
5694 return (ENXIO);
5695 }
5696 /* Indicate this interface is being detached */
5697 ifp->if_refflags &= ~IFRF_ATTACHED;
5698 ifp->if_refflags |= IFRF_DETACHING;
5699 lck_mtx_unlock(&ifp->if_ref_lock);
5700
5701 if (dlil_verbose)
5702 printf("%s: detaching\n", if_name(ifp));
5703
5704 /* Reset ECN enable/disable flags */
5705 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
5706 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
5707
5708 /*
5709 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5710 * no longer be visible during lookups from this point.
5711 */
5712 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
5713 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
5714 ifp->if_link.tqe_next = NULL;
5715 ifp->if_link.tqe_prev = NULL;
5716 ifindex2ifnet[ifp->if_index] = NULL;
5717
5718 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
5719 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
5720
5721 /* Record detach PC stacktrace */
5722 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
5723
5724 /* Clear logging parameters */
5725 bzero(&ifp->if_log, sizeof (ifp->if_log));
5726
5727 /* Clear delegated interface info (reference released below) */
5728 delegated_ifp = ifp->if_delegated.ifp;
5729 bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
5730
5731 /* Reset interface state */
5732 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5733
5734 ifnet_lock_done(ifp);
5735 ifnet_head_done();
5736 lck_mtx_unlock(rnh_lock);
5737
5738 /* Release reference held on the delegated interface */
5739 if (delegated_ifp != NULL)
5740 ifnet_release(delegated_ifp);
5741
5742 /* Reset Link Quality Metric (unless loopback [lo0]) */
5743 if (ifp != lo_ifp)
5744 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
5745
5746 /* Reset TCP local statistics */
5747 if (ifp->if_tcp_stat != NULL)
5748 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
5749
5750 /* Reset UDP local statistics */
5751 if (ifp->if_udp_stat != NULL)
5752 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
5753
5754 /* Reset ifnet IPv4 stats */
5755 if (ifp->if_ipv4_stat != NULL)
5756 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
5757
5758 /* Reset ifnet IPv6 stats */
5759 if (ifp->if_ipv6_stat != NULL)
5760 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
5761
5762 /* Release memory held for interface link status report */
5763 if (ifp->if_link_status != NULL) {
5764 FREE(ifp->if_link_status, M_TEMP);
5765 ifp->if_link_status = NULL;
5766 }
5767
5768 /* Let BPF know we're detaching */
5769 bpfdetach(ifp);
5770
5771 /* Mark the interface as DOWN */
5772 if_down(ifp);
5773
5774 /* Disable forwarding cached route */
5775 lck_mtx_lock(&ifp->if_cached_route_lock);
5776 ifp->if_fwd_cacheok = 0;
5777 lck_mtx_unlock(&ifp->if_cached_route_lock);
5778
5779 ifp->if_data_threshold = 0;
5780 /*
5781 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5782 * references to the info structures and leave them attached to
5783 * this ifnet.
5784 */
5785 #if INET
5786 igmp_domifdetach(ifp);
5787 #endif /* INET */
5788 #if INET6
5789 mld_domifdetach(ifp);
5790 #endif /* INET6 */
5791
5792 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
5793
5794 /* Let worker thread take care of the rest, to avoid reentrancy */
5795 dlil_if_lock();
5796 ifnet_detaching_enqueue(ifp);
5797 dlil_if_unlock();
5798
5799 return (0);
5800 }
5801
5802 static void
5803 ifnet_detaching_enqueue(struct ifnet *ifp)
5804 {
5805 dlil_if_lock_assert();
5806
5807 ++ifnet_detaching_cnt;
5808 VERIFY(ifnet_detaching_cnt != 0);
5809 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
5810 wakeup((caddr_t)&ifnet_delayed_run);
5811 }
5812
5813 static struct ifnet *
5814 ifnet_detaching_dequeue(void)
5815 {
5816 struct ifnet *ifp;
5817
5818 dlil_if_lock_assert();
5819
5820 ifp = TAILQ_FIRST(&ifnet_detaching_head);
5821 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
5822 if (ifp != NULL) {
5823 VERIFY(ifnet_detaching_cnt != 0);
5824 --ifnet_detaching_cnt;
5825 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
5826 ifp->if_detaching_link.tqe_next = NULL;
5827 ifp->if_detaching_link.tqe_prev = NULL;
5828 }
5829 return (ifp);
5830 }
5831
5832 static int
5833 ifnet_detacher_thread_cont(int err)
5834 {
5835 #pragma unused(err)
5836 struct ifnet *ifp;
5837
5838 for (;;) {
5839 dlil_if_lock_assert();
5840 while (ifnet_detaching_cnt == 0) {
5841 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
5842 (PZERO - 1), "ifnet_detacher_cont", 0,
5843 ifnet_detacher_thread_cont);
5844 /* NOTREACHED */
5845 }
5846
5847 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
5848
5849 /* Take care of detaching ifnet */
5850 ifp = ifnet_detaching_dequeue();
5851 if (ifp != NULL) {
5852 dlil_if_unlock();
5853 ifnet_detach_final(ifp);
5854 dlil_if_lock();
5855 }
5856 }
5857 /* NOTREACHED */
5858 return (0);
5859 }
5860
5861 static void
5862 ifnet_detacher_thread_func(void *v, wait_result_t w)
5863 {
5864 #pragma unused(v, w)
5865 dlil_if_lock();
5866 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
5867 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
5868 /*
5869 * msleep0() shouldn't have returned as PCATCH was not set;
5870 * therefore assert in this case.
5871 */
5872 dlil_if_unlock();
5873 VERIFY(0);
5874 }
5875
5876 static void
5877 ifnet_detach_final(struct ifnet *ifp)
5878 {
5879 struct ifnet_filter *filter, *filter_next;
5880 struct ifnet_filter_head fhead;
5881 struct dlil_threading_info *inp;
5882 struct ifaddr *ifa;
5883 ifnet_detached_func if_free;
5884 int i;
5885
5886 lck_mtx_lock(&ifp->if_ref_lock);
5887 if (!(ifp->if_refflags & IFRF_DETACHING)) {
5888 panic("%s: flags mismatch (detaching not set) ifp=%p",
5889 __func__, ifp);
5890 /* NOTREACHED */
5891 }
5892
5893 /*
5894 * Wait until the existing IO references get released
5895 * before we proceed with ifnet_detach. This is not a
5896 * common case, so block without using a continuation.
5897 */
5898 while (ifp->if_refio > 0) {
5899 printf("%s: Waiting for IO references on %s interface "
5900 "to be released\n", __func__, if_name(ifp));
5901 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
5902 (PZERO - 1), "ifnet_ioref_wait", NULL);
5903 }
5904 lck_mtx_unlock(&ifp->if_ref_lock);
5905
5906 /* Drain and destroy send queue */
5907 ifclassq_teardown(ifp);
5908
5909 /* Detach interface filters */
5910 lck_mtx_lock(&ifp->if_flt_lock);
5911 if_flt_monitor_enter(ifp);
5912
5913 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
5914 fhead = ifp->if_flt_head;
5915 TAILQ_INIT(&ifp->if_flt_head);
5916
5917 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
5918 filter_next = TAILQ_NEXT(filter, filt_next);
5919 lck_mtx_unlock(&ifp->if_flt_lock);
5920
5921 dlil_detach_filter_internal(filter, 1);
5922 lck_mtx_lock(&ifp->if_flt_lock);
5923 }
5924 if_flt_monitor_leave(ifp);
5925 lck_mtx_unlock(&ifp->if_flt_lock);
5926
5927 /* Tell upper layers to drop their network addresses */
5928 if_purgeaddrs(ifp);
5929
5930 ifnet_lock_exclusive(ifp);
5931
5932 /* Uplumb all protocols */
5933 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
5934 struct if_proto *proto;
5935
5936 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5937 while (proto != NULL) {
5938 protocol_family_t family = proto->protocol_family;
5939 ifnet_lock_done(ifp);
5940 proto_unplumb(family, ifp);
5941 ifnet_lock_exclusive(ifp);
5942 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5943 }
5944 /* There should not be any protocols left */
5945 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
5946 }
5947 zfree(dlif_phash_zone, ifp->if_proto_hash);
5948 ifp->if_proto_hash = NULL;
5949
5950 /* Detach (permanent) link address from if_addrhead */
5951 ifa = TAILQ_FIRST(&ifp->if_addrhead);
5952 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
5953 IFA_LOCK(ifa);
5954 if_detach_link_ifa(ifp, ifa);
5955 IFA_UNLOCK(ifa);
5956
5957 /* Remove (permanent) link address from ifnet_addrs[] */
5958 IFA_REMREF(ifa);
5959 ifnet_addrs[ifp->if_index - 1] = NULL;
5960
5961 /* This interface should not be on {ifnet_head,detaching} */
5962 VERIFY(ifp->if_link.tqe_next == NULL);
5963 VERIFY(ifp->if_link.tqe_prev == NULL);
5964 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5965 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5966
5967 /* Prefix list should be empty by now */
5968 VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
5969
5970 /* The slot should have been emptied */
5971 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5972
5973 /* There should not be any addresses left */
5974 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5975
5976 /*
5977 * Signal the starter thread to terminate itself.
5978 */
5979 if (ifp->if_start_thread != THREAD_NULL) {
5980 lck_mtx_lock_spin(&ifp->if_start_lock);
5981 ifp->if_start_flags = 0;
5982 ifp->if_start_thread = THREAD_NULL;
5983 wakeup_one((caddr_t)&ifp->if_start_thread);
5984 lck_mtx_unlock(&ifp->if_start_lock);
5985 }
5986
5987 /*
5988 * Signal the poller thread to terminate itself.
5989 */
5990 if (ifp->if_poll_thread != THREAD_NULL) {
5991 lck_mtx_lock_spin(&ifp->if_poll_lock);
5992 ifp->if_poll_thread = THREAD_NULL;
5993 wakeup_one((caddr_t)&ifp->if_poll_thread);
5994 lck_mtx_unlock(&ifp->if_poll_lock);
5995 }
5996
5997 /*
5998 * If thread affinity was set for the workloop thread, we will need
5999 * to tear down the affinity and release the extra reference count
6000 * taken at attach time. Does not apply to lo0 or other interfaces
6001 * without dedicated input threads.
6002 */
6003 if ((inp = ifp->if_inp) != NULL) {
6004 VERIFY(inp != dlil_main_input_thread);
6005
6006 if (inp->net_affinity) {
6007 struct thread *tp, *wtp, *ptp;
6008
6009 lck_mtx_lock_spin(&inp->input_lck);
6010 wtp = inp->wloop_thr;
6011 inp->wloop_thr = THREAD_NULL;
6012 ptp = inp->poll_thr;
6013 inp->poll_thr = THREAD_NULL;
6014 tp = inp->input_thr; /* don't nullify now */
6015 inp->tag = 0;
6016 inp->net_affinity = FALSE;
6017 lck_mtx_unlock(&inp->input_lck);
6018
6019 /* Tear down poll thread affinity */
6020 if (ptp != NULL) {
6021 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
6022 (void) dlil_affinity_set(ptp,
6023 THREAD_AFFINITY_TAG_NULL);
6024 thread_deallocate(ptp);
6025 }
6026
6027 /* Tear down workloop thread affinity */
6028 if (wtp != NULL) {
6029 (void) dlil_affinity_set(wtp,
6030 THREAD_AFFINITY_TAG_NULL);
6031 thread_deallocate(wtp);
6032 }
6033
6034 /* Tear down DLIL input thread affinity */
6035 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
6036 thread_deallocate(tp);
6037 }
6038
6039 /* disassociate ifp DLIL input thread */
6040 ifp->if_inp = NULL;
6041
6042 lck_mtx_lock_spin(&inp->input_lck);
6043 inp->input_waiting |= DLIL_INPUT_TERMINATE;
6044 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
6045 wakeup_one((caddr_t)&inp->input_waiting);
6046 }
6047 lck_mtx_unlock(&inp->input_lck);
6048 }
6049
6050 /* The driver might unload, so point these to ourselves */
6051 if_free = ifp->if_free;
6052 ifp->if_output = ifp_if_output;
6053 ifp->if_pre_enqueue = ifp_if_output;
6054 ifp->if_start = ifp_if_start;
6055 ifp->if_output_ctl = ifp_if_ctl;
6056 ifp->if_input_poll = ifp_if_input_poll;
6057 ifp->if_input_ctl = ifp_if_ctl;
6058 ifp->if_ioctl = ifp_if_ioctl;
6059 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
6060 ifp->if_free = ifp_if_free;
6061 ifp->if_demux = ifp_if_demux;
6062 ifp->if_event = ifp_if_event;
6063 ifp->if_framer_legacy = ifp_if_framer;
6064 ifp->if_framer = ifp_if_framer_extended;
6065 ifp->if_add_proto = ifp_if_add_proto;
6066 ifp->if_del_proto = ifp_if_del_proto;
6067 ifp->if_check_multi = ifp_if_check_multi;
6068
6069 /* wipe out interface description */
6070 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
6071 ifp->if_desc.ifd_len = 0;
6072 VERIFY(ifp->if_desc.ifd_desc != NULL);
6073 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
6074
6075 /* there shouldn't be any delegation by now */
6076 VERIFY(ifp->if_delegated.ifp == NULL);
6077 VERIFY(ifp->if_delegated.type == 0);
6078 VERIFY(ifp->if_delegated.family == 0);
6079 VERIFY(ifp->if_delegated.subfamily == 0);
6080 VERIFY(ifp->if_delegated.expensive == 0);
6081
6082 ifnet_lock_done(ifp);
6083
6084 #if PF
6085 /*
6086 * Detach this interface from packet filter, if enabled.
6087 */
6088 pf_ifnet_hook(ifp, 0);
6089 #endif /* PF */
6090
6091 /* Filter list should be empty */
6092 lck_mtx_lock_spin(&ifp->if_flt_lock);
6093 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6094 VERIFY(ifp->if_flt_busy == 0);
6095 VERIFY(ifp->if_flt_waiters == 0);
6096 lck_mtx_unlock(&ifp->if_flt_lock);
6097
6098 /* Last chance to drain send queue */
6099 if_qflush(ifp, 0);
6100
6101 /* Last chance to cleanup any cached route */
6102 lck_mtx_lock(&ifp->if_cached_route_lock);
6103 VERIFY(!ifp->if_fwd_cacheok);
6104 ROUTE_RELEASE(&ifp->if_fwd_route);
6105 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
6106 ROUTE_RELEASE(&ifp->if_src_route);
6107 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
6108 ROUTE_RELEASE(&ifp->if_src_route6);
6109 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
6110 lck_mtx_unlock(&ifp->if_cached_route_lock);
6111
6112 VERIFY(ifp->if_data_threshold == 0);
6113
6114 ifnet_llreach_ifdetach(ifp);
6115
6116 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
6117
6118 if (if_free != NULL)
6119 if_free(ifp);
6120
6121 /*
6122 * Finally, mark this ifnet as detached.
6123 */
6124 lck_mtx_lock_spin(&ifp->if_ref_lock);
6125 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6126 panic("%s: flags mismatch (detaching not set) ifp=%p",
6127 __func__, ifp);
6128 /* NOTREACHED */
6129 }
6130 ifp->if_refflags &= ~IFRF_DETACHING;
6131 lck_mtx_unlock(&ifp->if_ref_lock);
6132
6133 if (dlil_verbose)
6134 printf("%s: detached\n", if_name(ifp));
6135
6136 /* Release reference held during ifnet attach */
6137 ifnet_release(ifp);
6138 }
6139
6140 static errno_t
6141 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6142 {
6143 #pragma unused(ifp)
6144 m_freem(m);
6145 return (0);
6146 }
6147
6148 static void
6149 ifp_if_start(struct ifnet *ifp)
6150 {
6151 ifnet_purge(ifp);
6152 }
6153
6154 static void
6155 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6156 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6157 {
6158 #pragma unused(ifp, flags, max_cnt)
6159 if (m_head != NULL)
6160 *m_head = NULL;
6161 if (m_tail != NULL)
6162 *m_tail = NULL;
6163 if (cnt != NULL)
6164 *cnt = 0;
6165 if (len != NULL)
6166 *len = 0;
6167 }
6168
6169 static errno_t
6170 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6171 {
6172 #pragma unused(ifp, cmd, arglen, arg)
6173 return (EOPNOTSUPP);
6174 }
6175
6176 static errno_t
6177 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6178 {
6179 #pragma unused(ifp, fh, pf)
6180 m_freem(m);
6181 return (EJUSTRETURN);
6182 }
6183
6184 static errno_t
6185 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6186 const struct ifnet_demux_desc *da, u_int32_t dc)
6187 {
6188 #pragma unused(ifp, pf, da, dc)
6189 return (EINVAL);
6190 }
6191
6192 static errno_t
6193 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6194 {
6195 #pragma unused(ifp, pf)
6196 return (EINVAL);
6197 }
6198
6199 static errno_t
6200 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6201 {
6202 #pragma unused(ifp, sa)
6203 return (EOPNOTSUPP);
6204 }
6205
6206 static errno_t
6207 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6208 const struct sockaddr *sa, const char *ll, const char *t)
6209 {
6210 #pragma unused(ifp, m, sa, ll, t)
6211 return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
6212 }
6213
6214 static errno_t
6215 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6216 const struct sockaddr *sa, const char *ll, const char *t,
6217 u_int32_t *pre, u_int32_t *post)
6218 {
6219 #pragma unused(ifp, sa, ll, t)
6220 m_freem(*m);
6221 *m = NULL;
6222
6223 if (pre != NULL)
6224 *pre = 0;
6225 if (post != NULL)
6226 *post = 0;
6227
6228 return (EJUSTRETURN);
6229 }
6230
6231 errno_t
6232 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6233 {
6234 #pragma unused(ifp, cmd, arg)
6235 return (EOPNOTSUPP);
6236 }
6237
6238 static errno_t
6239 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6240 {
6241 #pragma unused(ifp, tm, f)
6242 /* XXX not sure what to do here */
6243 return (0);
6244 }
6245
6246 static void
6247 ifp_if_free(struct ifnet *ifp)
6248 {
6249 #pragma unused(ifp)
6250 }
6251
6252 static void
6253 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6254 {
6255 #pragma unused(ifp, e)
6256 }
6257
6258 __private_extern__
6259 int dlil_if_acquire(u_int32_t family, const void *uniqueid,
6260 size_t uniqueid_len, struct ifnet **ifp)
6261 {
6262 struct ifnet *ifp1 = NULL;
6263 struct dlil_ifnet *dlifp1 = NULL;
6264 void *buf, *base, **pbuf;
6265 int ret = 0;
6266
6267 dlil_if_lock();
6268 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
6269 ifp1 = (struct ifnet *)dlifp1;
6270
6271 if (ifp1->if_family != family)
6272 continue;
6273
6274 lck_mtx_lock(&dlifp1->dl_if_lock);
6275 /* same uniqueid and same len or no unique id specified */
6276 if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) &&
6277 !bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len)) {
6278 /* check for matching interface in use */
6279 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6280 if (uniqueid_len) {
6281 ret = EBUSY;
6282 lck_mtx_unlock(&dlifp1->dl_if_lock);
6283 goto end;
6284 }
6285 } else {
6286 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
6287 lck_mtx_unlock(&dlifp1->dl_if_lock);
6288 *ifp = ifp1;
6289 goto end;
6290 }
6291 }
6292 lck_mtx_unlock(&dlifp1->dl_if_lock);
6293 }
6294
6295 /* no interface found, allocate a new one */
6296 buf = zalloc(dlif_zone);
6297 if (buf == NULL) {
6298 ret = ENOMEM;
6299 goto end;
6300 }
6301 bzero(buf, dlif_bufsize);
6302
6303 /* Get the 64-bit aligned base address for this object */
6304 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
6305 sizeof (u_int64_t));
6306 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
6307
6308 /*
6309 * Wind back a pointer size from the aligned base and
6310 * save the original address so we can free it later.
6311 */
6312 pbuf = (void **)((intptr_t)base - sizeof (void *));
6313 *pbuf = buf;
6314 dlifp1 = base;
6315
6316 if (uniqueid_len) {
6317 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
6318 M_NKE, M_WAITOK);
6319 if (dlifp1->dl_if_uniqueid == NULL) {
6320 zfree(dlif_zone, dlifp1);
6321 ret = ENOMEM;
6322 goto end;
6323 }
6324 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
6325 dlifp1->dl_if_uniqueid_len = uniqueid_len;
6326 }
6327
6328 ifp1 = (struct ifnet *)dlifp1;
6329 dlifp1->dl_if_flags = DLIF_INUSE;
6330 if (ifnet_debug) {
6331 dlifp1->dl_if_flags |= DLIF_DEBUG;
6332 dlifp1->dl_if_trace = dlil_if_trace;
6333 }
6334 ifp1->if_name = dlifp1->dl_if_namestorage;
6335 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
6336
6337 /* initialize interface description */
6338 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
6339 ifp1->if_desc.ifd_len = 0;
6340 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
6341
6342 #if CONFIG_MACF_NET
6343 mac_ifnet_label_init(ifp1);
6344 #endif
6345
6346 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
6347 DLIL_PRINTF("%s: failed to allocate if local stats, "
6348 "error: %d\n", __func__, ret);
6349 /* This probably shouldn't be fatal */
6350 ret = 0;
6351 }
6352
6353 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
6354 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
6355 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
6356 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6357 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
6358 ifnet_lock_attr);
6359 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
6360 #if INET
6361 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
6362 ifnet_lock_attr);
6363 ifp1->if_inetdata = NULL;
6364 #endif
6365 #if INET6
6366 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
6367 ifnet_lock_attr);
6368 ifp1->if_inet6data = NULL;
6369 #endif
6370 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
6371 ifnet_lock_attr);
6372 ifp1->if_link_status = NULL;
6373
6374 /* for send data paths */
6375 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
6376 ifnet_lock_attr);
6377 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
6378 ifnet_lock_attr);
6379 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
6380 ifnet_lock_attr);
6381
6382 /* for receive data paths */
6383 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
6384 ifnet_lock_attr);
6385
6386 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
6387
6388 *ifp = ifp1;
6389
6390 end:
6391 dlil_if_unlock();
6392
6393 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
6394 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
6395
6396 return (ret);
6397 }
6398
6399 __private_extern__ void
6400 dlil_if_release(ifnet_t ifp)
6401 {
6402 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
6403
6404 ifnet_lock_exclusive(ifp);
6405 lck_mtx_lock(&dlifp->dl_if_lock);
6406 dlifp->dl_if_flags &= ~DLIF_INUSE;
6407 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6408 ifp->if_name = dlifp->dl_if_namestorage;
6409 /* Reset external name (name + unit) */
6410 ifp->if_xname = dlifp->dl_if_xnamestorage;
6411 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
6412 "%s?", ifp->if_name);
6413 lck_mtx_unlock(&dlifp->dl_if_lock);
6414 #if CONFIG_MACF_NET
6415 /*
6416 * We can either recycle the MAC label here or in dlil_if_acquire().
6417 * It seems logical to do it here but this means that anything that
6418 * still has a handle on ifp will now see it as unlabeled.
6419 * Since the interface is "dead" that may be OK. Revisit later.
6420 */
6421 mac_ifnet_label_recycle(ifp);
6422 #endif
6423 ifnet_lock_done(ifp);
6424 }
6425
6426 __private_extern__ void
6427 dlil_if_lock(void)
6428 {
6429 lck_mtx_lock(&dlil_ifnet_lock);
6430 }
6431
6432 __private_extern__ void
6433 dlil_if_unlock(void)
6434 {
6435 lck_mtx_unlock(&dlil_ifnet_lock);
6436 }
6437
6438 __private_extern__ void
6439 dlil_if_lock_assert(void)
6440 {
6441 lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
6442 }
6443
6444 __private_extern__ void
6445 dlil_proto_unplumb_all(struct ifnet *ifp)
6446 {
6447 /*
6448 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6449 * each bucket contains exactly one entry; PF_VLAN does not need an
6450 * explicit unplumb.
6451 *
6452 * if_proto_hash[3] is for other protocols; we expect anything
6453 * in this bucket to respond to the DETACHING event (which would
6454 * have happened by now) and do the unplumb then.
6455 */
6456 (void) proto_unplumb(PF_INET, ifp);
6457 #if INET6
6458 (void) proto_unplumb(PF_INET6, ifp);
6459 #endif /* INET6 */
6460 }
6461
6462 static void
6463 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6464 {
6465 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6466 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6467
6468 route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
6469
6470 lck_mtx_unlock(&ifp->if_cached_route_lock);
6471 }
6472
6473 static void
6474 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6475 {
6476 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6477 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6478
6479 if (ifp->if_fwd_cacheok) {
6480 route_copyin(src, &ifp->if_src_route, sizeof (*src));
6481 } else {
6482 ROUTE_RELEASE(src);
6483 }
6484 lck_mtx_unlock(&ifp->if_cached_route_lock);
6485 }
6486
6487 #if INET6
6488 static void
6489 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6490 {
6491 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6492 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6493
6494 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6495 sizeof (*dst));
6496
6497 lck_mtx_unlock(&ifp->if_cached_route_lock);
6498 }
6499
6500 static void
6501 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6502 {
6503 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6504 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6505
6506 if (ifp->if_fwd_cacheok) {
6507 route_copyin((struct route *)src,
6508 (struct route *)&ifp->if_src_route6, sizeof (*src));
6509 } else {
6510 ROUTE_RELEASE(src);
6511 }
6512 lck_mtx_unlock(&ifp->if_cached_route_lock);
6513 }
6514 #endif /* INET6 */
6515
6516 struct rtentry *
6517 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6518 {
6519 struct route src_rt;
6520 struct sockaddr_in *dst;
6521
6522 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
6523
6524 ifp_src_route_copyout(ifp, &src_rt);
6525
6526 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6527 ROUTE_RELEASE(&src_rt);
6528 if (dst->sin_family != AF_INET) {
6529 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
6530 dst->sin_len = sizeof (src_rt.ro_dst);
6531 dst->sin_family = AF_INET;
6532 }
6533 dst->sin_addr = src_ip;
6534
6535 if (src_rt.ro_rt == NULL) {
6536 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
6537 0, 0, ifp->if_index);
6538
6539 if (src_rt.ro_rt != NULL) {
6540 /* retain a ref, copyin consumes one */
6541 struct rtentry *rte = src_rt.ro_rt;
6542 RT_ADDREF(rte);
6543 ifp_src_route_copyin(ifp, &src_rt);
6544 src_rt.ro_rt = rte;
6545 }
6546 }
6547 }
6548
6549 return (src_rt.ro_rt);
6550 }
6551
6552 #if INET6
6553 struct rtentry*
6554 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
6555 {
6556 struct route_in6 src_rt;
6557
6558 ifp_src_route6_copyout(ifp, &src_rt);
6559
6560 if (ROUTE_UNUSABLE(&src_rt) ||
6561 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
6562 ROUTE_RELEASE(&src_rt);
6563 if (src_rt.ro_dst.sin6_family != AF_INET6) {
6564 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
6565 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
6566 src_rt.ro_dst.sin6_family = AF_INET6;
6567 }
6568 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
6569 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
6570 sizeof (src_rt.ro_dst.sin6_addr));
6571
6572 if (src_rt.ro_rt == NULL) {
6573 src_rt.ro_rt = rtalloc1_scoped(
6574 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
6575 ifp->if_index);
6576
6577 if (src_rt.ro_rt != NULL) {
6578 /* retain a ref, copyin consumes one */
6579 struct rtentry *rte = src_rt.ro_rt;
6580 RT_ADDREF(rte);
6581 ifp_src_route6_copyin(ifp, &src_rt);
6582 src_rt.ro_rt = rte;
6583 }
6584 }
6585 }
6586
6587 return (src_rt.ro_rt);
6588 }
6589 #endif /* INET6 */
6590
6591 void
6592 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
6593 {
6594 struct kev_dl_link_quality_metric_data ev_lqm_data;
6595
6596 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
6597
6598 /* Normalize to edge */
6599 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_BAD)
6600 lqm = IFNET_LQM_THRESH_BAD;
6601 else if (lqm > IFNET_LQM_THRESH_BAD && lqm <= IFNET_LQM_THRESH_POOR)
6602 lqm = IFNET_LQM_THRESH_POOR;
6603 else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD)
6604 lqm = IFNET_LQM_THRESH_GOOD;
6605
6606 /*
6607 * Take the lock if needed
6608 */
6609 if (!locked)
6610 ifnet_lock_exclusive(ifp);
6611
6612 if (lqm == ifp->if_interface_state.lqm_state &&
6613 (ifp->if_interface_state.valid_bitmask &
6614 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
6615 /*
6616 * Release the lock if was not held by the caller
6617 */
6618 if (!locked)
6619 ifnet_lock_done(ifp);
6620 return; /* nothing to update */
6621 }
6622 ifp->if_interface_state.valid_bitmask |=
6623 IF_INTERFACE_STATE_LQM_STATE_VALID;
6624 ifp->if_interface_state.lqm_state = lqm;
6625
6626 /*
6627 * Don't want to hold the lock when issuing kernel events
6628 */
6629 ifnet_lock_done(ifp);
6630
6631 bzero(&ev_lqm_data, sizeof (ev_lqm_data));
6632 ev_lqm_data.link_quality_metric = lqm;
6633
6634 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
6635 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
6636
6637 /*
6638 * Reacquire the lock for the caller
6639 */
6640 if (locked)
6641 ifnet_lock_exclusive(ifp);
6642 }
6643
6644 static void
6645 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
6646 {
6647 struct kev_dl_rrc_state kev;
6648
6649 if (rrc_state == ifp->if_interface_state.rrc_state &&
6650 (ifp->if_interface_state.valid_bitmask &
6651 IF_INTERFACE_STATE_RRC_STATE_VALID))
6652 return;
6653
6654 ifp->if_interface_state.valid_bitmask |=
6655 IF_INTERFACE_STATE_RRC_STATE_VALID;
6656
6657 ifp->if_interface_state.rrc_state = rrc_state;
6658
6659 /*
6660 * Don't want to hold the lock when issuing kernel events
6661 */
6662 ifnet_lock_done(ifp);
6663
6664 bzero(&kev, sizeof(struct kev_dl_rrc_state));
6665 kev.rrc_state = rrc_state;
6666
6667 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
6668 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
6669
6670 ifnet_lock_exclusive(ifp);
6671 }
6672
6673 errno_t
6674 if_state_update(struct ifnet *ifp,
6675 struct if_interface_state* if_interface_state)
6676 {
6677 u_short if_index_available = 0;
6678
6679 ifnet_lock_exclusive(ifp);
6680
6681 if ((ifp->if_type != IFT_CELLULAR) &&
6682 (if_interface_state->valid_bitmask &
6683 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6684 ifnet_lock_done(ifp);
6685 return (ENOTSUP);
6686 }
6687 if ((if_interface_state->valid_bitmask &
6688 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
6689 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
6690 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
6691 ifnet_lock_done(ifp);
6692 return (EINVAL);
6693 }
6694 if ((if_interface_state->valid_bitmask &
6695 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
6696 if_interface_state->rrc_state !=
6697 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
6698 if_interface_state->rrc_state !=
6699 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
6700 ifnet_lock_done(ifp);
6701 return (EINVAL);
6702 }
6703
6704 if (if_interface_state->valid_bitmask &
6705 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6706 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
6707 }
6708 if (if_interface_state->valid_bitmask &
6709 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6710 if_rrc_state_update(ifp, if_interface_state->rrc_state);
6711 }
6712 if (if_interface_state->valid_bitmask &
6713 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6714 ifp->if_interface_state.valid_bitmask |=
6715 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6716 ifp->if_interface_state.interface_availability =
6717 if_interface_state->interface_availability;
6718
6719 if (ifp->if_interface_state.interface_availability ==
6720 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
6721 if_index_available = ifp->if_index;
6722 }
6723 }
6724 ifnet_lock_done(ifp);
6725
6726 /*
6727 * Check if the TCP connections going on this interface should be
6728 * forced to send probe packets instead of waiting for TCP timers
6729 * to fire. This will be done when there is an explicit
6730 * notification that the interface became available.
6731 */
6732 if (if_index_available > 0)
6733 tcp_interface_send_probe(if_index_available);
6734
6735 return (0);
6736 }
6737
6738 void
6739 if_get_state(struct ifnet *ifp,
6740 struct if_interface_state* if_interface_state)
6741 {
6742 ifnet_lock_shared(ifp);
6743
6744 if_interface_state->valid_bitmask = 0;
6745
6746 if (ifp->if_interface_state.valid_bitmask &
6747 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6748 if_interface_state->valid_bitmask |=
6749 IF_INTERFACE_STATE_RRC_STATE_VALID;
6750 if_interface_state->rrc_state =
6751 ifp->if_interface_state.rrc_state;
6752 }
6753 if (ifp->if_interface_state.valid_bitmask &
6754 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6755 if_interface_state->valid_bitmask |=
6756 IF_INTERFACE_STATE_LQM_STATE_VALID;
6757 if_interface_state->lqm_state =
6758 ifp->if_interface_state.lqm_state;
6759 }
6760 if (ifp->if_interface_state.valid_bitmask &
6761 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6762 if_interface_state->valid_bitmask |=
6763 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6764 if_interface_state->interface_availability =
6765 ifp->if_interface_state.interface_availability;
6766 }
6767
6768 ifnet_lock_done(ifp);
6769 }
6770
6771 errno_t
6772 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
6773 {
6774 ifnet_lock_exclusive(ifp);
6775 if (conn_probe > 1) {
6776 ifnet_lock_done(ifp);
6777 return (EINVAL);
6778 }
6779 if (conn_probe == 0)
6780 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
6781 else
6782 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
6783 ifnet_lock_done(ifp);
6784
6785 tcp_probe_connectivity(ifp, conn_probe);
6786 return (0);
6787 }
6788
6789 /* for uuid.c */
6790 int
6791 uuid_get_ethernet(u_int8_t *node)
6792 {
6793 struct ifnet *ifp;
6794 struct sockaddr_dl *sdl;
6795
6796 ifnet_head_lock_shared();
6797 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6798 ifnet_lock_shared(ifp);
6799 IFA_LOCK_SPIN(ifp->if_lladdr);
6800 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
6801 if (sdl->sdl_type == IFT_ETHER) {
6802 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
6803 IFA_UNLOCK(ifp->if_lladdr);
6804 ifnet_lock_done(ifp);
6805 ifnet_head_done();
6806 return (0);
6807 }
6808 IFA_UNLOCK(ifp->if_lladdr);
6809 ifnet_lock_done(ifp);
6810 }
6811 ifnet_head_done();
6812
6813 return (-1);
6814 }
6815
6816 static int
6817 sysctl_rxpoll SYSCTL_HANDLER_ARGS
6818 {
6819 #pragma unused(arg1, arg2)
6820 uint32_t i;
6821 int err;
6822
6823 i = if_rxpoll;
6824
6825 err = sysctl_handle_int(oidp, &i, 0, req);
6826 if (err != 0 || req->newptr == USER_ADDR_NULL)
6827 return (err);
6828
6829 if (net_rxpoll == 0)
6830 return (ENXIO);
6831
6832 if_rxpoll = i;
6833 return (err);
6834 }
6835
6836 static int
6837 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
6838 {
6839 #pragma unused(arg1, arg2)
6840 uint64_t q;
6841 int err;
6842
6843 q = if_rxpoll_mode_holdtime;
6844
6845 err = sysctl_handle_quad(oidp, &q, 0, req);
6846 if (err != 0 || req->newptr == USER_ADDR_NULL)
6847 return (err);
6848
6849 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN)
6850 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
6851
6852 if_rxpoll_mode_holdtime = q;
6853
6854 return (err);
6855 }
6856
6857 static int
6858 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
6859 {
6860 #pragma unused(arg1, arg2)
6861 uint64_t q;
6862 int err;
6863
6864 q = if_rxpoll_sample_holdtime;
6865
6866 err = sysctl_handle_quad(oidp, &q, 0, req);
6867 if (err != 0 || req->newptr == USER_ADDR_NULL)
6868 return (err);
6869
6870 if (q < IF_RXPOLL_SAMPLETIME_MIN)
6871 q = IF_RXPOLL_SAMPLETIME_MIN;
6872
6873 if_rxpoll_sample_holdtime = q;
6874
6875 return (err);
6876 }
6877
6878 static int
6879 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
6880 {
6881 #pragma unused(arg1, arg2)
6882 uint64_t q;
6883 int err;
6884
6885 q = if_rxpoll_interval_time;
6886
6887 err = sysctl_handle_quad(oidp, &q, 0, req);
6888 if (err != 0 || req->newptr == USER_ADDR_NULL)
6889 return (err);
6890
6891 if (q < IF_RXPOLL_INTERVALTIME_MIN)
6892 q = IF_RXPOLL_INTERVALTIME_MIN;
6893
6894 if_rxpoll_interval_time = q;
6895
6896 return (err);
6897 }
6898
6899 static int
6900 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
6901 {
6902 #pragma unused(arg1, arg2)
6903 uint32_t i;
6904 int err;
6905
6906 i = if_rxpoll_wlowat;
6907
6908 err = sysctl_handle_int(oidp, &i, 0, req);
6909 if (err != 0 || req->newptr == USER_ADDR_NULL)
6910 return (err);
6911
6912 if (i == 0 || i >= if_rxpoll_whiwat)
6913 return (EINVAL);
6914
6915 if_rxpoll_wlowat = i;
6916 return (err);
6917 }
6918
6919 static int
6920 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
6921 {
6922 #pragma unused(arg1, arg2)
6923 uint32_t i;
6924 int err;
6925
6926 i = if_rxpoll_whiwat;
6927
6928 err = sysctl_handle_int(oidp, &i, 0, req);
6929 if (err != 0 || req->newptr == USER_ADDR_NULL)
6930 return (err);
6931
6932 if (i <= if_rxpoll_wlowat)
6933 return (EINVAL);
6934
6935 if_rxpoll_whiwat = i;
6936 return (err);
6937 }
6938
6939 static int
6940 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
6941 {
6942 #pragma unused(arg1, arg2)
6943 int i, err;
6944
6945 i = if_sndq_maxlen;
6946
6947 err = sysctl_handle_int(oidp, &i, 0, req);
6948 if (err != 0 || req->newptr == USER_ADDR_NULL)
6949 return (err);
6950
6951 if (i < IF_SNDQ_MINLEN)
6952 i = IF_SNDQ_MINLEN;
6953
6954 if_sndq_maxlen = i;
6955 return (err);
6956 }
6957
6958 static int
6959 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
6960 {
6961 #pragma unused(arg1, arg2)
6962 int i, err;
6963
6964 i = if_rcvq_maxlen;
6965
6966 err = sysctl_handle_int(oidp, &i, 0, req);
6967 if (err != 0 || req->newptr == USER_ADDR_NULL)
6968 return (err);
6969
6970 if (i < IF_RCVQ_MINLEN)
6971 i = IF_RCVQ_MINLEN;
6972
6973 if_rcvq_maxlen = i;
6974 return (err);
6975 }
6976
6977 void
6978 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
6979 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6980 {
6981 struct kev_dl_node_presence kev;
6982 struct sockaddr_dl *sdl;
6983 struct sockaddr_in6 *sin6;
6984
6985 VERIFY(ifp);
6986 VERIFY(sa);
6987 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6988
6989 bzero(&kev, sizeof (kev));
6990 sin6 = &kev.sin6_node_address;
6991 sdl = &kev.sdl_node_address;
6992 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
6993 kev.rssi = rssi;
6994 kev.link_quality_metric = lqm;
6995 kev.node_proximity_metric = npm;
6996 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
6997
6998 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
6999 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
7000 &kev.link_data, sizeof (kev));
7001 }
7002
7003 void
7004 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
7005 {
7006 struct kev_dl_node_absence kev;
7007 struct sockaddr_in6 *sin6;
7008 struct sockaddr_dl *sdl;
7009
7010 VERIFY(ifp);
7011 VERIFY(sa);
7012 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7013
7014 bzero(&kev, sizeof (kev));
7015 sin6 = &kev.sin6_node_address;
7016 sdl = &kev.sdl_node_address;
7017 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7018
7019 nd6_alt_node_absent(ifp, sin6);
7020 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
7021 &kev.link_data, sizeof (kev));
7022 }
7023
7024 const void *
7025 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
7026 kauth_cred_t *credp)
7027 {
7028 const u_int8_t *bytes;
7029 size_t size;
7030
7031 bytes = CONST_LLADDR(sdl);
7032 size = sdl->sdl_alen;
7033
7034 #if CONFIG_MACF
7035 if (dlil_lladdr_ckreq) {
7036 switch (sdl->sdl_type) {
7037 case IFT_ETHER:
7038 case IFT_IEEE1394:
7039 break;
7040 default:
7041 credp = NULL;
7042 break;
7043 };
7044
7045 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
7046 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
7047 [0] = 2
7048 };
7049
7050 switch (sdl->sdl_type) {
7051 case IFT_ETHER:
7052 VERIFY(size == ETHER_ADDR_LEN);
7053 bytes = unspec;
7054 break;
7055 case IFT_IEEE1394:
7056 VERIFY(size == FIREWIRE_EUI64_LEN);
7057 bytes = unspec;
7058 break;
7059 default:
7060 VERIFY(FALSE);
7061 break;
7062 };
7063 }
7064 }
7065 #else
7066 #pragma unused(credp)
7067 #endif
7068
7069 if (sizep != NULL) *sizep = size;
7070 return (bytes);
7071 }
7072
7073 void
7074 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
7075 u_int8_t info[DLIL_MODARGLEN])
7076 {
7077 struct kev_dl_issues kev;
7078 struct timeval tv;
7079
7080 VERIFY(ifp != NULL);
7081 VERIFY(modid != NULL);
7082 _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
7083 _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
7084
7085 bzero(&kev, sizeof (kev));
7086
7087 microtime(&tv);
7088 kev.timestamp = tv.tv_sec;
7089 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
7090 if (info != NULL)
7091 bcopy(info, &kev.info, DLIL_MODARGLEN);
7092
7093 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
7094 &kev.link_data, sizeof (kev));
7095 }
7096
7097 errno_t
7098 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7099 struct proc *p)
7100 {
7101 u_int32_t level = IFNET_THROTTLE_OFF;
7102 errno_t result = 0;
7103
7104 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
7105
7106 if (cmd == SIOCSIFOPPORTUNISTIC) {
7107 /*
7108 * XXX: Use priv_check_cred() instead of root check?
7109 */
7110 if ((result = proc_suser(p)) != 0)
7111 return (result);
7112
7113 if (ifr->ifr_opportunistic.ifo_flags ==
7114 IFRIFOF_BLOCK_OPPORTUNISTIC)
7115 level = IFNET_THROTTLE_OPPORTUNISTIC;
7116 else if (ifr->ifr_opportunistic.ifo_flags == 0)
7117 level = IFNET_THROTTLE_OFF;
7118 else
7119 result = EINVAL;
7120
7121 if (result == 0)
7122 result = ifnet_set_throttle(ifp, level);
7123 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
7124 ifr->ifr_opportunistic.ifo_flags = 0;
7125 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
7126 ifr->ifr_opportunistic.ifo_flags |=
7127 IFRIFOF_BLOCK_OPPORTUNISTIC;
7128 }
7129 }
7130
7131 /*
7132 * Return the count of current opportunistic connections
7133 * over the interface.
7134 */
7135 if (result == 0) {
7136 uint32_t flags = 0;
7137 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
7138 INPCB_OPPORTUNISTIC_SETCMD : 0;
7139 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
7140 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
7141 ifr->ifr_opportunistic.ifo_inuse =
7142 udp_count_opportunistic(ifp->if_index, flags) +
7143 tcp_count_opportunistic(ifp->if_index, flags);
7144 }
7145
7146 if (result == EALREADY)
7147 result = 0;
7148
7149 return (result);
7150 }
7151
7152 int
7153 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
7154 {
7155 struct ifclassq *ifq;
7156 int err = 0;
7157
7158 if (!(ifp->if_eflags & IFEF_TXSTART))
7159 return (ENXIO);
7160
7161 *level = IFNET_THROTTLE_OFF;
7162
7163 ifq = &ifp->if_snd;
7164 IFCQ_LOCK(ifq);
7165 /* Throttling works only for IFCQ, not ALTQ instances */
7166 if (IFCQ_IS_ENABLED(ifq))
7167 IFCQ_GET_THROTTLE(ifq, *level, err);
7168 IFCQ_UNLOCK(ifq);
7169
7170 return (err);
7171 }
7172
7173 int
7174 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
7175 {
7176 struct ifclassq *ifq;
7177 int err = 0;
7178
7179 if (!(ifp->if_eflags & IFEF_TXSTART))
7180 return (ENXIO);
7181
7182 ifq = &ifp->if_snd;
7183
7184 switch (level) {
7185 case IFNET_THROTTLE_OFF:
7186 case IFNET_THROTTLE_OPPORTUNISTIC:
7187 #if PF_ALTQ
7188 /* Throttling works only for IFCQ, not ALTQ instances */
7189 if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
7190 return (ENXIO);
7191 #endif /* PF_ALTQ */
7192 break;
7193 default:
7194 return (EINVAL);
7195 }
7196
7197 IFCQ_LOCK(ifq);
7198 if (IFCQ_IS_ENABLED(ifq))
7199 IFCQ_SET_THROTTLE(ifq, level, err);
7200 IFCQ_UNLOCK(ifq);
7201
7202 if (err == 0) {
7203 printf("%s: throttling level set to %d\n", if_name(ifp),
7204 level);
7205 if (level == IFNET_THROTTLE_OFF)
7206 ifnet_start(ifp);
7207 }
7208
7209 return (err);
7210 }
7211
7212 errno_t
7213 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7214 struct proc *p)
7215 {
7216 #pragma unused(p)
7217 errno_t result = 0;
7218 uint32_t flags;
7219 int level, category, subcategory;
7220
7221 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7222
7223 if (cmd == SIOCSIFLOG) {
7224 if ((result = priv_check_cred(kauth_cred_get(),
7225 PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
7226 return (result);
7227
7228 level = ifr->ifr_log.ifl_level;
7229 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX)
7230 result = EINVAL;
7231
7232 flags = ifr->ifr_log.ifl_flags;
7233 if ((flags &= IFNET_LOGF_MASK) == 0)
7234 result = EINVAL;
7235
7236 category = ifr->ifr_log.ifl_category;
7237 subcategory = ifr->ifr_log.ifl_subcategory;
7238
7239 if (result == 0)
7240 result = ifnet_set_log(ifp, level, flags,
7241 category, subcategory);
7242 } else {
7243 result = ifnet_get_log(ifp, &level, &flags, &category,
7244 &subcategory);
7245 if (result == 0) {
7246 ifr->ifr_log.ifl_level = level;
7247 ifr->ifr_log.ifl_flags = flags;
7248 ifr->ifr_log.ifl_category = category;
7249 ifr->ifr_log.ifl_subcategory = subcategory;
7250 }
7251 }
7252
7253 return (result);
7254 }
7255
7256 int
7257 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7258 int32_t category, int32_t subcategory)
7259 {
7260 int err = 0;
7261
7262 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7263 VERIFY(flags & IFNET_LOGF_MASK);
7264
7265 /*
7266 * The logging level applies to all facilities; make sure to
7267 * update them all with the most current level.
7268 */
7269 flags |= ifp->if_log.flags;
7270
7271 if (ifp->if_output_ctl != NULL) {
7272 struct ifnet_log_params l;
7273
7274 bzero(&l, sizeof (l));
7275 l.level = level;
7276 l.flags = flags;
7277 l.flags &= ~IFNET_LOGF_DLIL;
7278 l.category = category;
7279 l.subcategory = subcategory;
7280
7281 /* Send this request to lower layers */
7282 if (l.flags != 0) {
7283 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7284 sizeof (l), &l);
7285 }
7286 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7287 /*
7288 * If targeted to the lower layers without an output
7289 * control callback registered on the interface, just
7290 * silently ignore facilities other than ours.
7291 */
7292 flags &= IFNET_LOGF_DLIL;
7293 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL)))
7294 level = 0;
7295 }
7296
7297 if (err == 0) {
7298 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT)
7299 ifp->if_log.flags = 0;
7300 else
7301 ifp->if_log.flags |= flags;
7302
7303 log(LOG_INFO, "%s: logging level set to %d flags=%b "
7304 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
7305 ifp->if_log.level, ifp->if_log.flags,
7306 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
7307 category, subcategory);
7308 }
7309
7310 return (err);
7311 }
7312
7313 int
7314 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7315 int32_t *category, int32_t *subcategory)
7316 {
7317 if (level != NULL)
7318 *level = ifp->if_log.level;
7319 if (flags != NULL)
7320 *flags = ifp->if_log.flags;
7321 if (category != NULL)
7322 *category = ifp->if_log.category;
7323 if (subcategory != NULL)
7324 *subcategory = ifp->if_log.subcategory;
7325
7326 return (0);
7327 }
7328
7329 int
7330 ifnet_notify_address(struct ifnet *ifp, int af)
7331 {
7332 struct ifnet_notify_address_params na;
7333
7334 #if PF
7335 (void) pf_ifaddr_hook(ifp);
7336 #endif /* PF */
7337
7338 if (ifp->if_output_ctl == NULL)
7339 return (EOPNOTSUPP);
7340
7341 bzero(&na, sizeof (na));
7342 na.address_family = af;
7343
7344 return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7345 sizeof (na), &na));
7346 }
7347
7348 errno_t
7349 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7350 {
7351 if (ifp == NULL || flowid == NULL) {
7352 return (EINVAL);
7353 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7354 !(ifp->if_refflags & IFRF_ATTACHED)) {
7355 return (ENXIO);
7356 }
7357
7358 *flowid = ifp->if_flowhash;
7359
7360 return (0);
7361 }
7362
7363 errno_t
7364 ifnet_disable_output(struct ifnet *ifp)
7365 {
7366 int err;
7367
7368 if (ifp == NULL) {
7369 return (EINVAL);
7370 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7371 !(ifp->if_refflags & IFRF_ATTACHED)) {
7372 return (ENXIO);
7373 }
7374
7375 if ((err = ifnet_fc_add(ifp)) == 0) {
7376 lck_mtx_lock_spin(&ifp->if_start_lock);
7377 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7378 lck_mtx_unlock(&ifp->if_start_lock);
7379 }
7380 return (err);
7381 }
7382
7383 errno_t
7384 ifnet_enable_output(struct ifnet *ifp)
7385 {
7386 if (ifp == NULL) {
7387 return (EINVAL);
7388 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7389 !(ifp->if_refflags & IFRF_ATTACHED)) {
7390 return (ENXIO);
7391 }
7392
7393 ifnet_start_common(ifp, 1);
7394 return (0);
7395 }
7396
7397 void
7398 ifnet_flowadv(uint32_t flowhash)
7399 {
7400 struct ifnet_fc_entry *ifce;
7401 struct ifnet *ifp;
7402
7403 ifce = ifnet_fc_get(flowhash);
7404 if (ifce == NULL)
7405 return;
7406
7407 VERIFY(ifce->ifce_ifp != NULL);
7408 ifp = ifce->ifce_ifp;
7409
7410 /* flow hash gets recalculated per attach, so check */
7411 if (ifnet_is_attached(ifp, 1)) {
7412 if (ifp->if_flowhash == flowhash)
7413 (void) ifnet_enable_output(ifp);
7414 ifnet_decr_iorefcnt(ifp);
7415 }
7416 ifnet_fc_entry_free(ifce);
7417 }
7418
7419 /*
7420 * Function to compare ifnet_fc_entries in ifnet flow control tree
7421 */
7422 static inline int
7423 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7424 {
7425 return (fc1->ifce_flowhash - fc2->ifce_flowhash);
7426 }
7427
7428 static int
7429 ifnet_fc_add(struct ifnet *ifp)
7430 {
7431 struct ifnet_fc_entry keyfc, *ifce;
7432 uint32_t flowhash;
7433
7434 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7435 VERIFY(ifp->if_flowhash != 0);
7436 flowhash = ifp->if_flowhash;
7437
7438 bzero(&keyfc, sizeof (keyfc));
7439 keyfc.ifce_flowhash = flowhash;
7440
7441 lck_mtx_lock_spin(&ifnet_fc_lock);
7442 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7443 if (ifce != NULL && ifce->ifce_ifp == ifp) {
7444 /* Entry is already in ifnet_fc_tree, return */
7445 lck_mtx_unlock(&ifnet_fc_lock);
7446 return (0);
7447 }
7448
7449 if (ifce != NULL) {
7450 /*
7451 * There is a different fc entry with the same flow hash
7452 * but different ifp pointer. There can be a collision
7453 * on flow hash but the probability is low. Let's just
7454 * avoid adding a second one when there is a collision.
7455 */
7456 lck_mtx_unlock(&ifnet_fc_lock);
7457 return (EAGAIN);
7458 }
7459
7460 /* become regular mutex */
7461 lck_mtx_convert_spin(&ifnet_fc_lock);
7462
7463 ifce = zalloc_noblock(ifnet_fc_zone);
7464 if (ifce == NULL) {
7465 /* memory allocation failed */
7466 lck_mtx_unlock(&ifnet_fc_lock);
7467 return (ENOMEM);
7468 }
7469 bzero(ifce, ifnet_fc_zone_size);
7470
7471 ifce->ifce_flowhash = flowhash;
7472 ifce->ifce_ifp = ifp;
7473
7474 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7475 lck_mtx_unlock(&ifnet_fc_lock);
7476 return (0);
7477 }
7478
7479 static struct ifnet_fc_entry *
7480 ifnet_fc_get(uint32_t flowhash)
7481 {
7482 struct ifnet_fc_entry keyfc, *ifce;
7483 struct ifnet *ifp;
7484
7485 bzero(&keyfc, sizeof (keyfc));
7486 keyfc.ifce_flowhash = flowhash;
7487
7488 lck_mtx_lock_spin(&ifnet_fc_lock);
7489 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7490 if (ifce == NULL) {
7491 /* Entry is not present in ifnet_fc_tree, return */
7492 lck_mtx_unlock(&ifnet_fc_lock);
7493 return (NULL);
7494 }
7495
7496 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7497
7498 VERIFY(ifce->ifce_ifp != NULL);
7499 ifp = ifce->ifce_ifp;
7500
7501 /* become regular mutex */
7502 lck_mtx_convert_spin(&ifnet_fc_lock);
7503
7504 if (!ifnet_is_attached(ifp, 0)) {
7505 /*
7506 * This ifp is not attached or in the process of being
7507 * detached; just don't process it.
7508 */
7509 ifnet_fc_entry_free(ifce);
7510 ifce = NULL;
7511 }
7512 lck_mtx_unlock(&ifnet_fc_lock);
7513
7514 return (ifce);
7515 }
7516
7517 static void
7518 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7519 {
7520 zfree(ifnet_fc_zone, ifce);
7521 }
7522
7523 static uint32_t
7524 ifnet_calc_flowhash(struct ifnet *ifp)
7525 {
7526 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7527 uint32_t flowhash = 0;
7528
7529 if (ifnet_flowhash_seed == 0)
7530 ifnet_flowhash_seed = RandomULong();
7531
7532 bzero(&fh, sizeof (fh));
7533
7534 (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name);
7535 fh.ifk_unit = ifp->if_unit;
7536 fh.ifk_flags = ifp->if_flags;
7537 fh.ifk_eflags = ifp->if_eflags;
7538 fh.ifk_capabilities = ifp->if_capabilities;
7539 fh.ifk_capenable = ifp->if_capenable;
7540 fh.ifk_output_sched_model = ifp->if_output_sched_model;
7541 fh.ifk_rand1 = RandomULong();
7542 fh.ifk_rand2 = RandomULong();
7543
7544 try_again:
7545 flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed);
7546 if (flowhash == 0) {
7547 /* try to get a non-zero flowhash */
7548 ifnet_flowhash_seed = RandomULong();
7549 goto try_again;
7550 }
7551
7552 return (flowhash);
7553 }
7554
7555 int
7556 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
7557 uint16_t flags, uint8_t *data)
7558 {
7559 #pragma unused(flags)
7560 int error = 0;
7561
7562 switch (family) {
7563 case AF_INET:
7564 if_inetdata_lock_exclusive(ifp);
7565 if (IN_IFEXTRA(ifp) != NULL) {
7566 if (len == 0) {
7567 /* Allow clearing the signature */
7568 IN_IFEXTRA(ifp)->netsig_len = 0;
7569 bzero(IN_IFEXTRA(ifp)->netsig,
7570 sizeof (IN_IFEXTRA(ifp)->netsig));
7571 if_inetdata_lock_done(ifp);
7572 break;
7573 } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) {
7574 error = EINVAL;
7575 if_inetdata_lock_done(ifp);
7576 break;
7577 }
7578 IN_IFEXTRA(ifp)->netsig_len = len;
7579 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
7580 } else {
7581 error = ENOMEM;
7582 }
7583 if_inetdata_lock_done(ifp);
7584 break;
7585
7586 case AF_INET6:
7587 if_inet6data_lock_exclusive(ifp);
7588 if (IN6_IFEXTRA(ifp) != NULL) {
7589 if (len == 0) {
7590 /* Allow clearing the signature */
7591 IN6_IFEXTRA(ifp)->netsig_len = 0;
7592 bzero(IN6_IFEXTRA(ifp)->netsig,
7593 sizeof (IN6_IFEXTRA(ifp)->netsig));
7594 if_inet6data_lock_done(ifp);
7595 break;
7596 } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) {
7597 error = EINVAL;
7598 if_inet6data_lock_done(ifp);
7599 break;
7600 }
7601 IN6_IFEXTRA(ifp)->netsig_len = len;
7602 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
7603 } else {
7604 error = ENOMEM;
7605 }
7606 if_inet6data_lock_done(ifp);
7607 break;
7608
7609 default:
7610 error = EINVAL;
7611 break;
7612 }
7613
7614 return (error);
7615 }
7616
7617 int
7618 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
7619 uint16_t *flags, uint8_t *data)
7620 {
7621 int error = 0;
7622
7623 if (ifp == NULL || len == NULL || flags == NULL || data == NULL)
7624 return (EINVAL);
7625
7626 switch (family) {
7627 case AF_INET:
7628 if_inetdata_lock_shared(ifp);
7629 if (IN_IFEXTRA(ifp) != NULL) {
7630 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
7631 error = EINVAL;
7632 if_inetdata_lock_done(ifp);
7633 break;
7634 }
7635 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0)
7636 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
7637 else
7638 error = ENOENT;
7639 } else {
7640 error = ENOMEM;
7641 }
7642 if_inetdata_lock_done(ifp);
7643 break;
7644
7645 case AF_INET6:
7646 if_inet6data_lock_shared(ifp);
7647 if (IN6_IFEXTRA(ifp) != NULL) {
7648 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
7649 error = EINVAL;
7650 if_inet6data_lock_done(ifp);
7651 break;
7652 }
7653 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0)
7654 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
7655 else
7656 error = ENOENT;
7657 } else {
7658 error = ENOMEM;
7659 }
7660 if_inet6data_lock_done(ifp);
7661 break;
7662
7663 default:
7664 error = EINVAL;
7665 break;
7666 }
7667
7668 if (error == 0)
7669 *flags = 0;
7670
7671 return (error);
7672 }
7673
7674 static void
7675 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
7676 protocol_family_t pf)
7677 {
7678 #pragma unused(ifp)
7679 uint32_t did_sw;
7680
7681 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
7682 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6)))
7683 return;
7684
7685 switch (pf) {
7686 case PF_INET:
7687 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
7688 if (did_sw & CSUM_DELAY_IP)
7689 hwcksum_dbg_finalized_hdr++;
7690 if (did_sw & CSUM_DELAY_DATA)
7691 hwcksum_dbg_finalized_data++;
7692 break;
7693 #if INET6
7694 case PF_INET6:
7695 /*
7696 * Checksum offload should not have been enabled when
7697 * extension headers exist; that also means that we
7698 * cannot force-finalize packets with extension headers.
7699 * Indicate to the callee should it skip such case by
7700 * setting optlen to -1.
7701 */
7702 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
7703 m->m_pkthdr.csum_flags);
7704 if (did_sw & CSUM_DELAY_IPV6_DATA)
7705 hwcksum_dbg_finalized_data++;
7706 break;
7707 #endif /* INET6 */
7708 default:
7709 return;
7710 }
7711 }
7712
7713 static void
7714 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
7715 protocol_family_t pf)
7716 {
7717 uint16_t sum;
7718 uint32_t hlen;
7719
7720 if (frame_header == NULL ||
7721 frame_header < (char *)mbuf_datastart(m) ||
7722 frame_header > (char *)m->m_data) {
7723 printf("%s: frame header pointer 0x%llx out of range "
7724 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
7725 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
7726 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
7727 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
7728 (uint64_t)VM_KERNEL_ADDRPERM(m));
7729 return;
7730 }
7731 hlen = (m->m_data - frame_header);
7732
7733 switch (pf) {
7734 case PF_INET:
7735 #if INET6
7736 case PF_INET6:
7737 #endif /* INET6 */
7738 break;
7739 default:
7740 return;
7741 }
7742
7743 /*
7744 * Force partial checksum offload; useful to simulate cases
7745 * where the hardware does not support partial checksum offload,
7746 * in order to validate correctness throughout the layers above.
7747 */
7748 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
7749 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
7750
7751 if (foff > (uint32_t)m->m_pkthdr.len)
7752 return;
7753
7754 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
7755
7756 /* Compute 16-bit 1's complement sum from forced offset */
7757 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
7758
7759 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
7760 m->m_pkthdr.csum_rx_val = sum;
7761 m->m_pkthdr.csum_rx_start = (foff + hlen);
7762
7763 hwcksum_dbg_partial_forced++;
7764 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
7765 }
7766
7767 /*
7768 * Partial checksum offload verification (and adjustment);
7769 * useful to validate and test cases where the hardware
7770 * supports partial checksum offload.
7771 */
7772 if ((m->m_pkthdr.csum_flags &
7773 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
7774 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
7775 uint32_t rxoff;
7776
7777 /* Start offset must begin after frame header */
7778 rxoff = m->m_pkthdr.csum_rx_start;
7779 if (hlen > rxoff) {
7780 hwcksum_dbg_bad_rxoff++;
7781 if (dlil_verbose) {
7782 printf("%s: partial cksum start offset %d "
7783 "is less than frame header length %d for "
7784 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
7785 (uint64_t)VM_KERNEL_ADDRPERM(m));
7786 }
7787 return;
7788 }
7789 rxoff -=hlen;
7790
7791 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
7792 /*
7793 * Compute the expected 16-bit 1's complement sum;
7794 * skip this if we've already computed it above
7795 * when partial checksum offload is forced.
7796 */
7797 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
7798
7799 /* Hardware or driver is buggy */
7800 if (sum != m->m_pkthdr.csum_rx_val) {
7801 hwcksum_dbg_bad_cksum++;
7802 if (dlil_verbose) {
7803 printf("%s: bad partial cksum value "
7804 "0x%x (expected 0x%x) for mbuf "
7805 "0x%llx [rx_start %d]\n",
7806 if_name(ifp),
7807 m->m_pkthdr.csum_rx_val, sum,
7808 (uint64_t)VM_KERNEL_ADDRPERM(m),
7809 m->m_pkthdr.csum_rx_start);
7810 }
7811 return;
7812 }
7813 }
7814 hwcksum_dbg_verified++;
7815
7816 /*
7817 * This code allows us to emulate various hardwares that
7818 * perform 16-bit 1's complement sum beginning at various
7819 * start offset values.
7820 */
7821 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
7822 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
7823
7824 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len)
7825 return;
7826
7827 sum = m_adj_sum16(m, rxoff, aoff, sum);
7828
7829 m->m_pkthdr.csum_rx_val = sum;
7830 m->m_pkthdr.csum_rx_start = (aoff + hlen);
7831
7832 hwcksum_dbg_adjusted++;
7833 }
7834 }
7835 }
7836
7837 static int
7838 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
7839 {
7840 #pragma unused(arg1, arg2)
7841 u_int32_t i;
7842 int err;
7843
7844 i = hwcksum_dbg_mode;
7845
7846 err = sysctl_handle_int(oidp, &i, 0, req);
7847 if (err != 0 || req->newptr == USER_ADDR_NULL)
7848 return (err);
7849
7850 if (hwcksum_dbg == 0)
7851 return (ENODEV);
7852
7853 if ((i & ~HWCKSUM_DBG_MASK) != 0)
7854 return (EINVAL);
7855
7856 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
7857
7858 return (err);
7859 }
7860
7861 static int
7862 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
7863 {
7864 #pragma unused(arg1, arg2)
7865 u_int32_t i;
7866 int err;
7867
7868 i = hwcksum_dbg_partial_rxoff_forced;
7869
7870 err = sysctl_handle_int(oidp, &i, 0, req);
7871 if (err != 0 || req->newptr == USER_ADDR_NULL)
7872 return (err);
7873
7874 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED))
7875 return (ENODEV);
7876
7877 hwcksum_dbg_partial_rxoff_forced = i;
7878
7879 return (err);
7880 }
7881
7882 static int
7883 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
7884 {
7885 #pragma unused(arg1, arg2)
7886 u_int32_t i;
7887 int err;
7888
7889 i = hwcksum_dbg_partial_rxoff_adj;
7890
7891 err = sysctl_handle_int(oidp, &i, 0, req);
7892 if (err != 0 || req->newptr == USER_ADDR_NULL)
7893 return (err);
7894
7895 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ))
7896 return (ENODEV);
7897
7898 hwcksum_dbg_partial_rxoff_adj = i;
7899
7900 return (err);
7901 }
7902
7903 static int
7904 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
7905 {
7906 #pragma unused(oidp, arg1, arg2)
7907 int err;
7908
7909 if (req->oldptr == USER_ADDR_NULL) {
7910
7911 }
7912 if (req->newptr != USER_ADDR_NULL) {
7913 return (EPERM);
7914 }
7915 err = SYSCTL_OUT(req, &tx_chain_len_stats,
7916 sizeof(struct chain_len_stats));
7917
7918 return (err);
7919 }
7920
7921
7922 #if DEBUG
7923 /* Blob for sum16 verification */
7924 static uint8_t sumdata[] = {
7925 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
7926 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
7927 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
7928 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
7929 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
7930 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
7931 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
7932 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
7933 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
7934 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
7935 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
7936 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
7937 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
7938 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
7939 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
7940 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
7941 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
7942 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
7943 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
7944 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
7945 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
7946 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
7947 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
7948 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
7949 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
7950 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
7951 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
7952 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
7953 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
7954 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
7955 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
7956 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
7957 0xc8, 0x28, 0x02, 0x00, 0x00
7958 };
7959
7960 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
7961 static struct {
7962 int len;
7963 uint16_t sum;
7964 } sumtbl[] = {
7965 { 11, 0xcb6d },
7966 { 20, 0x20dd },
7967 { 27, 0xbabd },
7968 { 32, 0xf3e8 },
7969 { 37, 0x197d },
7970 { 43, 0x9eae },
7971 { 64, 0x4678 },
7972 { 127, 0x9399 },
7973 { 256, 0xd147 },
7974 { 325, 0x0358 }
7975 };
7976 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
7977
7978 static void
7979 dlil_verify_sum16(void)
7980 {
7981 struct mbuf *m;
7982 uint8_t *buf;
7983 int n;
7984
7985 /* Make sure test data plus extra room for alignment fits in cluster */
7986 _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES);
7987
7988 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
7989 MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */
7990 buf = mtod(m, uint8_t *); /* base address */
7991
7992 for (n = 0; n < SUMTBL_MAX; n++) {
7993 uint16_t len = sumtbl[n].len;
7994 int i;
7995
7996 /* Verify for all possible alignments */
7997 for (i = 0; i < (int)sizeof (uint64_t); i++) {
7998 uint16_t sum;
7999 uint8_t *c;
8000
8001 /* Copy over test data to mbuf */
8002 VERIFY(len <= sizeof (sumdata));
8003 c = buf + i;
8004 bcopy(sumdata, c, len);
8005
8006 /* Zero-offset test (align by data pointer) */
8007 m->m_data = (caddr_t)c;
8008 m->m_len = len;
8009 sum = m_sum16(m, 0, len);
8010
8011 /* Something is horribly broken; stop now */
8012 if (sum != sumtbl[n].sum) {
8013 panic("%s: broken m_sum16 for len=%d align=%d "
8014 "sum=0x%04x [expected=0x%04x]\n", __func__,
8015 len, i, sum, sumtbl[n].sum);
8016 /* NOTREACHED */
8017 }
8018
8019 /* Alignment test by offset (fixed data pointer) */
8020 m->m_data = (caddr_t)buf;
8021 m->m_len = i + len;
8022 sum = m_sum16(m, i, len);
8023
8024 /* Something is horribly broken; stop now */
8025 if (sum != sumtbl[n].sum) {
8026 panic("%s: broken m_sum16 for len=%d offset=%d "
8027 "sum=0x%04x [expected=0x%04x]\n", __func__,
8028 len, i, sum, sumtbl[n].sum);
8029 /* NOTREACHED */
8030 }
8031 #if INET
8032 /* Simple sum16 contiguous buffer test by aligment */
8033 sum = b_sum16(c, len);
8034
8035 /* Something is horribly broken; stop now */
8036 if (sum != sumtbl[n].sum) {
8037 panic("%s: broken b_sum16 for len=%d align=%d "
8038 "sum=0x%04x [expected=0x%04x]\n", __func__,
8039 len, i, sum, sumtbl[n].sum);
8040 /* NOTREACHED */
8041 }
8042 #endif /* INET */
8043 }
8044 }
8045 m_freem(m);
8046
8047 printf("DLIL: SUM16 self-tests PASSED\n");
8048 }
8049 #endif /* DEBUG */
8050
8051 #define CASE_STRINGIFY(x) case x: return #x
8052
8053 __private_extern__ const char *
8054 dlil_kev_dl_code_str(u_int32_t event_code)
8055 {
8056 switch (event_code) {
8057 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
8058 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
8059 CASE_STRINGIFY(KEV_DL_SIFMTU);
8060 CASE_STRINGIFY(KEV_DL_SIFPHYS);
8061 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
8062 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
8063 CASE_STRINGIFY(KEV_DL_ADDMULTI);
8064 CASE_STRINGIFY(KEV_DL_DELMULTI);
8065 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
8066 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
8067 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
8068 CASE_STRINGIFY(KEV_DL_LINK_OFF);
8069 CASE_STRINGIFY(KEV_DL_LINK_ON);
8070 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
8071 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
8072 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
8073 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
8074 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
8075 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
8076 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
8077 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
8078 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
8079 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
8080 CASE_STRINGIFY(KEV_DL_ISSUES);
8081 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
8082 default:
8083 break;
8084 }
8085 return ("");
8086 }
8087
8088 /*
8089 * Mirror the arguments of ifnet_get_local_ports_extended()
8090 * ifindex
8091 * protocol
8092 * flags
8093 */
8094 static int
8095 sysctl_get_ports_used SYSCTL_HANDLER_ARGS
8096 {
8097 #pragma unused(oidp)
8098 int *name = (int *)arg1;
8099 int namelen = arg2;
8100 int error = 0;
8101 int idx;
8102 protocol_family_t protocol;
8103 u_int32_t flags;
8104 ifnet_t ifp = NULL;
8105 u_int8_t *bitfield = NULL;
8106
8107 if (req->newptr) {
8108 error = EPERM;
8109 goto done;
8110 }
8111 if (namelen != 3) {
8112 error = ENOENT;
8113 goto done;
8114 }
8115
8116 if (req->oldptr == USER_ADDR_NULL) {
8117 req->oldidx = bitstr_size(65536);
8118 goto done;
8119 }
8120 if (req->oldlen < bitstr_size(65536)) {
8121 error = ENOMEM;
8122 goto done;
8123 }
8124
8125 idx = name[0];
8126 protocol = name[1];
8127 flags = name[2];
8128
8129
8130 ifnet_head_lock_shared();
8131 if (idx > if_index) {
8132 ifnet_head_done();
8133 error = ENOENT;
8134 goto done;
8135 }
8136 ifp = ifindex2ifnet[idx];
8137 ifnet_head_done();
8138
8139 bitfield = _MALLOC(bitstr_size(65536), M_TEMP, M_WAITOK);
8140 if (bitfield == NULL) {
8141 error = ENOMEM;
8142 goto done;
8143 }
8144 error = ifnet_get_local_ports_extended(ifp, protocol, flags, bitfield);
8145 if (error != 0) {
8146 printf("%s: ifnet_get_local_ports_extended() error %d\n",
8147 __func__, error);
8148 goto done;
8149 }
8150 error = SYSCTL_OUT(req, bitfield, bitstr_size(65536));
8151 done:
8152 if (bitfield != NULL)
8153 _FREE(bitfield, M_TEMP);
8154 return (error);
8155 }
8156