]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/dlil.c
7505ec20785c46805738e5fad19d67b4bd8304e7
[apple/xnu.git] / bsd / net / dlil.c
1 /*
2 * Copyright (c) 1999-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
43 #include <sys/user.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
47 #include <net/if.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
50 #include <net/dlil.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
59 #include <sys/priv.h>
60
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
67
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_llreach.h>
71 #include <net/kpi_interfacefilter.h>
72 #include <net/classq/classq.h>
73 #include <net/classq/classq_sfb.h>
74 #include <net/flowhash.h>
75 #include <net/ntstat.h>
76
77 #if INET
78 #include <netinet/in_var.h>
79 #include <netinet/igmp_var.h>
80 #include <netinet/ip_var.h>
81 #include <netinet/tcp.h>
82 #include <netinet/tcp_var.h>
83 #include <netinet/udp.h>
84 #include <netinet/udp_var.h>
85 #include <netinet/if_ether.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/in_tclass.h>
88 #endif /* INET */
89
90 #if INET6
91 #include <netinet6/in6_var.h>
92 #include <netinet6/nd6.h>
93 #include <netinet6/mld6_var.h>
94 #include <netinet6/scope6_var.h>
95 #endif /* INET6 */
96
97 #include <libkern/OSAtomic.h>
98 #include <libkern/tree.h>
99
100 #include <dev/random/randomdev.h>
101 #include <machine/machine_routines.h>
102
103 #include <mach/thread_act.h>
104 #include <mach/sdt.h>
105
106 #if CONFIG_MACF
107 #include <sys/kauth.h>
108 #include <security/mac_framework.h>
109 #include <net/ethernet.h>
110 #include <net/firewire.h>
111 #endif
112
113 #if PF
114 #include <net/pfvar.h>
115 #endif /* PF */
116 #if PF_ALTQ
117 #include <net/altq/altq.h>
118 #endif /* PF_ALTQ */
119 #include <net/pktsched/pktsched.h>
120
121 #if NECP
122 #include <net/necp.h>
123 #endif /* NECP */
124
125 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
126 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
127 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
128 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
129 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
130
131 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
132 #define MAX_LINKADDR 4 /* LONGWORDS */
133 #define M_NKE M_IFADDR
134
135 #if 1
136 #define DLIL_PRINTF printf
137 #else
138 #define DLIL_PRINTF kprintf
139 #endif
140
141 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
142 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
143
144 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
145 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
146
147 enum {
148 kProtoKPI_v1 = 1,
149 kProtoKPI_v2 = 2
150 };
151
152 /*
153 * List of if_proto structures in if_proto_hash[] is protected by
154 * the ifnet lock. The rest of the fields are initialized at protocol
155 * attach time and never change, thus no lock required as long as
156 * a reference to it is valid, via if_proto_ref().
157 */
158 struct if_proto {
159 SLIST_ENTRY(if_proto) next_hash;
160 u_int32_t refcount;
161 u_int32_t detached;
162 struct ifnet *ifp;
163 protocol_family_t protocol_family;
164 int proto_kpi;
165 union {
166 struct {
167 proto_media_input input;
168 proto_media_preout pre_output;
169 proto_media_event event;
170 proto_media_ioctl ioctl;
171 proto_media_detached detached;
172 proto_media_resolve_multi resolve_multi;
173 proto_media_send_arp send_arp;
174 } v1;
175 struct {
176 proto_media_input_v2 input;
177 proto_media_preout pre_output;
178 proto_media_event event;
179 proto_media_ioctl ioctl;
180 proto_media_detached detached;
181 proto_media_resolve_multi resolve_multi;
182 proto_media_send_arp send_arp;
183 } v2;
184 } kpi;
185 };
186
187 SLIST_HEAD(proto_hash_entry, if_proto);
188
189 #define DLIL_SDLMAXLEN 64
190 #define DLIL_SDLDATALEN \
191 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
192
193 struct dlil_ifnet {
194 struct ifnet dl_if; /* public ifnet */
195 /*
196 * DLIL private fields, protected by dl_if_lock
197 */
198 decl_lck_mtx_data(, dl_if_lock);
199 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
200 u_int32_t dl_if_flags; /* flags (below) */
201 u_int32_t dl_if_refcnt; /* refcnt */
202 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
203 void *dl_if_uniqueid; /* unique interface id */
204 size_t dl_if_uniqueid_len; /* length of the unique id */
205 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
206 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
207 struct {
208 struct ifaddr ifa; /* lladdr ifa */
209 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
210 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
211 } dl_if_lladdr;
212 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
213 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
214 ctrace_t dl_if_attach; /* attach PC stacktrace */
215 ctrace_t dl_if_detach; /* detach PC stacktrace */
216 };
217
218 /* Values for dl_if_flags (private to DLIL) */
219 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
220 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
221 #define DLIF_DEBUG 0x4 /* has debugging info */
222
223 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
224
225 /* For gdb */
226 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
227
228 struct dlil_ifnet_dbg {
229 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
230 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
231 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
232 /*
233 * Circular lists of ifnet_{reference,release} callers.
234 */
235 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
236 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
237 };
238
239 #define DLIL_TO_IFP(s) (&s->dl_if)
240 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
241
242 struct ifnet_filter {
243 TAILQ_ENTRY(ifnet_filter) filt_next;
244 u_int32_t filt_skip;
245 u_int32_t filt_flags;
246 ifnet_t filt_ifp;
247 const char *filt_name;
248 void *filt_cookie;
249 protocol_family_t filt_protocol;
250 iff_input_func filt_input;
251 iff_output_func filt_output;
252 iff_event_func filt_event;
253 iff_ioctl_func filt_ioctl;
254 iff_detached_func filt_detached;
255 };
256
257 struct proto_input_entry;
258
259 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
260 static lck_grp_t *dlil_lock_group;
261 lck_grp_t *ifnet_lock_group;
262 static lck_grp_t *ifnet_head_lock_group;
263 static lck_grp_t *ifnet_snd_lock_group;
264 static lck_grp_t *ifnet_rcv_lock_group;
265 lck_attr_t *ifnet_lock_attr;
266 decl_lck_rw_data(static, ifnet_head_lock);
267 decl_lck_mtx_data(static, dlil_ifnet_lock);
268 u_int32_t dlil_filter_disable_tso_count = 0;
269
270 #if DEBUG
271 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
272 #else
273 static unsigned int ifnet_debug; /* debugging (disabled) */
274 #endif /* !DEBUG */
275 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
276 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
277 static struct zone *dlif_zone; /* zone for dlil_ifnet */
278
279 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
280 #define DLIF_ZONE_NAME "ifnet" /* zone name */
281
282 static unsigned int dlif_filt_size; /* size of ifnet_filter */
283 static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
284
285 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
286 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
287
288 static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
289 static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
290
291 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
292 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
293
294 static unsigned int dlif_proto_size; /* size of if_proto */
295 static struct zone *dlif_proto_zone; /* zone for if_proto */
296
297 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
298 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
299
300 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
301 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
302 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
303
304 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
305 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
306
307 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
308 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
309 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
310
311 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
312 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
313
314 static u_int32_t net_rtref;
315
316 static struct dlil_main_threading_info dlil_main_input_thread_info;
317 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
318 (struct dlil_threading_info *)&dlil_main_input_thread_info;
319
320 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
321 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
322 static void dlil_if_trace(struct dlil_ifnet *, int);
323 static void if_proto_ref(struct if_proto *);
324 static void if_proto_free(struct if_proto *);
325 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
326 static int dlil_ifp_proto_count(struct ifnet *);
327 static void if_flt_monitor_busy(struct ifnet *);
328 static void if_flt_monitor_unbusy(struct ifnet *);
329 static void if_flt_monitor_enter(struct ifnet *);
330 static void if_flt_monitor_leave(struct ifnet *);
331 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
332 char **, protocol_family_t);
333 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
334 protocol_family_t);
335 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
336 const struct sockaddr_dl *);
337 static int ifnet_lookup(struct ifnet *);
338 static void if_purgeaddrs(struct ifnet *);
339
340 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
341 struct mbuf *, char *);
342 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
343 struct mbuf *);
344 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
345 mbuf_t *, const struct sockaddr *, void *, char *, char *);
346 static void ifproto_media_event(struct ifnet *, protocol_family_t,
347 const struct kev_msg *);
348 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
349 unsigned long, void *);
350 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
351 struct sockaddr_dl *, size_t);
352 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
353 const struct sockaddr_dl *, const struct sockaddr *,
354 const struct sockaddr_dl *, const struct sockaddr *);
355
356 static errno_t ifp_if_output(struct ifnet *, struct mbuf *);
357 static void ifp_if_start(struct ifnet *);
358 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
359 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
360 boolean_t poll, struct thread *tp);
361 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
362 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
363 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
364 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
365 protocol_family_t *);
366 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
367 const struct ifnet_demux_desc *, u_int32_t);
368 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
369 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
370 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
371 const struct sockaddr *, const char *, const char *);
372 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
373 const struct sockaddr *, const char *, const char *,
374 u_int32_t *, u_int32_t *);
375 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
376 static void ifp_if_free(struct ifnet *);
377 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
378 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
379 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
380
381 static void dlil_main_input_thread_func(void *, wait_result_t);
382 static void dlil_input_thread_func(void *, wait_result_t);
383 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
384 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
385 static void dlil_terminate_input_thread(struct dlil_threading_info *);
386 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
387 struct dlil_threading_info *, boolean_t);
388 static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
389 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
390 u_int32_t, ifnet_model_t, boolean_t);
391 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
392 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
393
394 #if DEBUG
395 static void dlil_verify_sum16(void);
396 #endif /* DEBUG */
397 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
398 protocol_family_t);
399 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
400 protocol_family_t);
401
402 static void ifnet_detacher_thread_func(void *, wait_result_t);
403 static int ifnet_detacher_thread_cont(int);
404 static void ifnet_detach_final(struct ifnet *);
405 static void ifnet_detaching_enqueue(struct ifnet *);
406 static struct ifnet *ifnet_detaching_dequeue(void);
407
408 static void ifnet_start_thread_fn(void *, wait_result_t);
409 static void ifnet_poll_thread_fn(void *, wait_result_t);
410 static void ifnet_poll(struct ifnet *);
411
412 static void ifp_src_route_copyout(struct ifnet *, struct route *);
413 static void ifp_src_route_copyin(struct ifnet *, struct route *);
414 #if INET6
415 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
416 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
417 #endif /* INET6 */
418
419 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
420 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
421 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
422 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
423 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
424 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
425 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
426 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
427 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
428 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
429 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
430 static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS;
431
432 struct chain_len_stats tx_chain_len_stats;
433 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
434
435 /* The following are protected by dlil_ifnet_lock */
436 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
437 static u_int32_t ifnet_detaching_cnt;
438 static void *ifnet_delayed_run; /* wait channel for detaching thread */
439
440 decl_lck_mtx_data(static, ifnet_fc_lock);
441
442 static uint32_t ifnet_flowhash_seed;
443
444 struct ifnet_flowhash_key {
445 char ifk_name[IFNAMSIZ];
446 uint32_t ifk_unit;
447 uint32_t ifk_flags;
448 uint32_t ifk_eflags;
449 uint32_t ifk_capabilities;
450 uint32_t ifk_capenable;
451 uint32_t ifk_output_sched_model;
452 uint32_t ifk_rand1;
453 uint32_t ifk_rand2;
454 };
455
456 /* Flow control entry per interface */
457 struct ifnet_fc_entry {
458 RB_ENTRY(ifnet_fc_entry) ifce_entry;
459 u_int32_t ifce_flowhash;
460 struct ifnet *ifce_ifp;
461 };
462
463 static uint32_t ifnet_calc_flowhash(struct ifnet *);
464 static int ifce_cmp(const struct ifnet_fc_entry *,
465 const struct ifnet_fc_entry *);
466 static int ifnet_fc_add(struct ifnet *);
467 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
468 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
469
470 /* protected by ifnet_fc_lock */
471 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
472 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
473 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
474
475 static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
476 static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
477
478 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
479 #define IFNET_FC_ZONE_MAX 32
480
481 extern void bpfdetach(struct ifnet *);
482 extern void proto_input_run(void);
483
484 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
485 u_int32_t flags);
486 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
487 u_int32_t flags);
488
489 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
490
491 #if CONFIG_MACF
492 int dlil_lladdr_ckreq = 0;
493 #endif
494
495 #if DEBUG
496 int dlil_verbose = 1;
497 #else
498 int dlil_verbose = 0;
499 #endif /* DEBUG */
500 #if IFNET_INPUT_SANITY_CHK
501 /* sanity checking of input packet lists received */
502 static u_int32_t dlil_input_sanity_check = 0;
503 #endif /* IFNET_INPUT_SANITY_CHK */
504 /* rate limit debug messages */
505 struct timespec dlil_dbgrate = { 1, 0 };
506
507 SYSCTL_DECL(_net_link_generic_system);
508
509 #if CONFIG_MACF
510 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_lladdr_ckreq,
511 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_lladdr_ckreq, 0,
512 "Require MACF system info check to expose link-layer address");
513 #endif
514
515 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
516 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
517
518 #define IF_SNDQ_MINLEN 32
519 u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
520 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
521 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
522 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
523
524 #define IF_RCVQ_MINLEN 32
525 #define IF_RCVQ_MAXLEN 256
526 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
527 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
528 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
529 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
530
531 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
532 static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
533 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
534 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
535 "ilog2 of EWMA decay rate of avg inbound packets");
536
537 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
538 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
539 static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
540 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
541 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
542 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
543 "Q", "input poll mode freeze time");
544
545 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
546 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
547 static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
548 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
549 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
550 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
551 "Q", "input poll sampling time");
552
553 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
554 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
555 static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
556 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
557 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
558 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
559 "Q", "input poll interval (time)");
560
561 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
562 static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
563 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
564 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
565 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
566
567 #define IF_RXPOLL_WLOWAT 10
568 static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
569 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
570 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
571 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
572 "I", "input poll wakeup low watermark");
573
574 #define IF_RXPOLL_WHIWAT 100
575 static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
576 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
577 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
578 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
579 "I", "input poll wakeup high watermark");
580
581 static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
582 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
583 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
584 "max packets per poll call");
585
586 static u_int32_t if_rxpoll = 1;
587 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
588 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
589 sysctl_rxpoll, "I", "enable opportunistic input polling");
590
591 u_int32_t if_bw_smoothing_val = 3;
592 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, if_bw_smoothing_val,
593 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_smoothing_val, 0, "");
594
595 u_int32_t if_bw_measure_size = 10;
596 SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_bw_measure_size,
597 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_measure_size, 0, "");
598
599 static u_int32_t cur_dlil_input_threads = 0;
600 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
601 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
602 "Current number of DLIL input threads");
603
604 #if IFNET_INPUT_SANITY_CHK
605 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
606 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
607 "Turn on sanity checking in DLIL input");
608 #endif /* IFNET_INPUT_SANITY_CHK */
609
610 static u_int32_t if_flowadv = 1;
611 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
612 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
613 "enable flow-advisory mechanism");
614
615 static u_int32_t if_delaybased_queue = 1;
616 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
617 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
618 "enable delay based dynamic queue sizing");
619
620 static uint64_t hwcksum_in_invalidated = 0;
621 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
622 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
623 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
624
625 uint32_t hwcksum_dbg = 0;
626 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
627 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
628 "enable hardware cksum debugging");
629
630 u_int32_t ifnet_start_delayed = 0;
631 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
632 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
633 "number of times start was delayed");
634
635 u_int32_t ifnet_delay_start_disabled = 0;
636 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
637 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
638 "number of times start was delayed");
639
640 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
641 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
642 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
643 #define HWCKSUM_DBG_MASK \
644 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
645 HWCKSUM_DBG_FINALIZE_FORCED)
646
647 static uint32_t hwcksum_dbg_mode = 0;
648 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
649 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
650 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
651
652 static uint64_t hwcksum_dbg_partial_forced = 0;
653 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
654 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
655 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
656
657 static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
658 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
659 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
660 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
661
662 static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
663 SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
664 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
665 &hwcksum_dbg_partial_rxoff_forced, 0,
666 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
667 "forced partial cksum rx offset");
668
669 static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
670 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
671 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
672 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
673 "adjusted partial cksum rx offset");
674
675 static uint64_t hwcksum_dbg_verified = 0;
676 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
677 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
678 &hwcksum_dbg_verified, "packets verified for having good checksum");
679
680 static uint64_t hwcksum_dbg_bad_cksum = 0;
681 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
682 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
683 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
684
685 static uint64_t hwcksum_dbg_bad_rxoff = 0;
686 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
687 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
688 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
689
690 static uint64_t hwcksum_dbg_adjusted = 0;
691 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
692 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
693 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
694
695 static uint64_t hwcksum_dbg_finalized_hdr = 0;
696 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
697 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
698 &hwcksum_dbg_finalized_hdr, "finalized headers");
699
700 static uint64_t hwcksum_dbg_finalized_data = 0;
701 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
702 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
703 &hwcksum_dbg_finalized_data, "finalized payloads");
704
705 uint32_t hwcksum_tx = 1;
706 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
707 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
708 "enable transmit hardware checksum offload");
709
710 uint32_t hwcksum_rx = 1;
711 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
712 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
713 "enable receive hardware checksum offload");
714
715 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
716 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
717 sysctl_tx_chain_len_stats, "S", "");
718
719 uint32_t tx_chain_len_count = 0;
720 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
721 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
722
723 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_ports_used,
724 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_ports_used, "");
725
726 #if (DEVELOPMENT || DEBUG)
727 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
728 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
729 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
730 #endif /* DEVELOPMENT || DEBUG */
731
732 unsigned int net_rxpoll = 1;
733 unsigned int net_affinity = 1;
734 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
735
736 extern u_int32_t inject_buckets;
737
738 static lck_grp_attr_t *dlil_grp_attributes = NULL;
739 static lck_attr_t *dlil_lck_attributes = NULL;
740
741
742 #define DLIL_INPUT_CHECK(m, ifp) { \
743 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
744 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
745 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
746 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
747 /* NOTREACHED */ \
748 } \
749 }
750
751 #define DLIL_EWMA(old, new, decay) do { \
752 u_int32_t _avg; \
753 if ((_avg = (old)) > 0) \
754 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
755 else \
756 _avg = (new); \
757 (old) = _avg; \
758 } while (0)
759
760 #define MBPS (1ULL * 1000 * 1000)
761 #define GBPS (MBPS * 1000)
762
763 struct rxpoll_time_tbl {
764 u_int64_t speed; /* downlink speed */
765 u_int32_t plowat; /* packets low watermark */
766 u_int32_t phiwat; /* packets high watermark */
767 u_int32_t blowat; /* bytes low watermark */
768 u_int32_t bhiwat; /* bytes high watermark */
769 };
770
771 static struct rxpoll_time_tbl rxpoll_tbl[] = {
772 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
773 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
774 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
775 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
776 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
777 { 0, 0, 0, 0, 0 }
778 };
779
780 int
781 proto_hash_value(u_int32_t protocol_family)
782 {
783 /*
784 * dlil_proto_unplumb_all() depends on the mapping between
785 * the hash bucket index and the protocol family defined
786 * here; future changes must be applied there as well.
787 */
788 switch (protocol_family) {
789 case PF_INET:
790 return (0);
791 case PF_INET6:
792 return (1);
793 case PF_VLAN:
794 return (2);
795 case PF_UNSPEC:
796 default:
797 return (3);
798 }
799 }
800
801 /*
802 * Caller must already be holding ifnet lock.
803 */
804 static struct if_proto *
805 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
806 {
807 struct if_proto *proto = NULL;
808 u_int32_t i = proto_hash_value(protocol_family);
809
810 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
811
812 if (ifp->if_proto_hash != NULL)
813 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
814
815 while (proto != NULL && proto->protocol_family != protocol_family)
816 proto = SLIST_NEXT(proto, next_hash);
817
818 if (proto != NULL)
819 if_proto_ref(proto);
820
821 return (proto);
822 }
823
824 static void
825 if_proto_ref(struct if_proto *proto)
826 {
827 atomic_add_32(&proto->refcount, 1);
828 }
829
830 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
831
832 static void
833 if_proto_free(struct if_proto *proto)
834 {
835 u_int32_t oldval;
836 struct ifnet *ifp = proto->ifp;
837 u_int32_t proto_family = proto->protocol_family;
838 struct kev_dl_proto_data ev_pr_data;
839
840 oldval = atomic_add_32_ov(&proto->refcount, -1);
841 if (oldval > 1)
842 return;
843
844 /* No more reference on this, protocol must have been detached */
845 VERIFY(proto->detached);
846
847 if (proto->proto_kpi == kProtoKPI_v1) {
848 if (proto->kpi.v1.detached)
849 proto->kpi.v1.detached(ifp, proto->protocol_family);
850 }
851 if (proto->proto_kpi == kProtoKPI_v2) {
852 if (proto->kpi.v2.detached)
853 proto->kpi.v2.detached(ifp, proto->protocol_family);
854 }
855
856 /*
857 * Cleanup routes that may still be in the routing table for that
858 * interface/protocol pair.
859 */
860 if_rtproto_del(ifp, proto_family);
861
862 /*
863 * The reserved field carries the number of protocol still attached
864 * (subject to change)
865 */
866 ifnet_lock_shared(ifp);
867 ev_pr_data.proto_family = proto_family;
868 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
869 ifnet_lock_done(ifp);
870
871 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
872 (struct net_event_data *)&ev_pr_data,
873 sizeof (struct kev_dl_proto_data));
874
875 zfree(dlif_proto_zone, proto);
876 }
877
878 __private_extern__ void
879 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
880 {
881 unsigned int type = 0;
882 int ass = 1;
883
884 switch (what) {
885 case IFNET_LCK_ASSERT_EXCLUSIVE:
886 type = LCK_RW_ASSERT_EXCLUSIVE;
887 break;
888
889 case IFNET_LCK_ASSERT_SHARED:
890 type = LCK_RW_ASSERT_SHARED;
891 break;
892
893 case IFNET_LCK_ASSERT_OWNED:
894 type = LCK_RW_ASSERT_HELD;
895 break;
896
897 case IFNET_LCK_ASSERT_NOTOWNED:
898 /* nothing to do here for RW lock; bypass assert */
899 ass = 0;
900 break;
901
902 default:
903 panic("bad ifnet assert type: %d", what);
904 /* NOTREACHED */
905 }
906 if (ass)
907 lck_rw_assert(&ifp->if_lock, type);
908 }
909
910 __private_extern__ void
911 ifnet_lock_shared(struct ifnet *ifp)
912 {
913 lck_rw_lock_shared(&ifp->if_lock);
914 }
915
916 __private_extern__ void
917 ifnet_lock_exclusive(struct ifnet *ifp)
918 {
919 lck_rw_lock_exclusive(&ifp->if_lock);
920 }
921
922 __private_extern__ void
923 ifnet_lock_done(struct ifnet *ifp)
924 {
925 lck_rw_done(&ifp->if_lock);
926 }
927
928 #if INET
929 __private_extern__ void
930 if_inetdata_lock_shared(struct ifnet *ifp)
931 {
932 lck_rw_lock_shared(&ifp->if_inetdata_lock);
933 }
934
935 __private_extern__ void
936 if_inetdata_lock_exclusive(struct ifnet *ifp)
937 {
938 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
939 }
940
941 __private_extern__ void
942 if_inetdata_lock_done(struct ifnet *ifp)
943 {
944 lck_rw_done(&ifp->if_inetdata_lock);
945 }
946 #endif
947
948 #if INET6
949 __private_extern__ void
950 if_inet6data_lock_shared(struct ifnet *ifp)
951 {
952 lck_rw_lock_shared(&ifp->if_inet6data_lock);
953 }
954
955 __private_extern__ void
956 if_inet6data_lock_exclusive(struct ifnet *ifp)
957 {
958 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
959 }
960
961 __private_extern__ void
962 if_inet6data_lock_done(struct ifnet *ifp)
963 {
964 lck_rw_done(&ifp->if_inet6data_lock);
965 }
966 #endif
967
968 __private_extern__ void
969 ifnet_head_lock_shared(void)
970 {
971 lck_rw_lock_shared(&ifnet_head_lock);
972 }
973
974 __private_extern__ void
975 ifnet_head_lock_exclusive(void)
976 {
977 lck_rw_lock_exclusive(&ifnet_head_lock);
978 }
979
980 __private_extern__ void
981 ifnet_head_done(void)
982 {
983 lck_rw_done(&ifnet_head_lock);
984 }
985
986 __private_extern__ void
987 ifnet_head_assert_exclusive(void)
988 {
989 lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
990 }
991
992 /*
993 * Caller must already be holding ifnet lock.
994 */
995 static int
996 dlil_ifp_proto_count(struct ifnet *ifp)
997 {
998 int i, count = 0;
999
1000 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1001
1002 if (ifp->if_proto_hash == NULL)
1003 goto done;
1004
1005 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1006 struct if_proto *proto;
1007 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1008 count++;
1009 }
1010 }
1011 done:
1012 return (count);
1013 }
1014
1015 __private_extern__ void
1016 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1017 u_int32_t event_code, struct net_event_data *event_data,
1018 u_int32_t event_data_len)
1019 {
1020 struct net_event_data ev_data;
1021 struct kev_msg ev_msg;
1022
1023 bzero(&ev_msg, sizeof (ev_msg));
1024 bzero(&ev_data, sizeof (ev_data));
1025 /*
1026 * a net event always starts with a net_event_data structure
1027 * but the caller can generate a simple net event or
1028 * provide a longer event structure to post
1029 */
1030 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1031 ev_msg.kev_class = KEV_NETWORK_CLASS;
1032 ev_msg.kev_subclass = event_subclass;
1033 ev_msg.event_code = event_code;
1034
1035 if (event_data == NULL) {
1036 event_data = &ev_data;
1037 event_data_len = sizeof (struct net_event_data);
1038 }
1039
1040 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1041 event_data->if_family = ifp->if_family;
1042 event_data->if_unit = (u_int32_t)ifp->if_unit;
1043
1044 ev_msg.dv[0].data_length = event_data_len;
1045 ev_msg.dv[0].data_ptr = event_data;
1046 ev_msg.dv[1].data_length = 0;
1047
1048 /* Don't update interface generation for quality and RRC state changess */
1049 bool update_generation = (event_subclass != KEV_DL_SUBCLASS ||
1050 (event_code != KEV_DL_LINK_QUALITY_METRIC_CHANGED &&
1051 event_code != KEV_DL_RRC_STATE_CHANGED));
1052
1053 dlil_event_internal(ifp, &ev_msg, update_generation);
1054 }
1055
1056 __private_extern__ int
1057 dlil_alloc_local_stats(struct ifnet *ifp)
1058 {
1059 int ret = EINVAL;
1060 void *buf, *base, **pbuf;
1061
1062 if (ifp == NULL)
1063 goto end;
1064
1065 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1066 /* allocate tcpstat_local structure */
1067 buf = zalloc(dlif_tcpstat_zone);
1068 if (buf == NULL) {
1069 ret = ENOMEM;
1070 goto end;
1071 }
1072 bzero(buf, dlif_tcpstat_bufsize);
1073
1074 /* Get the 64-bit aligned base address for this object */
1075 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1076 sizeof (u_int64_t));
1077 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1078 ((intptr_t)buf + dlif_tcpstat_bufsize));
1079
1080 /*
1081 * Wind back a pointer size from the aligned base and
1082 * save the original address so we can free it later.
1083 */
1084 pbuf = (void **)((intptr_t)base - sizeof (void *));
1085 *pbuf = buf;
1086 ifp->if_tcp_stat = base;
1087
1088 /* allocate udpstat_local structure */
1089 buf = zalloc(dlif_udpstat_zone);
1090 if (buf == NULL) {
1091 ret = ENOMEM;
1092 goto end;
1093 }
1094 bzero(buf, dlif_udpstat_bufsize);
1095
1096 /* Get the 64-bit aligned base address for this object */
1097 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1098 sizeof (u_int64_t));
1099 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1100 ((intptr_t)buf + dlif_udpstat_bufsize));
1101
1102 /*
1103 * Wind back a pointer size from the aligned base and
1104 * save the original address so we can free it later.
1105 */
1106 pbuf = (void **)((intptr_t)base - sizeof (void *));
1107 *pbuf = buf;
1108 ifp->if_udp_stat = base;
1109
1110 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
1111 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
1112
1113 ret = 0;
1114 }
1115
1116 if (ifp->if_ipv4_stat == NULL) {
1117 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1118 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1119 if (ifp->if_ipv4_stat == NULL) {
1120 ret = ENOMEM;
1121 goto end;
1122 }
1123 }
1124
1125 if (ifp->if_ipv6_stat == NULL) {
1126 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1127 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1128 if (ifp->if_ipv6_stat == NULL) {
1129 ret = ENOMEM;
1130 goto end;
1131 }
1132 }
1133 end:
1134 if (ret != 0) {
1135 if (ifp->if_tcp_stat != NULL) {
1136 pbuf = (void **)
1137 ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
1138 zfree(dlif_tcpstat_zone, *pbuf);
1139 ifp->if_tcp_stat = NULL;
1140 }
1141 if (ifp->if_udp_stat != NULL) {
1142 pbuf = (void **)
1143 ((intptr_t)ifp->if_udp_stat - sizeof (void *));
1144 zfree(dlif_udpstat_zone, *pbuf);
1145 ifp->if_udp_stat = NULL;
1146 }
1147 if (ifp->if_ipv4_stat != NULL) {
1148 FREE(ifp->if_ipv4_stat, M_TEMP);
1149 ifp->if_ipv4_stat = NULL;
1150 }
1151 if (ifp->if_ipv6_stat != NULL) {
1152 FREE(ifp->if_ipv6_stat, M_TEMP);
1153 ifp->if_ipv6_stat = NULL;
1154 }
1155 }
1156
1157 return (ret);
1158 }
1159
1160 static int
1161 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
1162 {
1163 thread_continue_t func;
1164 u_int32_t limit;
1165 int error;
1166
1167 /* NULL ifp indicates the main input thread, called at dlil_init time */
1168 if (ifp == NULL) {
1169 func = dlil_main_input_thread_func;
1170 VERIFY(inp == dlil_main_input_thread);
1171 (void) strlcat(inp->input_name,
1172 "main_input", DLIL_THREADNAME_LEN);
1173 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1174 func = dlil_rxpoll_input_thread_func;
1175 VERIFY(inp != dlil_main_input_thread);
1176 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1177 "%s_input_poll", if_name(ifp));
1178 } else {
1179 func = dlil_input_thread_func;
1180 VERIFY(inp != dlil_main_input_thread);
1181 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1182 "%s_input", if_name(ifp));
1183 }
1184 VERIFY(inp->input_thr == THREAD_NULL);
1185
1186 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1187 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1188
1189 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1190 inp->ifp = ifp; /* NULL for main input thread */
1191
1192 net_timerclear(&inp->mode_holdtime);
1193 net_timerclear(&inp->mode_lasttime);
1194 net_timerclear(&inp->sample_holdtime);
1195 net_timerclear(&inp->sample_lasttime);
1196 net_timerclear(&inp->dbg_lasttime);
1197
1198 /*
1199 * For interfaces that support opportunistic polling, set the
1200 * low and high watermarks for outstanding inbound packets/bytes.
1201 * Also define freeze times for transitioning between modes
1202 * and updating the average.
1203 */
1204 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1205 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1206 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1207 } else {
1208 limit = (u_int32_t)-1;
1209 }
1210
1211 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit);
1212 if (inp == dlil_main_input_thread) {
1213 struct dlil_main_threading_info *inpm =
1214 (struct dlil_main_threading_info *)inp;
1215 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit);
1216 }
1217
1218 error = kernel_thread_start(func, inp, &inp->input_thr);
1219 if (error == KERN_SUCCESS) {
1220 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
1221 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
1222 /*
1223 * We create an affinity set so that the matching workloop
1224 * thread or the starter thread (for loopback) can be
1225 * scheduled on the same processor set as the input thread.
1226 */
1227 if (net_affinity) {
1228 struct thread *tp = inp->input_thr;
1229 u_int32_t tag;
1230 /*
1231 * Randomize to reduce the probability
1232 * of affinity tag namespace collision.
1233 */
1234 read_random(&tag, sizeof (tag));
1235 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1236 thread_reference(tp);
1237 inp->tag = tag;
1238 inp->net_affinity = TRUE;
1239 }
1240 }
1241 } else if (inp == dlil_main_input_thread) {
1242 panic_plain("%s: couldn't create main input thread", __func__);
1243 /* NOTREACHED */
1244 } else {
1245 panic_plain("%s: couldn't create %s input thread", __func__,
1246 if_name(ifp));
1247 /* NOTREACHED */
1248 }
1249 OSAddAtomic(1, &cur_dlil_input_threads);
1250
1251 return (error);
1252 }
1253
1254 static void
1255 dlil_terminate_input_thread(struct dlil_threading_info *inp)
1256 {
1257 struct ifnet *ifp;
1258
1259 VERIFY(current_thread() == inp->input_thr);
1260 VERIFY(inp != dlil_main_input_thread);
1261
1262 OSAddAtomic(-1, &cur_dlil_input_threads);
1263
1264 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1265 lck_grp_free(inp->lck_grp);
1266
1267 inp->input_waiting = 0;
1268 inp->wtot = 0;
1269 bzero(inp->input_name, sizeof (inp->input_name));
1270 ifp = inp->ifp;
1271 inp->ifp = NULL;
1272 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1273 qlimit(&inp->rcvq_pkts) = 0;
1274 bzero(&inp->stats, sizeof (inp->stats));
1275
1276 VERIFY(!inp->net_affinity);
1277 inp->input_thr = THREAD_NULL;
1278 VERIFY(inp->wloop_thr == THREAD_NULL);
1279 VERIFY(inp->poll_thr == THREAD_NULL);
1280 VERIFY(inp->tag == 0);
1281
1282 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1283 bzero(&inp->tstats, sizeof (inp->tstats));
1284 bzero(&inp->pstats, sizeof (inp->pstats));
1285 bzero(&inp->sstats, sizeof (inp->sstats));
1286
1287 net_timerclear(&inp->mode_holdtime);
1288 net_timerclear(&inp->mode_lasttime);
1289 net_timerclear(&inp->sample_holdtime);
1290 net_timerclear(&inp->sample_lasttime);
1291 net_timerclear(&inp->dbg_lasttime);
1292
1293 #if IFNET_INPUT_SANITY_CHK
1294 inp->input_mbuf_cnt = 0;
1295 #endif /* IFNET_INPUT_SANITY_CHK */
1296
1297 if (dlil_verbose) {
1298 printf("%s: input thread terminated\n",
1299 if_name(ifp));
1300 }
1301
1302 /* for the extra refcnt from kernel_thread_start() */
1303 thread_deallocate(current_thread());
1304
1305 /* this is the end */
1306 thread_terminate(current_thread());
1307 /* NOTREACHED */
1308 }
1309
1310 static kern_return_t
1311 dlil_affinity_set(struct thread *tp, u_int32_t tag)
1312 {
1313 thread_affinity_policy_data_t policy;
1314
1315 bzero(&policy, sizeof (policy));
1316 policy.affinity_tag = tag;
1317 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1318 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
1319 }
1320
1321 void
1322 dlil_init(void)
1323 {
1324 thread_t thread = THREAD_NULL;
1325
1326 /*
1327 * The following fields must be 64-bit aligned for atomic operations.
1328 */
1329 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1330 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1331 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1332 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1333 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1334 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1335 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1336 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1337 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1338 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1339 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1340 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1341 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1342 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1343 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1344
1345 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1346 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1347 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1348 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1349 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1350 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1351 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1352 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1353 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1354 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1355 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1356 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1357 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1358 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1359 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1360
1361 /*
1362 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1363 */
1364 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1365 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1366 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1367 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1368 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1369 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1370 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1371 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1372 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1373 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1374 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1375 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1376 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1377
1378 /*
1379 * ... as well as the mbuf checksum flags counterparts.
1380 */
1381 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1382 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1383 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1384 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1385 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1386 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1387 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1388 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1389 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1390 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1391
1392 /*
1393 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1394 */
1395 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1396 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1397
1398 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1399 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1400 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1401 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1402
1403 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1404 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1405 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1406
1407 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1408 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1409 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1410 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1411 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1412 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1413 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1414 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1415 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1416 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1417 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1418 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1419 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1420 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1421 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1422 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1423
1424 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1425 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1426 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1427 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1428 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1429 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1430 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1431
1432 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1433 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1434
1435 PE_parse_boot_argn("net_affinity", &net_affinity,
1436 sizeof (net_affinity));
1437
1438 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
1439
1440 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
1441
1442 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
1443
1444 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
1445 sizeof (struct dlil_ifnet_dbg);
1446 /* Enforce 64-bit alignment for dlil_ifnet structure */
1447 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
1448 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
1449 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1450 0, DLIF_ZONE_NAME);
1451 if (dlif_zone == NULL) {
1452 panic_plain("%s: failed allocating %s", __func__,
1453 DLIF_ZONE_NAME);
1454 /* NOTREACHED */
1455 }
1456 zone_change(dlif_zone, Z_EXPAND, TRUE);
1457 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1458
1459 dlif_filt_size = sizeof (struct ifnet_filter);
1460 dlif_filt_zone = zinit(dlif_filt_size,
1461 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1462 if (dlif_filt_zone == NULL) {
1463 panic_plain("%s: failed allocating %s", __func__,
1464 DLIF_FILT_ZONE_NAME);
1465 /* NOTREACHED */
1466 }
1467 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1468 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1469
1470 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
1471 dlif_phash_zone = zinit(dlif_phash_size,
1472 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1473 if (dlif_phash_zone == NULL) {
1474 panic_plain("%s: failed allocating %s", __func__,
1475 DLIF_PHASH_ZONE_NAME);
1476 /* NOTREACHED */
1477 }
1478 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1479 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1480
1481 dlif_proto_size = sizeof (struct if_proto);
1482 dlif_proto_zone = zinit(dlif_proto_size,
1483 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1484 if (dlif_proto_zone == NULL) {
1485 panic_plain("%s: failed allocating %s", __func__,
1486 DLIF_PROTO_ZONE_NAME);
1487 /* NOTREACHED */
1488 }
1489 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1490 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1491
1492 dlif_tcpstat_size = sizeof (struct tcpstat_local);
1493 /* Enforce 64-bit alignment for tcpstat_local structure */
1494 dlif_tcpstat_bufsize =
1495 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
1496 dlif_tcpstat_bufsize =
1497 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
1498 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1499 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1500 DLIF_TCPSTAT_ZONE_NAME);
1501 if (dlif_tcpstat_zone == NULL) {
1502 panic_plain("%s: failed allocating %s", __func__,
1503 DLIF_TCPSTAT_ZONE_NAME);
1504 /* NOTREACHED */
1505 }
1506 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1507 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1508
1509 dlif_udpstat_size = sizeof (struct udpstat_local);
1510 /* Enforce 64-bit alignment for udpstat_local structure */
1511 dlif_udpstat_bufsize =
1512 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
1513 dlif_udpstat_bufsize =
1514 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
1515 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1516 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1517 DLIF_UDPSTAT_ZONE_NAME);
1518 if (dlif_udpstat_zone == NULL) {
1519 panic_plain("%s: failed allocating %s", __func__,
1520 DLIF_UDPSTAT_ZONE_NAME);
1521 /* NOTREACHED */
1522 }
1523 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1524 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1525
1526 ifnet_llreach_init();
1527
1528 TAILQ_INIT(&dlil_ifnet_head);
1529 TAILQ_INIT(&ifnet_head);
1530 TAILQ_INIT(&ifnet_detaching_head);
1531 TAILQ_INIT(&ifnet_ordered_head);
1532
1533 /* Setup the lock groups we will use */
1534 dlil_grp_attributes = lck_grp_attr_alloc_init();
1535
1536 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1537 dlil_grp_attributes);
1538 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1539 dlil_grp_attributes);
1540 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1541 dlil_grp_attributes);
1542 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1543 dlil_grp_attributes);
1544 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1545 dlil_grp_attributes);
1546
1547 /* Setup the lock attributes we will use */
1548 dlil_lck_attributes = lck_attr_alloc_init();
1549
1550 ifnet_lock_attr = lck_attr_alloc_init();
1551
1552 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1553 dlil_lck_attributes);
1554 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1555
1556 /* Setup interface flow control related items */
1557 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1558
1559 ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry);
1560 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1561 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1562 if (ifnet_fc_zone == NULL) {
1563 panic_plain("%s: failed allocating %s", __func__,
1564 IFNET_FC_ZONE_NAME);
1565 /* NOTREACHED */
1566 }
1567 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1568 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
1569
1570 /* Initialize interface address subsystem */
1571 ifa_init();
1572
1573 #if PF
1574 /* Initialize the packet filter */
1575 pfinit();
1576 #endif /* PF */
1577
1578 /* Initialize queue algorithms */
1579 classq_init();
1580
1581 /* Initialize packet schedulers */
1582 pktsched_init();
1583
1584 /* Initialize flow advisory subsystem */
1585 flowadv_init();
1586
1587 /* Initialize the pktap virtual interface */
1588 pktap_init();
1589
1590 /* Initialize the service class to dscp map */
1591 net_qos_map_init();
1592
1593 #if DEBUG
1594 /* Run self-tests */
1595 dlil_verify_sum16();
1596 #endif /* DEBUG */
1597
1598 /*
1599 * Create and start up the main DLIL input thread and the interface
1600 * detacher threads once everything is initialized.
1601 */
1602 dlil_create_input_thread(NULL, dlil_main_input_thread);
1603
1604 if (kernel_thread_start(ifnet_detacher_thread_func,
1605 NULL, &thread) != KERN_SUCCESS) {
1606 panic_plain("%s: couldn't create detacher thread", __func__);
1607 /* NOTREACHED */
1608 }
1609 thread_deallocate(thread);
1610 }
1611
1612 static void
1613 if_flt_monitor_busy(struct ifnet *ifp)
1614 {
1615 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1616
1617 ++ifp->if_flt_busy;
1618 VERIFY(ifp->if_flt_busy != 0);
1619 }
1620
1621 static void
1622 if_flt_monitor_unbusy(struct ifnet *ifp)
1623 {
1624 if_flt_monitor_leave(ifp);
1625 }
1626
1627 static void
1628 if_flt_monitor_enter(struct ifnet *ifp)
1629 {
1630 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1631
1632 while (ifp->if_flt_busy) {
1633 ++ifp->if_flt_waiters;
1634 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1635 (PZERO - 1), "if_flt_monitor", NULL);
1636 }
1637 if_flt_monitor_busy(ifp);
1638 }
1639
1640 static void
1641 if_flt_monitor_leave(struct ifnet *ifp)
1642 {
1643 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1644
1645 VERIFY(ifp->if_flt_busy != 0);
1646 --ifp->if_flt_busy;
1647
1648 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1649 ifp->if_flt_waiters = 0;
1650 wakeup(&ifp->if_flt_head);
1651 }
1652 }
1653
1654 __private_extern__ int
1655 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1656 interface_filter_t *filter_ref, u_int32_t flags)
1657 {
1658 int retval = 0;
1659 struct ifnet_filter *filter = NULL;
1660
1661 ifnet_head_lock_shared();
1662 /* Check that the interface is in the global list */
1663 if (!ifnet_lookup(ifp)) {
1664 retval = ENXIO;
1665 goto done;
1666 }
1667
1668 filter = zalloc(dlif_filt_zone);
1669 if (filter == NULL) {
1670 retval = ENOMEM;
1671 goto done;
1672 }
1673 bzero(filter, dlif_filt_size);
1674
1675 /* refcnt held above during lookup */
1676 filter->filt_flags = flags;
1677 filter->filt_ifp = ifp;
1678 filter->filt_cookie = if_filter->iff_cookie;
1679 filter->filt_name = if_filter->iff_name;
1680 filter->filt_protocol = if_filter->iff_protocol;
1681 filter->filt_input = if_filter->iff_input;
1682 filter->filt_output = if_filter->iff_output;
1683 filter->filt_event = if_filter->iff_event;
1684 filter->filt_ioctl = if_filter->iff_ioctl;
1685 filter->filt_detached = if_filter->iff_detached;
1686
1687 lck_mtx_lock(&ifp->if_flt_lock);
1688 if_flt_monitor_enter(ifp);
1689
1690 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1691 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1692
1693 if_flt_monitor_leave(ifp);
1694 lck_mtx_unlock(&ifp->if_flt_lock);
1695
1696 *filter_ref = filter;
1697
1698 /*
1699 * Bump filter count and route_generation ID to let TCP
1700 * know it shouldn't do TSO on this connection
1701 */
1702 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1703 OSAddAtomic(1, &dlil_filter_disable_tso_count);
1704 routegenid_update();
1705 }
1706 if (dlil_verbose) {
1707 printf("%s: %s filter attached\n", if_name(ifp),
1708 if_filter->iff_name);
1709 }
1710 done:
1711 ifnet_head_done();
1712 if (retval != 0 && ifp != NULL) {
1713 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1714 if_name(ifp), if_filter->iff_name, retval);
1715 }
1716 if (retval != 0 && filter != NULL)
1717 zfree(dlif_filt_zone, filter);
1718
1719 return (retval);
1720 }
1721
1722 static int
1723 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1724 {
1725 int retval = 0;
1726
1727 if (detached == 0) {
1728 ifnet_t ifp = NULL;
1729
1730 ifnet_head_lock_shared();
1731 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1732 interface_filter_t entry = NULL;
1733
1734 lck_mtx_lock(&ifp->if_flt_lock);
1735 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1736 if (entry != filter || entry->filt_skip)
1737 continue;
1738 /*
1739 * We've found a match; since it's possible
1740 * that the thread gets blocked in the monitor,
1741 * we do the lock dance. Interface should
1742 * not be detached since we still have a use
1743 * count held during filter attach.
1744 */
1745 entry->filt_skip = 1; /* skip input/output */
1746 lck_mtx_unlock(&ifp->if_flt_lock);
1747 ifnet_head_done();
1748
1749 lck_mtx_lock(&ifp->if_flt_lock);
1750 if_flt_monitor_enter(ifp);
1751 lck_mtx_assert(&ifp->if_flt_lock,
1752 LCK_MTX_ASSERT_OWNED);
1753
1754 /* Remove the filter from the list */
1755 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1756 filt_next);
1757
1758 if_flt_monitor_leave(ifp);
1759 lck_mtx_unlock(&ifp->if_flt_lock);
1760 if (dlil_verbose) {
1761 printf("%s: %s filter detached\n",
1762 if_name(ifp), filter->filt_name);
1763 }
1764 goto destroy;
1765 }
1766 lck_mtx_unlock(&ifp->if_flt_lock);
1767 }
1768 ifnet_head_done();
1769
1770 /* filter parameter is not a valid filter ref */
1771 retval = EINVAL;
1772 goto done;
1773 }
1774
1775 if (dlil_verbose)
1776 printf("%s filter detached\n", filter->filt_name);
1777
1778 destroy:
1779
1780 /* Call the detached function if there is one */
1781 if (filter->filt_detached)
1782 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1783
1784 /*
1785 * Decrease filter count and route_generation ID to let TCP
1786 * know it should reevalute doing TSO or not
1787 */
1788 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1789 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
1790 routegenid_update();
1791 }
1792
1793 /* Free the filter */
1794 zfree(dlif_filt_zone, filter);
1795 filter = NULL;
1796 done:
1797 if (retval != 0 && filter != NULL) {
1798 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1799 filter->filt_name, retval);
1800 }
1801
1802 return (retval);
1803 }
1804
1805 __private_extern__ void
1806 dlil_detach_filter(interface_filter_t filter)
1807 {
1808 if (filter == NULL)
1809 return;
1810 dlil_detach_filter_internal(filter, 0);
1811 }
1812
1813 /*
1814 * Main input thread:
1815 *
1816 * a) handles all inbound packets for lo0
1817 * b) handles all inbound packets for interfaces with no dedicated
1818 * input thread (e.g. anything but Ethernet/PDP or those that support
1819 * opportunistic polling.)
1820 * c) protocol registrations
1821 * d) packet injections
1822 */
1823 __attribute__((noreturn))
1824 static void
1825 dlil_main_input_thread_func(void *v, wait_result_t w)
1826 {
1827 #pragma unused(w)
1828 struct dlil_main_threading_info *inpm = v;
1829 struct dlil_threading_info *inp = v;
1830
1831 VERIFY(inp == dlil_main_input_thread);
1832 VERIFY(inp->ifp == NULL);
1833 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1834
1835 while (1) {
1836 struct mbuf *m = NULL, *m_loop = NULL;
1837 u_int32_t m_cnt, m_cnt_loop;
1838 boolean_t proto_req;
1839
1840 lck_mtx_lock_spin(&inp->input_lck);
1841
1842 /* Wait until there is work to be done */
1843 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1844 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1845 (void) msleep(&inp->input_waiting, &inp->input_lck,
1846 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1847 }
1848
1849 inp->input_waiting |= DLIL_INPUT_RUNNING;
1850 inp->input_waiting &= ~DLIL_INPUT_WAITING;
1851
1852 /* Main input thread cannot be terminated */
1853 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
1854
1855 proto_req = (inp->input_waiting &
1856 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
1857
1858 /* Packets for non-dedicated interfaces other than lo0 */
1859 m_cnt = qlen(&inp->rcvq_pkts);
1860 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
1861
1862 /* Packets exclusive to lo0 */
1863 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
1864 m_loop = _getq_all(&inpm->lo_rcvq_pkts, NULL, NULL, NULL);
1865
1866 inp->wtot = 0;
1867
1868 lck_mtx_unlock(&inp->input_lck);
1869
1870 /*
1871 * NOTE warning %%% attention !!!!
1872 * We should think about putting some thread starvation
1873 * safeguards if we deal with long chains of packets.
1874 */
1875 if (m_loop != NULL)
1876 dlil_input_packet_list_extended(lo_ifp, m_loop,
1877 m_cnt_loop, inp->mode);
1878
1879 if (m != NULL)
1880 dlil_input_packet_list_extended(NULL, m,
1881 m_cnt, inp->mode);
1882
1883 if (proto_req)
1884 proto_input_run();
1885 }
1886
1887 /* NOTREACHED */
1888 VERIFY(0); /* we should never get here */
1889 }
1890
1891 /*
1892 * Input thread for interfaces with legacy input model.
1893 */
1894 static void
1895 dlil_input_thread_func(void *v, wait_result_t w)
1896 {
1897 #pragma unused(w)
1898 char thread_name[MAXTHREADNAMESIZE];
1899 struct dlil_threading_info *inp = v;
1900 struct ifnet *ifp = inp->ifp;
1901
1902 /* Construct the name for this thread, and then apply it. */
1903 bzero(thread_name, sizeof(thread_name));
1904 snprintf(thread_name, sizeof(thread_name), "dlil_input_%s", ifp->if_xname);
1905 thread_set_thread_name(inp->input_thr, thread_name);
1906
1907 VERIFY(inp != dlil_main_input_thread);
1908 VERIFY(ifp != NULL);
1909 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
1910 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1911
1912 while (1) {
1913 struct mbuf *m = NULL;
1914 u_int32_t m_cnt;
1915
1916 lck_mtx_lock_spin(&inp->input_lck);
1917
1918 /* Wait until there is work to be done */
1919 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1920 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1921 (void) msleep(&inp->input_waiting, &inp->input_lck,
1922 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1923 }
1924
1925 inp->input_waiting |= DLIL_INPUT_RUNNING;
1926 inp->input_waiting &= ~DLIL_INPUT_WAITING;
1927
1928 /*
1929 * Protocol registration and injection must always use
1930 * the main input thread; in theory the latter can utilize
1931 * the corresponding input thread where the packet arrived
1932 * on, but that requires our knowing the interface in advance
1933 * (and the benefits might not worth the trouble.)
1934 */
1935 VERIFY(!(inp->input_waiting &
1936 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
1937
1938 /* Packets for this interface */
1939 m_cnt = qlen(&inp->rcvq_pkts);
1940 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
1941
1942 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
1943 lck_mtx_unlock(&inp->input_lck);
1944
1945 /* Free up pending packets */
1946 if (m != NULL)
1947 mbuf_freem_list(m);
1948
1949 dlil_terminate_input_thread(inp);
1950 /* NOTREACHED */
1951 return;
1952 }
1953
1954 inp->wtot = 0;
1955
1956 dlil_input_stats_sync(ifp, inp);
1957
1958 lck_mtx_unlock(&inp->input_lck);
1959
1960 /*
1961 * NOTE warning %%% attention !!!!
1962 * We should think about putting some thread starvation
1963 * safeguards if we deal with long chains of packets.
1964 */
1965 if (m != NULL)
1966 dlil_input_packet_list_extended(NULL, m,
1967 m_cnt, inp->mode);
1968 }
1969
1970 /* NOTREACHED */
1971 VERIFY(0); /* we should never get here */
1972 }
1973
1974 /*
1975 * Input thread for interfaces with opportunistic polling input model.
1976 */
1977 static void
1978 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
1979 {
1980 #pragma unused(w)
1981 struct dlil_threading_info *inp = v;
1982 struct ifnet *ifp = inp->ifp;
1983 struct timespec ts;
1984
1985 VERIFY(inp != dlil_main_input_thread);
1986 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
1987
1988 while (1) {
1989 struct mbuf *m = NULL;
1990 u_int32_t m_cnt, m_size, poll_req = 0;
1991 ifnet_model_t mode;
1992 struct timespec now, delta;
1993 u_int64_t ival;
1994
1995 lck_mtx_lock_spin(&inp->input_lck);
1996
1997 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
1998 ival = IF_RXPOLL_INTERVALTIME_MIN;
1999
2000 /* Link parameters changed? */
2001 if (ifp->if_poll_update != 0) {
2002 ifp->if_poll_update = 0;
2003 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
2004 }
2005
2006 /* Current operating mode */
2007 mode = inp->mode;
2008
2009 /* Wait until there is work to be done */
2010 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2011 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2012 (void) msleep(&inp->input_waiting, &inp->input_lck,
2013 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2014 }
2015
2016 inp->input_waiting |= DLIL_INPUT_RUNNING;
2017 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2018
2019 /*
2020 * Protocol registration and injection must always use
2021 * the main input thread; in theory the latter can utilize
2022 * the corresponding input thread where the packet arrived
2023 * on, but that requires our knowing the interface in advance
2024 * (and the benefits might not worth the trouble.)
2025 */
2026 VERIFY(!(inp->input_waiting &
2027 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2028
2029 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2030 /* Free up pending packets */
2031 _flushq(&inp->rcvq_pkts);
2032 lck_mtx_unlock(&inp->input_lck);
2033
2034 dlil_terminate_input_thread(inp);
2035 /* NOTREACHED */
2036 return;
2037 }
2038
2039 /* Total count of all packets */
2040 m_cnt = qlen(&inp->rcvq_pkts);
2041
2042 /* Total bytes of all packets */
2043 m_size = qsize(&inp->rcvq_pkts);
2044
2045 /* Packets for this interface */
2046 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2047 VERIFY(m != NULL || m_cnt == 0);
2048
2049 nanouptime(&now);
2050 if (!net_timerisset(&inp->sample_lasttime))
2051 *(&inp->sample_lasttime) = *(&now);
2052
2053 net_timersub(&now, &inp->sample_lasttime, &delta);
2054 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
2055 u_int32_t ptot, btot;
2056
2057 /* Accumulate statistics for current sampling */
2058 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
2059
2060 if (net_timercmp(&delta, &inp->sample_holdtime, <))
2061 goto skip;
2062
2063 *(&inp->sample_lasttime) = *(&now);
2064
2065 /* Calculate min/max of inbound bytes */
2066 btot = (u_int32_t)inp->sstats.bytes;
2067 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
2068 inp->rxpoll_bmin = btot;
2069 if (btot > inp->rxpoll_bmax)
2070 inp->rxpoll_bmax = btot;
2071
2072 /* Calculate EWMA of inbound bytes */
2073 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
2074
2075 /* Calculate min/max of inbound packets */
2076 ptot = (u_int32_t)inp->sstats.packets;
2077 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
2078 inp->rxpoll_pmin = ptot;
2079 if (ptot > inp->rxpoll_pmax)
2080 inp->rxpoll_pmax = ptot;
2081
2082 /* Calculate EWMA of inbound packets */
2083 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
2084
2085 /* Reset sampling statistics */
2086 PKTCNTR_CLEAR(&inp->sstats);
2087
2088 /* Calculate EWMA of wakeup requests */
2089 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2090 inp->wtot = 0;
2091
2092 if (dlil_verbose) {
2093 if (!net_timerisset(&inp->dbg_lasttime))
2094 *(&inp->dbg_lasttime) = *(&now);
2095 net_timersub(&now, &inp->dbg_lasttime, &delta);
2096 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2097 *(&inp->dbg_lasttime) = *(&now);
2098 printf("%s: [%s] pkts avg %d max %d "
2099 "limits [%d/%d], wreq avg %d "
2100 "limits [%d/%d], bytes avg %d "
2101 "limits [%d/%d]\n", if_name(ifp),
2102 (inp->mode ==
2103 IFNET_MODEL_INPUT_POLL_ON) ?
2104 "ON" : "OFF", inp->rxpoll_pavg,
2105 inp->rxpoll_pmax,
2106 inp->rxpoll_plowat,
2107 inp->rxpoll_phiwat,
2108 inp->rxpoll_wavg,
2109 inp->rxpoll_wlowat,
2110 inp->rxpoll_whiwat,
2111 inp->rxpoll_bavg,
2112 inp->rxpoll_blowat,
2113 inp->rxpoll_bhiwat);
2114 }
2115 }
2116
2117 /* Perform mode transition, if necessary */
2118 if (!net_timerisset(&inp->mode_lasttime))
2119 *(&inp->mode_lasttime) = *(&now);
2120
2121 net_timersub(&now, &inp->mode_lasttime, &delta);
2122 if (net_timercmp(&delta, &inp->mode_holdtime, <))
2123 goto skip;
2124
2125 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2126 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
2127 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2128 mode = IFNET_MODEL_INPUT_POLL_OFF;
2129 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2130 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2131 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2132 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2133 mode = IFNET_MODEL_INPUT_POLL_ON;
2134 }
2135
2136 if (mode != inp->mode) {
2137 inp->mode = mode;
2138 *(&inp->mode_lasttime) = *(&now);
2139 poll_req++;
2140 }
2141 }
2142 skip:
2143 dlil_input_stats_sync(ifp, inp);
2144
2145 lck_mtx_unlock(&inp->input_lck);
2146
2147 /*
2148 * If there's a mode change and interface is still attached,
2149 * perform a downcall to the driver for the new mode. Also
2150 * hold an IO refcnt on the interface to prevent it from
2151 * being detached (will be release below.)
2152 */
2153 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2154 struct ifnet_model_params p = { mode, { 0 } };
2155 errno_t err;
2156
2157 if (dlil_verbose) {
2158 printf("%s: polling is now %s, "
2159 "pkts avg %d max %d limits [%d/%d], "
2160 "wreq avg %d limits [%d/%d], "
2161 "bytes avg %d limits [%d/%d]\n",
2162 if_name(ifp),
2163 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2164 "ON" : "OFF", inp->rxpoll_pavg,
2165 inp->rxpoll_pmax, inp->rxpoll_plowat,
2166 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2167 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2168 inp->rxpoll_bavg, inp->rxpoll_blowat,
2169 inp->rxpoll_bhiwat);
2170 }
2171
2172 if ((err = ((*ifp->if_input_ctl)(ifp,
2173 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
2174 printf("%s: error setting polling mode "
2175 "to %s (%d)\n", if_name(ifp),
2176 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2177 "ON" : "OFF", err);
2178 }
2179
2180 switch (mode) {
2181 case IFNET_MODEL_INPUT_POLL_OFF:
2182 ifnet_set_poll_cycle(ifp, NULL);
2183 inp->rxpoll_offreq++;
2184 if (err != 0)
2185 inp->rxpoll_offerr++;
2186 break;
2187
2188 case IFNET_MODEL_INPUT_POLL_ON:
2189 net_nsectimer(&ival, &ts);
2190 ifnet_set_poll_cycle(ifp, &ts);
2191 ifnet_poll(ifp);
2192 inp->rxpoll_onreq++;
2193 if (err != 0)
2194 inp->rxpoll_onerr++;
2195 break;
2196
2197 default:
2198 VERIFY(0);
2199 /* NOTREACHED */
2200 }
2201
2202 /* Release the IO refcnt */
2203 ifnet_decr_iorefcnt(ifp);
2204 }
2205
2206 /*
2207 * NOTE warning %%% attention !!!!
2208 * We should think about putting some thread starvation
2209 * safeguards if we deal with long chains of packets.
2210 */
2211 if (m != NULL)
2212 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2213 }
2214
2215 /* NOTREACHED */
2216 VERIFY(0); /* we should never get here */
2217 }
2218
2219 /*
2220 * Must be called on an attached ifnet (caller is expected to check.)
2221 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2222 */
2223 errno_t
2224 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2225 boolean_t locked)
2226 {
2227 struct dlil_threading_info *inp;
2228 u_int64_t sample_holdtime, inbw;
2229
2230 VERIFY(ifp != NULL);
2231 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2232 return (ENXIO);
2233
2234 if (p != NULL) {
2235 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2236 (p->packets_lowat != 0 && p->packets_hiwat == 0))
2237 return (EINVAL);
2238 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2239 p->packets_lowat >= p->packets_hiwat)
2240 return (EINVAL);
2241 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2242 (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
2243 return (EINVAL);
2244 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2245 p->bytes_lowat >= p->bytes_hiwat)
2246 return (EINVAL);
2247 if (p->interval_time != 0 &&
2248 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
2249 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2250 }
2251
2252 if (!locked)
2253 lck_mtx_lock(&inp->input_lck);
2254
2255 lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2256
2257 /*
2258 * Normally, we'd reset the parameters to the auto-tuned values
2259 * if the the input thread detects a change in link rate. If the
2260 * driver provides its own parameters right after a link rate
2261 * changes, but before the input thread gets to run, we want to
2262 * make sure to keep the driver's values. Clearing if_poll_update
2263 * will achieve that.
2264 */
2265 if (p != NULL && !locked && ifp->if_poll_update != 0)
2266 ifp->if_poll_update = 0;
2267
2268 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2269 sample_holdtime = 0; /* polling is disabled */
2270 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2271 inp->rxpoll_blowat = 0;
2272 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2273 inp->rxpoll_bhiwat = (u_int32_t)-1;
2274 inp->rxpoll_plim = 0;
2275 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2276 } else {
2277 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2278 u_int64_t ival;
2279 unsigned int n, i;
2280
2281 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2282 if (inbw < rxpoll_tbl[i].speed)
2283 break;
2284 n = i;
2285 }
2286 /* auto-tune if caller didn't specify a value */
2287 plowat = ((p == NULL || p->packets_lowat == 0) ?
2288 rxpoll_tbl[n].plowat : p->packets_lowat);
2289 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2290 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2291 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2292 rxpoll_tbl[n].blowat : p->bytes_lowat);
2293 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2294 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2295 plim = ((p == NULL || p->packets_limit == 0) ?
2296 if_rxpoll_max : p->packets_limit);
2297 ival = ((p == NULL || p->interval_time == 0) ?
2298 if_rxpoll_interval_time : p->interval_time);
2299
2300 VERIFY(plowat != 0 && phiwat != 0);
2301 VERIFY(blowat != 0 && bhiwat != 0);
2302 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2303
2304 sample_holdtime = if_rxpoll_sample_holdtime;
2305 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2306 inp->rxpoll_whiwat = if_rxpoll_whiwat;
2307 inp->rxpoll_plowat = plowat;
2308 inp->rxpoll_phiwat = phiwat;
2309 inp->rxpoll_blowat = blowat;
2310 inp->rxpoll_bhiwat = bhiwat;
2311 inp->rxpoll_plim = plim;
2312 inp->rxpoll_ival = ival;
2313 }
2314
2315 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2316 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2317
2318 if (dlil_verbose) {
2319 printf("%s: speed %llu bps, sample per %llu nsec, "
2320 "poll interval %llu nsec, pkts per poll %u, "
2321 "pkt limits [%u/%u], wreq limits [%u/%u], "
2322 "bytes limits [%u/%u]\n", if_name(ifp),
2323 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2324 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2325 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
2326 }
2327
2328 if (!locked)
2329 lck_mtx_unlock(&inp->input_lck);
2330
2331 return (0);
2332 }
2333
2334 /*
2335 * Must be called on an attached ifnet (caller is expected to check.)
2336 */
2337 errno_t
2338 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2339 {
2340 struct dlil_threading_info *inp;
2341
2342 VERIFY(ifp != NULL && p != NULL);
2343 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2344 return (ENXIO);
2345
2346 bzero(p, sizeof (*p));
2347
2348 lck_mtx_lock(&inp->input_lck);
2349 p->packets_limit = inp->rxpoll_plim;
2350 p->packets_lowat = inp->rxpoll_plowat;
2351 p->packets_hiwat = inp->rxpoll_phiwat;
2352 p->bytes_lowat = inp->rxpoll_blowat;
2353 p->bytes_hiwat = inp->rxpoll_bhiwat;
2354 p->interval_time = inp->rxpoll_ival;
2355 lck_mtx_unlock(&inp->input_lck);
2356
2357 return (0);
2358 }
2359
2360 errno_t
2361 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2362 const struct ifnet_stat_increment_param *s)
2363 {
2364 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
2365 }
2366
2367 errno_t
2368 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2369 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2370 {
2371 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
2372 }
2373
2374 static errno_t
2375 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2376 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2377 {
2378 ifnet_input_handler_func handler_func;
2379 struct ifnet_stat_increment_param _s;
2380 u_int32_t m_cnt = 0, m_size = 0;
2381 struct mbuf *last;
2382 errno_t err = 0;
2383
2384 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2385 if (m_head != NULL)
2386 mbuf_freem_list(m_head);
2387 return (EINVAL);
2388 }
2389
2390 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2391 VERIFY(m_tail == NULL || ext);
2392 VERIFY(s != NULL || !ext);
2393
2394 /*
2395 * Drop the packet(s) if the parameters are invalid, or if the
2396 * interface is no longer attached; else hold an IO refcnt to
2397 * prevent it from being detached (will be released below.)
2398 */
2399 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
2400 if (m_head != NULL)
2401 mbuf_freem_list(m_head);
2402 return (EINVAL);
2403 }
2404
2405 handler_func = ifp->if_input_handler;
2406 VERIFY(handler_func != NULL);
2407
2408 if (m_tail == NULL) {
2409 last = m_head;
2410 while (m_head != NULL) {
2411 #if IFNET_INPUT_SANITY_CHK
2412 if (dlil_input_sanity_check != 0)
2413 DLIL_INPUT_CHECK(last, ifp);
2414 #endif /* IFNET_INPUT_SANITY_CHK */
2415 m_cnt++;
2416 m_size += m_length(last);
2417 if (mbuf_nextpkt(last) == NULL)
2418 break;
2419 last = mbuf_nextpkt(last);
2420 }
2421 m_tail = last;
2422 } else {
2423 #if IFNET_INPUT_SANITY_CHK
2424 if (dlil_input_sanity_check != 0) {
2425 last = m_head;
2426 while (1) {
2427 DLIL_INPUT_CHECK(last, ifp);
2428 m_cnt++;
2429 m_size += m_length(last);
2430 if (mbuf_nextpkt(last) == NULL)
2431 break;
2432 last = mbuf_nextpkt(last);
2433 }
2434 } else {
2435 m_cnt = s->packets_in;
2436 m_size = s->bytes_in;
2437 last = m_tail;
2438 }
2439 #else
2440 m_cnt = s->packets_in;
2441 m_size = s->bytes_in;
2442 last = m_tail;
2443 #endif /* IFNET_INPUT_SANITY_CHK */
2444 }
2445
2446 if (last != m_tail) {
2447 panic_plain("%s: invalid input packet chain for %s, "
2448 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2449 m_tail, last);
2450 }
2451
2452 /*
2453 * Assert packet count only for the extended variant, for backwards
2454 * compatibility, since this came directly from the device driver.
2455 * Relax this assertion for input bytes, as the driver may have
2456 * included the link-layer headers in the computation; hence
2457 * m_size is just an approximation.
2458 */
2459 if (ext && s->packets_in != m_cnt) {
2460 panic_plain("%s: input packet count mismatch for %s, "
2461 "%d instead of %d\n", __func__, if_name(ifp),
2462 s->packets_in, m_cnt);
2463 }
2464
2465 if (s == NULL) {
2466 bzero(&_s, sizeof (_s));
2467 s = &_s;
2468 } else {
2469 _s = *s;
2470 }
2471 _s.packets_in = m_cnt;
2472 _s.bytes_in = m_size;
2473
2474 err = (*handler_func)(ifp, m_head, m_tail, s, poll, current_thread());
2475
2476 if (ifp != lo_ifp) {
2477 /* Release the IO refcnt */
2478 ifnet_decr_iorefcnt(ifp);
2479 }
2480
2481 return (err);
2482 }
2483
2484 errno_t
2485 ifnet_set_input_handler(struct ifnet *ifp, ifnet_input_handler_func fn)
2486 {
2487 return (atomic_test_set_ptr(&ifp->if_input_handler,
2488 dlil_input_handler, fn) ? 0 : EBUSY);
2489 }
2490
2491 void
2492 ifnet_reset_input_handler(struct ifnet *ifp)
2493 {
2494 atomic_set_ptr(&ifp->if_input_handler, dlil_input_handler);
2495 }
2496
2497 errno_t
2498 ifnet_set_output_handler(struct ifnet *ifp, ifnet_output_handler_func fn)
2499 {
2500 return (atomic_test_set_ptr(&ifp->if_output_handler,
2501 dlil_output_handler, fn) ? 0 : EBUSY);
2502 }
2503
2504 void
2505 ifnet_reset_output_handler(struct ifnet *ifp)
2506 {
2507 atomic_set_ptr(&ifp->if_output_handler, dlil_output_handler);
2508 }
2509
2510 errno_t
2511 dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2512 {
2513 return (ifp->if_output(ifp, m));
2514 }
2515
2516 errno_t
2517 dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2518 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2519 boolean_t poll, struct thread *tp)
2520 {
2521 struct dlil_threading_info *inp;
2522 u_int32_t m_cnt = s->packets_in;
2523 u_int32_t m_size = s->bytes_in;
2524
2525 if ((inp = ifp->if_inp) == NULL)
2526 inp = dlil_main_input_thread;
2527
2528 /*
2529 * If there is a matching DLIL input thread associated with an
2530 * affinity set, associate this thread with the same set. We
2531 * will only do this once.
2532 */
2533 lck_mtx_lock_spin(&inp->input_lck);
2534 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
2535 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2536 (poll && inp->poll_thr == THREAD_NULL))) {
2537 u_int32_t tag = inp->tag;
2538
2539 if (poll) {
2540 VERIFY(inp->poll_thr == THREAD_NULL);
2541 inp->poll_thr = tp;
2542 } else {
2543 VERIFY(inp->wloop_thr == THREAD_NULL);
2544 inp->wloop_thr = tp;
2545 }
2546 lck_mtx_unlock(&inp->input_lck);
2547
2548 /* Associate the current thread with the new affinity tag */
2549 (void) dlil_affinity_set(tp, tag);
2550
2551 /*
2552 * Take a reference on the current thread; during detach,
2553 * we will need to refer to it in order ot tear down its
2554 * affinity.
2555 */
2556 thread_reference(tp);
2557 lck_mtx_lock_spin(&inp->input_lck);
2558 }
2559
2560 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2561
2562 /*
2563 * Because of loopbacked multicast we cannot stuff the ifp in
2564 * the rcvif of the packet header: loopback (lo0) packets use a
2565 * dedicated list so that we can later associate them with lo_ifp
2566 * on their way up the stack. Packets for other interfaces without
2567 * dedicated input threads go to the regular list.
2568 */
2569 if (m_head != NULL) {
2570 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2571 struct dlil_main_threading_info *inpm =
2572 (struct dlil_main_threading_info *)inp;
2573 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2574 m_cnt, m_size);
2575 } else {
2576 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2577 m_cnt, m_size);
2578 }
2579 }
2580
2581 #if IFNET_INPUT_SANITY_CHK
2582 if (dlil_input_sanity_check != 0) {
2583 u_int32_t count;
2584 struct mbuf *m0;
2585
2586 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
2587 count++;
2588
2589 if (count != m_cnt) {
2590 panic_plain("%s: invalid packet count %d "
2591 "(expected %d)\n", if_name(ifp),
2592 count, m_cnt);
2593 /* NOTREACHED */
2594 }
2595
2596 inp->input_mbuf_cnt += m_cnt;
2597 }
2598 #endif /* IFNET_INPUT_SANITY_CHK */
2599
2600 dlil_input_stats_add(s, inp, poll);
2601 /*
2602 * If we're using the main input thread, synchronize the
2603 * stats now since we have the interface context. All
2604 * other cases involving dedicated input threads will
2605 * have their stats synchronized there.
2606 */
2607 if (inp == dlil_main_input_thread)
2608 dlil_input_stats_sync(ifp, inp);
2609
2610 inp->input_waiting |= DLIL_INPUT_WAITING;
2611 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2612 inp->wtot++;
2613 wakeup_one((caddr_t)&inp->input_waiting);
2614 }
2615 lck_mtx_unlock(&inp->input_lck);
2616
2617 return (0);
2618 }
2619
2620 static void
2621 ifnet_start_common(struct ifnet *ifp, int resetfc)
2622 {
2623 if (!(ifp->if_eflags & IFEF_TXSTART))
2624 return;
2625 /*
2626 * If the starter thread is inactive, signal it to do work,
2627 * unless the interface is being flow controlled from below,
2628 * e.g. a virtual interface being flow controlled by a real
2629 * network interface beneath it.
2630 */
2631 lck_mtx_lock_spin(&ifp->if_start_lock);
2632 if (resetfc) {
2633 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2634 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2635 lck_mtx_unlock(&ifp->if_start_lock);
2636 return;
2637 }
2638 ifp->if_start_req++;
2639 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2640 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2641 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
2642 ifp->if_start_delayed == 0)) {
2643 wakeup_one((caddr_t)&ifp->if_start_thread);
2644 }
2645 lck_mtx_unlock(&ifp->if_start_lock);
2646 }
2647
2648 void
2649 ifnet_start(struct ifnet *ifp)
2650 {
2651 ifnet_start_common(ifp, 0);
2652 }
2653
2654 static void
2655 ifnet_start_thread_fn(void *v, wait_result_t w)
2656 {
2657 #pragma unused(w)
2658 struct ifnet *ifp = v;
2659 char ifname[IFNAMSIZ + 1];
2660 char thread_name[MAXTHREADNAMESIZE];
2661 struct timespec *ts = NULL;
2662 struct ifclassq *ifq = &ifp->if_snd;
2663 struct timespec delay_start_ts;
2664
2665 /* Construct the name for this thread, and then apply it. */
2666 bzero(thread_name, sizeof(thread_name));
2667 snprintf(thread_name, sizeof(thread_name), "ifnet_start_%s", ifp->if_xname);
2668 thread_set_thread_name(ifp->if_start_thread, thread_name);
2669
2670 /*
2671 * Treat the dedicated starter thread for lo0 as equivalent to
2672 * the driver workloop thread; if net_affinity is enabled for
2673 * the main input thread, associate this starter thread to it
2674 * by binding them with the same affinity tag. This is done
2675 * only once (as we only have one lo_ifp which never goes away.)
2676 */
2677 if (ifp == lo_ifp) {
2678 struct dlil_threading_info *inp = dlil_main_input_thread;
2679 struct thread *tp = current_thread();
2680
2681 lck_mtx_lock(&inp->input_lck);
2682 if (inp->net_affinity) {
2683 u_int32_t tag = inp->tag;
2684
2685 VERIFY(inp->wloop_thr == THREAD_NULL);
2686 VERIFY(inp->poll_thr == THREAD_NULL);
2687 inp->wloop_thr = tp;
2688 lck_mtx_unlock(&inp->input_lck);
2689
2690 /* Associate this thread with the affinity tag */
2691 (void) dlil_affinity_set(tp, tag);
2692 } else {
2693 lck_mtx_unlock(&inp->input_lck);
2694 }
2695 }
2696
2697 snprintf(ifname, sizeof (ifname), "%s_starter",
2698 if_name(ifp));
2699
2700 lck_mtx_lock_spin(&ifp->if_start_lock);
2701
2702 for (;;) {
2703 if (ifp->if_start_thread != NULL)
2704 (void) msleep(&ifp->if_start_thread,
2705 &ifp->if_start_lock,
2706 (PZERO - 1) | PSPIN, ifname, ts);
2707
2708 /* interface is detached? */
2709 if (ifp->if_start_thread == THREAD_NULL) {
2710 ifnet_set_start_cycle(ifp, NULL);
2711 lck_mtx_unlock(&ifp->if_start_lock);
2712 ifnet_purge(ifp);
2713
2714 if (dlil_verbose) {
2715 printf("%s: starter thread terminated\n",
2716 if_name(ifp));
2717 }
2718
2719 /* for the extra refcnt from kernel_thread_start() */
2720 thread_deallocate(current_thread());
2721 /* this is the end */
2722 thread_terminate(current_thread());
2723 /* NOTREACHED */
2724 return;
2725 }
2726
2727 ifp->if_start_active = 1;
2728
2729 for (;;) {
2730 u_int32_t req = ifp->if_start_req;
2731 if (!IFCQ_IS_EMPTY(ifq) &&
2732 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2733 ifp->if_start_delayed == 0 &&
2734 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2735 (ifp->if_eflags & IFEF_DELAY_START)) {
2736 ifp->if_start_delayed = 1;
2737 ifnet_start_delayed++;
2738 break;
2739 } else {
2740 ifp->if_start_delayed = 0;
2741 }
2742 lck_mtx_unlock(&ifp->if_start_lock);
2743
2744 /*
2745 * If no longer attached, don't call start because ifp
2746 * is being destroyed; else hold an IO refcnt to
2747 * prevent the interface from being detached (will be
2748 * released below.)
2749 */
2750 if (!ifnet_is_attached(ifp, 1)) {
2751 lck_mtx_lock_spin(&ifp->if_start_lock);
2752 break;
2753 }
2754
2755 /* invoke the driver's start routine */
2756 ((*ifp->if_start)(ifp));
2757
2758 /*
2759 * Release the io ref count taken by ifnet_is_attached.
2760 */
2761 ifnet_decr_iorefcnt(ifp);
2762
2763 lck_mtx_lock_spin(&ifp->if_start_lock);
2764
2765 /* if there's no pending request, we're done */
2766 if (req == ifp->if_start_req)
2767 break;
2768 }
2769
2770 ifp->if_start_req = 0;
2771 ifp->if_start_active = 0;
2772
2773 /*
2774 * Wakeup N ns from now if rate-controlled by TBR, and if
2775 * there are still packets in the send queue which haven't
2776 * been dequeued so far; else sleep indefinitely (ts = NULL)
2777 * until ifnet_start() is called again.
2778 */
2779 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2780 &ifp->if_start_cycle : NULL);
2781
2782 if (ts == NULL && ifp->if_start_delayed == 1) {
2783 delay_start_ts.tv_sec = 0;
2784 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2785 ts = &delay_start_ts;
2786 }
2787
2788 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
2789 ts = NULL;
2790 }
2791
2792 /* NOTREACHED */
2793 }
2794
2795 void
2796 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2797 {
2798 if (ts == NULL)
2799 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
2800 else
2801 *(&ifp->if_start_cycle) = *ts;
2802
2803 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
2804 printf("%s: restart interval set to %lu nsec\n",
2805 if_name(ifp), ts->tv_nsec);
2806 }
2807
2808 static void
2809 ifnet_poll(struct ifnet *ifp)
2810 {
2811 /*
2812 * If the poller thread is inactive, signal it to do work.
2813 */
2814 lck_mtx_lock_spin(&ifp->if_poll_lock);
2815 ifp->if_poll_req++;
2816 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
2817 wakeup_one((caddr_t)&ifp->if_poll_thread);
2818 }
2819 lck_mtx_unlock(&ifp->if_poll_lock);
2820 }
2821
2822 static void
2823 ifnet_poll_thread_fn(void *v, wait_result_t w)
2824 {
2825 #pragma unused(w)
2826 struct dlil_threading_info *inp;
2827 struct ifnet *ifp = v;
2828 char ifname[IFNAMSIZ + 1];
2829 struct timespec *ts = NULL;
2830 struct ifnet_stat_increment_param s;
2831
2832 snprintf(ifname, sizeof (ifname), "%s_poller",
2833 if_name(ifp));
2834 bzero(&s, sizeof (s));
2835
2836 lck_mtx_lock_spin(&ifp->if_poll_lock);
2837
2838 inp = ifp->if_inp;
2839 VERIFY(inp != NULL);
2840
2841 for (;;) {
2842 if (ifp->if_poll_thread != THREAD_NULL) {
2843 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
2844 (PZERO - 1) | PSPIN, ifname, ts);
2845 }
2846
2847 /* interface is detached (maybe while asleep)? */
2848 if (ifp->if_poll_thread == THREAD_NULL) {
2849 ifnet_set_poll_cycle(ifp, NULL);
2850 lck_mtx_unlock(&ifp->if_poll_lock);
2851
2852 if (dlil_verbose) {
2853 printf("%s: poller thread terminated\n",
2854 if_name(ifp));
2855 }
2856
2857 /* for the extra refcnt from kernel_thread_start() */
2858 thread_deallocate(current_thread());
2859 /* this is the end */
2860 thread_terminate(current_thread());
2861 /* NOTREACHED */
2862 return;
2863 }
2864
2865 ifp->if_poll_active = 1;
2866 for (;;) {
2867 struct mbuf *m_head, *m_tail;
2868 u_int32_t m_lim, m_cnt, m_totlen;
2869 u_int16_t req = ifp->if_poll_req;
2870
2871 lck_mtx_unlock(&ifp->if_poll_lock);
2872
2873 /*
2874 * If no longer attached, there's nothing to do;
2875 * else hold an IO refcnt to prevent the interface
2876 * from being detached (will be released below.)
2877 */
2878 if (!ifnet_is_attached(ifp, 1)) {
2879 lck_mtx_lock_spin(&ifp->if_poll_lock);
2880 break;
2881 }
2882
2883 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
2884 MAX((qlimit(&inp->rcvq_pkts)),
2885 (inp->rxpoll_phiwat << 2));
2886
2887 if (dlil_verbose > 1) {
2888 printf("%s: polling up to %d pkts, "
2889 "pkts avg %d max %d, wreq avg %d, "
2890 "bytes avg %d\n",
2891 if_name(ifp), m_lim,
2892 inp->rxpoll_pavg, inp->rxpoll_pmax,
2893 inp->rxpoll_wavg, inp->rxpoll_bavg);
2894 }
2895
2896 /* invoke the driver's input poll routine */
2897 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
2898 &m_cnt, &m_totlen));
2899
2900 if (m_head != NULL) {
2901 VERIFY(m_tail != NULL && m_cnt > 0);
2902
2903 if (dlil_verbose > 1) {
2904 printf("%s: polled %d pkts, "
2905 "pkts avg %d max %d, wreq avg %d, "
2906 "bytes avg %d\n",
2907 if_name(ifp), m_cnt,
2908 inp->rxpoll_pavg, inp->rxpoll_pmax,
2909 inp->rxpoll_wavg, inp->rxpoll_bavg);
2910 }
2911
2912 /* stats are required for extended variant */
2913 s.packets_in = m_cnt;
2914 s.bytes_in = m_totlen;
2915
2916 (void) ifnet_input_common(ifp, m_head, m_tail,
2917 &s, TRUE, TRUE);
2918 } else {
2919 if (dlil_verbose > 1) {
2920 printf("%s: no packets, "
2921 "pkts avg %d max %d, wreq avg %d, "
2922 "bytes avg %d\n",
2923 if_name(ifp), inp->rxpoll_pavg,
2924 inp->rxpoll_pmax, inp->rxpoll_wavg,
2925 inp->rxpoll_bavg);
2926 }
2927
2928 (void) ifnet_input_common(ifp, NULL, NULL,
2929 NULL, FALSE, TRUE);
2930 }
2931
2932 /* Release the io ref count */
2933 ifnet_decr_iorefcnt(ifp);
2934
2935 lck_mtx_lock_spin(&ifp->if_poll_lock);
2936
2937 /* if there's no pending request, we're done */
2938 if (req == ifp->if_poll_req)
2939 break;
2940 }
2941 ifp->if_poll_req = 0;
2942 ifp->if_poll_active = 0;
2943
2944 /*
2945 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2946 * until ifnet_poll() is called again.
2947 */
2948 ts = &ifp->if_poll_cycle;
2949 if (ts->tv_sec == 0 && ts->tv_nsec == 0)
2950 ts = NULL;
2951 }
2952
2953 /* NOTREACHED */
2954 }
2955
2956 void
2957 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
2958 {
2959 if (ts == NULL)
2960 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
2961 else
2962 *(&ifp->if_poll_cycle) = *ts;
2963
2964 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
2965 printf("%s: poll interval set to %lu nsec\n",
2966 if_name(ifp), ts->tv_nsec);
2967 }
2968
2969 void
2970 ifnet_purge(struct ifnet *ifp)
2971 {
2972 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
2973 if_qflush(ifp, 0);
2974 }
2975
2976 void
2977 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
2978 {
2979 IFCQ_LOCK_ASSERT_HELD(ifq);
2980
2981 if (!(IFCQ_IS_READY(ifq)))
2982 return;
2983
2984 if (IFCQ_TBR_IS_ENABLED(ifq)) {
2985 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
2986 ifq->ifcq_tbr.tbr_percent, 0 };
2987 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
2988 }
2989
2990 ifclassq_update(ifq, ev);
2991 }
2992
2993 void
2994 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
2995 {
2996 switch (ev) {
2997 case CLASSQ_EV_LINK_BANDWIDTH:
2998 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
2999 ifp->if_poll_update++;
3000 break;
3001
3002 default:
3003 break;
3004 }
3005 }
3006
3007 errno_t
3008 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3009 {
3010 struct ifclassq *ifq;
3011 u_int32_t omodel;
3012 errno_t err;
3013
3014 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX)
3015 return (EINVAL);
3016 else if (!(ifp->if_eflags & IFEF_TXSTART))
3017 return (ENXIO);
3018
3019 ifq = &ifp->if_snd;
3020 IFCQ_LOCK(ifq);
3021 omodel = ifp->if_output_sched_model;
3022 ifp->if_output_sched_model = model;
3023 if ((err = ifclassq_pktsched_setup(ifq)) != 0)
3024 ifp->if_output_sched_model = omodel;
3025 IFCQ_UNLOCK(ifq);
3026
3027 return (err);
3028 }
3029
3030 errno_t
3031 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3032 {
3033 if (ifp == NULL)
3034 return (EINVAL);
3035 else if (!(ifp->if_eflags & IFEF_TXSTART))
3036 return (ENXIO);
3037
3038 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3039
3040 return (0);
3041 }
3042
3043 errno_t
3044 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3045 {
3046 if (ifp == NULL || maxqlen == NULL)
3047 return (EINVAL);
3048 else if (!(ifp->if_eflags & IFEF_TXSTART))
3049 return (ENXIO);
3050
3051 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3052
3053 return (0);
3054 }
3055
3056 errno_t
3057 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
3058 {
3059 errno_t err;
3060
3061 if (ifp == NULL || pkts == NULL)
3062 err = EINVAL;
3063 else if (!(ifp->if_eflags & IFEF_TXSTART))
3064 err = ENXIO;
3065 else
3066 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3067 pkts, NULL);
3068
3069 return (err);
3070 }
3071
3072 errno_t
3073 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3074 u_int32_t *pkts, u_int32_t *bytes)
3075 {
3076 errno_t err;
3077
3078 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3079 (pkts == NULL && bytes == NULL))
3080 err = EINVAL;
3081 else if (!(ifp->if_eflags & IFEF_TXSTART))
3082 err = ENXIO;
3083 else
3084 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3085
3086 return (err);
3087 }
3088
3089 errno_t
3090 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3091 {
3092 struct dlil_threading_info *inp;
3093
3094 if (ifp == NULL)
3095 return (EINVAL);
3096 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3097 return (ENXIO);
3098
3099 if (maxqlen == 0)
3100 maxqlen = if_rcvq_maxlen;
3101 else if (maxqlen < IF_RCVQ_MINLEN)
3102 maxqlen = IF_RCVQ_MINLEN;
3103
3104 inp = ifp->if_inp;
3105 lck_mtx_lock(&inp->input_lck);
3106 qlimit(&inp->rcvq_pkts) = maxqlen;
3107 lck_mtx_unlock(&inp->input_lck);
3108
3109 return (0);
3110 }
3111
3112 errno_t
3113 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3114 {
3115 struct dlil_threading_info *inp;
3116
3117 if (ifp == NULL || maxqlen == NULL)
3118 return (EINVAL);
3119 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3120 return (ENXIO);
3121
3122 inp = ifp->if_inp;
3123 lck_mtx_lock(&inp->input_lck);
3124 *maxqlen = qlimit(&inp->rcvq_pkts);
3125 lck_mtx_unlock(&inp->input_lck);
3126 return (0);
3127 }
3128
3129 errno_t
3130 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3131 {
3132 int error;
3133 struct timespec now;
3134 u_int64_t now_nsec;
3135
3136 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3137 m->m_nextpkt != NULL) {
3138 if (m != NULL)
3139 m_freem_list(m);
3140 return (EINVAL);
3141 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3142 !(ifp->if_refflags & IFRF_ATTACHED)) {
3143 /* flag tested without lock for performance */
3144 m_freem(m);
3145 return (ENXIO);
3146 } else if (!(ifp->if_flags & IFF_UP)) {
3147 m_freem(m);
3148 return (ENETDOWN);
3149 }
3150
3151 nanouptime(&now);
3152 net_timernsec(&now, &now_nsec);
3153 m->m_pkthdr.pkt_timestamp = now_nsec;
3154 m->m_pkthdr.pkt_flags &= ~PKTF_DRV_TS_VALID;
3155
3156 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3157 /*
3158 * If the driver chose to delay start callback for
3159 * coalescing multiple packets, Then use the following
3160 * heuristics to make sure that start callback will
3161 * be delayed only when bulk data transfer is detected.
3162 * 1. number of packets enqueued in (delay_win * 2) is
3163 * greater than or equal to the delay qlen.
3164 * 2. If delay_start is enabled it will stay enabled for
3165 * another 10 idle windows. This is to take into account
3166 * variable RTT and burst traffic.
3167 * 3. If the time elapsed since last enqueue is more
3168 * than 200ms we disable delaying start callback. This is
3169 * is to take idle time into account.
3170 */
3171 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3172 if (ifp->if_start_delay_swin > 0) {
3173 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3174 ifp->if_start_delay_cnt++;
3175 } else if ((now_nsec - ifp->if_start_delay_swin)
3176 >= (200 * 1000 * 1000)) {
3177 ifp->if_start_delay_swin = now_nsec;
3178 ifp->if_start_delay_cnt = 1;
3179 ifp->if_start_delay_idle = 0;
3180 if (ifp->if_eflags & IFEF_DELAY_START) {
3181 ifp->if_eflags &=
3182 ~(IFEF_DELAY_START);
3183 ifnet_delay_start_disabled++;
3184 }
3185 } else {
3186 if (ifp->if_start_delay_cnt >=
3187 ifp->if_start_delay_qlen) {
3188 ifp->if_eflags |= IFEF_DELAY_START;
3189 ifp->if_start_delay_idle = 0;
3190 } else {
3191 if (ifp->if_start_delay_idle >= 10) {
3192 ifp->if_eflags &= ~(IFEF_DELAY_START);
3193 ifnet_delay_start_disabled++;
3194 } else {
3195 ifp->if_start_delay_idle++;
3196 }
3197 }
3198 ifp->if_start_delay_swin = now_nsec;
3199 ifp->if_start_delay_cnt = 1;
3200 }
3201 } else {
3202 ifp->if_start_delay_swin = now_nsec;
3203 ifp->if_start_delay_cnt = 1;
3204 ifp->if_start_delay_idle = 0;
3205 ifp->if_eflags &= ~(IFEF_DELAY_START);
3206 }
3207 } else {
3208 ifp->if_eflags &= ~(IFEF_DELAY_START);
3209 }
3210
3211 /* enqueue the packet */
3212 error = ifclassq_enqueue(&ifp->if_snd, m);
3213
3214 /*
3215 * Tell the driver to start dequeueing; do this even when the queue
3216 * for the packet is suspended (EQSUSPENDED), as the driver could still
3217 * be dequeueing from other unsuspended queues.
3218 */
3219 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3220 (error == 0 || error == EQFULL || error == EQSUSPENDED))
3221 ifnet_start(ifp);
3222
3223 return (error);
3224 }
3225
3226 errno_t
3227 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3228 {
3229 errno_t rc;
3230 if (ifp == NULL || mp == NULL)
3231 return (EINVAL);
3232 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3233 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3234 return (ENXIO);
3235 if (!ifnet_is_attached(ifp, 1))
3236 return (ENXIO);
3237 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3238 mp, NULL, NULL, NULL);
3239 ifnet_decr_iorefcnt(ifp);
3240
3241 return (rc);
3242 }
3243
3244 errno_t
3245 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3246 struct mbuf **mp)
3247 {
3248 errno_t rc;
3249 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
3250 return (EINVAL);
3251 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3252 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3253 return (ENXIO);
3254 if (!ifnet_is_attached(ifp, 1))
3255 return (ENXIO);
3256
3257 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL);
3258 ifnet_decr_iorefcnt(ifp);
3259 return (rc);
3260 }
3261
3262 errno_t
3263 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3264 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3265 {
3266 errno_t rc;
3267 if (ifp == NULL || head == NULL || pkt_limit < 1)
3268 return (EINVAL);
3269 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3270 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3271 return (ENXIO);
3272 if (!ifnet_is_attached(ifp, 1))
3273 return (ENXIO);
3274
3275 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
3276 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, head, tail, cnt, len);
3277 ifnet_decr_iorefcnt(ifp);
3278 return (rc);
3279 }
3280
3281 errno_t
3282 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3283 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3284 {
3285 errno_t rc;
3286 if (ifp == NULL || head == NULL || byte_limit < 1)
3287 return (EINVAL);
3288 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3289 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3290 return (ENXIO);
3291 if (!ifnet_is_attached(ifp, 1))
3292 return (ENXIO);
3293
3294 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3295 byte_limit, head, tail, cnt, len);
3296 ifnet_decr_iorefcnt(ifp);
3297 return (rc);
3298 }
3299
3300 errno_t
3301 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3302 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3303 u_int32_t *len)
3304 {
3305 errno_t rc;
3306 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3307 !MBUF_VALID_SC(sc))
3308 return (EINVAL);
3309 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3310 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3311 return (ENXIO);
3312 if (!ifnet_is_attached(ifp, 1))
3313 return (ENXIO);
3314 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit, head,
3315 tail, cnt, len);
3316 ifnet_decr_iorefcnt(ifp);
3317 return (rc);
3318 }
3319
3320 errno_t
3321 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3322 const struct sockaddr *dest, const char *dest_linkaddr,
3323 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3324 {
3325 if (pre != NULL)
3326 *pre = 0;
3327 if (post != NULL)
3328 *post = 0;
3329
3330 return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
3331 }
3332
3333 static int
3334 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3335 char **frame_header_p, protocol_family_t protocol_family)
3336 {
3337 struct ifnet_filter *filter;
3338
3339 /*
3340 * Pass the inbound packet to the interface filters
3341 */
3342 lck_mtx_lock_spin(&ifp->if_flt_lock);
3343 /* prevent filter list from changing in case we drop the lock */
3344 if_flt_monitor_busy(ifp);
3345 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3346 int result;
3347
3348 if (!filter->filt_skip && filter->filt_input != NULL &&
3349 (filter->filt_protocol == 0 ||
3350 filter->filt_protocol == protocol_family)) {
3351 lck_mtx_unlock(&ifp->if_flt_lock);
3352
3353 result = (*filter->filt_input)(filter->filt_cookie,
3354 ifp, protocol_family, m_p, frame_header_p);
3355
3356 lck_mtx_lock_spin(&ifp->if_flt_lock);
3357 if (result != 0) {
3358 /* we're done with the filter list */
3359 if_flt_monitor_unbusy(ifp);
3360 lck_mtx_unlock(&ifp->if_flt_lock);
3361 return (result);
3362 }
3363 }
3364 }
3365 /* we're done with the filter list */
3366 if_flt_monitor_unbusy(ifp);
3367 lck_mtx_unlock(&ifp->if_flt_lock);
3368
3369 /*
3370 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3371 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3372 */
3373 if (*m_p != NULL)
3374 (*m_p)->m_flags &= ~M_PROTO1;
3375
3376 return (0);
3377 }
3378
3379 static int
3380 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3381 protocol_family_t protocol_family)
3382 {
3383 struct ifnet_filter *filter;
3384
3385 /*
3386 * Pass the outbound packet to the interface filters
3387 */
3388 lck_mtx_lock_spin(&ifp->if_flt_lock);
3389 /* prevent filter list from changing in case we drop the lock */
3390 if_flt_monitor_busy(ifp);
3391 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3392 int result;
3393
3394 if (!filter->filt_skip && filter->filt_output != NULL &&
3395 (filter->filt_protocol == 0 ||
3396 filter->filt_protocol == protocol_family)) {
3397 lck_mtx_unlock(&ifp->if_flt_lock);
3398
3399 result = filter->filt_output(filter->filt_cookie, ifp,
3400 protocol_family, m_p);
3401
3402 lck_mtx_lock_spin(&ifp->if_flt_lock);
3403 if (result != 0) {
3404 /* we're done with the filter list */
3405 if_flt_monitor_unbusy(ifp);
3406 lck_mtx_unlock(&ifp->if_flt_lock);
3407 return (result);
3408 }
3409 }
3410 }
3411 /* we're done with the filter list */
3412 if_flt_monitor_unbusy(ifp);
3413 lck_mtx_unlock(&ifp->if_flt_lock);
3414
3415 return (0);
3416 }
3417
3418 static void
3419 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
3420 {
3421 int error;
3422
3423 if (ifproto->proto_kpi == kProtoKPI_v1) {
3424 /* Version 1 protocols get one packet at a time */
3425 while (m != NULL) {
3426 char * frame_header;
3427 mbuf_t next_packet;
3428
3429 next_packet = m->m_nextpkt;
3430 m->m_nextpkt = NULL;
3431 frame_header = m->m_pkthdr.pkt_hdr;
3432 m->m_pkthdr.pkt_hdr = NULL;
3433 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3434 ifproto->protocol_family, m, frame_header);
3435 if (error != 0 && error != EJUSTRETURN)
3436 m_freem(m);
3437 m = next_packet;
3438 }
3439 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
3440 /* Version 2 protocols support packet lists */
3441 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
3442 ifproto->protocol_family, m);
3443 if (error != 0 && error != EJUSTRETURN)
3444 m_freem_list(m);
3445 }
3446 }
3447
3448 static void
3449 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3450 struct dlil_threading_info *inp, boolean_t poll)
3451 {
3452 struct ifnet_stat_increment_param *d = &inp->stats;
3453
3454 if (s->packets_in != 0)
3455 d->packets_in += s->packets_in;
3456 if (s->bytes_in != 0)
3457 d->bytes_in += s->bytes_in;
3458 if (s->errors_in != 0)
3459 d->errors_in += s->errors_in;
3460
3461 if (s->packets_out != 0)
3462 d->packets_out += s->packets_out;
3463 if (s->bytes_out != 0)
3464 d->bytes_out += s->bytes_out;
3465 if (s->errors_out != 0)
3466 d->errors_out += s->errors_out;
3467
3468 if (s->collisions != 0)
3469 d->collisions += s->collisions;
3470 if (s->dropped != 0)
3471 d->dropped += s->dropped;
3472
3473 if (poll)
3474 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
3475 }
3476
3477 static void
3478 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3479 {
3480 struct ifnet_stat_increment_param *s = &inp->stats;
3481
3482 /*
3483 * Use of atomic operations is unavoidable here because
3484 * these stats may also be incremented elsewhere via KPIs.
3485 */
3486 if (s->packets_in != 0) {
3487 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3488 s->packets_in = 0;
3489 }
3490 if (s->bytes_in != 0) {
3491 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3492 s->bytes_in = 0;
3493 }
3494 if (s->errors_in != 0) {
3495 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3496 s->errors_in = 0;
3497 }
3498
3499 if (s->packets_out != 0) {
3500 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3501 s->packets_out = 0;
3502 }
3503 if (s->bytes_out != 0) {
3504 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3505 s->bytes_out = 0;
3506 }
3507 if (s->errors_out != 0) {
3508 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3509 s->errors_out = 0;
3510 }
3511
3512 if (s->collisions != 0) {
3513 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3514 s->collisions = 0;
3515 }
3516 if (s->dropped != 0) {
3517 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3518 s->dropped = 0;
3519 }
3520 /*
3521 * If we went over the threshold, notify NetworkStatistics.
3522 */
3523 if (ifp->if_data_threshold &&
3524 (ifp->if_ibytes + ifp->if_obytes) - ifp->if_dt_bytes >
3525 ifp->if_data_threshold) {
3526 ifp->if_dt_bytes = ifp->if_ibytes + ifp->if_obytes;
3527
3528 lck_mtx_convert_spin(&inp->input_lck);
3529 nstat_ifnet_threshold_reached(ifp->if_index);
3530 }
3531 /*
3532 * No need for atomic operations as they are modified here
3533 * only from within the DLIL input thread context.
3534 */
3535 if (inp->tstats.packets != 0) {
3536 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3537 inp->tstats.packets = 0;
3538 }
3539 if (inp->tstats.bytes != 0) {
3540 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3541 inp->tstats.bytes = 0;
3542 }
3543 }
3544
3545 __private_extern__ void
3546 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3547 {
3548 return (dlil_input_packet_list_common(ifp, m, 0,
3549 IFNET_MODEL_INPUT_POLL_OFF, FALSE));
3550 }
3551
3552 __private_extern__ void
3553 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3554 u_int32_t cnt, ifnet_model_t mode)
3555 {
3556 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
3557 }
3558
3559 static void
3560 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3561 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
3562 {
3563 int error = 0;
3564 protocol_family_t protocol_family;
3565 mbuf_t next_packet;
3566 ifnet_t ifp = ifp_param;
3567 char * frame_header;
3568 struct if_proto * last_ifproto = NULL;
3569 mbuf_t pkt_first = NULL;
3570 mbuf_t * pkt_next = NULL;
3571 u_int32_t poll_thresh = 0, poll_ival = 0;
3572
3573 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
3574
3575 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
3576 (poll_ival = if_rxpoll_interval_pkts) > 0)
3577 poll_thresh = cnt;
3578
3579 while (m != NULL) {
3580 struct if_proto *ifproto = NULL;
3581 int iorefcnt = 0;
3582 uint32_t pktf_mask; /* pkt flags to preserve */
3583
3584 if (ifp_param == NULL)
3585 ifp = m->m_pkthdr.rcvif;
3586
3587 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
3588 poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
3589 ifnet_poll(ifp);
3590
3591 /* Check if this mbuf looks valid */
3592 MBUF_INPUT_CHECK(m, ifp);
3593
3594 next_packet = m->m_nextpkt;
3595 m->m_nextpkt = NULL;
3596 frame_header = m->m_pkthdr.pkt_hdr;
3597 m->m_pkthdr.pkt_hdr = NULL;
3598
3599 /*
3600 * Get an IO reference count if the interface is not
3601 * loopback (lo0) and it is attached; lo0 never goes
3602 * away, so optimize for that.
3603 */
3604 if (ifp != lo_ifp) {
3605 if (!ifnet_is_attached(ifp, 1)) {
3606 m_freem(m);
3607 goto next;
3608 }
3609 iorefcnt = 1;
3610 pktf_mask = 0;
3611 } else {
3612 /*
3613 * If this arrived on lo0, preserve interface addr
3614 * info to allow for connectivity between loopback
3615 * and local interface addresses.
3616 */
3617 pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
3618 }
3619
3620 /* make sure packet comes in clean */
3621 m_classifier_init(m, pktf_mask);
3622
3623 ifp_inc_traffic_class_in(ifp, m);
3624
3625 /* find which protocol family this packet is for */
3626 ifnet_lock_shared(ifp);
3627 error = (*ifp->if_demux)(ifp, m, frame_header,
3628 &protocol_family);
3629 ifnet_lock_done(ifp);
3630 if (error != 0) {
3631 if (error == EJUSTRETURN)
3632 goto next;
3633 protocol_family = 0;
3634 }
3635
3636 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
3637 !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
3638 dlil_input_cksum_dbg(ifp, m, frame_header,
3639 protocol_family);
3640
3641 /*
3642 * For partial checksum offload, we expect the driver to
3643 * set the start offset indicating the start of the span
3644 * that is covered by the hardware-computed checksum;
3645 * adjust this start offset accordingly because the data
3646 * pointer has been advanced beyond the link-layer header.
3647 *
3648 * Don't adjust if the interface is a bridge member, as
3649 * the adjustment will occur from the context of the
3650 * bridge interface during input.
3651 */
3652 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
3653 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
3654 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
3655 int adj;
3656
3657 if (frame_header == NULL ||
3658 frame_header < (char *)mbuf_datastart(m) ||
3659 frame_header > (char *)m->m_data ||
3660 (adj = (m->m_data - frame_header)) >
3661 m->m_pkthdr.csum_rx_start) {
3662 m->m_pkthdr.csum_data = 0;
3663 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
3664 hwcksum_in_invalidated++;
3665 } else {
3666 m->m_pkthdr.csum_rx_start -= adj;
3667 }
3668 }
3669
3670 pktap_input(ifp, protocol_family, m, frame_header);
3671
3672 if (m->m_flags & (M_BCAST|M_MCAST))
3673 atomic_add_64(&ifp->if_imcasts, 1);
3674
3675 /* run interface filters, exclude VLAN packets PR-3586856 */
3676 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
3677 error = dlil_interface_filters_input(ifp, &m,
3678 &frame_header, protocol_family);
3679 if (error != 0) {
3680 if (error != EJUSTRETURN)
3681 m_freem(m);
3682 goto next;
3683 }
3684 }
3685 if (error != 0 || ((m->m_flags & M_PROMISC) != 0)) {
3686 m_freem(m);
3687 goto next;
3688 }
3689
3690 /* Lookup the protocol attachment to this interface */
3691 if (protocol_family == 0) {
3692 ifproto = NULL;
3693 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
3694 (last_ifproto->protocol_family == protocol_family)) {
3695 VERIFY(ifproto == NULL);
3696 ifproto = last_ifproto;
3697 if_proto_ref(last_ifproto);
3698 } else {
3699 VERIFY(ifproto == NULL);
3700 ifnet_lock_shared(ifp);
3701 /* callee holds a proto refcnt upon success */
3702 ifproto = find_attached_proto(ifp, protocol_family);
3703 ifnet_lock_done(ifp);
3704 }
3705 if (ifproto == NULL) {
3706 /* no protocol for this packet, discard */
3707 m_freem(m);
3708 goto next;
3709 }
3710 if (ifproto != last_ifproto) {
3711 if (last_ifproto != NULL) {
3712 /* pass up the list for the previous protocol */
3713 dlil_ifproto_input(last_ifproto, pkt_first);
3714 pkt_first = NULL;
3715 if_proto_free(last_ifproto);
3716 }
3717 last_ifproto = ifproto;
3718 if_proto_ref(ifproto);
3719 }
3720 /* extend the list */
3721 m->m_pkthdr.pkt_hdr = frame_header;
3722 if (pkt_first == NULL) {
3723 pkt_first = m;
3724 } else {
3725 *pkt_next = m;
3726 }
3727 pkt_next = &m->m_nextpkt;
3728
3729 next:
3730 if (next_packet == NULL && last_ifproto != NULL) {
3731 /* pass up the last list of packets */
3732 dlil_ifproto_input(last_ifproto, pkt_first);
3733 if_proto_free(last_ifproto);
3734 last_ifproto = NULL;
3735 }
3736 if (ifproto != NULL) {
3737 if_proto_free(ifproto);
3738 ifproto = NULL;
3739 }
3740
3741 m = next_packet;
3742
3743 /* update the driver's multicast filter, if needed */
3744 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
3745 ifp->if_updatemcasts = 0;
3746 if (iorefcnt == 1)
3747 ifnet_decr_iorefcnt(ifp);
3748 }
3749
3750 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
3751 }
3752
3753 errno_t
3754 if_mcasts_update(struct ifnet *ifp)
3755 {
3756 errno_t err;
3757
3758 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
3759 if (err == EAFNOSUPPORT)
3760 err = 0;
3761 printf("%s: %s %d suspended link-layer multicast membership(s) "
3762 "(err=%d)\n", if_name(ifp),
3763 (err == 0 ? "successfully restored" : "failed to restore"),
3764 ifp->if_updatemcasts, err);
3765
3766 /* just return success */
3767 return (0);
3768 }
3769
3770 /* If ifp is set, we will increment the generation for the interface */
3771 int
3772 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
3773 {
3774 if (ifp != NULL) {
3775 ifnet_increment_generation(ifp);
3776 }
3777
3778 #if NECP
3779 necp_update_all_clients();
3780 #endif /* NECP */
3781
3782 return (kev_post_msg(event));
3783 }
3784
3785 #define TMP_IF_PROTO_ARR_SIZE 10
3786 static int
3787 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
3788 {
3789 struct ifnet_filter *filter = NULL;
3790 struct if_proto *proto = NULL;
3791 int if_proto_count = 0;
3792 struct if_proto **tmp_ifproto_arr = NULL;
3793 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
3794 int tmp_ifproto_arr_idx = 0;
3795 bool tmp_malloc = false;
3796
3797 /*
3798 * Pass the event to the interface filters
3799 */
3800 lck_mtx_lock_spin(&ifp->if_flt_lock);
3801 /* prevent filter list from changing in case we drop the lock */
3802 if_flt_monitor_busy(ifp);
3803 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3804 if (filter->filt_event != NULL) {
3805 lck_mtx_unlock(&ifp->if_flt_lock);
3806
3807 filter->filt_event(filter->filt_cookie, ifp,
3808 filter->filt_protocol, event);
3809
3810 lck_mtx_lock_spin(&ifp->if_flt_lock);
3811 }
3812 }
3813 /* we're done with the filter list */
3814 if_flt_monitor_unbusy(ifp);
3815 lck_mtx_unlock(&ifp->if_flt_lock);
3816
3817 /* Get an io ref count if the interface is attached */
3818 if (!ifnet_is_attached(ifp, 1))
3819 goto done;
3820
3821 /*
3822 * An embedded tmp_list_entry in if_proto may still get
3823 * over-written by another thread after giving up ifnet lock,
3824 * therefore we are avoiding embedded pointers here.
3825 */
3826 ifnet_lock_shared(ifp);
3827 if_proto_count = dlil_ifp_proto_count(ifp);
3828 if (if_proto_count) {
3829 int i;
3830 VERIFY(ifp->if_proto_hash != NULL);
3831 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
3832 tmp_ifproto_arr = tmp_ifproto_stack_arr;
3833 } else {
3834 MALLOC(tmp_ifproto_arr, struct if_proto **,
3835 sizeof (*tmp_ifproto_arr) * if_proto_count,
3836 M_TEMP, M_ZERO);
3837 if (tmp_ifproto_arr == NULL) {
3838 ifnet_lock_done(ifp);
3839 goto cleanup;
3840 }
3841 tmp_malloc = true;
3842 }
3843
3844 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
3845 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
3846 next_hash) {
3847 if_proto_ref(proto);
3848 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
3849 tmp_ifproto_arr_idx++;
3850 }
3851 }
3852 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
3853 }
3854 ifnet_lock_done(ifp);
3855
3856 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
3857 tmp_ifproto_arr_idx++) {
3858 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
3859 VERIFY(proto != NULL);
3860 proto_media_event eventp =
3861 (proto->proto_kpi == kProtoKPI_v1 ?
3862 proto->kpi.v1.event :
3863 proto->kpi.v2.event);
3864
3865 if (eventp != NULL) {
3866 eventp(ifp, proto->protocol_family,
3867 event);
3868 }
3869 if_proto_free(proto);
3870 }
3871
3872 cleanup:
3873 if (tmp_malloc) {
3874 FREE(tmp_ifproto_arr, M_TEMP);
3875 }
3876
3877 /* Pass the event to the interface */
3878 if (ifp->if_event != NULL)
3879 ifp->if_event(ifp, event);
3880
3881 /* Release the io ref count */
3882 ifnet_decr_iorefcnt(ifp);
3883 done:
3884 return (dlil_post_complete_msg(update_generation ? ifp : NULL, event));
3885 }
3886
3887 errno_t
3888 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
3889 {
3890 struct kev_msg kev_msg;
3891 int result = 0;
3892
3893 if (ifp == NULL || event == NULL)
3894 return (EINVAL);
3895
3896 bzero(&kev_msg, sizeof (kev_msg));
3897 kev_msg.vendor_code = event->vendor_code;
3898 kev_msg.kev_class = event->kev_class;
3899 kev_msg.kev_subclass = event->kev_subclass;
3900 kev_msg.event_code = event->event_code;
3901 kev_msg.dv[0].data_ptr = &event->event_data[0];
3902 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
3903 kev_msg.dv[1].data_length = 0;
3904
3905 result = dlil_event_internal(ifp, &kev_msg, TRUE);
3906
3907 return (result);
3908 }
3909
3910 #if CONFIG_MACF_NET
3911 #include <netinet/ip6.h>
3912 #include <netinet/ip.h>
3913 static int
3914 dlil_get_socket_type(struct mbuf **mp, int family, int raw)
3915 {
3916 struct mbuf *m;
3917 struct ip *ip;
3918 struct ip6_hdr *ip6;
3919 int type = SOCK_RAW;
3920
3921 if (!raw) {
3922 switch (family) {
3923 case PF_INET:
3924 m = m_pullup(*mp, sizeof(struct ip));
3925 if (m == NULL)
3926 break;
3927 *mp = m;
3928 ip = mtod(m, struct ip *);
3929 if (ip->ip_p == IPPROTO_TCP)
3930 type = SOCK_STREAM;
3931 else if (ip->ip_p == IPPROTO_UDP)
3932 type = SOCK_DGRAM;
3933 break;
3934 case PF_INET6:
3935 m = m_pullup(*mp, sizeof(struct ip6_hdr));
3936 if (m == NULL)
3937 break;
3938 *mp = m;
3939 ip6 = mtod(m, struct ip6_hdr *);
3940 if (ip6->ip6_nxt == IPPROTO_TCP)
3941 type = SOCK_STREAM;
3942 else if (ip6->ip6_nxt == IPPROTO_UDP)
3943 type = SOCK_DGRAM;
3944 break;
3945 }
3946 }
3947
3948 return (type);
3949 }
3950 #endif
3951
3952 /*
3953 * This is mostly called from the context of the DLIL input thread;
3954 * because of that there is no need for atomic operations.
3955 */
3956 static __inline void
3957 ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
3958 {
3959 if (!(m->m_flags & M_PKTHDR))
3960 return;
3961
3962 switch (m_get_traffic_class(m)) {
3963 case MBUF_TC_BE:
3964 ifp->if_tc.ifi_ibepackets++;
3965 ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len;
3966 break;
3967 case MBUF_TC_BK:
3968 ifp->if_tc.ifi_ibkpackets++;
3969 ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len;
3970 break;
3971 case MBUF_TC_VI:
3972 ifp->if_tc.ifi_ivipackets++;
3973 ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len;
3974 break;
3975 case MBUF_TC_VO:
3976 ifp->if_tc.ifi_ivopackets++;
3977 ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len;
3978 break;
3979 default:
3980 break;
3981 }
3982
3983 if (mbuf_is_traffic_class_privileged(m)) {
3984 ifp->if_tc.ifi_ipvpackets++;
3985 ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len;
3986 }
3987 }
3988
3989 /*
3990 * This is called from DLIL output, hence multiple threads could end
3991 * up modifying the statistics. We trade off acccuracy for performance
3992 * by not using atomic operations here.
3993 */
3994 static __inline void
3995 ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
3996 {
3997 if (!(m->m_flags & M_PKTHDR))
3998 return;
3999
4000 switch (m_get_traffic_class(m)) {
4001 case MBUF_TC_BE:
4002 ifp->if_tc.ifi_obepackets++;
4003 ifp->if_tc.ifi_obebytes += m->m_pkthdr.len;
4004 break;
4005 case MBUF_TC_BK:
4006 ifp->if_tc.ifi_obkpackets++;
4007 ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len;
4008 break;
4009 case MBUF_TC_VI:
4010 ifp->if_tc.ifi_ovipackets++;
4011 ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len;
4012 break;
4013 case MBUF_TC_VO:
4014 ifp->if_tc.ifi_ovopackets++;
4015 ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len;
4016 break;
4017 default:
4018 break;
4019 }
4020
4021 if (mbuf_is_traffic_class_privileged(m)) {
4022 ifp->if_tc.ifi_opvpackets++;
4023 ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len;
4024 }
4025 }
4026
4027 static void
4028 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4029 {
4030 mbuf_t n = m;
4031 int chainlen = 0;
4032
4033 while (n != NULL) {
4034 chainlen++;
4035 n = n->m_next;
4036 }
4037 switch (chainlen) {
4038 case 0:
4039 break;
4040 case 1:
4041 atomic_add_64(&cls->cls_one, 1);
4042 break;
4043 case 2:
4044 atomic_add_64(&cls->cls_two, 1);
4045 break;
4046 case 3:
4047 atomic_add_64(&cls->cls_three, 1);
4048 break;
4049 case 4:
4050 atomic_add_64(&cls->cls_four, 1);
4051 break;
4052 case 5:
4053 default:
4054 atomic_add_64(&cls->cls_five_or_more, 1);
4055 break;
4056 }
4057 }
4058
4059 /*
4060 * dlil_output
4061 *
4062 * Caller should have a lock on the protocol domain if the protocol
4063 * doesn't support finer grained locking. In most cases, the lock
4064 * will be held from the socket layer and won't be released until
4065 * we return back to the socket layer.
4066 *
4067 * This does mean that we must take a protocol lock before we take
4068 * an interface lock if we're going to take both. This makes sense
4069 * because a protocol is likely to interact with an ifp while it
4070 * is under the protocol lock.
4071 *
4072 * An advisory code will be returned if adv is not null. This
4073 * can be used to provide feedback about interface queues to the
4074 * application.
4075 */
4076 errno_t
4077 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
4078 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
4079 {
4080 ifnet_output_handler_func handler_func;
4081 char *frame_type = NULL;
4082 char *dst_linkaddr = NULL;
4083 int retval = 0;
4084 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4085 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
4086 struct if_proto *proto = NULL;
4087 mbuf_t m;
4088 mbuf_t send_head = NULL;
4089 mbuf_t *send_tail = &send_head;
4090 int iorefcnt = 0;
4091 u_int32_t pre = 0, post = 0;
4092 u_int32_t fpkts = 0, fbytes = 0;
4093 int32_t flen = 0;
4094
4095 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4096
4097 /*
4098 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4099 * from happening while this operation is in progress
4100 */
4101 if (!ifnet_is_attached(ifp, 1)) {
4102 retval = ENXIO;
4103 goto cleanup;
4104 }
4105 iorefcnt = 1;
4106
4107 handler_func = ifp->if_output_handler;
4108 VERIFY(handler_func != NULL);
4109
4110 /* update the driver's multicast filter, if needed */
4111 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
4112 ifp->if_updatemcasts = 0;
4113
4114 frame_type = frame_type_buffer;
4115 dst_linkaddr = dst_linkaddr_buffer;
4116
4117 if (raw == 0) {
4118 ifnet_lock_shared(ifp);
4119 /* callee holds a proto refcnt upon success */
4120 proto = find_attached_proto(ifp, proto_family);
4121 if (proto == NULL) {
4122 ifnet_lock_done(ifp);
4123 retval = ENXIO;
4124 goto cleanup;
4125 }
4126 ifnet_lock_done(ifp);
4127 }
4128
4129 preout_again:
4130 if (packetlist == NULL)
4131 goto cleanup;
4132
4133 m = packetlist;
4134 packetlist = packetlist->m_nextpkt;
4135 m->m_nextpkt = NULL;
4136
4137 if (raw == 0) {
4138 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
4139 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
4140 retval = 0;
4141 if (preoutp != NULL) {
4142 retval = preoutp(ifp, proto_family, &m, dest, route,
4143 frame_type, dst_linkaddr);
4144
4145 if (retval != 0) {
4146 if (retval == EJUSTRETURN)
4147 goto preout_again;
4148 m_freem(m);
4149 goto cleanup;
4150 }
4151 }
4152 }
4153
4154 #if CONFIG_MACF_NET
4155 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
4156 dlil_get_socket_type(&m, proto_family, raw));
4157 if (retval != 0) {
4158 m_freem(m);
4159 goto cleanup;
4160 }
4161 #endif
4162
4163 do {
4164 #if CONFIG_DTRACE
4165 if (!raw && proto_family == PF_INET) {
4166 struct ip *ip = mtod(m, struct ip *);
4167 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4168 struct ip *, ip, struct ifnet *, ifp,
4169 struct ip *, ip, struct ip6_hdr *, NULL);
4170
4171 } else if (!raw && proto_family == PF_INET6) {
4172 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
4173 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4174 struct ip6_hdr *, ip6, struct ifnet *, ifp,
4175 struct ip *, NULL, struct ip6_hdr *, ip6);
4176 }
4177 #endif /* CONFIG_DTRACE */
4178
4179 if (raw == 0 && ifp->if_framer != NULL) {
4180 int rcvif_set = 0;
4181
4182 /*
4183 * If this is a broadcast packet that needs to be
4184 * looped back into the system, set the inbound ifp
4185 * to that of the outbound ifp. This will allow
4186 * us to determine that it is a legitimate packet
4187 * for the system. Only set the ifp if it's not
4188 * already set, just to be safe.
4189 */
4190 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
4191 m->m_pkthdr.rcvif == NULL) {
4192 m->m_pkthdr.rcvif = ifp;
4193 rcvif_set = 1;
4194 }
4195
4196 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
4197 frame_type, &pre, &post);
4198 if (retval != 0) {
4199 if (retval != EJUSTRETURN)
4200 m_freem(m);
4201 goto next;
4202 }
4203
4204 /*
4205 * For partial checksum offload, adjust the start
4206 * and stuff offsets based on the prepended header.
4207 */
4208 if ((m->m_pkthdr.csum_flags &
4209 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4210 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4211 m->m_pkthdr.csum_tx_stuff += pre;
4212 m->m_pkthdr.csum_tx_start += pre;
4213 }
4214
4215 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
4216 dlil_output_cksum_dbg(ifp, m, pre,
4217 proto_family);
4218
4219 /*
4220 * Clear the ifp if it was set above, and to be
4221 * safe, only if it is still the same as the
4222 * outbound ifp we have in context. If it was
4223 * looped back, then a copy of it was sent to the
4224 * loopback interface with the rcvif set, and we
4225 * are clearing the one that will go down to the
4226 * layer below.
4227 */
4228 if (rcvif_set && m->m_pkthdr.rcvif == ifp)
4229 m->m_pkthdr.rcvif = NULL;
4230 }
4231
4232 /*
4233 * Let interface filters (if any) do their thing ...
4234 */
4235 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4236 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
4237 retval = dlil_interface_filters_output(ifp,
4238 &m, proto_family);
4239 if (retval != 0) {
4240 if (retval != EJUSTRETURN)
4241 m_freem(m);
4242 goto next;
4243 }
4244 }
4245 /*
4246 * Strip away M_PROTO1 bit prior to sending packet
4247 * to the driver as this field may be used by the driver
4248 */
4249 m->m_flags &= ~M_PROTO1;
4250
4251 /*
4252 * If the underlying interface is not capable of handling a
4253 * packet whose data portion spans across physically disjoint
4254 * pages, we need to "normalize" the packet so that we pass
4255 * down a chain of mbufs where each mbuf points to a span that
4256 * resides in the system page boundary. If the packet does
4257 * not cross page(s), the following is a no-op.
4258 */
4259 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
4260 if ((m = m_normalize(m)) == NULL)
4261 goto next;
4262 }
4263
4264 /*
4265 * If this is a TSO packet, make sure the interface still
4266 * advertise TSO capability.
4267 */
4268 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
4269 retval = EMSGSIZE;
4270 m_freem(m);
4271 goto cleanup;
4272 }
4273
4274 /*
4275 * If the packet service class is not background,
4276 * update the timestamp to indicate recent activity
4277 * on a foreground socket.
4278 */
4279 if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
4280 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
4281 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND))
4282 ifp->if_fg_sendts = net_uptime();
4283
4284 if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME)
4285 ifp->if_rt_sendts = net_uptime();
4286 }
4287
4288 ifp_inc_traffic_class_out(ifp, m);
4289 pktap_output(ifp, proto_family, m, pre, post);
4290
4291 /*
4292 * Count the number of elements in the mbuf chain
4293 */
4294 if (tx_chain_len_count) {
4295 dlil_count_chain_len(m, &tx_chain_len_stats);
4296 }
4297
4298 /*
4299 * Finally, call the driver.
4300 */
4301 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
4302 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4303 flen += (m_pktlen(m) - (pre + post));
4304 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4305 }
4306 *send_tail = m;
4307 send_tail = &m->m_nextpkt;
4308 } else {
4309 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4310 flen = (m_pktlen(m) - (pre + post));
4311 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4312 } else {
4313 flen = 0;
4314 }
4315 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4316 0, 0, 0, 0, 0);
4317 retval = (*handler_func)(ifp, m);
4318 if (retval == EQFULL || retval == EQSUSPENDED) {
4319 if (adv != NULL && adv->code == FADV_SUCCESS) {
4320 adv->code = (retval == EQFULL ?
4321 FADV_FLOW_CONTROLLED :
4322 FADV_SUSPENDED);
4323 }
4324 retval = 0;
4325 }
4326 if (retval == 0 && flen > 0) {
4327 fbytes += flen;
4328 fpkts++;
4329 }
4330 if (retval != 0 && dlil_verbose) {
4331 printf("%s: output error on %s retval = %d\n",
4332 __func__, if_name(ifp),
4333 retval);
4334 }
4335 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
4336 0, 0, 0, 0, 0);
4337 }
4338 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4339
4340 next:
4341 m = packetlist;
4342 if (m != NULL) {
4343 packetlist = packetlist->m_nextpkt;
4344 m->m_nextpkt = NULL;
4345 }
4346 } while (m != NULL);
4347
4348 if (send_head != NULL) {
4349 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4350 0, 0, 0, 0, 0);
4351 if (ifp->if_eflags & IFEF_SENDLIST) {
4352 retval = (*handler_func)(ifp, send_head);
4353 if (retval == EQFULL || retval == EQSUSPENDED) {
4354 if (adv != NULL) {
4355 adv->code = (retval == EQFULL ?
4356 FADV_FLOW_CONTROLLED :
4357 FADV_SUSPENDED);
4358 }
4359 retval = 0;
4360 }
4361 if (retval == 0 && flen > 0) {
4362 fbytes += flen;
4363 fpkts++;
4364 }
4365 if (retval != 0 && dlil_verbose) {
4366 printf("%s: output error on %s retval = %d\n",
4367 __func__, if_name(ifp), retval);
4368 }
4369 } else {
4370 struct mbuf *send_m;
4371 int enq_cnt = 0;
4372 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
4373 while (send_head != NULL) {
4374 send_m = send_head;
4375 send_head = send_m->m_nextpkt;
4376 send_m->m_nextpkt = NULL;
4377 retval = (*handler_func)(ifp, send_m);
4378 if (retval == EQFULL || retval == EQSUSPENDED) {
4379 if (adv != NULL) {
4380 adv->code = (retval == EQFULL ?
4381 FADV_FLOW_CONTROLLED :
4382 FADV_SUSPENDED);
4383 }
4384 retval = 0;
4385 }
4386 if (retval == 0) {
4387 enq_cnt++;
4388 if (flen > 0)
4389 fpkts++;
4390 }
4391 if (retval != 0 && dlil_verbose) {
4392 printf("%s: output error on %s "
4393 "retval = %d\n",
4394 __func__, if_name(ifp), retval);
4395 }
4396 }
4397 if (enq_cnt > 0) {
4398 fbytes += flen;
4399 ifnet_start(ifp);
4400 }
4401 }
4402 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4403 }
4404
4405 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4406
4407 cleanup:
4408 if (fbytes > 0)
4409 ifp->if_fbytes += fbytes;
4410 if (fpkts > 0)
4411 ifp->if_fpackets += fpkts;
4412 if (proto != NULL)
4413 if_proto_free(proto);
4414 if (packetlist) /* if any packets are left, clean up */
4415 mbuf_freem_list(packetlist);
4416 if (retval == EJUSTRETURN)
4417 retval = 0;
4418 if (iorefcnt == 1)
4419 ifnet_decr_iorefcnt(ifp);
4420
4421 return (retval);
4422 }
4423
4424 errno_t
4425 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
4426 void *ioctl_arg)
4427 {
4428 struct ifnet_filter *filter;
4429 int retval = EOPNOTSUPP;
4430 int result = 0;
4431
4432 if (ifp == NULL || ioctl_code == 0)
4433 return (EINVAL);
4434
4435 /* Get an io ref count if the interface is attached */
4436 if (!ifnet_is_attached(ifp, 1))
4437 return (EOPNOTSUPP);
4438
4439 /*
4440 * Run the interface filters first.
4441 * We want to run all filters before calling the protocol,
4442 * interface family, or interface.
4443 */
4444 lck_mtx_lock_spin(&ifp->if_flt_lock);
4445 /* prevent filter list from changing in case we drop the lock */
4446 if_flt_monitor_busy(ifp);
4447 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4448 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
4449 filter->filt_protocol == proto_fam)) {
4450 lck_mtx_unlock(&ifp->if_flt_lock);
4451
4452 result = filter->filt_ioctl(filter->filt_cookie, ifp,
4453 proto_fam, ioctl_code, ioctl_arg);
4454
4455 lck_mtx_lock_spin(&ifp->if_flt_lock);
4456
4457 /* Only update retval if no one has handled the ioctl */
4458 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4459 if (result == ENOTSUP)
4460 result = EOPNOTSUPP;
4461 retval = result;
4462 if (retval != 0 && retval != EOPNOTSUPP) {
4463 /* we're done with the filter list */
4464 if_flt_monitor_unbusy(ifp);
4465 lck_mtx_unlock(&ifp->if_flt_lock);
4466 goto cleanup;
4467 }
4468 }
4469 }
4470 }
4471 /* we're done with the filter list */
4472 if_flt_monitor_unbusy(ifp);
4473 lck_mtx_unlock(&ifp->if_flt_lock);
4474
4475 /* Allow the protocol to handle the ioctl */
4476 if (proto_fam != 0) {
4477 struct if_proto *proto;
4478
4479 /* callee holds a proto refcnt upon success */
4480 ifnet_lock_shared(ifp);
4481 proto = find_attached_proto(ifp, proto_fam);
4482 ifnet_lock_done(ifp);
4483 if (proto != NULL) {
4484 proto_media_ioctl ioctlp =
4485 (proto->proto_kpi == kProtoKPI_v1 ?
4486 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
4487 result = EOPNOTSUPP;
4488 if (ioctlp != NULL)
4489 result = ioctlp(ifp, proto_fam, ioctl_code,
4490 ioctl_arg);
4491 if_proto_free(proto);
4492
4493 /* Only update retval if no one has handled the ioctl */
4494 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4495 if (result == ENOTSUP)
4496 result = EOPNOTSUPP;
4497 retval = result;
4498 if (retval && retval != EOPNOTSUPP)
4499 goto cleanup;
4500 }
4501 }
4502 }
4503
4504 /* retval is either 0 or EOPNOTSUPP */
4505
4506 /*
4507 * Let the interface handle this ioctl.
4508 * If it returns EOPNOTSUPP, ignore that, we may have
4509 * already handled this in the protocol or family.
4510 */
4511 if (ifp->if_ioctl)
4512 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
4513
4514 /* Only update retval if no one has handled the ioctl */
4515 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4516 if (result == ENOTSUP)
4517 result = EOPNOTSUPP;
4518 retval = result;
4519 if (retval && retval != EOPNOTSUPP) {
4520 goto cleanup;
4521 }
4522 }
4523
4524 cleanup:
4525 if (retval == EJUSTRETURN)
4526 retval = 0;
4527
4528 ifnet_decr_iorefcnt(ifp);
4529
4530 return (retval);
4531 }
4532
4533 __private_extern__ errno_t
4534 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
4535 {
4536 errno_t error = 0;
4537
4538
4539 if (ifp->if_set_bpf_tap) {
4540 /* Get an io reference on the interface if it is attached */
4541 if (!ifnet_is_attached(ifp, 1))
4542 return (ENXIO);
4543 error = ifp->if_set_bpf_tap(ifp, mode, callback);
4544 ifnet_decr_iorefcnt(ifp);
4545 }
4546 return (error);
4547 }
4548
4549 errno_t
4550 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4551 struct sockaddr *ll_addr, size_t ll_len)
4552 {
4553 errno_t result = EOPNOTSUPP;
4554 struct if_proto *proto;
4555 const struct sockaddr *verify;
4556 proto_media_resolve_multi resolvep;
4557
4558 if (!ifnet_is_attached(ifp, 1))
4559 return (result);
4560
4561 bzero(ll_addr, ll_len);
4562
4563 /* Call the protocol first; callee holds a proto refcnt upon success */
4564 ifnet_lock_shared(ifp);
4565 proto = find_attached_proto(ifp, proto_addr->sa_family);
4566 ifnet_lock_done(ifp);
4567 if (proto != NULL) {
4568 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4569 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4570 if (resolvep != NULL)
4571 result = resolvep(ifp, proto_addr,
4572 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
4573 if_proto_free(proto);
4574 }
4575
4576 /* Let the interface verify the multicast address */
4577 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4578 if (result == 0)
4579 verify = ll_addr;
4580 else
4581 verify = proto_addr;
4582 result = ifp->if_check_multi(ifp, verify);
4583 }
4584
4585 ifnet_decr_iorefcnt(ifp);
4586 return (result);
4587 }
4588
4589 __private_extern__ errno_t
4590 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4591 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4592 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4593 {
4594 struct if_proto *proto;
4595 errno_t result = 0;
4596
4597 /* callee holds a proto refcnt upon success */
4598 ifnet_lock_shared(ifp);
4599 proto = find_attached_proto(ifp, target_proto->sa_family);
4600 ifnet_lock_done(ifp);
4601 if (proto == NULL) {
4602 result = ENOTSUP;
4603 } else {
4604 proto_media_send_arp arpp;
4605 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4606 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4607 if (arpp == NULL) {
4608 result = ENOTSUP;
4609 } else {
4610 switch (arpop) {
4611 case ARPOP_REQUEST:
4612 arpstat.txrequests++;
4613 if (target_hw != NULL)
4614 arpstat.txurequests++;
4615 break;
4616 case ARPOP_REPLY:
4617 arpstat.txreplies++;
4618 break;
4619 }
4620 result = arpp(ifp, arpop, sender_hw, sender_proto,
4621 target_hw, target_proto);
4622 }
4623 if_proto_free(proto);
4624 }
4625
4626 return (result);
4627 }
4628
4629 struct net_thread_marks { };
4630 static const struct net_thread_marks net_thread_marks_base = { };
4631
4632 __private_extern__ const net_thread_marks_t net_thread_marks_none =
4633 &net_thread_marks_base;
4634
4635 __private_extern__ net_thread_marks_t
4636 net_thread_marks_push(u_int32_t push)
4637 {
4638 static const char *const base = (const void*)&net_thread_marks_base;
4639 u_int32_t pop = 0;
4640
4641 if (push != 0) {
4642 struct uthread *uth = get_bsdthread_info(current_thread());
4643
4644 pop = push & ~uth->uu_network_marks;
4645 if (pop != 0)
4646 uth->uu_network_marks |= pop;
4647 }
4648
4649 return ((net_thread_marks_t)&base[pop]);
4650 }
4651
4652 __private_extern__ net_thread_marks_t
4653 net_thread_unmarks_push(u_int32_t unpush)
4654 {
4655 static const char *const base = (const void*)&net_thread_marks_base;
4656 u_int32_t unpop = 0;
4657
4658 if (unpush != 0) {
4659 struct uthread *uth = get_bsdthread_info(current_thread());
4660
4661 unpop = unpush & uth->uu_network_marks;
4662 if (unpop != 0)
4663 uth->uu_network_marks &= ~unpop;
4664 }
4665
4666 return ((net_thread_marks_t)&base[unpop]);
4667 }
4668
4669 __private_extern__ void
4670 net_thread_marks_pop(net_thread_marks_t popx)
4671 {
4672 static const char *const base = (const void*)&net_thread_marks_base;
4673 const ptrdiff_t pop = (const char *)popx - (const char *)base;
4674
4675 if (pop != 0) {
4676 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4677 struct uthread *uth = get_bsdthread_info(current_thread());
4678
4679 VERIFY((pop & ones) == pop);
4680 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
4681 uth->uu_network_marks &= ~pop;
4682 }
4683 }
4684
4685 __private_extern__ void
4686 net_thread_unmarks_pop(net_thread_marks_t unpopx)
4687 {
4688 static const char *const base = (const void*)&net_thread_marks_base;
4689 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
4690
4691 if (unpop != 0) {
4692 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4693 struct uthread *uth = get_bsdthread_info(current_thread());
4694
4695 VERIFY((unpop & ones) == unpop);
4696 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
4697 uth->uu_network_marks |= unpop;
4698 }
4699 }
4700
4701 __private_extern__ u_int32_t
4702 net_thread_is_marked(u_int32_t check)
4703 {
4704 if (check != 0) {
4705 struct uthread *uth = get_bsdthread_info(current_thread());
4706 return (uth->uu_network_marks & check);
4707 }
4708 else
4709 return (0);
4710 }
4711
4712 __private_extern__ u_int32_t
4713 net_thread_is_unmarked(u_int32_t check)
4714 {
4715 if (check != 0) {
4716 struct uthread *uth = get_bsdthread_info(current_thread());
4717 return (~uth->uu_network_marks & check);
4718 }
4719 else
4720 return (0);
4721 }
4722
4723 static __inline__ int
4724 _is_announcement(const struct sockaddr_in * sender_sin,
4725 const struct sockaddr_in * target_sin)
4726 {
4727 if (sender_sin == NULL) {
4728 return (FALSE);
4729 }
4730 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
4731 }
4732
4733 __private_extern__ errno_t
4734 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4735 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4736 const struct sockaddr *target_proto0, u_int32_t rtflags)
4737 {
4738 errno_t result = 0;
4739 const struct sockaddr_in * sender_sin;
4740 const struct sockaddr_in * target_sin;
4741 struct sockaddr_inarp target_proto_sinarp;
4742 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
4743
4744 if (target_proto == NULL || (sender_proto != NULL &&
4745 sender_proto->sa_family != target_proto->sa_family))
4746 return (EINVAL);
4747
4748 /*
4749 * If the target is a (default) router, provide that
4750 * information to the send_arp callback routine.
4751 */
4752 if (rtflags & RTF_ROUTER) {
4753 bcopy(target_proto, &target_proto_sinarp,
4754 sizeof (struct sockaddr_in));
4755 target_proto_sinarp.sin_other |= SIN_ROUTER;
4756 target_proto = (struct sockaddr *)&target_proto_sinarp;
4757 }
4758
4759 /*
4760 * If this is an ARP request and the target IP is IPv4LL,
4761 * send the request on all interfaces. The exception is
4762 * an announcement, which must only appear on the specific
4763 * interface.
4764 */
4765 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
4766 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
4767 if (target_proto->sa_family == AF_INET &&
4768 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4769 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4770 !_is_announcement(target_sin, sender_sin)) {
4771 ifnet_t *ifp_list;
4772 u_int32_t count;
4773 u_int32_t ifp_on;
4774
4775 result = ENOTSUP;
4776
4777 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4778 for (ifp_on = 0; ifp_on < count; ifp_on++) {
4779 errno_t new_result;
4780 ifaddr_t source_hw = NULL;
4781 ifaddr_t source_ip = NULL;
4782 struct sockaddr_in source_ip_copy;
4783 struct ifnet *cur_ifp = ifp_list[ifp_on];
4784
4785 /*
4786 * Only arp on interfaces marked for IPv4LL
4787 * ARPing. This may mean that we don't ARP on
4788 * the interface the subnet route points to.
4789 */
4790 if (!(cur_ifp->if_eflags & IFEF_ARPLL))
4791 continue;
4792
4793 /* Find the source IP address */
4794 ifnet_lock_shared(cur_ifp);
4795 source_hw = cur_ifp->if_lladdr;
4796 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4797 ifa_link) {
4798 IFA_LOCK(source_ip);
4799 if (source_ip->ifa_addr != NULL &&
4800 source_ip->ifa_addr->sa_family ==
4801 AF_INET) {
4802 /* Copy the source IP address */
4803 source_ip_copy =
4804 *(struct sockaddr_in *)
4805 (void *)source_ip->ifa_addr;
4806 IFA_UNLOCK(source_ip);
4807 break;
4808 }
4809 IFA_UNLOCK(source_ip);
4810 }
4811
4812 /* No IP Source, don't arp */
4813 if (source_ip == NULL) {
4814 ifnet_lock_done(cur_ifp);
4815 continue;
4816 }
4817
4818 IFA_ADDREF(source_hw);
4819 ifnet_lock_done(cur_ifp);
4820
4821 /* Send the ARP */
4822 new_result = dlil_send_arp_internal(cur_ifp,
4823 arpop, (struct sockaddr_dl *)(void *)
4824 source_hw->ifa_addr,
4825 (struct sockaddr *)&source_ip_copy, NULL,
4826 target_proto);
4827
4828 IFA_REMREF(source_hw);
4829 if (result == ENOTSUP) {
4830 result = new_result;
4831 }
4832 }
4833 ifnet_list_free(ifp_list);
4834 }
4835 } else {
4836 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4837 sender_proto, target_hw, target_proto);
4838 }
4839
4840 return (result);
4841 }
4842
4843 /*
4844 * Caller must hold ifnet head lock.
4845 */
4846 static int
4847 ifnet_lookup(struct ifnet *ifp)
4848 {
4849 struct ifnet *_ifp;
4850
4851 lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
4852 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
4853 if (_ifp == ifp)
4854 break;
4855 }
4856 return (_ifp != NULL);
4857 }
4858
4859 /*
4860 * Caller has to pass a non-zero refio argument to get a
4861 * IO reference count. This will prevent ifnet_detach from
4862 * being called when there are outstanding io reference counts.
4863 */
4864 int
4865 ifnet_is_attached(struct ifnet *ifp, int refio)
4866 {
4867 int ret;
4868
4869 lck_mtx_lock_spin(&ifp->if_ref_lock);
4870 if ((ret = ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
4871 IFRF_ATTACHED))) {
4872 if (refio > 0)
4873 ifp->if_refio++;
4874 }
4875 lck_mtx_unlock(&ifp->if_ref_lock);
4876
4877 return (ret);
4878 }
4879
4880 /*
4881 * Caller must ensure the interface is attached; the assumption is that
4882 * there is at least an outstanding IO reference count held already.
4883 * Most callers would call ifnet_is_attached() instead.
4884 */
4885 void
4886 ifnet_incr_iorefcnt(struct ifnet *ifp)
4887 {
4888 lck_mtx_lock_spin(&ifp->if_ref_lock);
4889 VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
4890 IFRF_ATTACHED);
4891 VERIFY(ifp->if_refio > 0);
4892 ifp->if_refio++;
4893 lck_mtx_unlock(&ifp->if_ref_lock);
4894 }
4895
4896 void
4897 ifnet_decr_iorefcnt(struct ifnet *ifp)
4898 {
4899 lck_mtx_lock_spin(&ifp->if_ref_lock);
4900 VERIFY(ifp->if_refio > 0);
4901 VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) != 0);
4902 ifp->if_refio--;
4903
4904 /*
4905 * if there are no more outstanding io references, wakeup the
4906 * ifnet_detach thread if detaching flag is set.
4907 */
4908 if (ifp->if_refio == 0 &&
4909 (ifp->if_refflags & IFRF_DETACHING) != 0) {
4910 wakeup(&(ifp->if_refio));
4911 }
4912 lck_mtx_unlock(&ifp->if_ref_lock);
4913 }
4914
4915 static void
4916 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
4917 {
4918 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
4919 ctrace_t *tr;
4920 u_int32_t idx;
4921 u_int16_t *cnt;
4922
4923 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
4924 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
4925 /* NOTREACHED */
4926 }
4927
4928 if (refhold) {
4929 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
4930 tr = dl_if_dbg->dldbg_if_refhold;
4931 } else {
4932 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
4933 tr = dl_if_dbg->dldbg_if_refrele;
4934 }
4935
4936 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
4937 ctrace_record(&tr[idx]);
4938 }
4939
4940 errno_t
4941 dlil_if_ref(struct ifnet *ifp)
4942 {
4943 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4944
4945 if (dl_if == NULL)
4946 return (EINVAL);
4947
4948 lck_mtx_lock_spin(&dl_if->dl_if_lock);
4949 ++dl_if->dl_if_refcnt;
4950 if (dl_if->dl_if_refcnt == 0) {
4951 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
4952 /* NOTREACHED */
4953 }
4954 if (dl_if->dl_if_trace != NULL)
4955 (*dl_if->dl_if_trace)(dl_if, TRUE);
4956 lck_mtx_unlock(&dl_if->dl_if_lock);
4957
4958 return (0);
4959 }
4960
4961 errno_t
4962 dlil_if_free(struct ifnet *ifp)
4963 {
4964 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4965
4966 if (dl_if == NULL)
4967 return (EINVAL);
4968
4969 lck_mtx_lock_spin(&dl_if->dl_if_lock);
4970 if (dl_if->dl_if_refcnt == 0) {
4971 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
4972 /* NOTREACHED */
4973 }
4974 --dl_if->dl_if_refcnt;
4975 if (dl_if->dl_if_trace != NULL)
4976 (*dl_if->dl_if_trace)(dl_if, FALSE);
4977 lck_mtx_unlock(&dl_if->dl_if_lock);
4978
4979 return (0);
4980 }
4981
4982 static errno_t
4983 dlil_attach_protocol_internal(struct if_proto *proto,
4984 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count)
4985 {
4986 struct kev_dl_proto_data ev_pr_data;
4987 struct ifnet *ifp = proto->ifp;
4988 int retval = 0;
4989 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
4990 struct if_proto *prev_proto;
4991 struct if_proto *_proto;
4992
4993 /* callee holds a proto refcnt upon success */
4994 ifnet_lock_exclusive(ifp);
4995 _proto = find_attached_proto(ifp, proto->protocol_family);
4996 if (_proto != NULL) {
4997 ifnet_lock_done(ifp);
4998 if_proto_free(_proto);
4999 return (EEXIST);
5000 }
5001
5002 /*
5003 * Call family module add_proto routine so it can refine the
5004 * demux descriptors as it wishes.
5005 */
5006 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
5007 demux_count);
5008 if (retval) {
5009 ifnet_lock_done(ifp);
5010 return (retval);
5011 }
5012
5013 /*
5014 * Insert the protocol in the hash
5015 */
5016 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
5017 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
5018 prev_proto = SLIST_NEXT(prev_proto, next_hash);
5019 if (prev_proto)
5020 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
5021 else
5022 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
5023 proto, next_hash);
5024
5025 /* hold a proto refcnt for attach */
5026 if_proto_ref(proto);
5027
5028 /*
5029 * The reserved field carries the number of protocol still attached
5030 * (subject to change)
5031 */
5032 ev_pr_data.proto_family = proto->protocol_family;
5033 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
5034 ifnet_lock_done(ifp);
5035
5036 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
5037 (struct net_event_data *)&ev_pr_data,
5038 sizeof (struct kev_dl_proto_data));
5039 return (retval);
5040 }
5041
5042 errno_t
5043 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
5044 const struct ifnet_attach_proto_param *proto_details)
5045 {
5046 int retval = 0;
5047 struct if_proto *ifproto = NULL;
5048
5049 ifnet_head_lock_shared();
5050 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5051 retval = EINVAL;
5052 goto end;
5053 }
5054 /* Check that the interface is in the global list */
5055 if (!ifnet_lookup(ifp)) {
5056 retval = ENXIO;
5057 goto end;
5058 }
5059
5060 ifproto = zalloc(dlif_proto_zone);
5061 if (ifproto == NULL) {
5062 retval = ENOMEM;
5063 goto end;
5064 }
5065 bzero(ifproto, dlif_proto_size);
5066
5067 /* refcnt held above during lookup */
5068 ifproto->ifp = ifp;
5069 ifproto->protocol_family = protocol;
5070 ifproto->proto_kpi = kProtoKPI_v1;
5071 ifproto->kpi.v1.input = proto_details->input;
5072 ifproto->kpi.v1.pre_output = proto_details->pre_output;
5073 ifproto->kpi.v1.event = proto_details->event;
5074 ifproto->kpi.v1.ioctl = proto_details->ioctl;
5075 ifproto->kpi.v1.detached = proto_details->detached;
5076 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
5077 ifproto->kpi.v1.send_arp = proto_details->send_arp;
5078
5079 retval = dlil_attach_protocol_internal(ifproto,
5080 proto_details->demux_list, proto_details->demux_count);
5081
5082 if (dlil_verbose) {
5083 printf("%s: attached v1 protocol %d\n", if_name(ifp),
5084 protocol);
5085 }
5086
5087 end:
5088 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5089 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5090 if_name(ifp), protocol, retval);
5091 }
5092 ifnet_head_done();
5093 if (retval != 0 && ifproto != NULL)
5094 zfree(dlif_proto_zone, ifproto);
5095 return (retval);
5096 }
5097
5098 errno_t
5099 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
5100 const struct ifnet_attach_proto_param_v2 *proto_details)
5101 {
5102 int retval = 0;
5103 struct if_proto *ifproto = NULL;
5104
5105 ifnet_head_lock_shared();
5106 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5107 retval = EINVAL;
5108 goto end;
5109 }
5110 /* Check that the interface is in the global list */
5111 if (!ifnet_lookup(ifp)) {
5112 retval = ENXIO;
5113 goto end;
5114 }
5115
5116 ifproto = zalloc(dlif_proto_zone);
5117 if (ifproto == NULL) {
5118 retval = ENOMEM;
5119 goto end;
5120 }
5121 bzero(ifproto, sizeof(*ifproto));
5122
5123 /* refcnt held above during lookup */
5124 ifproto->ifp = ifp;
5125 ifproto->protocol_family = protocol;
5126 ifproto->proto_kpi = kProtoKPI_v2;
5127 ifproto->kpi.v2.input = proto_details->input;
5128 ifproto->kpi.v2.pre_output = proto_details->pre_output;
5129 ifproto->kpi.v2.event = proto_details->event;
5130 ifproto->kpi.v2.ioctl = proto_details->ioctl;
5131 ifproto->kpi.v2.detached = proto_details->detached;
5132 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
5133 ifproto->kpi.v2.send_arp = proto_details->send_arp;
5134
5135 retval = dlil_attach_protocol_internal(ifproto,
5136 proto_details->demux_list, proto_details->demux_count);
5137
5138 if (dlil_verbose) {
5139 printf("%s: attached v2 protocol %d\n", if_name(ifp),
5140 protocol);
5141 }
5142
5143 end:
5144 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5145 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5146 if_name(ifp), protocol, retval);
5147 }
5148 ifnet_head_done();
5149 if (retval != 0 && ifproto != NULL)
5150 zfree(dlif_proto_zone, ifproto);
5151 return (retval);
5152 }
5153
5154 errno_t
5155 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
5156 {
5157 struct if_proto *proto = NULL;
5158 int retval = 0;
5159
5160 if (ifp == NULL || proto_family == 0) {
5161 retval = EINVAL;
5162 goto end;
5163 }
5164
5165 ifnet_lock_exclusive(ifp);
5166 /* callee holds a proto refcnt upon success */
5167 proto = find_attached_proto(ifp, proto_family);
5168 if (proto == NULL) {
5169 retval = ENXIO;
5170 ifnet_lock_done(ifp);
5171 goto end;
5172 }
5173
5174 /* call family module del_proto */
5175 if (ifp->if_del_proto)
5176 ifp->if_del_proto(ifp, proto->protocol_family);
5177
5178 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
5179 proto, if_proto, next_hash);
5180
5181 if (proto->proto_kpi == kProtoKPI_v1) {
5182 proto->kpi.v1.input = ifproto_media_input_v1;
5183 proto->kpi.v1.pre_output = ifproto_media_preout;
5184 proto->kpi.v1.event = ifproto_media_event;
5185 proto->kpi.v1.ioctl = ifproto_media_ioctl;
5186 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
5187 proto->kpi.v1.send_arp = ifproto_media_send_arp;
5188 } else {
5189 proto->kpi.v2.input = ifproto_media_input_v2;
5190 proto->kpi.v2.pre_output = ifproto_media_preout;
5191 proto->kpi.v2.event = ifproto_media_event;
5192 proto->kpi.v2.ioctl = ifproto_media_ioctl;
5193 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
5194 proto->kpi.v2.send_arp = ifproto_media_send_arp;
5195 }
5196 proto->detached = 1;
5197 ifnet_lock_done(ifp);
5198
5199 if (dlil_verbose) {
5200 printf("%s: detached %s protocol %d\n", if_name(ifp),
5201 (proto->proto_kpi == kProtoKPI_v1) ?
5202 "v1" : "v2", proto_family);
5203 }
5204
5205 /* release proto refcnt held during protocol attach */
5206 if_proto_free(proto);
5207
5208 /*
5209 * Release proto refcnt held during lookup; the rest of
5210 * protocol detach steps will happen when the last proto
5211 * reference is released.
5212 */
5213 if_proto_free(proto);
5214
5215 end:
5216 return (retval);
5217 }
5218
5219
5220 static errno_t
5221 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
5222 struct mbuf *packet, char *header)
5223 {
5224 #pragma unused(ifp, protocol, packet, header)
5225 return (ENXIO);
5226 }
5227
5228 static errno_t
5229 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
5230 struct mbuf *packet)
5231 {
5232 #pragma unused(ifp, protocol, packet)
5233 return (ENXIO);
5234
5235 }
5236
5237 static errno_t
5238 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
5239 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
5240 char *link_layer_dest)
5241 {
5242 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5243 return (ENXIO);
5244
5245 }
5246
5247 static void
5248 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
5249 const struct kev_msg *event)
5250 {
5251 #pragma unused(ifp, protocol, event)
5252 }
5253
5254 static errno_t
5255 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
5256 unsigned long command, void *argument)
5257 {
5258 #pragma unused(ifp, protocol, command, argument)
5259 return (ENXIO);
5260 }
5261
5262 static errno_t
5263 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
5264 struct sockaddr_dl *out_ll, size_t ll_len)
5265 {
5266 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5267 return (ENXIO);
5268 }
5269
5270 static errno_t
5271 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
5272 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5273 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5274 {
5275 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5276 return (ENXIO);
5277 }
5278
5279 extern int if_next_index(void);
5280 extern int tcp_ecn_outbound;
5281
5282 errno_t
5283 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
5284 {
5285 struct ifnet *tmp_if;
5286 struct ifaddr *ifa;
5287 struct if_data_internal if_data_saved;
5288 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5289 struct dlil_threading_info *dl_inp;
5290 u_int32_t sflags = 0;
5291 int err;
5292
5293 if (ifp == NULL)
5294 return (EINVAL);
5295
5296 /*
5297 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5298 * prevent the interface from being configured while it is
5299 * embryonic, as ifnet_head_lock is dropped and reacquired
5300 * below prior to marking the ifnet with IFRF_ATTACHED.
5301 */
5302 dlil_if_lock();
5303 ifnet_head_lock_exclusive();
5304 /* Verify we aren't already on the list */
5305 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
5306 if (tmp_if == ifp) {
5307 ifnet_head_done();
5308 dlil_if_unlock();
5309 return (EEXIST);
5310 }
5311 }
5312
5313 lck_mtx_lock_spin(&ifp->if_ref_lock);
5314 if (ifp->if_refflags & IFRF_ATTACHED) {
5315 panic_plain("%s: flags mismatch (attached set) ifp=%p",
5316 __func__, ifp);
5317 /* NOTREACHED */
5318 }
5319 lck_mtx_unlock(&ifp->if_ref_lock);
5320
5321 ifnet_lock_exclusive(ifp);
5322
5323 /* Sanity check */
5324 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5325 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5326
5327 if (ll_addr != NULL) {
5328 if (ifp->if_addrlen == 0) {
5329 ifp->if_addrlen = ll_addr->sdl_alen;
5330 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
5331 ifnet_lock_done(ifp);
5332 ifnet_head_done();
5333 dlil_if_unlock();
5334 return (EINVAL);
5335 }
5336 }
5337
5338 /*
5339 * Allow interfaces without protocol families to attach
5340 * only if they have the necessary fields filled out.
5341 */
5342 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
5343 DLIL_PRINTF("%s: Attempt to attach interface without "
5344 "family module - %d\n", __func__, ifp->if_family);
5345 ifnet_lock_done(ifp);
5346 ifnet_head_done();
5347 dlil_if_unlock();
5348 return (ENODEV);
5349 }
5350
5351 /* Allocate protocol hash table */
5352 VERIFY(ifp->if_proto_hash == NULL);
5353 ifp->if_proto_hash = zalloc(dlif_phash_zone);
5354 if (ifp->if_proto_hash == NULL) {
5355 ifnet_lock_done(ifp);
5356 ifnet_head_done();
5357 dlil_if_unlock();
5358 return (ENOBUFS);
5359 }
5360 bzero(ifp->if_proto_hash, dlif_phash_size);
5361
5362 lck_mtx_lock_spin(&ifp->if_flt_lock);
5363 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
5364 TAILQ_INIT(&ifp->if_flt_head);
5365 VERIFY(ifp->if_flt_busy == 0);
5366 VERIFY(ifp->if_flt_waiters == 0);
5367 lck_mtx_unlock(&ifp->if_flt_lock);
5368
5369 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
5370 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
5371 LIST_INIT(&ifp->if_multiaddrs);
5372 }
5373
5374 VERIFY(ifp->if_allhostsinm == NULL);
5375 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5376 TAILQ_INIT(&ifp->if_addrhead);
5377
5378 if (ifp->if_index == 0) {
5379 int idx = if_next_index();
5380
5381 if (idx == -1) {
5382 ifp->if_index = 0;
5383 ifnet_lock_done(ifp);
5384 ifnet_head_done();
5385 dlil_if_unlock();
5386 return (ENOBUFS);
5387 }
5388 ifp->if_index = idx;
5389 }
5390 /* There should not be anything occupying this slot */
5391 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5392
5393 /* allocate (if needed) and initialize a link address */
5394 VERIFY(!(dl_if->dl_if_flags & DLIF_REUSE) || ifp->if_lladdr != NULL);
5395 ifa = dlil_alloc_lladdr(ifp, ll_addr);
5396 if (ifa == NULL) {
5397 ifnet_lock_done(ifp);
5398 ifnet_head_done();
5399 dlil_if_unlock();
5400 return (ENOBUFS);
5401 }
5402
5403 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5404 ifnet_addrs[ifp->if_index - 1] = ifa;
5405
5406 /* make this address the first on the list */
5407 IFA_LOCK(ifa);
5408 /* hold a reference for ifnet_addrs[] */
5409 IFA_ADDREF_LOCKED(ifa);
5410 /* if_attach_link_ifa() holds a reference for ifa_link */
5411 if_attach_link_ifa(ifp, ifa);
5412 IFA_UNLOCK(ifa);
5413
5414 #if CONFIG_MACF_NET
5415 mac_ifnet_label_associate(ifp);
5416 #endif
5417
5418 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5419 ifindex2ifnet[ifp->if_index] = ifp;
5420
5421 /* Hold a reference to the underlying dlil_ifnet */
5422 ifnet_reference(ifp);
5423
5424 /* Clear stats (save and restore other fields that we care) */
5425 if_data_saved = ifp->if_data;
5426 bzero(&ifp->if_data, sizeof (ifp->if_data));
5427 ifp->if_data.ifi_type = if_data_saved.ifi_type;
5428 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5429 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5430 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5431 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5432 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5433 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5434 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5435 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5436 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5437 ifnet_touch_lastchange(ifp);
5438
5439 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
5440 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
5441 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
5442
5443 /* By default, use SFB and enable flow advisory */
5444 sflags = PKTSCHEDF_QALG_SFB;
5445 if (if_flowadv)
5446 sflags |= PKTSCHEDF_QALG_FLOWCTL;
5447
5448 if (if_delaybased_queue)
5449 sflags |= PKTSCHEDF_QALG_DELAYBASED;
5450
5451 /* Initialize transmit queue(s) */
5452 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
5453 if (err != 0) {
5454 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5455 "err=%d", __func__, ifp, err);
5456 /* NOTREACHED */
5457 }
5458
5459 /* Sanity checks on the input thread storage */
5460 dl_inp = &dl_if->dl_if_inpstorage;
5461 bzero(&dl_inp->stats, sizeof (dl_inp->stats));
5462 VERIFY(dl_inp->input_waiting == 0);
5463 VERIFY(dl_inp->wtot == 0);
5464 VERIFY(dl_inp->ifp == NULL);
5465 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
5466 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
5467 VERIFY(!dl_inp->net_affinity);
5468 VERIFY(ifp->if_inp == NULL);
5469 VERIFY(dl_inp->input_thr == THREAD_NULL);
5470 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
5471 VERIFY(dl_inp->poll_thr == THREAD_NULL);
5472 VERIFY(dl_inp->tag == 0);
5473 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
5474 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
5475 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
5476 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
5477 #if IFNET_INPUT_SANITY_CHK
5478 VERIFY(dl_inp->input_mbuf_cnt == 0);
5479 #endif /* IFNET_INPUT_SANITY_CHK */
5480
5481 /*
5482 * A specific DLIL input thread is created per Ethernet/cellular
5483 * interface or for an interface which supports opportunistic
5484 * input polling. Pseudo interfaces or other types of interfaces
5485 * use the main input thread instead.
5486 */
5487 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
5488 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
5489 ifp->if_inp = dl_inp;
5490 err = dlil_create_input_thread(ifp, ifp->if_inp);
5491 if (err != 0) {
5492 panic_plain("%s: ifp=%p couldn't get an input thread; "
5493 "err=%d", __func__, ifp, err);
5494 /* NOTREACHED */
5495 }
5496 }
5497
5498 /*
5499 * If the driver supports the new transmit model, calculate flow hash
5500 * and create a workloop starter thread to invoke the if_start callback
5501 * where the packets may be dequeued and transmitted.
5502 */
5503 if (ifp->if_eflags & IFEF_TXSTART) {
5504 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5505 VERIFY(ifp->if_flowhash != 0);
5506
5507 VERIFY(ifp->if_start != NULL);
5508 VERIFY(ifp->if_start_thread == THREAD_NULL);
5509
5510 ifnet_set_start_cycle(ifp, NULL);
5511 ifp->if_start_active = 0;
5512 ifp->if_start_req = 0;
5513 ifp->if_start_flags = 0;
5514 if ((err = kernel_thread_start(ifnet_start_thread_fn, ifp,
5515 &ifp->if_start_thread)) != KERN_SUCCESS) {
5516 panic_plain("%s: ifp=%p couldn't get a start thread; "
5517 "err=%d", __func__, ifp, err);
5518 /* NOTREACHED */
5519 }
5520 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
5521 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5522 } else {
5523 ifp->if_flowhash = 0;
5524 }
5525
5526 /*
5527 * If the driver supports the new receive model, create a poller
5528 * thread to invoke if_input_poll callback where the packets may
5529 * be dequeued from the driver and processed for reception.
5530 */
5531 if (ifp->if_eflags & IFEF_RXPOLL) {
5532 VERIFY(ifp->if_input_poll != NULL);
5533 VERIFY(ifp->if_input_ctl != NULL);
5534 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5535
5536 ifnet_set_poll_cycle(ifp, NULL);
5537 ifp->if_poll_update = 0;
5538 ifp->if_poll_active = 0;
5539 ifp->if_poll_req = 0;
5540 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
5541 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5542 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5543 "err=%d", __func__, ifp, err);
5544 /* NOTREACHED */
5545 }
5546 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
5547 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5548 }
5549
5550 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5551 VERIFY(ifp->if_desc.ifd_len == 0);
5552 VERIFY(ifp->if_desc.ifd_desc != NULL);
5553
5554 /* Record attach PC stacktrace */
5555 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5556
5557 ifp->if_updatemcasts = 0;
5558 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5559 struct ifmultiaddr *ifma;
5560 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5561 IFMA_LOCK(ifma);
5562 if (ifma->ifma_addr->sa_family == AF_LINK ||
5563 ifma->ifma_addr->sa_family == AF_UNSPEC)
5564 ifp->if_updatemcasts++;
5565 IFMA_UNLOCK(ifma);
5566 }
5567
5568 printf("%s: attached with %d suspended link-layer multicast "
5569 "membership(s)\n", if_name(ifp),
5570 ifp->if_updatemcasts);
5571 }
5572
5573 /* Clear logging parameters */
5574 bzero(&ifp->if_log, sizeof (ifp->if_log));
5575 ifp->if_fg_sendts = 0;
5576
5577 VERIFY(ifp->if_delegated.ifp == NULL);
5578 VERIFY(ifp->if_delegated.type == 0);
5579 VERIFY(ifp->if_delegated.family == 0);
5580 VERIFY(ifp->if_delegated.subfamily == 0);
5581 VERIFY(ifp->if_delegated.expensive == 0);
5582
5583 VERIFY(ifp->if_agentids == NULL);
5584 VERIFY(ifp->if_agentcount == 0);
5585
5586 /* Reset interface state */
5587 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5588 ifp->if_interface_state.valid_bitmask |=
5589 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5590 ifp->if_interface_state.interface_availability =
5591 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5592
5593 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5594 if (ifp == lo_ifp) {
5595 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5596 ifp->if_interface_state.valid_bitmask |=
5597 IF_INTERFACE_STATE_LQM_STATE_VALID;
5598 } else {
5599 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5600 }
5601
5602 /*
5603 * Enable ECN capability on this interface depending on the
5604 * value of ECN global setting
5605 */
5606 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
5607 ifp->if_eflags |= IFEF_ECN_ENABLE;
5608 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
5609 }
5610
5611 /*
5612 * Built-in Cyclops always on policy for WiFi infra
5613 */
5614 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5615 errno_t error;
5616
5617 error = if_set_qosmarking_mode(ifp,
5618 IFRTYPE_QOSMARKING_FASTLANE);
5619 if (error != 0) {
5620 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5621 __func__, ifp->if_xname, error);
5622 } else {
5623 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
5624 #if (DEVELOPMENT || DEBUG)
5625 printf("%s fastlane enabled on %s\n",
5626 __func__, ifp->if_xname);
5627 #endif /* (DEVELOPMENT || DEBUG) */
5628 }
5629 }
5630
5631 ifnet_lock_done(ifp);
5632 ifnet_head_done();
5633
5634 lck_mtx_lock(&ifp->if_cached_route_lock);
5635 /* Enable forwarding cached route */
5636 ifp->if_fwd_cacheok = 1;
5637 /* Clean up any existing cached routes */
5638 ROUTE_RELEASE(&ifp->if_fwd_route);
5639 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
5640 ROUTE_RELEASE(&ifp->if_src_route);
5641 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
5642 ROUTE_RELEASE(&ifp->if_src_route6);
5643 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
5644 lck_mtx_unlock(&ifp->if_cached_route_lock);
5645
5646 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5647
5648 /*
5649 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5650 * and trees; do this before the ifnet is marked as attached.
5651 * The ifnet keeps the reference to the info structures even after
5652 * the ifnet is detached, since the network-layer records still
5653 * refer to the info structures even after that. This also
5654 * makes it possible for them to still function after the ifnet
5655 * is recycled or reattached.
5656 */
5657 #if INET
5658 if (IGMP_IFINFO(ifp) == NULL) {
5659 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
5660 VERIFY(IGMP_IFINFO(ifp) != NULL);
5661 } else {
5662 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5663 igmp_domifreattach(IGMP_IFINFO(ifp));
5664 }
5665 #endif /* INET */
5666 #if INET6
5667 if (MLD_IFINFO(ifp) == NULL) {
5668 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
5669 VERIFY(MLD_IFINFO(ifp) != NULL);
5670 } else {
5671 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5672 mld_domifreattach(MLD_IFINFO(ifp));
5673 }
5674 #endif /* INET6 */
5675
5676 VERIFY(ifp->if_data_threshold == 0);
5677
5678 /*
5679 * Finally, mark this ifnet as attached.
5680 */
5681 lck_mtx_lock(rnh_lock);
5682 ifnet_lock_exclusive(ifp);
5683 lck_mtx_lock_spin(&ifp->if_ref_lock);
5684 ifp->if_refflags = IFRF_ATTACHED;
5685 lck_mtx_unlock(&ifp->if_ref_lock);
5686 if (net_rtref) {
5687 /* boot-args override; enable idle notification */
5688 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5689 IFRF_IDLE_NOTIFY);
5690 } else {
5691 /* apply previous request(s) to set the idle flags, if any */
5692 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5693 ifp->if_idle_new_flags_mask);
5694
5695 }
5696 ifnet_lock_done(ifp);
5697 lck_mtx_unlock(rnh_lock);
5698 dlil_if_unlock();
5699
5700 #if PF
5701 /*
5702 * Attach packet filter to this interface, if enabled.
5703 */
5704 pf_ifnet_hook(ifp, 1);
5705 #endif /* PF */
5706
5707 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
5708
5709 if (dlil_verbose) {
5710 printf("%s: attached%s\n", if_name(ifp),
5711 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5712 }
5713
5714 return (0);
5715 }
5716
5717 /*
5718 * Prepare the storage for the first/permanent link address, which must
5719 * must have the same lifetime as the ifnet itself. Although the link
5720 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5721 * its location in memory must never change as it may still be referred
5722 * to by some parts of the system afterwards (unfortunate implementation
5723 * artifacts inherited from BSD.)
5724 *
5725 * Caller must hold ifnet lock as writer.
5726 */
5727 static struct ifaddr *
5728 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
5729 {
5730 struct ifaddr *ifa, *oifa;
5731 struct sockaddr_dl *asdl, *msdl;
5732 char workbuf[IFNAMSIZ*2];
5733 int namelen, masklen, socksize;
5734 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5735
5736 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
5737 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
5738
5739 namelen = snprintf(workbuf, sizeof (workbuf), "%s",
5740 if_name(ifp));
5741 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
5742 + ((namelen > 0) ? namelen : 0);
5743 socksize = masklen + ifp->if_addrlen;
5744 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
5745 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
5746 socksize = sizeof(struct sockaddr_dl);
5747 socksize = ROUNDUP(socksize);
5748 #undef ROUNDUP
5749
5750 ifa = ifp->if_lladdr;
5751 if (socksize > DLIL_SDLMAXLEN ||
5752 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
5753 /*
5754 * Rare, but in the event that the link address requires
5755 * more storage space than DLIL_SDLMAXLEN, allocate the
5756 * largest possible storages for address and mask, such
5757 * that we can reuse the same space when if_addrlen grows.
5758 * This same space will be used when if_addrlen shrinks.
5759 */
5760 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
5761 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
5762 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
5763 if (ifa == NULL)
5764 return (NULL);
5765 ifa_lock_init(ifa);
5766 /* Don't set IFD_ALLOC, as this is permanent */
5767 ifa->ifa_debug = IFD_LINK;
5768 }
5769 IFA_LOCK(ifa);
5770 /* address and mask sockaddr_dl locations */
5771 asdl = (struct sockaddr_dl *)(ifa + 1);
5772 bzero(asdl, SOCK_MAXADDRLEN);
5773 msdl = (struct sockaddr_dl *)(void *)
5774 ((char *)asdl + SOCK_MAXADDRLEN);
5775 bzero(msdl, SOCK_MAXADDRLEN);
5776 } else {
5777 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
5778 /*
5779 * Use the storage areas for address and mask within the
5780 * dlil_ifnet structure. This is the most common case.
5781 */
5782 if (ifa == NULL) {
5783 ifa = &dl_if->dl_if_lladdr.ifa;
5784 ifa_lock_init(ifa);
5785 /* Don't set IFD_ALLOC, as this is permanent */
5786 ifa->ifa_debug = IFD_LINK;
5787 }
5788 IFA_LOCK(ifa);
5789 /* address and mask sockaddr_dl locations */
5790 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
5791 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
5792 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
5793 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
5794 }
5795
5796 /* hold a permanent reference for the ifnet itself */
5797 IFA_ADDREF_LOCKED(ifa);
5798 oifa = ifp->if_lladdr;
5799 ifp->if_lladdr = ifa;
5800
5801 VERIFY(ifa->ifa_debug == IFD_LINK);
5802 ifa->ifa_ifp = ifp;
5803 ifa->ifa_rtrequest = link_rtrequest;
5804 ifa->ifa_addr = (struct sockaddr *)asdl;
5805 asdl->sdl_len = socksize;
5806 asdl->sdl_family = AF_LINK;
5807 if (namelen > 0) {
5808 bcopy(workbuf, asdl->sdl_data, min(namelen,
5809 sizeof (asdl->sdl_data)));
5810 asdl->sdl_nlen = namelen;
5811 } else {
5812 asdl->sdl_nlen = 0;
5813 }
5814 asdl->sdl_index = ifp->if_index;
5815 asdl->sdl_type = ifp->if_type;
5816 if (ll_addr != NULL) {
5817 asdl->sdl_alen = ll_addr->sdl_alen;
5818 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
5819 } else {
5820 asdl->sdl_alen = 0;
5821 }
5822 ifa->ifa_netmask = (struct sockaddr *)msdl;
5823 msdl->sdl_len = masklen;
5824 while (namelen > 0)
5825 msdl->sdl_data[--namelen] = 0xff;
5826 IFA_UNLOCK(ifa);
5827
5828 if (oifa != NULL)
5829 IFA_REMREF(oifa);
5830
5831 return (ifa);
5832 }
5833
5834 static void
5835 if_purgeaddrs(struct ifnet *ifp)
5836 {
5837 #if INET
5838 in_purgeaddrs(ifp);
5839 #endif /* INET */
5840 #if INET6
5841 in6_purgeaddrs(ifp);
5842 #endif /* INET6 */
5843 }
5844
5845 errno_t
5846 ifnet_detach(ifnet_t ifp)
5847 {
5848 struct ifnet *delegated_ifp;
5849 struct nd_ifinfo *ndi = NULL;
5850
5851 if (ifp == NULL)
5852 return (EINVAL);
5853
5854 ndi = ND_IFINFO(ifp);
5855 if (NULL != ndi)
5856 ndi->cga_initialized = FALSE;
5857
5858 lck_mtx_lock(rnh_lock);
5859 ifnet_head_lock_exclusive();
5860 ifnet_lock_exclusive(ifp);
5861
5862 /*
5863 * Check to see if this interface has previously triggered
5864 * aggressive protocol draining; if so, decrement the global
5865 * refcnt and clear PR_AGGDRAIN on the route domain if
5866 * there are no more of such an interface around.
5867 */
5868 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
5869
5870 lck_mtx_lock_spin(&ifp->if_ref_lock);
5871 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
5872 lck_mtx_unlock(&ifp->if_ref_lock);
5873 ifnet_lock_done(ifp);
5874 ifnet_head_done();
5875 lck_mtx_unlock(rnh_lock);
5876 return (EINVAL);
5877 } else if (ifp->if_refflags & IFRF_DETACHING) {
5878 /* Interface has already been detached */
5879 lck_mtx_unlock(&ifp->if_ref_lock);
5880 ifnet_lock_done(ifp);
5881 ifnet_head_done();
5882 lck_mtx_unlock(rnh_lock);
5883 return (ENXIO);
5884 }
5885 /* Indicate this interface is being detached */
5886 ifp->if_refflags &= ~IFRF_ATTACHED;
5887 ifp->if_refflags |= IFRF_DETACHING;
5888 lck_mtx_unlock(&ifp->if_ref_lock);
5889
5890 if (dlil_verbose)
5891 printf("%s: detaching\n", if_name(ifp));
5892
5893 /* Reset ECN enable/disable flags */
5894 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
5895 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
5896
5897 /*
5898 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5899 * no longer be visible during lookups from this point.
5900 */
5901 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
5902 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
5903 ifp->if_link.tqe_next = NULL;
5904 ifp->if_link.tqe_prev = NULL;
5905 if (ifp->if_ordered_link.tqe_next != NULL ||
5906 ifp->if_ordered_link.tqe_prev != NULL) {
5907 ifnet_remove_from_ordered_list(ifp);
5908 }
5909 ifindex2ifnet[ifp->if_index] = NULL;
5910
5911 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
5912 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
5913
5914 /* Record detach PC stacktrace */
5915 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
5916
5917 /* Clear logging parameters */
5918 bzero(&ifp->if_log, sizeof (ifp->if_log));
5919
5920 /* Clear delegated interface info (reference released below) */
5921 delegated_ifp = ifp->if_delegated.ifp;
5922 bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
5923
5924 /* Reset interface state */
5925 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5926
5927 ifnet_lock_done(ifp);
5928 ifnet_head_done();
5929 lck_mtx_unlock(rnh_lock);
5930
5931 /* Release reference held on the delegated interface */
5932 if (delegated_ifp != NULL)
5933 ifnet_release(delegated_ifp);
5934
5935 /* Reset Link Quality Metric (unless loopback [lo0]) */
5936 if (ifp != lo_ifp)
5937 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
5938
5939 /* Reset TCP local statistics */
5940 if (ifp->if_tcp_stat != NULL)
5941 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
5942
5943 /* Reset UDP local statistics */
5944 if (ifp->if_udp_stat != NULL)
5945 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
5946
5947 /* Reset ifnet IPv4 stats */
5948 if (ifp->if_ipv4_stat != NULL)
5949 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
5950
5951 /* Reset ifnet IPv6 stats */
5952 if (ifp->if_ipv6_stat != NULL)
5953 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
5954
5955 /* Release memory held for interface link status report */
5956 if (ifp->if_link_status != NULL) {
5957 FREE(ifp->if_link_status, M_TEMP);
5958 ifp->if_link_status = NULL;
5959 }
5960
5961 /* Clear agent IDs */
5962 if (ifp->if_agentids != NULL) {
5963 FREE(ifp->if_agentids, M_NETAGENT);
5964 ifp->if_agentids = NULL;
5965 }
5966 ifp->if_agentcount = 0;
5967
5968
5969 /* Let BPF know we're detaching */
5970 bpfdetach(ifp);
5971
5972 /* Mark the interface as DOWN */
5973 if_down(ifp);
5974
5975 /* Disable forwarding cached route */
5976 lck_mtx_lock(&ifp->if_cached_route_lock);
5977 ifp->if_fwd_cacheok = 0;
5978 lck_mtx_unlock(&ifp->if_cached_route_lock);
5979
5980 ifp->if_data_threshold = 0;
5981 /*
5982 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5983 * references to the info structures and leave them attached to
5984 * this ifnet.
5985 */
5986 #if INET
5987 igmp_domifdetach(ifp);
5988 #endif /* INET */
5989 #if INET6
5990 mld_domifdetach(ifp);
5991 #endif /* INET6 */
5992
5993 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
5994
5995 /* Let worker thread take care of the rest, to avoid reentrancy */
5996 dlil_if_lock();
5997 ifnet_detaching_enqueue(ifp);
5998 dlil_if_unlock();
5999
6000 return (0);
6001 }
6002
6003 static void
6004 ifnet_detaching_enqueue(struct ifnet *ifp)
6005 {
6006 dlil_if_lock_assert();
6007
6008 ++ifnet_detaching_cnt;
6009 VERIFY(ifnet_detaching_cnt != 0);
6010 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
6011 wakeup((caddr_t)&ifnet_delayed_run);
6012 }
6013
6014 static struct ifnet *
6015 ifnet_detaching_dequeue(void)
6016 {
6017 struct ifnet *ifp;
6018
6019 dlil_if_lock_assert();
6020
6021 ifp = TAILQ_FIRST(&ifnet_detaching_head);
6022 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
6023 if (ifp != NULL) {
6024 VERIFY(ifnet_detaching_cnt != 0);
6025 --ifnet_detaching_cnt;
6026 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
6027 ifp->if_detaching_link.tqe_next = NULL;
6028 ifp->if_detaching_link.tqe_prev = NULL;
6029 }
6030 return (ifp);
6031 }
6032
6033 static int
6034 ifnet_detacher_thread_cont(int err)
6035 {
6036 #pragma unused(err)
6037 struct ifnet *ifp;
6038
6039 for (;;) {
6040 dlil_if_lock_assert();
6041 while (ifnet_detaching_cnt == 0) {
6042 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6043 (PZERO - 1), "ifnet_detacher_cont", 0,
6044 ifnet_detacher_thread_cont);
6045 /* NOTREACHED */
6046 }
6047
6048 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
6049
6050 /* Take care of detaching ifnet */
6051 ifp = ifnet_detaching_dequeue();
6052 if (ifp != NULL) {
6053 dlil_if_unlock();
6054 ifnet_detach_final(ifp);
6055 dlil_if_lock();
6056 }
6057 }
6058 }
6059
6060 static void
6061 ifnet_detacher_thread_func(void *v, wait_result_t w)
6062 {
6063 #pragma unused(v, w)
6064 dlil_if_lock();
6065 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6066 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
6067 /*
6068 * msleep0() shouldn't have returned as PCATCH was not set;
6069 * therefore assert in this case.
6070 */
6071 dlil_if_unlock();
6072 VERIFY(0);
6073 }
6074
6075 static void
6076 ifnet_detach_final(struct ifnet *ifp)
6077 {
6078 struct ifnet_filter *filter, *filter_next;
6079 struct ifnet_filter_head fhead;
6080 struct dlil_threading_info *inp;
6081 struct ifaddr *ifa;
6082 ifnet_detached_func if_free;
6083 int i;
6084
6085 lck_mtx_lock(&ifp->if_ref_lock);
6086 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6087 panic("%s: flags mismatch (detaching not set) ifp=%p",
6088 __func__, ifp);
6089 /* NOTREACHED */
6090 }
6091
6092 /*
6093 * Wait until the existing IO references get released
6094 * before we proceed with ifnet_detach. This is not a
6095 * common case, so block without using a continuation.
6096 */
6097 while (ifp->if_refio > 0) {
6098 printf("%s: Waiting for IO references on %s interface "
6099 "to be released\n", __func__, if_name(ifp));
6100 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
6101 (PZERO - 1), "ifnet_ioref_wait", NULL);
6102 }
6103 lck_mtx_unlock(&ifp->if_ref_lock);
6104
6105 /* Drain and destroy send queue */
6106 ifclassq_teardown(ifp);
6107
6108 /* Detach interface filters */
6109 lck_mtx_lock(&ifp->if_flt_lock);
6110 if_flt_monitor_enter(ifp);
6111
6112 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6113 fhead = ifp->if_flt_head;
6114 TAILQ_INIT(&ifp->if_flt_head);
6115
6116 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
6117 filter_next = TAILQ_NEXT(filter, filt_next);
6118 lck_mtx_unlock(&ifp->if_flt_lock);
6119
6120 dlil_detach_filter_internal(filter, 1);
6121 lck_mtx_lock(&ifp->if_flt_lock);
6122 }
6123 if_flt_monitor_leave(ifp);
6124 lck_mtx_unlock(&ifp->if_flt_lock);
6125
6126 /* Tell upper layers to drop their network addresses */
6127 if_purgeaddrs(ifp);
6128
6129 ifnet_lock_exclusive(ifp);
6130
6131 /* Uplumb all protocols */
6132 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6133 struct if_proto *proto;
6134
6135 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6136 while (proto != NULL) {
6137 protocol_family_t family = proto->protocol_family;
6138 ifnet_lock_done(ifp);
6139 proto_unplumb(family, ifp);
6140 ifnet_lock_exclusive(ifp);
6141 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6142 }
6143 /* There should not be any protocols left */
6144 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
6145 }
6146 zfree(dlif_phash_zone, ifp->if_proto_hash);
6147 ifp->if_proto_hash = NULL;
6148
6149 /* Detach (permanent) link address from if_addrhead */
6150 ifa = TAILQ_FIRST(&ifp->if_addrhead);
6151 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
6152 IFA_LOCK(ifa);
6153 if_detach_link_ifa(ifp, ifa);
6154 IFA_UNLOCK(ifa);
6155
6156 /* Remove (permanent) link address from ifnet_addrs[] */
6157 IFA_REMREF(ifa);
6158 ifnet_addrs[ifp->if_index - 1] = NULL;
6159
6160 /* This interface should not be on {ifnet_head,detaching} */
6161 VERIFY(ifp->if_link.tqe_next == NULL);
6162 VERIFY(ifp->if_link.tqe_prev == NULL);
6163 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6164 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
6165 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
6166 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
6167
6168 /* The slot should have been emptied */
6169 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6170
6171 /* There should not be any addresses left */
6172 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
6173
6174 /*
6175 * Signal the starter thread to terminate itself.
6176 */
6177 if (ifp->if_start_thread != THREAD_NULL) {
6178 lck_mtx_lock_spin(&ifp->if_start_lock);
6179 ifp->if_start_flags = 0;
6180 ifp->if_start_thread = THREAD_NULL;
6181 wakeup_one((caddr_t)&ifp->if_start_thread);
6182 lck_mtx_unlock(&ifp->if_start_lock);
6183 }
6184
6185 /*
6186 * Signal the poller thread to terminate itself.
6187 */
6188 if (ifp->if_poll_thread != THREAD_NULL) {
6189 lck_mtx_lock_spin(&ifp->if_poll_lock);
6190 ifp->if_poll_thread = THREAD_NULL;
6191 wakeup_one((caddr_t)&ifp->if_poll_thread);
6192 lck_mtx_unlock(&ifp->if_poll_lock);
6193 }
6194
6195 /*
6196 * If thread affinity was set for the workloop thread, we will need
6197 * to tear down the affinity and release the extra reference count
6198 * taken at attach time. Does not apply to lo0 or other interfaces
6199 * without dedicated input threads.
6200 */
6201 if ((inp = ifp->if_inp) != NULL) {
6202 VERIFY(inp != dlil_main_input_thread);
6203
6204 if (inp->net_affinity) {
6205 struct thread *tp, *wtp, *ptp;
6206
6207 lck_mtx_lock_spin(&inp->input_lck);
6208 wtp = inp->wloop_thr;
6209 inp->wloop_thr = THREAD_NULL;
6210 ptp = inp->poll_thr;
6211 inp->poll_thr = THREAD_NULL;
6212 tp = inp->input_thr; /* don't nullify now */
6213 inp->tag = 0;
6214 inp->net_affinity = FALSE;
6215 lck_mtx_unlock(&inp->input_lck);
6216
6217 /* Tear down poll thread affinity */
6218 if (ptp != NULL) {
6219 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
6220 (void) dlil_affinity_set(ptp,
6221 THREAD_AFFINITY_TAG_NULL);
6222 thread_deallocate(ptp);
6223 }
6224
6225 /* Tear down workloop thread affinity */
6226 if (wtp != NULL) {
6227 (void) dlil_affinity_set(wtp,
6228 THREAD_AFFINITY_TAG_NULL);
6229 thread_deallocate(wtp);
6230 }
6231
6232 /* Tear down DLIL input thread affinity */
6233 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
6234 thread_deallocate(tp);
6235 }
6236
6237 /* disassociate ifp DLIL input thread */
6238 ifp->if_inp = NULL;
6239
6240 lck_mtx_lock_spin(&inp->input_lck);
6241 inp->input_waiting |= DLIL_INPUT_TERMINATE;
6242 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
6243 wakeup_one((caddr_t)&inp->input_waiting);
6244 }
6245 lck_mtx_unlock(&inp->input_lck);
6246 }
6247
6248 /* The driver might unload, so point these to ourselves */
6249 if_free = ifp->if_free;
6250 ifp->if_output_handler = ifp_if_output;
6251 ifp->if_output = ifp_if_output;
6252 ifp->if_pre_enqueue = ifp_if_output;
6253 ifp->if_start = ifp_if_start;
6254 ifp->if_output_ctl = ifp_if_ctl;
6255 ifp->if_input_handler = ifp_if_input;
6256 ifp->if_input_poll = ifp_if_input_poll;
6257 ifp->if_input_ctl = ifp_if_ctl;
6258 ifp->if_ioctl = ifp_if_ioctl;
6259 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
6260 ifp->if_free = ifp_if_free;
6261 ifp->if_demux = ifp_if_demux;
6262 ifp->if_event = ifp_if_event;
6263 ifp->if_framer_legacy = ifp_if_framer;
6264 ifp->if_framer = ifp_if_framer_extended;
6265 ifp->if_add_proto = ifp_if_add_proto;
6266 ifp->if_del_proto = ifp_if_del_proto;
6267 ifp->if_check_multi = ifp_if_check_multi;
6268
6269 /* wipe out interface description */
6270 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
6271 ifp->if_desc.ifd_len = 0;
6272 VERIFY(ifp->if_desc.ifd_desc != NULL);
6273 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
6274
6275 /* there shouldn't be any delegation by now */
6276 VERIFY(ifp->if_delegated.ifp == NULL);
6277 VERIFY(ifp->if_delegated.type == 0);
6278 VERIFY(ifp->if_delegated.family == 0);
6279 VERIFY(ifp->if_delegated.subfamily == 0);
6280 VERIFY(ifp->if_delegated.expensive == 0);
6281
6282 /* QoS marking get cleared */
6283 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
6284 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
6285
6286 ifnet_lock_done(ifp);
6287
6288 #if PF
6289 /*
6290 * Detach this interface from packet filter, if enabled.
6291 */
6292 pf_ifnet_hook(ifp, 0);
6293 #endif /* PF */
6294
6295 /* Filter list should be empty */
6296 lck_mtx_lock_spin(&ifp->if_flt_lock);
6297 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6298 VERIFY(ifp->if_flt_busy == 0);
6299 VERIFY(ifp->if_flt_waiters == 0);
6300 lck_mtx_unlock(&ifp->if_flt_lock);
6301
6302 /* Last chance to drain send queue */
6303 if_qflush(ifp, 0);
6304
6305 /* Last chance to cleanup any cached route */
6306 lck_mtx_lock(&ifp->if_cached_route_lock);
6307 VERIFY(!ifp->if_fwd_cacheok);
6308 ROUTE_RELEASE(&ifp->if_fwd_route);
6309 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
6310 ROUTE_RELEASE(&ifp->if_src_route);
6311 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
6312 ROUTE_RELEASE(&ifp->if_src_route6);
6313 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
6314 lck_mtx_unlock(&ifp->if_cached_route_lock);
6315
6316 VERIFY(ifp->if_data_threshold == 0);
6317
6318 ifnet_llreach_ifdetach(ifp);
6319
6320 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
6321
6322 /*
6323 * Finally, mark this ifnet as detached.
6324 */
6325 lck_mtx_lock_spin(&ifp->if_ref_lock);
6326 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6327 panic("%s: flags mismatch (detaching not set) ifp=%p",
6328 __func__, ifp);
6329 /* NOTREACHED */
6330 }
6331 ifp->if_refflags &= ~IFRF_DETACHING;
6332 lck_mtx_unlock(&ifp->if_ref_lock);
6333 if (if_free != NULL)
6334 if_free(ifp);
6335
6336 if (dlil_verbose)
6337 printf("%s: detached\n", if_name(ifp));
6338
6339 /* Release reference held during ifnet attach */
6340 ifnet_release(ifp);
6341 }
6342
6343 static errno_t
6344 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6345 {
6346 #pragma unused(ifp)
6347 m_freem_list(m);
6348 return (0);
6349 }
6350
6351 static void
6352 ifp_if_start(struct ifnet *ifp)
6353 {
6354 ifnet_purge(ifp);
6355 }
6356
6357 static errno_t
6358 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6359 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6360 boolean_t poll, struct thread *tp)
6361 {
6362 #pragma unused(ifp, m_tail, s, poll, tp)
6363 m_freem_list(m_head);
6364 return (ENXIO);
6365 }
6366
6367 static void
6368 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6369 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6370 {
6371 #pragma unused(ifp, flags, max_cnt)
6372 if (m_head != NULL)
6373 *m_head = NULL;
6374 if (m_tail != NULL)
6375 *m_tail = NULL;
6376 if (cnt != NULL)
6377 *cnt = 0;
6378 if (len != NULL)
6379 *len = 0;
6380 }
6381
6382 static errno_t
6383 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6384 {
6385 #pragma unused(ifp, cmd, arglen, arg)
6386 return (EOPNOTSUPP);
6387 }
6388
6389 static errno_t
6390 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6391 {
6392 #pragma unused(ifp, fh, pf)
6393 m_freem(m);
6394 return (EJUSTRETURN);
6395 }
6396
6397 static errno_t
6398 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6399 const struct ifnet_demux_desc *da, u_int32_t dc)
6400 {
6401 #pragma unused(ifp, pf, da, dc)
6402 return (EINVAL);
6403 }
6404
6405 static errno_t
6406 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6407 {
6408 #pragma unused(ifp, pf)
6409 return (EINVAL);
6410 }
6411
6412 static errno_t
6413 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6414 {
6415 #pragma unused(ifp, sa)
6416 return (EOPNOTSUPP);
6417 }
6418
6419 static errno_t
6420 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6421 const struct sockaddr *sa, const char *ll, const char *t)
6422 {
6423 #pragma unused(ifp, m, sa, ll, t)
6424 return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
6425 }
6426
6427 static errno_t
6428 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6429 const struct sockaddr *sa, const char *ll, const char *t,
6430 u_int32_t *pre, u_int32_t *post)
6431 {
6432 #pragma unused(ifp, sa, ll, t)
6433 m_freem(*m);
6434 *m = NULL;
6435
6436 if (pre != NULL)
6437 *pre = 0;
6438 if (post != NULL)
6439 *post = 0;
6440
6441 return (EJUSTRETURN);
6442 }
6443
6444 errno_t
6445 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6446 {
6447 #pragma unused(ifp, cmd, arg)
6448 return (EOPNOTSUPP);
6449 }
6450
6451 static errno_t
6452 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6453 {
6454 #pragma unused(ifp, tm, f)
6455 /* XXX not sure what to do here */
6456 return (0);
6457 }
6458
6459 static void
6460 ifp_if_free(struct ifnet *ifp)
6461 {
6462 #pragma unused(ifp)
6463 }
6464
6465 static void
6466 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6467 {
6468 #pragma unused(ifp, e)
6469 }
6470
6471 __private_extern__
6472 int dlil_if_acquire(u_int32_t family, const void *uniqueid,
6473 size_t uniqueid_len, struct ifnet **ifp)
6474 {
6475 struct ifnet *ifp1 = NULL;
6476 struct dlil_ifnet *dlifp1 = NULL;
6477 void *buf, *base, **pbuf;
6478 int ret = 0;
6479
6480 dlil_if_lock();
6481 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
6482 ifp1 = (struct ifnet *)dlifp1;
6483
6484 if (ifp1->if_family != family)
6485 continue;
6486
6487 lck_mtx_lock(&dlifp1->dl_if_lock);
6488 /* same uniqueid and same len or no unique id specified */
6489 if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) &&
6490 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
6491 /* check for matching interface in use */
6492 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6493 if (uniqueid_len) {
6494 ret = EBUSY;
6495 lck_mtx_unlock(&dlifp1->dl_if_lock);
6496 goto end;
6497 }
6498 } else {
6499 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
6500 lck_mtx_unlock(&dlifp1->dl_if_lock);
6501 *ifp = ifp1;
6502 goto end;
6503 }
6504 }
6505 lck_mtx_unlock(&dlifp1->dl_if_lock);
6506 }
6507
6508 /* no interface found, allocate a new one */
6509 buf = zalloc(dlif_zone);
6510 if (buf == NULL) {
6511 ret = ENOMEM;
6512 goto end;
6513 }
6514 bzero(buf, dlif_bufsize);
6515
6516 /* Get the 64-bit aligned base address for this object */
6517 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
6518 sizeof (u_int64_t));
6519 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
6520
6521 /*
6522 * Wind back a pointer size from the aligned base and
6523 * save the original address so we can free it later.
6524 */
6525 pbuf = (void **)((intptr_t)base - sizeof (void *));
6526 *pbuf = buf;
6527 dlifp1 = base;
6528
6529 if (uniqueid_len) {
6530 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
6531 M_NKE, M_WAITOK);
6532 if (dlifp1->dl_if_uniqueid == NULL) {
6533 zfree(dlif_zone, dlifp1);
6534 ret = ENOMEM;
6535 goto end;
6536 }
6537 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
6538 dlifp1->dl_if_uniqueid_len = uniqueid_len;
6539 }
6540
6541 ifp1 = (struct ifnet *)dlifp1;
6542 dlifp1->dl_if_flags = DLIF_INUSE;
6543 if (ifnet_debug) {
6544 dlifp1->dl_if_flags |= DLIF_DEBUG;
6545 dlifp1->dl_if_trace = dlil_if_trace;
6546 }
6547 ifp1->if_name = dlifp1->dl_if_namestorage;
6548 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
6549
6550 /* initialize interface description */
6551 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
6552 ifp1->if_desc.ifd_len = 0;
6553 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
6554
6555 #if CONFIG_MACF_NET
6556 mac_ifnet_label_init(ifp1);
6557 #endif
6558
6559 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
6560 DLIL_PRINTF("%s: failed to allocate if local stats, "
6561 "error: %d\n", __func__, ret);
6562 /* This probably shouldn't be fatal */
6563 ret = 0;
6564 }
6565
6566 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
6567 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
6568 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
6569 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6570 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
6571 ifnet_lock_attr);
6572 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
6573 #if INET
6574 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
6575 ifnet_lock_attr);
6576 ifp1->if_inetdata = NULL;
6577 #endif
6578 #if INET6
6579 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
6580 ifnet_lock_attr);
6581 ifp1->if_inet6data = NULL;
6582 #endif
6583 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
6584 ifnet_lock_attr);
6585 ifp1->if_link_status = NULL;
6586
6587 /* for send data paths */
6588 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
6589 ifnet_lock_attr);
6590 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
6591 ifnet_lock_attr);
6592 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
6593 ifnet_lock_attr);
6594
6595 /* for receive data paths */
6596 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
6597 ifnet_lock_attr);
6598
6599 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
6600
6601 *ifp = ifp1;
6602
6603 end:
6604 dlil_if_unlock();
6605
6606 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
6607 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
6608
6609 return (ret);
6610 }
6611
6612 __private_extern__ void
6613 dlil_if_release(ifnet_t ifp)
6614 {
6615 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
6616
6617 ifnet_lock_exclusive(ifp);
6618 lck_mtx_lock(&dlifp->dl_if_lock);
6619 dlifp->dl_if_flags &= ~DLIF_INUSE;
6620 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6621 ifp->if_name = dlifp->dl_if_namestorage;
6622 /* Reset external name (name + unit) */
6623 ifp->if_xname = dlifp->dl_if_xnamestorage;
6624 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
6625 "%s?", ifp->if_name);
6626 lck_mtx_unlock(&dlifp->dl_if_lock);
6627 #if CONFIG_MACF_NET
6628 /*
6629 * We can either recycle the MAC label here or in dlil_if_acquire().
6630 * It seems logical to do it here but this means that anything that
6631 * still has a handle on ifp will now see it as unlabeled.
6632 * Since the interface is "dead" that may be OK. Revisit later.
6633 */
6634 mac_ifnet_label_recycle(ifp);
6635 #endif
6636 ifnet_lock_done(ifp);
6637 }
6638
6639 __private_extern__ void
6640 dlil_if_lock(void)
6641 {
6642 lck_mtx_lock(&dlil_ifnet_lock);
6643 }
6644
6645 __private_extern__ void
6646 dlil_if_unlock(void)
6647 {
6648 lck_mtx_unlock(&dlil_ifnet_lock);
6649 }
6650
6651 __private_extern__ void
6652 dlil_if_lock_assert(void)
6653 {
6654 lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
6655 }
6656
6657 __private_extern__ void
6658 dlil_proto_unplumb_all(struct ifnet *ifp)
6659 {
6660 /*
6661 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6662 * each bucket contains exactly one entry; PF_VLAN does not need an
6663 * explicit unplumb.
6664 *
6665 * if_proto_hash[3] is for other protocols; we expect anything
6666 * in this bucket to respond to the DETACHING event (which would
6667 * have happened by now) and do the unplumb then.
6668 */
6669 (void) proto_unplumb(PF_INET, ifp);
6670 #if INET6
6671 (void) proto_unplumb(PF_INET6, ifp);
6672 #endif /* INET6 */
6673 }
6674
6675 static void
6676 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6677 {
6678 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6679 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6680
6681 route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
6682
6683 lck_mtx_unlock(&ifp->if_cached_route_lock);
6684 }
6685
6686 static void
6687 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6688 {
6689 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6690 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6691
6692 if (ifp->if_fwd_cacheok) {
6693 route_copyin(src, &ifp->if_src_route, sizeof (*src));
6694 } else {
6695 ROUTE_RELEASE(src);
6696 }
6697 lck_mtx_unlock(&ifp->if_cached_route_lock);
6698 }
6699
6700 #if INET6
6701 static void
6702 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6703 {
6704 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6705 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6706
6707 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6708 sizeof (*dst));
6709
6710 lck_mtx_unlock(&ifp->if_cached_route_lock);
6711 }
6712
6713 static void
6714 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6715 {
6716 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6717 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6718
6719 if (ifp->if_fwd_cacheok) {
6720 route_copyin((struct route *)src,
6721 (struct route *)&ifp->if_src_route6, sizeof (*src));
6722 } else {
6723 ROUTE_RELEASE(src);
6724 }
6725 lck_mtx_unlock(&ifp->if_cached_route_lock);
6726 }
6727 #endif /* INET6 */
6728
6729 struct rtentry *
6730 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6731 {
6732 struct route src_rt;
6733 struct sockaddr_in *dst;
6734
6735 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
6736
6737 ifp_src_route_copyout(ifp, &src_rt);
6738
6739 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6740 ROUTE_RELEASE(&src_rt);
6741 if (dst->sin_family != AF_INET) {
6742 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
6743 dst->sin_len = sizeof (src_rt.ro_dst);
6744 dst->sin_family = AF_INET;
6745 }
6746 dst->sin_addr = src_ip;
6747
6748 if (src_rt.ro_rt == NULL) {
6749 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
6750 0, 0, ifp->if_index);
6751
6752 if (src_rt.ro_rt != NULL) {
6753 /* retain a ref, copyin consumes one */
6754 struct rtentry *rte = src_rt.ro_rt;
6755 RT_ADDREF(rte);
6756 ifp_src_route_copyin(ifp, &src_rt);
6757 src_rt.ro_rt = rte;
6758 }
6759 }
6760 }
6761
6762 return (src_rt.ro_rt);
6763 }
6764
6765 #if INET6
6766 struct rtentry *
6767 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
6768 {
6769 struct route_in6 src_rt;
6770
6771 ifp_src_route6_copyout(ifp, &src_rt);
6772
6773 if (ROUTE_UNUSABLE(&src_rt) ||
6774 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
6775 ROUTE_RELEASE(&src_rt);
6776 if (src_rt.ro_dst.sin6_family != AF_INET6) {
6777 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
6778 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
6779 src_rt.ro_dst.sin6_family = AF_INET6;
6780 }
6781 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
6782 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
6783 sizeof (src_rt.ro_dst.sin6_addr));
6784
6785 if (src_rt.ro_rt == NULL) {
6786 src_rt.ro_rt = rtalloc1_scoped(
6787 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
6788 ifp->if_index);
6789
6790 if (src_rt.ro_rt != NULL) {
6791 /* retain a ref, copyin consumes one */
6792 struct rtentry *rte = src_rt.ro_rt;
6793 RT_ADDREF(rte);
6794 ifp_src_route6_copyin(ifp, &src_rt);
6795 src_rt.ro_rt = rte;
6796 }
6797 }
6798 }
6799
6800 return (src_rt.ro_rt);
6801 }
6802 #endif /* INET6 */
6803
6804 void
6805 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
6806 {
6807 struct kev_dl_link_quality_metric_data ev_lqm_data;
6808
6809 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
6810
6811 /* Normalize to edge */
6812 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_BAD)
6813 lqm = IFNET_LQM_THRESH_BAD;
6814 else if (lqm > IFNET_LQM_THRESH_BAD && lqm <= IFNET_LQM_THRESH_POOR)
6815 lqm = IFNET_LQM_THRESH_POOR;
6816 else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD)
6817 lqm = IFNET_LQM_THRESH_GOOD;
6818
6819 /*
6820 * Take the lock if needed
6821 */
6822 if (!locked)
6823 ifnet_lock_exclusive(ifp);
6824
6825 if (lqm == ifp->if_interface_state.lqm_state &&
6826 (ifp->if_interface_state.valid_bitmask &
6827 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
6828 /*
6829 * Release the lock if was not held by the caller
6830 */
6831 if (!locked)
6832 ifnet_lock_done(ifp);
6833 return; /* nothing to update */
6834 }
6835 ifp->if_interface_state.valid_bitmask |=
6836 IF_INTERFACE_STATE_LQM_STATE_VALID;
6837 ifp->if_interface_state.lqm_state = lqm;
6838
6839 /*
6840 * Don't want to hold the lock when issuing kernel events
6841 */
6842 ifnet_lock_done(ifp);
6843
6844 bzero(&ev_lqm_data, sizeof (ev_lqm_data));
6845 ev_lqm_data.link_quality_metric = lqm;
6846
6847 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
6848 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
6849
6850 /*
6851 * Reacquire the lock for the caller
6852 */
6853 if (locked)
6854 ifnet_lock_exclusive(ifp);
6855 }
6856
6857 static void
6858 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
6859 {
6860 struct kev_dl_rrc_state kev;
6861
6862 if (rrc_state == ifp->if_interface_state.rrc_state &&
6863 (ifp->if_interface_state.valid_bitmask &
6864 IF_INTERFACE_STATE_RRC_STATE_VALID))
6865 return;
6866
6867 ifp->if_interface_state.valid_bitmask |=
6868 IF_INTERFACE_STATE_RRC_STATE_VALID;
6869
6870 ifp->if_interface_state.rrc_state = rrc_state;
6871
6872 /*
6873 * Don't want to hold the lock when issuing kernel events
6874 */
6875 ifnet_lock_done(ifp);
6876
6877 bzero(&kev, sizeof(struct kev_dl_rrc_state));
6878 kev.rrc_state = rrc_state;
6879
6880 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
6881 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
6882
6883 ifnet_lock_exclusive(ifp);
6884 }
6885
6886 errno_t
6887 if_state_update(struct ifnet *ifp,
6888 struct if_interface_state *if_interface_state)
6889 {
6890 u_short if_index_available = 0;
6891
6892 ifnet_lock_exclusive(ifp);
6893
6894 if ((ifp->if_type != IFT_CELLULAR) &&
6895 (if_interface_state->valid_bitmask &
6896 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6897 ifnet_lock_done(ifp);
6898 return (ENOTSUP);
6899 }
6900 if ((if_interface_state->valid_bitmask &
6901 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
6902 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
6903 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
6904 ifnet_lock_done(ifp);
6905 return (EINVAL);
6906 }
6907 if ((if_interface_state->valid_bitmask &
6908 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
6909 if_interface_state->rrc_state !=
6910 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
6911 if_interface_state->rrc_state !=
6912 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
6913 ifnet_lock_done(ifp);
6914 return (EINVAL);
6915 }
6916
6917 if (if_interface_state->valid_bitmask &
6918 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6919 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
6920 }
6921 if (if_interface_state->valid_bitmask &
6922 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6923 if_rrc_state_update(ifp, if_interface_state->rrc_state);
6924 }
6925 if (if_interface_state->valid_bitmask &
6926 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6927 ifp->if_interface_state.valid_bitmask |=
6928 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6929 ifp->if_interface_state.interface_availability =
6930 if_interface_state->interface_availability;
6931
6932 if (ifp->if_interface_state.interface_availability ==
6933 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
6934 if_index_available = ifp->if_index;
6935 }
6936 }
6937 ifnet_lock_done(ifp);
6938
6939 /*
6940 * Check if the TCP connections going on this interface should be
6941 * forced to send probe packets instead of waiting for TCP timers
6942 * to fire. This will be done when there is an explicit
6943 * notification that the interface became available.
6944 */
6945 if (if_index_available > 0)
6946 tcp_interface_send_probe(if_index_available);
6947
6948 return (0);
6949 }
6950
6951 void
6952 if_get_state(struct ifnet *ifp,
6953 struct if_interface_state *if_interface_state)
6954 {
6955 ifnet_lock_shared(ifp);
6956
6957 if_interface_state->valid_bitmask = 0;
6958
6959 if (ifp->if_interface_state.valid_bitmask &
6960 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6961 if_interface_state->valid_bitmask |=
6962 IF_INTERFACE_STATE_RRC_STATE_VALID;
6963 if_interface_state->rrc_state =
6964 ifp->if_interface_state.rrc_state;
6965 }
6966 if (ifp->if_interface_state.valid_bitmask &
6967 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6968 if_interface_state->valid_bitmask |=
6969 IF_INTERFACE_STATE_LQM_STATE_VALID;
6970 if_interface_state->lqm_state =
6971 ifp->if_interface_state.lqm_state;
6972 }
6973 if (ifp->if_interface_state.valid_bitmask &
6974 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6975 if_interface_state->valid_bitmask |=
6976 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6977 if_interface_state->interface_availability =
6978 ifp->if_interface_state.interface_availability;
6979 }
6980
6981 ifnet_lock_done(ifp);
6982 }
6983
6984 errno_t
6985 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
6986 {
6987 ifnet_lock_exclusive(ifp);
6988 if (conn_probe > 1) {
6989 ifnet_lock_done(ifp);
6990 return (EINVAL);
6991 }
6992 if (conn_probe == 0)
6993 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
6994 else
6995 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
6996 ifnet_lock_done(ifp);
6997
6998 tcp_probe_connectivity(ifp, conn_probe);
6999 return (0);
7000 }
7001
7002 /* for uuid.c */
7003 int
7004 uuid_get_ethernet(u_int8_t *node)
7005 {
7006 struct ifnet *ifp;
7007 struct sockaddr_dl *sdl;
7008
7009 ifnet_head_lock_shared();
7010 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
7011 ifnet_lock_shared(ifp);
7012 IFA_LOCK_SPIN(ifp->if_lladdr);
7013 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
7014 if (sdl->sdl_type == IFT_ETHER) {
7015 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
7016 IFA_UNLOCK(ifp->if_lladdr);
7017 ifnet_lock_done(ifp);
7018 ifnet_head_done();
7019 return (0);
7020 }
7021 IFA_UNLOCK(ifp->if_lladdr);
7022 ifnet_lock_done(ifp);
7023 }
7024 ifnet_head_done();
7025
7026 return (-1);
7027 }
7028
7029 static int
7030 sysctl_rxpoll SYSCTL_HANDLER_ARGS
7031 {
7032 #pragma unused(arg1, arg2)
7033 uint32_t i;
7034 int err;
7035
7036 i = if_rxpoll;
7037
7038 err = sysctl_handle_int(oidp, &i, 0, req);
7039 if (err != 0 || req->newptr == USER_ADDR_NULL)
7040 return (err);
7041
7042 if (net_rxpoll == 0)
7043 return (ENXIO);
7044
7045 if_rxpoll = i;
7046 return (err);
7047 }
7048
7049 static int
7050 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7051 {
7052 #pragma unused(arg1, arg2)
7053 uint64_t q;
7054 int err;
7055
7056 q = if_rxpoll_mode_holdtime;
7057
7058 err = sysctl_handle_quad(oidp, &q, 0, req);
7059 if (err != 0 || req->newptr == USER_ADDR_NULL)
7060 return (err);
7061
7062 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN)
7063 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
7064
7065 if_rxpoll_mode_holdtime = q;
7066
7067 return (err);
7068 }
7069
7070 static int
7071 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7072 {
7073 #pragma unused(arg1, arg2)
7074 uint64_t q;
7075 int err;
7076
7077 q = if_rxpoll_sample_holdtime;
7078
7079 err = sysctl_handle_quad(oidp, &q, 0, req);
7080 if (err != 0 || req->newptr == USER_ADDR_NULL)
7081 return (err);
7082
7083 if (q < IF_RXPOLL_SAMPLETIME_MIN)
7084 q = IF_RXPOLL_SAMPLETIME_MIN;
7085
7086 if_rxpoll_sample_holdtime = q;
7087
7088 return (err);
7089 }
7090
7091 static int
7092 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7093 {
7094 #pragma unused(arg1, arg2)
7095 uint64_t q;
7096 int err;
7097
7098 q = if_rxpoll_interval_time;
7099
7100 err = sysctl_handle_quad(oidp, &q, 0, req);
7101 if (err != 0 || req->newptr == USER_ADDR_NULL)
7102 return (err);
7103
7104 if (q < IF_RXPOLL_INTERVALTIME_MIN)
7105 q = IF_RXPOLL_INTERVALTIME_MIN;
7106
7107 if_rxpoll_interval_time = q;
7108
7109 return (err);
7110 }
7111
7112 static int
7113 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7114 {
7115 #pragma unused(arg1, arg2)
7116 uint32_t i;
7117 int err;
7118
7119 i = if_rxpoll_wlowat;
7120
7121 err = sysctl_handle_int(oidp, &i, 0, req);
7122 if (err != 0 || req->newptr == USER_ADDR_NULL)
7123 return (err);
7124
7125 if (i == 0 || i >= if_rxpoll_whiwat)
7126 return (EINVAL);
7127
7128 if_rxpoll_wlowat = i;
7129 return (err);
7130 }
7131
7132 static int
7133 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7134 {
7135 #pragma unused(arg1, arg2)
7136 uint32_t i;
7137 int err;
7138
7139 i = if_rxpoll_whiwat;
7140
7141 err = sysctl_handle_int(oidp, &i, 0, req);
7142 if (err != 0 || req->newptr == USER_ADDR_NULL)
7143 return (err);
7144
7145 if (i <= if_rxpoll_wlowat)
7146 return (EINVAL);
7147
7148 if_rxpoll_whiwat = i;
7149 return (err);
7150 }
7151
7152 static int
7153 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
7154 {
7155 #pragma unused(arg1, arg2)
7156 int i, err;
7157
7158 i = if_sndq_maxlen;
7159
7160 err = sysctl_handle_int(oidp, &i, 0, req);
7161 if (err != 0 || req->newptr == USER_ADDR_NULL)
7162 return (err);
7163
7164 if (i < IF_SNDQ_MINLEN)
7165 i = IF_SNDQ_MINLEN;
7166
7167 if_sndq_maxlen = i;
7168 return (err);
7169 }
7170
7171 static int
7172 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
7173 {
7174 #pragma unused(arg1, arg2)
7175 int i, err;
7176
7177 i = if_rcvq_maxlen;
7178
7179 err = sysctl_handle_int(oidp, &i, 0, req);
7180 if (err != 0 || req->newptr == USER_ADDR_NULL)
7181 return (err);
7182
7183 if (i < IF_RCVQ_MINLEN)
7184 i = IF_RCVQ_MINLEN;
7185
7186 if_rcvq_maxlen = i;
7187 return (err);
7188 }
7189
7190 void
7191 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
7192 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
7193 {
7194 struct kev_dl_node_presence kev;
7195 struct sockaddr_dl *sdl;
7196 struct sockaddr_in6 *sin6;
7197
7198 VERIFY(ifp);
7199 VERIFY(sa);
7200 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7201
7202 bzero(&kev, sizeof (kev));
7203 sin6 = &kev.sin6_node_address;
7204 sdl = &kev.sdl_node_address;
7205 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7206 kev.rssi = rssi;
7207 kev.link_quality_metric = lqm;
7208 kev.node_proximity_metric = npm;
7209 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
7210
7211 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
7212 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
7213 &kev.link_data, sizeof (kev));
7214 }
7215
7216 void
7217 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
7218 {
7219 struct kev_dl_node_absence kev;
7220 struct sockaddr_in6 *sin6;
7221 struct sockaddr_dl *sdl;
7222
7223 VERIFY(ifp);
7224 VERIFY(sa);
7225 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7226
7227 bzero(&kev, sizeof (kev));
7228 sin6 = &kev.sin6_node_address;
7229 sdl = &kev.sdl_node_address;
7230 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7231
7232 nd6_alt_node_absent(ifp, sin6);
7233 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
7234 &kev.link_data, sizeof (kev));
7235 }
7236
7237 const void *
7238 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
7239 kauth_cred_t *credp)
7240 {
7241 const u_int8_t *bytes;
7242 size_t size;
7243
7244 bytes = CONST_LLADDR(sdl);
7245 size = sdl->sdl_alen;
7246
7247 #if CONFIG_MACF
7248 if (dlil_lladdr_ckreq) {
7249 switch (sdl->sdl_type) {
7250 case IFT_ETHER:
7251 case IFT_IEEE1394:
7252 break;
7253 default:
7254 credp = NULL;
7255 break;
7256 };
7257
7258 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
7259 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
7260 [0] = 2
7261 };
7262
7263 switch (sdl->sdl_type) {
7264 case IFT_ETHER:
7265 VERIFY(size == ETHER_ADDR_LEN);
7266 bytes = unspec;
7267 break;
7268 case IFT_IEEE1394:
7269 VERIFY(size == FIREWIRE_EUI64_LEN);
7270 bytes = unspec;
7271 break;
7272 default:
7273 VERIFY(FALSE);
7274 break;
7275 };
7276 }
7277 }
7278 #else
7279 #pragma unused(credp)
7280 #endif
7281
7282 if (sizep != NULL) *sizep = size;
7283 return (bytes);
7284 }
7285
7286 void
7287 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
7288 u_int8_t info[DLIL_MODARGLEN])
7289 {
7290 struct kev_dl_issues kev;
7291 struct timeval tv;
7292
7293 VERIFY(ifp != NULL);
7294 VERIFY(modid != NULL);
7295 _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
7296 _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
7297
7298 bzero(&kev, sizeof (kev));
7299
7300 microtime(&tv);
7301 kev.timestamp = tv.tv_sec;
7302 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
7303 if (info != NULL)
7304 bcopy(info, &kev.info, DLIL_MODARGLEN);
7305
7306 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
7307 &kev.link_data, sizeof (kev));
7308 }
7309
7310 errno_t
7311 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7312 struct proc *p)
7313 {
7314 u_int32_t level = IFNET_THROTTLE_OFF;
7315 errno_t result = 0;
7316
7317 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
7318
7319 if (cmd == SIOCSIFOPPORTUNISTIC) {
7320 /*
7321 * XXX: Use priv_check_cred() instead of root check?
7322 */
7323 if ((result = proc_suser(p)) != 0)
7324 return (result);
7325
7326 if (ifr->ifr_opportunistic.ifo_flags ==
7327 IFRIFOF_BLOCK_OPPORTUNISTIC)
7328 level = IFNET_THROTTLE_OPPORTUNISTIC;
7329 else if (ifr->ifr_opportunistic.ifo_flags == 0)
7330 level = IFNET_THROTTLE_OFF;
7331 else
7332 result = EINVAL;
7333
7334 if (result == 0)
7335 result = ifnet_set_throttle(ifp, level);
7336 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
7337 ifr->ifr_opportunistic.ifo_flags = 0;
7338 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
7339 ifr->ifr_opportunistic.ifo_flags |=
7340 IFRIFOF_BLOCK_OPPORTUNISTIC;
7341 }
7342 }
7343
7344 /*
7345 * Return the count of current opportunistic connections
7346 * over the interface.
7347 */
7348 if (result == 0) {
7349 uint32_t flags = 0;
7350 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
7351 INPCB_OPPORTUNISTIC_SETCMD : 0;
7352 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
7353 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
7354 ifr->ifr_opportunistic.ifo_inuse =
7355 udp_count_opportunistic(ifp->if_index, flags) +
7356 tcp_count_opportunistic(ifp->if_index, flags);
7357 }
7358
7359 if (result == EALREADY)
7360 result = 0;
7361
7362 return (result);
7363 }
7364
7365 int
7366 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
7367 {
7368 struct ifclassq *ifq;
7369 int err = 0;
7370
7371 if (!(ifp->if_eflags & IFEF_TXSTART))
7372 return (ENXIO);
7373
7374 *level = IFNET_THROTTLE_OFF;
7375
7376 ifq = &ifp->if_snd;
7377 IFCQ_LOCK(ifq);
7378 /* Throttling works only for IFCQ, not ALTQ instances */
7379 if (IFCQ_IS_ENABLED(ifq))
7380 IFCQ_GET_THROTTLE(ifq, *level, err);
7381 IFCQ_UNLOCK(ifq);
7382
7383 return (err);
7384 }
7385
7386 int
7387 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
7388 {
7389 struct ifclassq *ifq;
7390 int err = 0;
7391
7392 if (!(ifp->if_eflags & IFEF_TXSTART))
7393 return (ENXIO);
7394
7395 ifq = &ifp->if_snd;
7396
7397 switch (level) {
7398 case IFNET_THROTTLE_OFF:
7399 case IFNET_THROTTLE_OPPORTUNISTIC:
7400 #if PF_ALTQ
7401 /* Throttling works only for IFCQ, not ALTQ instances */
7402 if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
7403 return (ENXIO);
7404 #endif /* PF_ALTQ */
7405 break;
7406 default:
7407 return (EINVAL);
7408 }
7409
7410 IFCQ_LOCK(ifq);
7411 if (IFCQ_IS_ENABLED(ifq))
7412 IFCQ_SET_THROTTLE(ifq, level, err);
7413 IFCQ_UNLOCK(ifq);
7414
7415 if (err == 0) {
7416 printf("%s: throttling level set to %d\n", if_name(ifp),
7417 level);
7418 if (level == IFNET_THROTTLE_OFF)
7419 ifnet_start(ifp);
7420 }
7421
7422 return (err);
7423 }
7424
7425 errno_t
7426 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7427 struct proc *p)
7428 {
7429 #pragma unused(p)
7430 errno_t result = 0;
7431 uint32_t flags;
7432 int level, category, subcategory;
7433
7434 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7435
7436 if (cmd == SIOCSIFLOG) {
7437 if ((result = priv_check_cred(kauth_cred_get(),
7438 PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
7439 return (result);
7440
7441 level = ifr->ifr_log.ifl_level;
7442 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX)
7443 result = EINVAL;
7444
7445 flags = ifr->ifr_log.ifl_flags;
7446 if ((flags &= IFNET_LOGF_MASK) == 0)
7447 result = EINVAL;
7448
7449 category = ifr->ifr_log.ifl_category;
7450 subcategory = ifr->ifr_log.ifl_subcategory;
7451
7452 if (result == 0)
7453 result = ifnet_set_log(ifp, level, flags,
7454 category, subcategory);
7455 } else {
7456 result = ifnet_get_log(ifp, &level, &flags, &category,
7457 &subcategory);
7458 if (result == 0) {
7459 ifr->ifr_log.ifl_level = level;
7460 ifr->ifr_log.ifl_flags = flags;
7461 ifr->ifr_log.ifl_category = category;
7462 ifr->ifr_log.ifl_subcategory = subcategory;
7463 }
7464 }
7465
7466 return (result);
7467 }
7468
7469 int
7470 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7471 int32_t category, int32_t subcategory)
7472 {
7473 int err = 0;
7474
7475 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7476 VERIFY(flags & IFNET_LOGF_MASK);
7477
7478 /*
7479 * The logging level applies to all facilities; make sure to
7480 * update them all with the most current level.
7481 */
7482 flags |= ifp->if_log.flags;
7483
7484 if (ifp->if_output_ctl != NULL) {
7485 struct ifnet_log_params l;
7486
7487 bzero(&l, sizeof (l));
7488 l.level = level;
7489 l.flags = flags;
7490 l.flags &= ~IFNET_LOGF_DLIL;
7491 l.category = category;
7492 l.subcategory = subcategory;
7493
7494 /* Send this request to lower layers */
7495 if (l.flags != 0) {
7496 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7497 sizeof (l), &l);
7498 }
7499 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7500 /*
7501 * If targeted to the lower layers without an output
7502 * control callback registered on the interface, just
7503 * silently ignore facilities other than ours.
7504 */
7505 flags &= IFNET_LOGF_DLIL;
7506 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL)))
7507 level = 0;
7508 }
7509
7510 if (err == 0) {
7511 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT)
7512 ifp->if_log.flags = 0;
7513 else
7514 ifp->if_log.flags |= flags;
7515
7516 log(LOG_INFO, "%s: logging level set to %d flags=%b "
7517 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
7518 ifp->if_log.level, ifp->if_log.flags,
7519 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
7520 category, subcategory);
7521 }
7522
7523 return (err);
7524 }
7525
7526 int
7527 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7528 int32_t *category, int32_t *subcategory)
7529 {
7530 if (level != NULL)
7531 *level = ifp->if_log.level;
7532 if (flags != NULL)
7533 *flags = ifp->if_log.flags;
7534 if (category != NULL)
7535 *category = ifp->if_log.category;
7536 if (subcategory != NULL)
7537 *subcategory = ifp->if_log.subcategory;
7538
7539 return (0);
7540 }
7541
7542 int
7543 ifnet_notify_address(struct ifnet *ifp, int af)
7544 {
7545 struct ifnet_notify_address_params na;
7546
7547 #if PF
7548 (void) pf_ifaddr_hook(ifp);
7549 #endif /* PF */
7550
7551 if (ifp->if_output_ctl == NULL)
7552 return (EOPNOTSUPP);
7553
7554 bzero(&na, sizeof (na));
7555 na.address_family = af;
7556
7557 return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7558 sizeof (na), &na));
7559 }
7560
7561 errno_t
7562 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7563 {
7564 if (ifp == NULL || flowid == NULL) {
7565 return (EINVAL);
7566 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7567 !(ifp->if_refflags & IFRF_ATTACHED)) {
7568 return (ENXIO);
7569 }
7570
7571 *flowid = ifp->if_flowhash;
7572
7573 return (0);
7574 }
7575
7576 errno_t
7577 ifnet_disable_output(struct ifnet *ifp)
7578 {
7579 int err;
7580
7581 if (ifp == NULL) {
7582 return (EINVAL);
7583 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7584 !(ifp->if_refflags & IFRF_ATTACHED)) {
7585 return (ENXIO);
7586 }
7587
7588 if ((err = ifnet_fc_add(ifp)) == 0) {
7589 lck_mtx_lock_spin(&ifp->if_start_lock);
7590 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7591 lck_mtx_unlock(&ifp->if_start_lock);
7592 }
7593 return (err);
7594 }
7595
7596 errno_t
7597 ifnet_enable_output(struct ifnet *ifp)
7598 {
7599 if (ifp == NULL) {
7600 return (EINVAL);
7601 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7602 !(ifp->if_refflags & IFRF_ATTACHED)) {
7603 return (ENXIO);
7604 }
7605
7606 ifnet_start_common(ifp, 1);
7607 return (0);
7608 }
7609
7610 void
7611 ifnet_flowadv(uint32_t flowhash)
7612 {
7613 struct ifnet_fc_entry *ifce;
7614 struct ifnet *ifp;
7615
7616 ifce = ifnet_fc_get(flowhash);
7617 if (ifce == NULL)
7618 return;
7619
7620 VERIFY(ifce->ifce_ifp != NULL);
7621 ifp = ifce->ifce_ifp;
7622
7623 /* flow hash gets recalculated per attach, so check */
7624 if (ifnet_is_attached(ifp, 1)) {
7625 if (ifp->if_flowhash == flowhash)
7626 (void) ifnet_enable_output(ifp);
7627 ifnet_decr_iorefcnt(ifp);
7628 }
7629 ifnet_fc_entry_free(ifce);
7630 }
7631
7632 /*
7633 * Function to compare ifnet_fc_entries in ifnet flow control tree
7634 */
7635 static inline int
7636 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7637 {
7638 return (fc1->ifce_flowhash - fc2->ifce_flowhash);
7639 }
7640
7641 static int
7642 ifnet_fc_add(struct ifnet *ifp)
7643 {
7644 struct ifnet_fc_entry keyfc, *ifce;
7645 uint32_t flowhash;
7646
7647 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7648 VERIFY(ifp->if_flowhash != 0);
7649 flowhash = ifp->if_flowhash;
7650
7651 bzero(&keyfc, sizeof (keyfc));
7652 keyfc.ifce_flowhash = flowhash;
7653
7654 lck_mtx_lock_spin(&ifnet_fc_lock);
7655 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7656 if (ifce != NULL && ifce->ifce_ifp == ifp) {
7657 /* Entry is already in ifnet_fc_tree, return */
7658 lck_mtx_unlock(&ifnet_fc_lock);
7659 return (0);
7660 }
7661
7662 if (ifce != NULL) {
7663 /*
7664 * There is a different fc entry with the same flow hash
7665 * but different ifp pointer. There can be a collision
7666 * on flow hash but the probability is low. Let's just
7667 * avoid adding a second one when there is a collision.
7668 */
7669 lck_mtx_unlock(&ifnet_fc_lock);
7670 return (EAGAIN);
7671 }
7672
7673 /* become regular mutex */
7674 lck_mtx_convert_spin(&ifnet_fc_lock);
7675
7676 ifce = zalloc_noblock(ifnet_fc_zone);
7677 if (ifce == NULL) {
7678 /* memory allocation failed */
7679 lck_mtx_unlock(&ifnet_fc_lock);
7680 return (ENOMEM);
7681 }
7682 bzero(ifce, ifnet_fc_zone_size);
7683
7684 ifce->ifce_flowhash = flowhash;
7685 ifce->ifce_ifp = ifp;
7686
7687 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7688 lck_mtx_unlock(&ifnet_fc_lock);
7689 return (0);
7690 }
7691
7692 static struct ifnet_fc_entry *
7693 ifnet_fc_get(uint32_t flowhash)
7694 {
7695 struct ifnet_fc_entry keyfc, *ifce;
7696 struct ifnet *ifp;
7697
7698 bzero(&keyfc, sizeof (keyfc));
7699 keyfc.ifce_flowhash = flowhash;
7700
7701 lck_mtx_lock_spin(&ifnet_fc_lock);
7702 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7703 if (ifce == NULL) {
7704 /* Entry is not present in ifnet_fc_tree, return */
7705 lck_mtx_unlock(&ifnet_fc_lock);
7706 return (NULL);
7707 }
7708
7709 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7710
7711 VERIFY(ifce->ifce_ifp != NULL);
7712 ifp = ifce->ifce_ifp;
7713
7714 /* become regular mutex */
7715 lck_mtx_convert_spin(&ifnet_fc_lock);
7716
7717 if (!ifnet_is_attached(ifp, 0)) {
7718 /*
7719 * This ifp is not attached or in the process of being
7720 * detached; just don't process it.
7721 */
7722 ifnet_fc_entry_free(ifce);
7723 ifce = NULL;
7724 }
7725 lck_mtx_unlock(&ifnet_fc_lock);
7726
7727 return (ifce);
7728 }
7729
7730 static void
7731 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7732 {
7733 zfree(ifnet_fc_zone, ifce);
7734 }
7735
7736 static uint32_t
7737 ifnet_calc_flowhash(struct ifnet *ifp)
7738 {
7739 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7740 uint32_t flowhash = 0;
7741
7742 if (ifnet_flowhash_seed == 0)
7743 ifnet_flowhash_seed = RandomULong();
7744
7745 bzero(&fh, sizeof (fh));
7746
7747 (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name);
7748 fh.ifk_unit = ifp->if_unit;
7749 fh.ifk_flags = ifp->if_flags;
7750 fh.ifk_eflags = ifp->if_eflags;
7751 fh.ifk_capabilities = ifp->if_capabilities;
7752 fh.ifk_capenable = ifp->if_capenable;
7753 fh.ifk_output_sched_model = ifp->if_output_sched_model;
7754 fh.ifk_rand1 = RandomULong();
7755 fh.ifk_rand2 = RandomULong();
7756
7757 try_again:
7758 flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed);
7759 if (flowhash == 0) {
7760 /* try to get a non-zero flowhash */
7761 ifnet_flowhash_seed = RandomULong();
7762 goto try_again;
7763 }
7764
7765 return (flowhash);
7766 }
7767
7768 int
7769 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
7770 uint16_t flags, uint8_t *data)
7771 {
7772 #pragma unused(flags)
7773 int error = 0;
7774
7775 switch (family) {
7776 case AF_INET:
7777 if_inetdata_lock_exclusive(ifp);
7778 if (IN_IFEXTRA(ifp) != NULL) {
7779 if (len == 0) {
7780 /* Allow clearing the signature */
7781 IN_IFEXTRA(ifp)->netsig_len = 0;
7782 bzero(IN_IFEXTRA(ifp)->netsig,
7783 sizeof (IN_IFEXTRA(ifp)->netsig));
7784 if_inetdata_lock_done(ifp);
7785 break;
7786 } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) {
7787 error = EINVAL;
7788 if_inetdata_lock_done(ifp);
7789 break;
7790 }
7791 IN_IFEXTRA(ifp)->netsig_len = len;
7792 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
7793 } else {
7794 error = ENOMEM;
7795 }
7796 if_inetdata_lock_done(ifp);
7797 break;
7798
7799 case AF_INET6:
7800 if_inet6data_lock_exclusive(ifp);
7801 if (IN6_IFEXTRA(ifp) != NULL) {
7802 if (len == 0) {
7803 /* Allow clearing the signature */
7804 IN6_IFEXTRA(ifp)->netsig_len = 0;
7805 bzero(IN6_IFEXTRA(ifp)->netsig,
7806 sizeof (IN6_IFEXTRA(ifp)->netsig));
7807 if_inet6data_lock_done(ifp);
7808 break;
7809 } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) {
7810 error = EINVAL;
7811 if_inet6data_lock_done(ifp);
7812 break;
7813 }
7814 IN6_IFEXTRA(ifp)->netsig_len = len;
7815 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
7816 } else {
7817 error = ENOMEM;
7818 }
7819 if_inet6data_lock_done(ifp);
7820 break;
7821
7822 default:
7823 error = EINVAL;
7824 break;
7825 }
7826
7827 return (error);
7828 }
7829
7830 int
7831 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
7832 uint16_t *flags, uint8_t *data)
7833 {
7834 int error = 0;
7835
7836 if (ifp == NULL || len == NULL || flags == NULL || data == NULL)
7837 return (EINVAL);
7838
7839 switch (family) {
7840 case AF_INET:
7841 if_inetdata_lock_shared(ifp);
7842 if (IN_IFEXTRA(ifp) != NULL) {
7843 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
7844 error = EINVAL;
7845 if_inetdata_lock_done(ifp);
7846 break;
7847 }
7848 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0)
7849 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
7850 else
7851 error = ENOENT;
7852 } else {
7853 error = ENOMEM;
7854 }
7855 if_inetdata_lock_done(ifp);
7856 break;
7857
7858 case AF_INET6:
7859 if_inet6data_lock_shared(ifp);
7860 if (IN6_IFEXTRA(ifp) != NULL) {
7861 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
7862 error = EINVAL;
7863 if_inet6data_lock_done(ifp);
7864 break;
7865 }
7866 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0)
7867 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
7868 else
7869 error = ENOENT;
7870 } else {
7871 error = ENOMEM;
7872 }
7873 if_inet6data_lock_done(ifp);
7874 break;
7875
7876 default:
7877 error = EINVAL;
7878 break;
7879 }
7880
7881 if (error == 0)
7882 *flags = 0;
7883
7884 return (error);
7885 }
7886
7887 static void
7888 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
7889 protocol_family_t pf)
7890 {
7891 #pragma unused(ifp)
7892 uint32_t did_sw;
7893
7894 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
7895 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6)))
7896 return;
7897
7898 switch (pf) {
7899 case PF_INET:
7900 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
7901 if (did_sw & CSUM_DELAY_IP)
7902 hwcksum_dbg_finalized_hdr++;
7903 if (did_sw & CSUM_DELAY_DATA)
7904 hwcksum_dbg_finalized_data++;
7905 break;
7906 #if INET6
7907 case PF_INET6:
7908 /*
7909 * Checksum offload should not have been enabled when
7910 * extension headers exist; that also means that we
7911 * cannot force-finalize packets with extension headers.
7912 * Indicate to the callee should it skip such case by
7913 * setting optlen to -1.
7914 */
7915 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
7916 m->m_pkthdr.csum_flags);
7917 if (did_sw & CSUM_DELAY_IPV6_DATA)
7918 hwcksum_dbg_finalized_data++;
7919 break;
7920 #endif /* INET6 */
7921 default:
7922 return;
7923 }
7924 }
7925
7926 static void
7927 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
7928 protocol_family_t pf)
7929 {
7930 uint16_t sum;
7931 uint32_t hlen;
7932
7933 if (frame_header == NULL ||
7934 frame_header < (char *)mbuf_datastart(m) ||
7935 frame_header > (char *)m->m_data) {
7936 printf("%s: frame header pointer 0x%llx out of range "
7937 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
7938 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
7939 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
7940 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
7941 (uint64_t)VM_KERNEL_ADDRPERM(m));
7942 return;
7943 }
7944 hlen = (m->m_data - frame_header);
7945
7946 switch (pf) {
7947 case PF_INET:
7948 #if INET6
7949 case PF_INET6:
7950 #endif /* INET6 */
7951 break;
7952 default:
7953 return;
7954 }
7955
7956 /*
7957 * Force partial checksum offload; useful to simulate cases
7958 * where the hardware does not support partial checksum offload,
7959 * in order to validate correctness throughout the layers above.
7960 */
7961 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
7962 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
7963
7964 if (foff > (uint32_t)m->m_pkthdr.len)
7965 return;
7966
7967 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
7968
7969 /* Compute 16-bit 1's complement sum from forced offset */
7970 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
7971
7972 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
7973 m->m_pkthdr.csum_rx_val = sum;
7974 m->m_pkthdr.csum_rx_start = (foff + hlen);
7975
7976 hwcksum_dbg_partial_forced++;
7977 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
7978 }
7979
7980 /*
7981 * Partial checksum offload verification (and adjustment);
7982 * useful to validate and test cases where the hardware
7983 * supports partial checksum offload.
7984 */
7985 if ((m->m_pkthdr.csum_flags &
7986 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
7987 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
7988 uint32_t rxoff;
7989
7990 /* Start offset must begin after frame header */
7991 rxoff = m->m_pkthdr.csum_rx_start;
7992 if (hlen > rxoff) {
7993 hwcksum_dbg_bad_rxoff++;
7994 if (dlil_verbose) {
7995 printf("%s: partial cksum start offset %d "
7996 "is less than frame header length %d for "
7997 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
7998 (uint64_t)VM_KERNEL_ADDRPERM(m));
7999 }
8000 return;
8001 }
8002 rxoff -= hlen;
8003
8004 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
8005 /*
8006 * Compute the expected 16-bit 1's complement sum;
8007 * skip this if we've already computed it above
8008 * when partial checksum offload is forced.
8009 */
8010 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
8011
8012 /* Hardware or driver is buggy */
8013 if (sum != m->m_pkthdr.csum_rx_val) {
8014 hwcksum_dbg_bad_cksum++;
8015 if (dlil_verbose) {
8016 printf("%s: bad partial cksum value "
8017 "0x%x (expected 0x%x) for mbuf "
8018 "0x%llx [rx_start %d]\n",
8019 if_name(ifp),
8020 m->m_pkthdr.csum_rx_val, sum,
8021 (uint64_t)VM_KERNEL_ADDRPERM(m),
8022 m->m_pkthdr.csum_rx_start);
8023 }
8024 return;
8025 }
8026 }
8027 hwcksum_dbg_verified++;
8028
8029 /*
8030 * This code allows us to emulate various hardwares that
8031 * perform 16-bit 1's complement sum beginning at various
8032 * start offset values.
8033 */
8034 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
8035 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
8036
8037 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len)
8038 return;
8039
8040 sum = m_adj_sum16(m, rxoff, aoff, sum);
8041
8042 m->m_pkthdr.csum_rx_val = sum;
8043 m->m_pkthdr.csum_rx_start = (aoff + hlen);
8044
8045 hwcksum_dbg_adjusted++;
8046 }
8047 }
8048 }
8049
8050 static int
8051 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8052 {
8053 #pragma unused(arg1, arg2)
8054 u_int32_t i;
8055 int err;
8056
8057 i = hwcksum_dbg_mode;
8058
8059 err = sysctl_handle_int(oidp, &i, 0, req);
8060 if (err != 0 || req->newptr == USER_ADDR_NULL)
8061 return (err);
8062
8063 if (hwcksum_dbg == 0)
8064 return (ENODEV);
8065
8066 if ((i & ~HWCKSUM_DBG_MASK) != 0)
8067 return (EINVAL);
8068
8069 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
8070
8071 return (err);
8072 }
8073
8074 static int
8075 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8076 {
8077 #pragma unused(arg1, arg2)
8078 u_int32_t i;
8079 int err;
8080
8081 i = hwcksum_dbg_partial_rxoff_forced;
8082
8083 err = sysctl_handle_int(oidp, &i, 0, req);
8084 if (err != 0 || req->newptr == USER_ADDR_NULL)
8085 return (err);
8086
8087 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED))
8088 return (ENODEV);
8089
8090 hwcksum_dbg_partial_rxoff_forced = i;
8091
8092 return (err);
8093 }
8094
8095 static int
8096 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8097 {
8098 #pragma unused(arg1, arg2)
8099 u_int32_t i;
8100 int err;
8101
8102 i = hwcksum_dbg_partial_rxoff_adj;
8103
8104 err = sysctl_handle_int(oidp, &i, 0, req);
8105 if (err != 0 || req->newptr == USER_ADDR_NULL)
8106 return (err);
8107
8108 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ))
8109 return (ENODEV);
8110
8111 hwcksum_dbg_partial_rxoff_adj = i;
8112
8113 return (err);
8114 }
8115
8116 static int
8117 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8118 {
8119 #pragma unused(oidp, arg1, arg2)
8120 int err;
8121
8122 if (req->oldptr == USER_ADDR_NULL) {
8123
8124 }
8125 if (req->newptr != USER_ADDR_NULL) {
8126 return (EPERM);
8127 }
8128 err = SYSCTL_OUT(req, &tx_chain_len_stats,
8129 sizeof(struct chain_len_stats));
8130
8131 return (err);
8132 }
8133
8134
8135 #if DEBUG
8136 /* Blob for sum16 verification */
8137 static uint8_t sumdata[] = {
8138 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8139 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8140 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8141 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8142 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8143 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8144 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8145 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8146 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8147 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8148 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8149 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8150 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8151 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8152 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8153 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8154 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8155 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8156 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8157 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8158 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8159 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8160 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8161 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8162 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8163 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8164 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8165 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8166 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8167 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8168 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8169 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8170 0xc8, 0x28, 0x02, 0x00, 0x00
8171 };
8172
8173 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
8174 static struct {
8175 int len;
8176 uint16_t sum;
8177 } sumtbl[] = {
8178 { 11, 0xcb6d },
8179 { 20, 0x20dd },
8180 { 27, 0xbabd },
8181 { 32, 0xf3e8 },
8182 { 37, 0x197d },
8183 { 43, 0x9eae },
8184 { 64, 0x4678 },
8185 { 127, 0x9399 },
8186 { 256, 0xd147 },
8187 { 325, 0x0358 }
8188 };
8189 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8190
8191 static void
8192 dlil_verify_sum16(void)
8193 {
8194 struct mbuf *m;
8195 uint8_t *buf;
8196 int n;
8197
8198 /* Make sure test data plus extra room for alignment fits in cluster */
8199 _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES);
8200
8201 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
8202 MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */
8203 buf = mtod(m, uint8_t *); /* base address */
8204
8205 for (n = 0; n < SUMTBL_MAX; n++) {
8206 uint16_t len = sumtbl[n].len;
8207 int i;
8208
8209 /* Verify for all possible alignments */
8210 for (i = 0; i < (int)sizeof (uint64_t); i++) {
8211 uint16_t sum;
8212 uint8_t *c;
8213
8214 /* Copy over test data to mbuf */
8215 VERIFY(len <= sizeof (sumdata));
8216 c = buf + i;
8217 bcopy(sumdata, c, len);
8218
8219 /* Zero-offset test (align by data pointer) */
8220 m->m_data = (caddr_t)c;
8221 m->m_len = len;
8222 sum = m_sum16(m, 0, len);
8223
8224 /* Something is horribly broken; stop now */
8225 if (sum != sumtbl[n].sum) {
8226 panic("%s: broken m_sum16 for len=%d align=%d "
8227 "sum=0x%04x [expected=0x%04x]\n", __func__,
8228 len, i, sum, sumtbl[n].sum);
8229 /* NOTREACHED */
8230 }
8231
8232 /* Alignment test by offset (fixed data pointer) */
8233 m->m_data = (caddr_t)buf;
8234 m->m_len = i + len;
8235 sum = m_sum16(m, i, len);
8236
8237 /* Something is horribly broken; stop now */
8238 if (sum != sumtbl[n].sum) {
8239 panic("%s: broken m_sum16 for len=%d offset=%d "
8240 "sum=0x%04x [expected=0x%04x]\n", __func__,
8241 len, i, sum, sumtbl[n].sum);
8242 /* NOTREACHED */
8243 }
8244 #if INET
8245 /* Simple sum16 contiguous buffer test by aligment */
8246 sum = b_sum16(c, len);
8247
8248 /* Something is horribly broken; stop now */
8249 if (sum != sumtbl[n].sum) {
8250 panic("%s: broken b_sum16 for len=%d align=%d "
8251 "sum=0x%04x [expected=0x%04x]\n", __func__,
8252 len, i, sum, sumtbl[n].sum);
8253 /* NOTREACHED */
8254 }
8255 #endif /* INET */
8256 }
8257 }
8258 m_freem(m);
8259
8260 printf("DLIL: SUM16 self-tests PASSED\n");
8261 }
8262 #endif /* DEBUG */
8263
8264 #define CASE_STRINGIFY(x) case x: return #x
8265
8266 __private_extern__ const char *
8267 dlil_kev_dl_code_str(u_int32_t event_code)
8268 {
8269 switch (event_code) {
8270 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
8271 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
8272 CASE_STRINGIFY(KEV_DL_SIFMTU);
8273 CASE_STRINGIFY(KEV_DL_SIFPHYS);
8274 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
8275 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
8276 CASE_STRINGIFY(KEV_DL_ADDMULTI);
8277 CASE_STRINGIFY(KEV_DL_DELMULTI);
8278 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
8279 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
8280 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
8281 CASE_STRINGIFY(KEV_DL_LINK_OFF);
8282 CASE_STRINGIFY(KEV_DL_LINK_ON);
8283 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
8284 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
8285 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
8286 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
8287 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
8288 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
8289 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
8290 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
8291 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
8292 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
8293 CASE_STRINGIFY(KEV_DL_ISSUES);
8294 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
8295 default:
8296 break;
8297 }
8298 return ("");
8299 }
8300
8301 /*
8302 * Mirror the arguments of ifnet_get_local_ports_extended()
8303 * ifindex
8304 * protocol
8305 * flags
8306 */
8307 static int
8308 sysctl_get_ports_used SYSCTL_HANDLER_ARGS
8309 {
8310 #pragma unused(oidp)
8311 int *name = (int *)arg1;
8312 int namelen = arg2;
8313 int error = 0;
8314 int idx;
8315 protocol_family_t protocol;
8316 u_int32_t flags;
8317 ifnet_t ifp = NULL;
8318 u_int8_t *bitfield = NULL;
8319
8320 if (req->newptr != USER_ADDR_NULL) {
8321 error = EPERM;
8322 goto done;
8323 }
8324 if (namelen != 3) {
8325 error = ENOENT;
8326 goto done;
8327 }
8328
8329 if (req->oldptr == USER_ADDR_NULL) {
8330 req->oldidx = bitstr_size(65536);
8331 goto done;
8332 }
8333 if (req->oldlen < bitstr_size(65536)) {
8334 error = ENOMEM;
8335 goto done;
8336 }
8337
8338 idx = name[0];
8339 protocol = name[1];
8340 flags = name[2];
8341
8342 ifnet_head_lock_shared();
8343 if (idx > if_index) {
8344 ifnet_head_done();
8345 error = ENOENT;
8346 goto done;
8347 }
8348 ifp = ifindex2ifnet[idx];
8349 ifnet_head_done();
8350
8351 bitfield = _MALLOC(bitstr_size(65536), M_TEMP, M_WAITOK);
8352 if (bitfield == NULL) {
8353 error = ENOMEM;
8354 goto done;
8355 }
8356 error = ifnet_get_local_ports_extended(ifp, protocol, flags, bitfield);
8357 if (error != 0) {
8358 printf("%s: ifnet_get_local_ports_extended() error %d\n",
8359 __func__, error);
8360 goto done;
8361 }
8362 error = SYSCTL_OUT(req, bitfield, bitstr_size(65536));
8363 done:
8364 if (bitfield != NULL)
8365 _FREE(bitfield, M_TEMP);
8366 return (error);
8367 }
8368
8369 #if (DEVELOPMENT || DEBUG)
8370 /*
8371 * The sysctl variable name contains the input parameters of
8372 * ifnet_get_keepalive_offload_frames()
8373 * ifp (interface index): name[0]
8374 * frames_array_count: name[1]
8375 * frame_data_offset: name[2]
8376 * The return length gives used_frames_count
8377 */
8378 static int
8379 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8380 {
8381 #pragma unused(oidp)
8382 int *name = (int *)arg1;
8383 u_int namelen = arg2;
8384 int idx;
8385 ifnet_t ifp = NULL;
8386 u_int32_t frames_array_count;
8387 size_t frame_data_offset;
8388 u_int32_t used_frames_count;
8389 struct ifnet_keepalive_offload_frame *frames_array = NULL;
8390 int error = 0;
8391 u_int32_t i;
8392
8393 /*
8394 * Only root can get look at other people TCP frames
8395 */
8396 error = proc_suser(current_proc());
8397 if (error != 0)
8398 goto done;
8399 /*
8400 * Validate the input parameters
8401 */
8402 if (req->newptr != USER_ADDR_NULL) {
8403 error = EPERM;
8404 goto done;
8405 }
8406 if (namelen != 3) {
8407 error = EINVAL;
8408 goto done;
8409 }
8410 if (req->oldptr == USER_ADDR_NULL) {
8411 error = EINVAL;
8412 goto done;
8413 }
8414 if (req->oldlen == 0) {
8415 error = EINVAL;
8416 goto done;
8417 }
8418 idx = name[0];
8419 frames_array_count = name[1];
8420 frame_data_offset = name[2];
8421
8422 /* Make sure the passed buffer is large enough */
8423 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
8424 req->oldlen) {
8425 error = ENOMEM;
8426 goto done;
8427 }
8428
8429 ifnet_head_lock_shared();
8430 if (idx > if_index) {
8431 ifnet_head_done();
8432 error = ENOENT;
8433 goto done;
8434 }
8435 ifp = ifindex2ifnet[idx];
8436 ifnet_head_done();
8437
8438 frames_array = _MALLOC(frames_array_count *
8439 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
8440 if (frames_array == NULL) {
8441 error = ENOMEM;
8442 goto done;
8443 }
8444
8445 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
8446 frames_array_count, frame_data_offset, &used_frames_count);
8447 if (error != 0) {
8448 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8449 __func__, error);
8450 goto done;
8451 }
8452
8453 for (i = 0; i < used_frames_count; i++) {
8454 error = SYSCTL_OUT(req, frames_array + i,
8455 sizeof(struct ifnet_keepalive_offload_frame));
8456 if (error != 0) {
8457 goto done;
8458 }
8459 }
8460 done:
8461 if (frames_array != NULL)
8462 _FREE(frames_array, M_TEMP);
8463 return (error);
8464 }
8465 #endif /* DEVELOPMENT || DEBUG */