]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/dlil.c
xnu-3789.41.3.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
1 /*
2 * Copyright (c) 1999-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
43 #include <sys/user.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
47 #include <net/if.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
50 #include <net/dlil.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
59 #include <sys/priv.h>
60
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
67
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_llreach.h>
71 #include <net/kpi_interfacefilter.h>
72 #include <net/classq/classq.h>
73 #include <net/classq/classq_sfb.h>
74 #include <net/flowhash.h>
75 #include <net/ntstat.h>
76
77 #if INET
78 #include <netinet/in_var.h>
79 #include <netinet/igmp_var.h>
80 #include <netinet/ip_var.h>
81 #include <netinet/tcp.h>
82 #include <netinet/tcp_var.h>
83 #include <netinet/udp.h>
84 #include <netinet/udp_var.h>
85 #include <netinet/if_ether.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/in_tclass.h>
88 #endif /* INET */
89
90 #if INET6
91 #include <netinet6/in6_var.h>
92 #include <netinet6/nd6.h>
93 #include <netinet6/mld6_var.h>
94 #include <netinet6/scope6_var.h>
95 #endif /* INET6 */
96
97 #include <libkern/OSAtomic.h>
98 #include <libkern/tree.h>
99
100 #include <dev/random/randomdev.h>
101 #include <machine/machine_routines.h>
102
103 #include <mach/thread_act.h>
104 #include <mach/sdt.h>
105
106 #if CONFIG_MACF
107 #include <sys/kauth.h>
108 #include <security/mac_framework.h>
109 #include <net/ethernet.h>
110 #include <net/firewire.h>
111 #endif
112
113 #if PF
114 #include <net/pfvar.h>
115 #endif /* PF */
116 #if PF_ALTQ
117 #include <net/altq/altq.h>
118 #endif /* PF_ALTQ */
119 #include <net/pktsched/pktsched.h>
120
121 #if NECP
122 #include <net/necp.h>
123 #endif /* NECP */
124
125 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
126 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
127 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
128 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
129 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
130
131 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
132 #define MAX_LINKADDR 4 /* LONGWORDS */
133 #define M_NKE M_IFADDR
134
135 #if 1
136 #define DLIL_PRINTF printf
137 #else
138 #define DLIL_PRINTF kprintf
139 #endif
140
141 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
142 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
143
144 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
145 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
146
147 enum {
148 kProtoKPI_v1 = 1,
149 kProtoKPI_v2 = 2
150 };
151
152 /*
153 * List of if_proto structures in if_proto_hash[] is protected by
154 * the ifnet lock. The rest of the fields are initialized at protocol
155 * attach time and never change, thus no lock required as long as
156 * a reference to it is valid, via if_proto_ref().
157 */
158 struct if_proto {
159 SLIST_ENTRY(if_proto) next_hash;
160 u_int32_t refcount;
161 u_int32_t detached;
162 struct ifnet *ifp;
163 protocol_family_t protocol_family;
164 int proto_kpi;
165 union {
166 struct {
167 proto_media_input input;
168 proto_media_preout pre_output;
169 proto_media_event event;
170 proto_media_ioctl ioctl;
171 proto_media_detached detached;
172 proto_media_resolve_multi resolve_multi;
173 proto_media_send_arp send_arp;
174 } v1;
175 struct {
176 proto_media_input_v2 input;
177 proto_media_preout pre_output;
178 proto_media_event event;
179 proto_media_ioctl ioctl;
180 proto_media_detached detached;
181 proto_media_resolve_multi resolve_multi;
182 proto_media_send_arp send_arp;
183 } v2;
184 } kpi;
185 };
186
187 SLIST_HEAD(proto_hash_entry, if_proto);
188
189 #define DLIL_SDLMAXLEN 64
190 #define DLIL_SDLDATALEN \
191 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
192
193 struct dlil_ifnet {
194 struct ifnet dl_if; /* public ifnet */
195 /*
196 * DLIL private fields, protected by dl_if_lock
197 */
198 decl_lck_mtx_data(, dl_if_lock);
199 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
200 u_int32_t dl_if_flags; /* flags (below) */
201 u_int32_t dl_if_refcnt; /* refcnt */
202 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
203 void *dl_if_uniqueid; /* unique interface id */
204 size_t dl_if_uniqueid_len; /* length of the unique id */
205 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
206 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
207 struct {
208 struct ifaddr ifa; /* lladdr ifa */
209 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
210 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
211 } dl_if_lladdr;
212 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
213 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
214 ctrace_t dl_if_attach; /* attach PC stacktrace */
215 ctrace_t dl_if_detach; /* detach PC stacktrace */
216 };
217
218 /* Values for dl_if_flags (private to DLIL) */
219 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
220 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
221 #define DLIF_DEBUG 0x4 /* has debugging info */
222
223 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
224
225 /* For gdb */
226 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
227
228 struct dlil_ifnet_dbg {
229 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
230 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
231 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
232 /*
233 * Circular lists of ifnet_{reference,release} callers.
234 */
235 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
236 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
237 };
238
239 #define DLIL_TO_IFP(s) (&s->dl_if)
240 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
241
242 struct ifnet_filter {
243 TAILQ_ENTRY(ifnet_filter) filt_next;
244 u_int32_t filt_skip;
245 u_int32_t filt_flags;
246 ifnet_t filt_ifp;
247 const char *filt_name;
248 void *filt_cookie;
249 protocol_family_t filt_protocol;
250 iff_input_func filt_input;
251 iff_output_func filt_output;
252 iff_event_func filt_event;
253 iff_ioctl_func filt_ioctl;
254 iff_detached_func filt_detached;
255 };
256
257 struct proto_input_entry;
258
259 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
260 static lck_grp_t *dlil_lock_group;
261 lck_grp_t *ifnet_lock_group;
262 static lck_grp_t *ifnet_head_lock_group;
263 static lck_grp_t *ifnet_snd_lock_group;
264 static lck_grp_t *ifnet_rcv_lock_group;
265 lck_attr_t *ifnet_lock_attr;
266 decl_lck_rw_data(static, ifnet_head_lock);
267 decl_lck_mtx_data(static, dlil_ifnet_lock);
268 u_int32_t dlil_filter_disable_tso_count = 0;
269
270 #if DEBUG
271 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
272 #else
273 static unsigned int ifnet_debug; /* debugging (disabled) */
274 #endif /* !DEBUG */
275 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
276 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
277 static struct zone *dlif_zone; /* zone for dlil_ifnet */
278
279 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
280 #define DLIF_ZONE_NAME "ifnet" /* zone name */
281
282 static unsigned int dlif_filt_size; /* size of ifnet_filter */
283 static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
284
285 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
286 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
287
288 static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
289 static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
290
291 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
292 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
293
294 static unsigned int dlif_proto_size; /* size of if_proto */
295 static struct zone *dlif_proto_zone; /* zone for if_proto */
296
297 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
298 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
299
300 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
301 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
302 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
303
304 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
305 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
306
307 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
308 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
309 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
310
311 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
312 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
313
314 static u_int32_t net_rtref;
315
316 static struct dlil_main_threading_info dlil_main_input_thread_info;
317 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
318 (struct dlil_threading_info *)&dlil_main_input_thread_info;
319
320 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
321 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
322 static void dlil_if_trace(struct dlil_ifnet *, int);
323 static void if_proto_ref(struct if_proto *);
324 static void if_proto_free(struct if_proto *);
325 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
326 static int dlil_ifp_proto_count(struct ifnet *);
327 static void if_flt_monitor_busy(struct ifnet *);
328 static void if_flt_monitor_unbusy(struct ifnet *);
329 static void if_flt_monitor_enter(struct ifnet *);
330 static void if_flt_monitor_leave(struct ifnet *);
331 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
332 char **, protocol_family_t);
333 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
334 protocol_family_t);
335 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
336 const struct sockaddr_dl *);
337 static int ifnet_lookup(struct ifnet *);
338 static void if_purgeaddrs(struct ifnet *);
339
340 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
341 struct mbuf *, char *);
342 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
343 struct mbuf *);
344 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
345 mbuf_t *, const struct sockaddr *, void *, char *, char *);
346 static void ifproto_media_event(struct ifnet *, protocol_family_t,
347 const struct kev_msg *);
348 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
349 unsigned long, void *);
350 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
351 struct sockaddr_dl *, size_t);
352 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
353 const struct sockaddr_dl *, const struct sockaddr *,
354 const struct sockaddr_dl *, const struct sockaddr *);
355
356 static errno_t ifp_if_output(struct ifnet *, struct mbuf *);
357 static void ifp_if_start(struct ifnet *);
358 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
359 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
360 boolean_t poll, struct thread *tp);
361 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
362 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
363 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
364 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
365 protocol_family_t *);
366 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
367 const struct ifnet_demux_desc *, u_int32_t);
368 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
369 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
370 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
371 const struct sockaddr *, const char *, const char *);
372 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
373 const struct sockaddr *, const char *, const char *,
374 u_int32_t *, u_int32_t *);
375 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
376 static void ifp_if_free(struct ifnet *);
377 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
378 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
379 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
380
381 static void dlil_main_input_thread_func(void *, wait_result_t);
382 static void dlil_input_thread_func(void *, wait_result_t);
383 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
384 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
385 static void dlil_terminate_input_thread(struct dlil_threading_info *);
386 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
387 struct dlil_threading_info *, boolean_t);
388 static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
389 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
390 u_int32_t, ifnet_model_t, boolean_t);
391 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
392 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
393
394 #if DEBUG
395 static void dlil_verify_sum16(void);
396 #endif /* DEBUG */
397 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
398 protocol_family_t);
399 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
400 protocol_family_t);
401
402 static void ifnet_detacher_thread_func(void *, wait_result_t);
403 static int ifnet_detacher_thread_cont(int);
404 static void ifnet_detach_final(struct ifnet *);
405 static void ifnet_detaching_enqueue(struct ifnet *);
406 static struct ifnet *ifnet_detaching_dequeue(void);
407
408 static void ifnet_start_thread_fn(void *, wait_result_t);
409 static void ifnet_poll_thread_fn(void *, wait_result_t);
410 static void ifnet_poll(struct ifnet *);
411
412 static void ifp_src_route_copyout(struct ifnet *, struct route *);
413 static void ifp_src_route_copyin(struct ifnet *, struct route *);
414 #if INET6
415 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
416 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
417 #endif /* INET6 */
418
419 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
420 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
421 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
422 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
423 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
424 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
425 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
426 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
427 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
428 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
429 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
430 static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS;
431
432 struct chain_len_stats tx_chain_len_stats;
433 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
434
435 /* The following are protected by dlil_ifnet_lock */
436 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
437 static u_int32_t ifnet_detaching_cnt;
438 static void *ifnet_delayed_run; /* wait channel for detaching thread */
439
440 decl_lck_mtx_data(static, ifnet_fc_lock);
441
442 static uint32_t ifnet_flowhash_seed;
443
444 struct ifnet_flowhash_key {
445 char ifk_name[IFNAMSIZ];
446 uint32_t ifk_unit;
447 uint32_t ifk_flags;
448 uint32_t ifk_eflags;
449 uint32_t ifk_capabilities;
450 uint32_t ifk_capenable;
451 uint32_t ifk_output_sched_model;
452 uint32_t ifk_rand1;
453 uint32_t ifk_rand2;
454 };
455
456 /* Flow control entry per interface */
457 struct ifnet_fc_entry {
458 RB_ENTRY(ifnet_fc_entry) ifce_entry;
459 u_int32_t ifce_flowhash;
460 struct ifnet *ifce_ifp;
461 };
462
463 static uint32_t ifnet_calc_flowhash(struct ifnet *);
464 static int ifce_cmp(const struct ifnet_fc_entry *,
465 const struct ifnet_fc_entry *);
466 static int ifnet_fc_add(struct ifnet *);
467 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
468 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
469
470 /* protected by ifnet_fc_lock */
471 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
472 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
473 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
474
475 static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
476 static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
477
478 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
479 #define IFNET_FC_ZONE_MAX 32
480
481 extern void bpfdetach(struct ifnet *);
482 extern void proto_input_run(void);
483
484 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
485 u_int32_t flags);
486 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
487 u_int32_t flags);
488
489 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
490
491 #if CONFIG_MACF
492 int dlil_lladdr_ckreq = 0;
493 #endif
494
495 #if DEBUG
496 int dlil_verbose = 1;
497 #else
498 int dlil_verbose = 0;
499 #endif /* DEBUG */
500 #if IFNET_INPUT_SANITY_CHK
501 /* sanity checking of input packet lists received */
502 static u_int32_t dlil_input_sanity_check = 0;
503 #endif /* IFNET_INPUT_SANITY_CHK */
504 /* rate limit debug messages */
505 struct timespec dlil_dbgrate = { 1, 0 };
506
507 SYSCTL_DECL(_net_link_generic_system);
508
509 #if CONFIG_MACF
510 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_lladdr_ckreq,
511 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_lladdr_ckreq, 0,
512 "Require MACF system info check to expose link-layer address");
513 #endif
514
515 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
516 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
517
518 #define IF_SNDQ_MINLEN 32
519 u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
520 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
521 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
522 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
523
524 #define IF_RCVQ_MINLEN 32
525 #define IF_RCVQ_MAXLEN 256
526 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
527 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
528 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
529 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
530
531 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
532 static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
533 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
534 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
535 "ilog2 of EWMA decay rate of avg inbound packets");
536
537 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
538 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
539 static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
540 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
541 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
542 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
543 "Q", "input poll mode freeze time");
544
545 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
546 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
547 static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
548 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
549 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
550 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
551 "Q", "input poll sampling time");
552
553 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
554 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
555 static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
556 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
557 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
558 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
559 "Q", "input poll interval (time)");
560
561 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
562 static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
563 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
564 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
565 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
566
567 #define IF_RXPOLL_WLOWAT 10
568 static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
569 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
570 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
571 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
572 "I", "input poll wakeup low watermark");
573
574 #define IF_RXPOLL_WHIWAT 100
575 static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
576 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
577 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
578 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
579 "I", "input poll wakeup high watermark");
580
581 static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
582 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
583 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
584 "max packets per poll call");
585
586 static u_int32_t if_rxpoll = 1;
587 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
588 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
589 sysctl_rxpoll, "I", "enable opportunistic input polling");
590
591 u_int32_t if_bw_smoothing_val = 3;
592 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, if_bw_smoothing_val,
593 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_smoothing_val, 0, "");
594
595 u_int32_t if_bw_measure_size = 10;
596 SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_bw_measure_size,
597 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_measure_size, 0, "");
598
599 static u_int32_t cur_dlil_input_threads = 0;
600 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
601 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
602 "Current number of DLIL input threads");
603
604 #if IFNET_INPUT_SANITY_CHK
605 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
606 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
607 "Turn on sanity checking in DLIL input");
608 #endif /* IFNET_INPUT_SANITY_CHK */
609
610 static u_int32_t if_flowadv = 1;
611 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
612 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
613 "enable flow-advisory mechanism");
614
615 static u_int32_t if_delaybased_queue = 1;
616 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
617 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
618 "enable delay based dynamic queue sizing");
619
620 static uint64_t hwcksum_in_invalidated = 0;
621 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
622 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
623 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
624
625 uint32_t hwcksum_dbg = 0;
626 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
627 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
628 "enable hardware cksum debugging");
629
630 u_int32_t ifnet_start_delayed = 0;
631 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
632 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
633 "number of times start was delayed");
634
635 u_int32_t ifnet_delay_start_disabled = 0;
636 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
637 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
638 "number of times start was delayed");
639
640 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
641 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
642 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
643 #define HWCKSUM_DBG_MASK \
644 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
645 HWCKSUM_DBG_FINALIZE_FORCED)
646
647 static uint32_t hwcksum_dbg_mode = 0;
648 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
649 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
650 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
651
652 static uint64_t hwcksum_dbg_partial_forced = 0;
653 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
654 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
655 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
656
657 static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
658 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
659 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
660 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
661
662 static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
663 SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
664 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
665 &hwcksum_dbg_partial_rxoff_forced, 0,
666 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
667 "forced partial cksum rx offset");
668
669 static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
670 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
671 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
672 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
673 "adjusted partial cksum rx offset");
674
675 static uint64_t hwcksum_dbg_verified = 0;
676 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
677 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
678 &hwcksum_dbg_verified, "packets verified for having good checksum");
679
680 static uint64_t hwcksum_dbg_bad_cksum = 0;
681 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
682 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
683 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
684
685 static uint64_t hwcksum_dbg_bad_rxoff = 0;
686 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
687 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
688 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
689
690 static uint64_t hwcksum_dbg_adjusted = 0;
691 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
692 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
693 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
694
695 static uint64_t hwcksum_dbg_finalized_hdr = 0;
696 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
697 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
698 &hwcksum_dbg_finalized_hdr, "finalized headers");
699
700 static uint64_t hwcksum_dbg_finalized_data = 0;
701 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
702 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
703 &hwcksum_dbg_finalized_data, "finalized payloads");
704
705 uint32_t hwcksum_tx = 1;
706 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
707 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
708 "enable transmit hardware checksum offload");
709
710 uint32_t hwcksum_rx = 1;
711 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
712 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
713 "enable receive hardware checksum offload");
714
715 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
716 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
717 sysctl_tx_chain_len_stats, "S", "");
718
719 uint32_t tx_chain_len_count = 0;
720 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
721 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
722
723 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_ports_used,
724 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_ports_used, "");
725
726 #if (DEVELOPMENT || DEBUG)
727 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
728 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
729 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
730 #endif /* DEVELOPMENT || DEBUG */
731
732 unsigned int net_rxpoll = 1;
733 unsigned int net_affinity = 1;
734 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
735
736 extern u_int32_t inject_buckets;
737
738 static lck_grp_attr_t *dlil_grp_attributes = NULL;
739 static lck_attr_t *dlil_lck_attributes = NULL;
740
741
742 #define DLIL_INPUT_CHECK(m, ifp) { \
743 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
744 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
745 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
746 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
747 /* NOTREACHED */ \
748 } \
749 }
750
751 #define DLIL_EWMA(old, new, decay) do { \
752 u_int32_t _avg; \
753 if ((_avg = (old)) > 0) \
754 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
755 else \
756 _avg = (new); \
757 (old) = _avg; \
758 } while (0)
759
760 #define MBPS (1ULL * 1000 * 1000)
761 #define GBPS (MBPS * 1000)
762
763 struct rxpoll_time_tbl {
764 u_int64_t speed; /* downlink speed */
765 u_int32_t plowat; /* packets low watermark */
766 u_int32_t phiwat; /* packets high watermark */
767 u_int32_t blowat; /* bytes low watermark */
768 u_int32_t bhiwat; /* bytes high watermark */
769 };
770
771 static struct rxpoll_time_tbl rxpoll_tbl[] = {
772 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
773 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
774 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
775 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
776 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
777 { 0, 0, 0, 0, 0 }
778 };
779
780 int
781 proto_hash_value(u_int32_t protocol_family)
782 {
783 /*
784 * dlil_proto_unplumb_all() depends on the mapping between
785 * the hash bucket index and the protocol family defined
786 * here; future changes must be applied there as well.
787 */
788 switch (protocol_family) {
789 case PF_INET:
790 return (0);
791 case PF_INET6:
792 return (1);
793 case PF_VLAN:
794 return (2);
795 case PF_UNSPEC:
796 default:
797 return (3);
798 }
799 }
800
801 /*
802 * Caller must already be holding ifnet lock.
803 */
804 static struct if_proto *
805 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
806 {
807 struct if_proto *proto = NULL;
808 u_int32_t i = proto_hash_value(protocol_family);
809
810 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
811
812 if (ifp->if_proto_hash != NULL)
813 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
814
815 while (proto != NULL && proto->protocol_family != protocol_family)
816 proto = SLIST_NEXT(proto, next_hash);
817
818 if (proto != NULL)
819 if_proto_ref(proto);
820
821 return (proto);
822 }
823
824 static void
825 if_proto_ref(struct if_proto *proto)
826 {
827 atomic_add_32(&proto->refcount, 1);
828 }
829
830 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
831
832 static void
833 if_proto_free(struct if_proto *proto)
834 {
835 u_int32_t oldval;
836 struct ifnet *ifp = proto->ifp;
837 u_int32_t proto_family = proto->protocol_family;
838 struct kev_dl_proto_data ev_pr_data;
839
840 oldval = atomic_add_32_ov(&proto->refcount, -1);
841 if (oldval > 1)
842 return;
843
844 /* No more reference on this, protocol must have been detached */
845 VERIFY(proto->detached);
846
847 if (proto->proto_kpi == kProtoKPI_v1) {
848 if (proto->kpi.v1.detached)
849 proto->kpi.v1.detached(ifp, proto->protocol_family);
850 }
851 if (proto->proto_kpi == kProtoKPI_v2) {
852 if (proto->kpi.v2.detached)
853 proto->kpi.v2.detached(ifp, proto->protocol_family);
854 }
855
856 /*
857 * Cleanup routes that may still be in the routing table for that
858 * interface/protocol pair.
859 */
860 if_rtproto_del(ifp, proto_family);
861
862 /*
863 * The reserved field carries the number of protocol still attached
864 * (subject to change)
865 */
866 ifnet_lock_shared(ifp);
867 ev_pr_data.proto_family = proto_family;
868 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
869 ifnet_lock_done(ifp);
870
871 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
872 (struct net_event_data *)&ev_pr_data,
873 sizeof (struct kev_dl_proto_data));
874
875 zfree(dlif_proto_zone, proto);
876 }
877
878 __private_extern__ void
879 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
880 {
881 unsigned int type = 0;
882 int ass = 1;
883
884 switch (what) {
885 case IFNET_LCK_ASSERT_EXCLUSIVE:
886 type = LCK_RW_ASSERT_EXCLUSIVE;
887 break;
888
889 case IFNET_LCK_ASSERT_SHARED:
890 type = LCK_RW_ASSERT_SHARED;
891 break;
892
893 case IFNET_LCK_ASSERT_OWNED:
894 type = LCK_RW_ASSERT_HELD;
895 break;
896
897 case IFNET_LCK_ASSERT_NOTOWNED:
898 /* nothing to do here for RW lock; bypass assert */
899 ass = 0;
900 break;
901
902 default:
903 panic("bad ifnet assert type: %d", what);
904 /* NOTREACHED */
905 }
906 if (ass)
907 lck_rw_assert(&ifp->if_lock, type);
908 }
909
910 __private_extern__ void
911 ifnet_lock_shared(struct ifnet *ifp)
912 {
913 lck_rw_lock_shared(&ifp->if_lock);
914 }
915
916 __private_extern__ void
917 ifnet_lock_exclusive(struct ifnet *ifp)
918 {
919 lck_rw_lock_exclusive(&ifp->if_lock);
920 }
921
922 __private_extern__ void
923 ifnet_lock_done(struct ifnet *ifp)
924 {
925 lck_rw_done(&ifp->if_lock);
926 }
927
928 #if INET
929 __private_extern__ void
930 if_inetdata_lock_shared(struct ifnet *ifp)
931 {
932 lck_rw_lock_shared(&ifp->if_inetdata_lock);
933 }
934
935 __private_extern__ void
936 if_inetdata_lock_exclusive(struct ifnet *ifp)
937 {
938 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
939 }
940
941 __private_extern__ void
942 if_inetdata_lock_done(struct ifnet *ifp)
943 {
944 lck_rw_done(&ifp->if_inetdata_lock);
945 }
946 #endif
947
948 #if INET6
949 __private_extern__ void
950 if_inet6data_lock_shared(struct ifnet *ifp)
951 {
952 lck_rw_lock_shared(&ifp->if_inet6data_lock);
953 }
954
955 __private_extern__ void
956 if_inet6data_lock_exclusive(struct ifnet *ifp)
957 {
958 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
959 }
960
961 __private_extern__ void
962 if_inet6data_lock_done(struct ifnet *ifp)
963 {
964 lck_rw_done(&ifp->if_inet6data_lock);
965 }
966 #endif
967
968 __private_extern__ void
969 ifnet_head_lock_shared(void)
970 {
971 lck_rw_lock_shared(&ifnet_head_lock);
972 }
973
974 __private_extern__ void
975 ifnet_head_lock_exclusive(void)
976 {
977 lck_rw_lock_exclusive(&ifnet_head_lock);
978 }
979
980 __private_extern__ void
981 ifnet_head_done(void)
982 {
983 lck_rw_done(&ifnet_head_lock);
984 }
985
986 __private_extern__ void
987 ifnet_head_assert_exclusive(void)
988 {
989 lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
990 }
991
992 /*
993 * Caller must already be holding ifnet lock.
994 */
995 static int
996 dlil_ifp_proto_count(struct ifnet *ifp)
997 {
998 int i, count = 0;
999
1000 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1001
1002 if (ifp->if_proto_hash == NULL)
1003 goto done;
1004
1005 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1006 struct if_proto *proto;
1007 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1008 count++;
1009 }
1010 }
1011 done:
1012 return (count);
1013 }
1014
1015 __private_extern__ void
1016 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1017 u_int32_t event_code, struct net_event_data *event_data,
1018 u_int32_t event_data_len)
1019 {
1020 struct net_event_data ev_data;
1021 struct kev_msg ev_msg;
1022
1023 bzero(&ev_msg, sizeof (ev_msg));
1024 bzero(&ev_data, sizeof (ev_data));
1025 /*
1026 * a net event always starts with a net_event_data structure
1027 * but the caller can generate a simple net event or
1028 * provide a longer event structure to post
1029 */
1030 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1031 ev_msg.kev_class = KEV_NETWORK_CLASS;
1032 ev_msg.kev_subclass = event_subclass;
1033 ev_msg.event_code = event_code;
1034
1035 if (event_data == NULL) {
1036 event_data = &ev_data;
1037 event_data_len = sizeof (struct net_event_data);
1038 }
1039
1040 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1041 event_data->if_family = ifp->if_family;
1042 event_data->if_unit = (u_int32_t)ifp->if_unit;
1043
1044 ev_msg.dv[0].data_length = event_data_len;
1045 ev_msg.dv[0].data_ptr = event_data;
1046 ev_msg.dv[1].data_length = 0;
1047
1048 /* Don't update interface generation for quality and RRC state changess */
1049 bool update_generation = (event_subclass != KEV_DL_SUBCLASS ||
1050 (event_code != KEV_DL_LINK_QUALITY_METRIC_CHANGED &&
1051 event_code != KEV_DL_RRC_STATE_CHANGED));
1052
1053 dlil_event_internal(ifp, &ev_msg, update_generation);
1054 }
1055
1056 __private_extern__ int
1057 dlil_alloc_local_stats(struct ifnet *ifp)
1058 {
1059 int ret = EINVAL;
1060 void *buf, *base, **pbuf;
1061
1062 if (ifp == NULL)
1063 goto end;
1064
1065 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1066 /* allocate tcpstat_local structure */
1067 buf = zalloc(dlif_tcpstat_zone);
1068 if (buf == NULL) {
1069 ret = ENOMEM;
1070 goto end;
1071 }
1072 bzero(buf, dlif_tcpstat_bufsize);
1073
1074 /* Get the 64-bit aligned base address for this object */
1075 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1076 sizeof (u_int64_t));
1077 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1078 ((intptr_t)buf + dlif_tcpstat_bufsize));
1079
1080 /*
1081 * Wind back a pointer size from the aligned base and
1082 * save the original address so we can free it later.
1083 */
1084 pbuf = (void **)((intptr_t)base - sizeof (void *));
1085 *pbuf = buf;
1086 ifp->if_tcp_stat = base;
1087
1088 /* allocate udpstat_local structure */
1089 buf = zalloc(dlif_udpstat_zone);
1090 if (buf == NULL) {
1091 ret = ENOMEM;
1092 goto end;
1093 }
1094 bzero(buf, dlif_udpstat_bufsize);
1095
1096 /* Get the 64-bit aligned base address for this object */
1097 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1098 sizeof (u_int64_t));
1099 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1100 ((intptr_t)buf + dlif_udpstat_bufsize));
1101
1102 /*
1103 * Wind back a pointer size from the aligned base and
1104 * save the original address so we can free it later.
1105 */
1106 pbuf = (void **)((intptr_t)base - sizeof (void *));
1107 *pbuf = buf;
1108 ifp->if_udp_stat = base;
1109
1110 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
1111 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
1112
1113 ret = 0;
1114 }
1115
1116 if (ifp->if_ipv4_stat == NULL) {
1117 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1118 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1119 if (ifp->if_ipv4_stat == NULL) {
1120 ret = ENOMEM;
1121 goto end;
1122 }
1123 }
1124
1125 if (ifp->if_ipv6_stat == NULL) {
1126 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1127 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1128 if (ifp->if_ipv6_stat == NULL) {
1129 ret = ENOMEM;
1130 goto end;
1131 }
1132 }
1133 end:
1134 if (ret != 0) {
1135 if (ifp->if_tcp_stat != NULL) {
1136 pbuf = (void **)
1137 ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
1138 zfree(dlif_tcpstat_zone, *pbuf);
1139 ifp->if_tcp_stat = NULL;
1140 }
1141 if (ifp->if_udp_stat != NULL) {
1142 pbuf = (void **)
1143 ((intptr_t)ifp->if_udp_stat - sizeof (void *));
1144 zfree(dlif_udpstat_zone, *pbuf);
1145 ifp->if_udp_stat = NULL;
1146 }
1147 if (ifp->if_ipv4_stat != NULL) {
1148 FREE(ifp->if_ipv4_stat, M_TEMP);
1149 ifp->if_ipv4_stat = NULL;
1150 }
1151 if (ifp->if_ipv6_stat != NULL) {
1152 FREE(ifp->if_ipv6_stat, M_TEMP);
1153 ifp->if_ipv6_stat = NULL;
1154 }
1155 }
1156
1157 return (ret);
1158 }
1159
1160 static int
1161 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
1162 {
1163 thread_continue_t func;
1164 u_int32_t limit;
1165 int error;
1166
1167 /* NULL ifp indicates the main input thread, called at dlil_init time */
1168 if (ifp == NULL) {
1169 func = dlil_main_input_thread_func;
1170 VERIFY(inp == dlil_main_input_thread);
1171 (void) strlcat(inp->input_name,
1172 "main_input", DLIL_THREADNAME_LEN);
1173 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1174 func = dlil_rxpoll_input_thread_func;
1175 VERIFY(inp != dlil_main_input_thread);
1176 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1177 "%s_input_poll", if_name(ifp));
1178 } else {
1179 func = dlil_input_thread_func;
1180 VERIFY(inp != dlil_main_input_thread);
1181 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1182 "%s_input", if_name(ifp));
1183 }
1184 VERIFY(inp->input_thr == THREAD_NULL);
1185
1186 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1187 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1188
1189 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1190 inp->ifp = ifp; /* NULL for main input thread */
1191
1192 net_timerclear(&inp->mode_holdtime);
1193 net_timerclear(&inp->mode_lasttime);
1194 net_timerclear(&inp->sample_holdtime);
1195 net_timerclear(&inp->sample_lasttime);
1196 net_timerclear(&inp->dbg_lasttime);
1197
1198 /*
1199 * For interfaces that support opportunistic polling, set the
1200 * low and high watermarks for outstanding inbound packets/bytes.
1201 * Also define freeze times for transitioning between modes
1202 * and updating the average.
1203 */
1204 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1205 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1206 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1207 } else {
1208 limit = (u_int32_t)-1;
1209 }
1210
1211 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit);
1212 if (inp == dlil_main_input_thread) {
1213 struct dlil_main_threading_info *inpm =
1214 (struct dlil_main_threading_info *)inp;
1215 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit);
1216 }
1217
1218 error = kernel_thread_start(func, inp, &inp->input_thr);
1219 if (error == KERN_SUCCESS) {
1220 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
1221 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
1222 /*
1223 * We create an affinity set so that the matching workloop
1224 * thread or the starter thread (for loopback) can be
1225 * scheduled on the same processor set as the input thread.
1226 */
1227 if (net_affinity) {
1228 struct thread *tp = inp->input_thr;
1229 u_int32_t tag;
1230 /*
1231 * Randomize to reduce the probability
1232 * of affinity tag namespace collision.
1233 */
1234 read_random(&tag, sizeof (tag));
1235 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1236 thread_reference(tp);
1237 inp->tag = tag;
1238 inp->net_affinity = TRUE;
1239 }
1240 }
1241 } else if (inp == dlil_main_input_thread) {
1242 panic_plain("%s: couldn't create main input thread", __func__);
1243 /* NOTREACHED */
1244 } else {
1245 panic_plain("%s: couldn't create %s input thread", __func__,
1246 if_name(ifp));
1247 /* NOTREACHED */
1248 }
1249 OSAddAtomic(1, &cur_dlil_input_threads);
1250
1251 return (error);
1252 }
1253
1254 static void
1255 dlil_terminate_input_thread(struct dlil_threading_info *inp)
1256 {
1257 struct ifnet *ifp;
1258
1259 VERIFY(current_thread() == inp->input_thr);
1260 VERIFY(inp != dlil_main_input_thread);
1261
1262 OSAddAtomic(-1, &cur_dlil_input_threads);
1263
1264 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1265 lck_grp_free(inp->lck_grp);
1266
1267 inp->input_waiting = 0;
1268 inp->wtot = 0;
1269 bzero(inp->input_name, sizeof (inp->input_name));
1270 ifp = inp->ifp;
1271 inp->ifp = NULL;
1272 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1273 qlimit(&inp->rcvq_pkts) = 0;
1274 bzero(&inp->stats, sizeof (inp->stats));
1275
1276 VERIFY(!inp->net_affinity);
1277 inp->input_thr = THREAD_NULL;
1278 VERIFY(inp->wloop_thr == THREAD_NULL);
1279 VERIFY(inp->poll_thr == THREAD_NULL);
1280 VERIFY(inp->tag == 0);
1281
1282 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1283 bzero(&inp->tstats, sizeof (inp->tstats));
1284 bzero(&inp->pstats, sizeof (inp->pstats));
1285 bzero(&inp->sstats, sizeof (inp->sstats));
1286
1287 net_timerclear(&inp->mode_holdtime);
1288 net_timerclear(&inp->mode_lasttime);
1289 net_timerclear(&inp->sample_holdtime);
1290 net_timerclear(&inp->sample_lasttime);
1291 net_timerclear(&inp->dbg_lasttime);
1292
1293 #if IFNET_INPUT_SANITY_CHK
1294 inp->input_mbuf_cnt = 0;
1295 #endif /* IFNET_INPUT_SANITY_CHK */
1296
1297 if (dlil_verbose) {
1298 printf("%s: input thread terminated\n",
1299 if_name(ifp));
1300 }
1301
1302 /* for the extra refcnt from kernel_thread_start() */
1303 thread_deallocate(current_thread());
1304
1305 /* this is the end */
1306 thread_terminate(current_thread());
1307 /* NOTREACHED */
1308 }
1309
1310 static kern_return_t
1311 dlil_affinity_set(struct thread *tp, u_int32_t tag)
1312 {
1313 thread_affinity_policy_data_t policy;
1314
1315 bzero(&policy, sizeof (policy));
1316 policy.affinity_tag = tag;
1317 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1318 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
1319 }
1320
1321 void
1322 dlil_init(void)
1323 {
1324 thread_t thread = THREAD_NULL;
1325
1326 /*
1327 * The following fields must be 64-bit aligned for atomic operations.
1328 */
1329 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1330 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1331 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1332 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1333 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1334 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1335 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1336 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1337 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1338 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1339 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1340 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1341 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1342 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1343 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1344
1345 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1346 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1347 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1348 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1349 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1350 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1351 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1352 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1353 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1354 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1355 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1356 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1357 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1358 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1359 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1360
1361 /*
1362 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1363 */
1364 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1365 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1366 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1367 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1368 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1369 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1370 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1371 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1372 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1373 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1374 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1375 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1376 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1377
1378 /*
1379 * ... as well as the mbuf checksum flags counterparts.
1380 */
1381 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1382 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1383 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1384 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1385 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1386 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1387 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1388 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1389 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1390 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1391
1392 /*
1393 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1394 */
1395 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1396 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1397
1398 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1399 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1400 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1401 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1402
1403 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1404 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1405 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1406
1407 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1408 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1409 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1410 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1411 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1412 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1413 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1414 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1415 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1416 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1417 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1418 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1419 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1420 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1421 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1422 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1423
1424 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1425 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1426 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1427 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1428 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1429 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1430 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1431
1432 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1433 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1434
1435 PE_parse_boot_argn("net_affinity", &net_affinity,
1436 sizeof (net_affinity));
1437
1438 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
1439
1440 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
1441
1442 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
1443
1444 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
1445 sizeof (struct dlil_ifnet_dbg);
1446 /* Enforce 64-bit alignment for dlil_ifnet structure */
1447 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
1448 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
1449 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1450 0, DLIF_ZONE_NAME);
1451 if (dlif_zone == NULL) {
1452 panic_plain("%s: failed allocating %s", __func__,
1453 DLIF_ZONE_NAME);
1454 /* NOTREACHED */
1455 }
1456 zone_change(dlif_zone, Z_EXPAND, TRUE);
1457 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1458
1459 dlif_filt_size = sizeof (struct ifnet_filter);
1460 dlif_filt_zone = zinit(dlif_filt_size,
1461 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1462 if (dlif_filt_zone == NULL) {
1463 panic_plain("%s: failed allocating %s", __func__,
1464 DLIF_FILT_ZONE_NAME);
1465 /* NOTREACHED */
1466 }
1467 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1468 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1469
1470 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
1471 dlif_phash_zone = zinit(dlif_phash_size,
1472 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1473 if (dlif_phash_zone == NULL) {
1474 panic_plain("%s: failed allocating %s", __func__,
1475 DLIF_PHASH_ZONE_NAME);
1476 /* NOTREACHED */
1477 }
1478 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1479 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1480
1481 dlif_proto_size = sizeof (struct if_proto);
1482 dlif_proto_zone = zinit(dlif_proto_size,
1483 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1484 if (dlif_proto_zone == NULL) {
1485 panic_plain("%s: failed allocating %s", __func__,
1486 DLIF_PROTO_ZONE_NAME);
1487 /* NOTREACHED */
1488 }
1489 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1490 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1491
1492 dlif_tcpstat_size = sizeof (struct tcpstat_local);
1493 /* Enforce 64-bit alignment for tcpstat_local structure */
1494 dlif_tcpstat_bufsize =
1495 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
1496 dlif_tcpstat_bufsize =
1497 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
1498 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1499 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1500 DLIF_TCPSTAT_ZONE_NAME);
1501 if (dlif_tcpstat_zone == NULL) {
1502 panic_plain("%s: failed allocating %s", __func__,
1503 DLIF_TCPSTAT_ZONE_NAME);
1504 /* NOTREACHED */
1505 }
1506 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1507 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1508
1509 dlif_udpstat_size = sizeof (struct udpstat_local);
1510 /* Enforce 64-bit alignment for udpstat_local structure */
1511 dlif_udpstat_bufsize =
1512 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
1513 dlif_udpstat_bufsize =
1514 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
1515 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1516 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1517 DLIF_UDPSTAT_ZONE_NAME);
1518 if (dlif_udpstat_zone == NULL) {
1519 panic_plain("%s: failed allocating %s", __func__,
1520 DLIF_UDPSTAT_ZONE_NAME);
1521 /* NOTREACHED */
1522 }
1523 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1524 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1525
1526 ifnet_llreach_init();
1527
1528 TAILQ_INIT(&dlil_ifnet_head);
1529 TAILQ_INIT(&ifnet_head);
1530 TAILQ_INIT(&ifnet_detaching_head);
1531 TAILQ_INIT(&ifnet_ordered_head);
1532
1533 /* Setup the lock groups we will use */
1534 dlil_grp_attributes = lck_grp_attr_alloc_init();
1535
1536 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1537 dlil_grp_attributes);
1538 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1539 dlil_grp_attributes);
1540 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1541 dlil_grp_attributes);
1542 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1543 dlil_grp_attributes);
1544 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1545 dlil_grp_attributes);
1546
1547 /* Setup the lock attributes we will use */
1548 dlil_lck_attributes = lck_attr_alloc_init();
1549
1550 ifnet_lock_attr = lck_attr_alloc_init();
1551
1552 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1553 dlil_lck_attributes);
1554 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1555
1556 /* Setup interface flow control related items */
1557 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1558
1559 ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry);
1560 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1561 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1562 if (ifnet_fc_zone == NULL) {
1563 panic_plain("%s: failed allocating %s", __func__,
1564 IFNET_FC_ZONE_NAME);
1565 /* NOTREACHED */
1566 }
1567 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1568 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
1569
1570 /* Initialize interface address subsystem */
1571 ifa_init();
1572
1573 #if PF
1574 /* Initialize the packet filter */
1575 pfinit();
1576 #endif /* PF */
1577
1578 /* Initialize queue algorithms */
1579 classq_init();
1580
1581 /* Initialize packet schedulers */
1582 pktsched_init();
1583
1584 /* Initialize flow advisory subsystem */
1585 flowadv_init();
1586
1587 /* Initialize the pktap virtual interface */
1588 pktap_init();
1589
1590 /* Initialize the service class to dscp map */
1591 net_qos_map_init();
1592
1593 #if DEBUG
1594 /* Run self-tests */
1595 dlil_verify_sum16();
1596 #endif /* DEBUG */
1597
1598 /*
1599 * Create and start up the main DLIL input thread and the interface
1600 * detacher threads once everything is initialized.
1601 */
1602 dlil_create_input_thread(NULL, dlil_main_input_thread);
1603
1604 if (kernel_thread_start(ifnet_detacher_thread_func,
1605 NULL, &thread) != KERN_SUCCESS) {
1606 panic_plain("%s: couldn't create detacher thread", __func__);
1607 /* NOTREACHED */
1608 }
1609 thread_deallocate(thread);
1610 }
1611
1612 static void
1613 if_flt_monitor_busy(struct ifnet *ifp)
1614 {
1615 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1616
1617 ++ifp->if_flt_busy;
1618 VERIFY(ifp->if_flt_busy != 0);
1619 }
1620
1621 static void
1622 if_flt_monitor_unbusy(struct ifnet *ifp)
1623 {
1624 if_flt_monitor_leave(ifp);
1625 }
1626
1627 static void
1628 if_flt_monitor_enter(struct ifnet *ifp)
1629 {
1630 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1631
1632 while (ifp->if_flt_busy) {
1633 ++ifp->if_flt_waiters;
1634 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1635 (PZERO - 1), "if_flt_monitor", NULL);
1636 }
1637 if_flt_monitor_busy(ifp);
1638 }
1639
1640 static void
1641 if_flt_monitor_leave(struct ifnet *ifp)
1642 {
1643 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1644
1645 VERIFY(ifp->if_flt_busy != 0);
1646 --ifp->if_flt_busy;
1647
1648 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1649 ifp->if_flt_waiters = 0;
1650 wakeup(&ifp->if_flt_head);
1651 }
1652 }
1653
1654 __private_extern__ int
1655 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1656 interface_filter_t *filter_ref, u_int32_t flags)
1657 {
1658 int retval = 0;
1659 struct ifnet_filter *filter = NULL;
1660
1661 ifnet_head_lock_shared();
1662 /* Check that the interface is in the global list */
1663 if (!ifnet_lookup(ifp)) {
1664 retval = ENXIO;
1665 goto done;
1666 }
1667
1668 filter = zalloc(dlif_filt_zone);
1669 if (filter == NULL) {
1670 retval = ENOMEM;
1671 goto done;
1672 }
1673 bzero(filter, dlif_filt_size);
1674
1675 /* refcnt held above during lookup */
1676 filter->filt_flags = flags;
1677 filter->filt_ifp = ifp;
1678 filter->filt_cookie = if_filter->iff_cookie;
1679 filter->filt_name = if_filter->iff_name;
1680 filter->filt_protocol = if_filter->iff_protocol;
1681 /*
1682 * Do not install filter callbacks for internal coproc interface
1683 */
1684 if (!IFNET_IS_INTCOPROC(ifp)) {
1685 filter->filt_input = if_filter->iff_input;
1686 filter->filt_output = if_filter->iff_output;
1687 filter->filt_event = if_filter->iff_event;
1688 filter->filt_ioctl = if_filter->iff_ioctl;
1689 }
1690 filter->filt_detached = if_filter->iff_detached;
1691
1692 lck_mtx_lock(&ifp->if_flt_lock);
1693 if_flt_monitor_enter(ifp);
1694
1695 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1696 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1697
1698 if_flt_monitor_leave(ifp);
1699 lck_mtx_unlock(&ifp->if_flt_lock);
1700
1701 *filter_ref = filter;
1702
1703 /*
1704 * Bump filter count and route_generation ID to let TCP
1705 * know it shouldn't do TSO on this connection
1706 */
1707 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1708 OSAddAtomic(1, &dlil_filter_disable_tso_count);
1709 routegenid_update();
1710 }
1711 if (dlil_verbose) {
1712 printf("%s: %s filter attached\n", if_name(ifp),
1713 if_filter->iff_name);
1714 }
1715 done:
1716 ifnet_head_done();
1717 if (retval != 0 && ifp != NULL) {
1718 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1719 if_name(ifp), if_filter->iff_name, retval);
1720 }
1721 if (retval != 0 && filter != NULL)
1722 zfree(dlif_filt_zone, filter);
1723
1724 return (retval);
1725 }
1726
1727 static int
1728 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1729 {
1730 int retval = 0;
1731
1732 if (detached == 0) {
1733 ifnet_t ifp = NULL;
1734
1735 ifnet_head_lock_shared();
1736 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1737 interface_filter_t entry = NULL;
1738
1739 lck_mtx_lock(&ifp->if_flt_lock);
1740 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1741 if (entry != filter || entry->filt_skip)
1742 continue;
1743 /*
1744 * We've found a match; since it's possible
1745 * that the thread gets blocked in the monitor,
1746 * we do the lock dance. Interface should
1747 * not be detached since we still have a use
1748 * count held during filter attach.
1749 */
1750 entry->filt_skip = 1; /* skip input/output */
1751 lck_mtx_unlock(&ifp->if_flt_lock);
1752 ifnet_head_done();
1753
1754 lck_mtx_lock(&ifp->if_flt_lock);
1755 if_flt_monitor_enter(ifp);
1756 lck_mtx_assert(&ifp->if_flt_lock,
1757 LCK_MTX_ASSERT_OWNED);
1758
1759 /* Remove the filter from the list */
1760 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1761 filt_next);
1762
1763 if_flt_monitor_leave(ifp);
1764 lck_mtx_unlock(&ifp->if_flt_lock);
1765 if (dlil_verbose) {
1766 printf("%s: %s filter detached\n",
1767 if_name(ifp), filter->filt_name);
1768 }
1769 goto destroy;
1770 }
1771 lck_mtx_unlock(&ifp->if_flt_lock);
1772 }
1773 ifnet_head_done();
1774
1775 /* filter parameter is not a valid filter ref */
1776 retval = EINVAL;
1777 goto done;
1778 }
1779
1780 if (dlil_verbose)
1781 printf("%s filter detached\n", filter->filt_name);
1782
1783 destroy:
1784
1785 /* Call the detached function if there is one */
1786 if (filter->filt_detached)
1787 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1788
1789 /*
1790 * Decrease filter count and route_generation ID to let TCP
1791 * know it should reevalute doing TSO or not
1792 */
1793 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1794 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
1795 routegenid_update();
1796 }
1797
1798 /* Free the filter */
1799 zfree(dlif_filt_zone, filter);
1800 filter = NULL;
1801 done:
1802 if (retval != 0 && filter != NULL) {
1803 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1804 filter->filt_name, retval);
1805 }
1806
1807 return (retval);
1808 }
1809
1810 __private_extern__ void
1811 dlil_detach_filter(interface_filter_t filter)
1812 {
1813 if (filter == NULL)
1814 return;
1815 dlil_detach_filter_internal(filter, 0);
1816 }
1817
1818 /*
1819 * Main input thread:
1820 *
1821 * a) handles all inbound packets for lo0
1822 * b) handles all inbound packets for interfaces with no dedicated
1823 * input thread (e.g. anything but Ethernet/PDP or those that support
1824 * opportunistic polling.)
1825 * c) protocol registrations
1826 * d) packet injections
1827 */
1828 __attribute__((noreturn))
1829 static void
1830 dlil_main_input_thread_func(void *v, wait_result_t w)
1831 {
1832 #pragma unused(w)
1833 struct dlil_main_threading_info *inpm = v;
1834 struct dlil_threading_info *inp = v;
1835
1836 VERIFY(inp == dlil_main_input_thread);
1837 VERIFY(inp->ifp == NULL);
1838 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1839
1840 while (1) {
1841 struct mbuf *m = NULL, *m_loop = NULL;
1842 u_int32_t m_cnt, m_cnt_loop;
1843 boolean_t proto_req;
1844
1845 lck_mtx_lock_spin(&inp->input_lck);
1846
1847 /* Wait until there is work to be done */
1848 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1849 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1850 (void) msleep(&inp->input_waiting, &inp->input_lck,
1851 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1852 }
1853
1854 inp->input_waiting |= DLIL_INPUT_RUNNING;
1855 inp->input_waiting &= ~DLIL_INPUT_WAITING;
1856
1857 /* Main input thread cannot be terminated */
1858 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
1859
1860 proto_req = (inp->input_waiting &
1861 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
1862
1863 /* Packets for non-dedicated interfaces other than lo0 */
1864 m_cnt = qlen(&inp->rcvq_pkts);
1865 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
1866
1867 /* Packets exclusive to lo0 */
1868 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
1869 m_loop = _getq_all(&inpm->lo_rcvq_pkts, NULL, NULL, NULL);
1870
1871 inp->wtot = 0;
1872
1873 lck_mtx_unlock(&inp->input_lck);
1874
1875 /*
1876 * NOTE warning %%% attention !!!!
1877 * We should think about putting some thread starvation
1878 * safeguards if we deal with long chains of packets.
1879 */
1880 if (m_loop != NULL)
1881 dlil_input_packet_list_extended(lo_ifp, m_loop,
1882 m_cnt_loop, inp->mode);
1883
1884 if (m != NULL)
1885 dlil_input_packet_list_extended(NULL, m,
1886 m_cnt, inp->mode);
1887
1888 if (proto_req)
1889 proto_input_run();
1890 }
1891
1892 /* NOTREACHED */
1893 VERIFY(0); /* we should never get here */
1894 }
1895
1896 /*
1897 * Input thread for interfaces with legacy input model.
1898 */
1899 static void
1900 dlil_input_thread_func(void *v, wait_result_t w)
1901 {
1902 #pragma unused(w)
1903 char thread_name[MAXTHREADNAMESIZE];
1904 struct dlil_threading_info *inp = v;
1905 struct ifnet *ifp = inp->ifp;
1906
1907 /* Construct the name for this thread, and then apply it. */
1908 bzero(thread_name, sizeof(thread_name));
1909 snprintf(thread_name, sizeof(thread_name), "dlil_input_%s", ifp->if_xname);
1910 thread_set_thread_name(inp->input_thr, thread_name);
1911
1912 VERIFY(inp != dlil_main_input_thread);
1913 VERIFY(ifp != NULL);
1914 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
1915 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1916
1917 while (1) {
1918 struct mbuf *m = NULL;
1919 u_int32_t m_cnt;
1920
1921 lck_mtx_lock_spin(&inp->input_lck);
1922
1923 /* Wait until there is work to be done */
1924 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1925 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1926 (void) msleep(&inp->input_waiting, &inp->input_lck,
1927 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1928 }
1929
1930 inp->input_waiting |= DLIL_INPUT_RUNNING;
1931 inp->input_waiting &= ~DLIL_INPUT_WAITING;
1932
1933 /*
1934 * Protocol registration and injection must always use
1935 * the main input thread; in theory the latter can utilize
1936 * the corresponding input thread where the packet arrived
1937 * on, but that requires our knowing the interface in advance
1938 * (and the benefits might not worth the trouble.)
1939 */
1940 VERIFY(!(inp->input_waiting &
1941 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
1942
1943 /* Packets for this interface */
1944 m_cnt = qlen(&inp->rcvq_pkts);
1945 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
1946
1947 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
1948 lck_mtx_unlock(&inp->input_lck);
1949
1950 /* Free up pending packets */
1951 if (m != NULL)
1952 mbuf_freem_list(m);
1953
1954 dlil_terminate_input_thread(inp);
1955 /* NOTREACHED */
1956 return;
1957 }
1958
1959 inp->wtot = 0;
1960
1961 dlil_input_stats_sync(ifp, inp);
1962
1963 lck_mtx_unlock(&inp->input_lck);
1964
1965 /*
1966 * NOTE warning %%% attention !!!!
1967 * We should think about putting some thread starvation
1968 * safeguards if we deal with long chains of packets.
1969 */
1970 if (m != NULL)
1971 dlil_input_packet_list_extended(NULL, m,
1972 m_cnt, inp->mode);
1973 }
1974
1975 /* NOTREACHED */
1976 VERIFY(0); /* we should never get here */
1977 }
1978
1979 /*
1980 * Input thread for interfaces with opportunistic polling input model.
1981 */
1982 static void
1983 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
1984 {
1985 #pragma unused(w)
1986 struct dlil_threading_info *inp = v;
1987 struct ifnet *ifp = inp->ifp;
1988 struct timespec ts;
1989
1990 VERIFY(inp != dlil_main_input_thread);
1991 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
1992
1993 while (1) {
1994 struct mbuf *m = NULL;
1995 u_int32_t m_cnt, m_size, poll_req = 0;
1996 ifnet_model_t mode;
1997 struct timespec now, delta;
1998 u_int64_t ival;
1999
2000 lck_mtx_lock_spin(&inp->input_lck);
2001
2002 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
2003 ival = IF_RXPOLL_INTERVALTIME_MIN;
2004
2005 /* Link parameters changed? */
2006 if (ifp->if_poll_update != 0) {
2007 ifp->if_poll_update = 0;
2008 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
2009 }
2010
2011 /* Current operating mode */
2012 mode = inp->mode;
2013
2014 /* Wait until there is work to be done */
2015 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2016 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2017 (void) msleep(&inp->input_waiting, &inp->input_lck,
2018 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2019 }
2020
2021 inp->input_waiting |= DLIL_INPUT_RUNNING;
2022 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2023
2024 /*
2025 * Protocol registration and injection must always use
2026 * the main input thread; in theory the latter can utilize
2027 * the corresponding input thread where the packet arrived
2028 * on, but that requires our knowing the interface in advance
2029 * (and the benefits might not worth the trouble.)
2030 */
2031 VERIFY(!(inp->input_waiting &
2032 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2033
2034 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2035 /* Free up pending packets */
2036 _flushq(&inp->rcvq_pkts);
2037 lck_mtx_unlock(&inp->input_lck);
2038
2039 dlil_terminate_input_thread(inp);
2040 /* NOTREACHED */
2041 return;
2042 }
2043
2044 /* Total count of all packets */
2045 m_cnt = qlen(&inp->rcvq_pkts);
2046
2047 /* Total bytes of all packets */
2048 m_size = qsize(&inp->rcvq_pkts);
2049
2050 /* Packets for this interface */
2051 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2052 VERIFY(m != NULL || m_cnt == 0);
2053
2054 nanouptime(&now);
2055 if (!net_timerisset(&inp->sample_lasttime))
2056 *(&inp->sample_lasttime) = *(&now);
2057
2058 net_timersub(&now, &inp->sample_lasttime, &delta);
2059 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
2060 u_int32_t ptot, btot;
2061
2062 /* Accumulate statistics for current sampling */
2063 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
2064
2065 if (net_timercmp(&delta, &inp->sample_holdtime, <))
2066 goto skip;
2067
2068 *(&inp->sample_lasttime) = *(&now);
2069
2070 /* Calculate min/max of inbound bytes */
2071 btot = (u_int32_t)inp->sstats.bytes;
2072 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
2073 inp->rxpoll_bmin = btot;
2074 if (btot > inp->rxpoll_bmax)
2075 inp->rxpoll_bmax = btot;
2076
2077 /* Calculate EWMA of inbound bytes */
2078 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
2079
2080 /* Calculate min/max of inbound packets */
2081 ptot = (u_int32_t)inp->sstats.packets;
2082 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
2083 inp->rxpoll_pmin = ptot;
2084 if (ptot > inp->rxpoll_pmax)
2085 inp->rxpoll_pmax = ptot;
2086
2087 /* Calculate EWMA of inbound packets */
2088 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
2089
2090 /* Reset sampling statistics */
2091 PKTCNTR_CLEAR(&inp->sstats);
2092
2093 /* Calculate EWMA of wakeup requests */
2094 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2095 inp->wtot = 0;
2096
2097 if (dlil_verbose) {
2098 if (!net_timerisset(&inp->dbg_lasttime))
2099 *(&inp->dbg_lasttime) = *(&now);
2100 net_timersub(&now, &inp->dbg_lasttime, &delta);
2101 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2102 *(&inp->dbg_lasttime) = *(&now);
2103 printf("%s: [%s] pkts avg %d max %d "
2104 "limits [%d/%d], wreq avg %d "
2105 "limits [%d/%d], bytes avg %d "
2106 "limits [%d/%d]\n", if_name(ifp),
2107 (inp->mode ==
2108 IFNET_MODEL_INPUT_POLL_ON) ?
2109 "ON" : "OFF", inp->rxpoll_pavg,
2110 inp->rxpoll_pmax,
2111 inp->rxpoll_plowat,
2112 inp->rxpoll_phiwat,
2113 inp->rxpoll_wavg,
2114 inp->rxpoll_wlowat,
2115 inp->rxpoll_whiwat,
2116 inp->rxpoll_bavg,
2117 inp->rxpoll_blowat,
2118 inp->rxpoll_bhiwat);
2119 }
2120 }
2121
2122 /* Perform mode transition, if necessary */
2123 if (!net_timerisset(&inp->mode_lasttime))
2124 *(&inp->mode_lasttime) = *(&now);
2125
2126 net_timersub(&now, &inp->mode_lasttime, &delta);
2127 if (net_timercmp(&delta, &inp->mode_holdtime, <))
2128 goto skip;
2129
2130 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2131 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
2132 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2133 mode = IFNET_MODEL_INPUT_POLL_OFF;
2134 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2135 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2136 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2137 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2138 mode = IFNET_MODEL_INPUT_POLL_ON;
2139 }
2140
2141 if (mode != inp->mode) {
2142 inp->mode = mode;
2143 *(&inp->mode_lasttime) = *(&now);
2144 poll_req++;
2145 }
2146 }
2147 skip:
2148 dlil_input_stats_sync(ifp, inp);
2149
2150 lck_mtx_unlock(&inp->input_lck);
2151
2152 /*
2153 * If there's a mode change and interface is still attached,
2154 * perform a downcall to the driver for the new mode. Also
2155 * hold an IO refcnt on the interface to prevent it from
2156 * being detached (will be release below.)
2157 */
2158 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2159 struct ifnet_model_params p = { mode, { 0 } };
2160 errno_t err;
2161
2162 if (dlil_verbose) {
2163 printf("%s: polling is now %s, "
2164 "pkts avg %d max %d limits [%d/%d], "
2165 "wreq avg %d limits [%d/%d], "
2166 "bytes avg %d limits [%d/%d]\n",
2167 if_name(ifp),
2168 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2169 "ON" : "OFF", inp->rxpoll_pavg,
2170 inp->rxpoll_pmax, inp->rxpoll_plowat,
2171 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2172 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2173 inp->rxpoll_bavg, inp->rxpoll_blowat,
2174 inp->rxpoll_bhiwat);
2175 }
2176
2177 if ((err = ((*ifp->if_input_ctl)(ifp,
2178 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
2179 printf("%s: error setting polling mode "
2180 "to %s (%d)\n", if_name(ifp),
2181 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2182 "ON" : "OFF", err);
2183 }
2184
2185 switch (mode) {
2186 case IFNET_MODEL_INPUT_POLL_OFF:
2187 ifnet_set_poll_cycle(ifp, NULL);
2188 inp->rxpoll_offreq++;
2189 if (err != 0)
2190 inp->rxpoll_offerr++;
2191 break;
2192
2193 case IFNET_MODEL_INPUT_POLL_ON:
2194 net_nsectimer(&ival, &ts);
2195 ifnet_set_poll_cycle(ifp, &ts);
2196 ifnet_poll(ifp);
2197 inp->rxpoll_onreq++;
2198 if (err != 0)
2199 inp->rxpoll_onerr++;
2200 break;
2201
2202 default:
2203 VERIFY(0);
2204 /* NOTREACHED */
2205 }
2206
2207 /* Release the IO refcnt */
2208 ifnet_decr_iorefcnt(ifp);
2209 }
2210
2211 /*
2212 * NOTE warning %%% attention !!!!
2213 * We should think about putting some thread starvation
2214 * safeguards if we deal with long chains of packets.
2215 */
2216 if (m != NULL)
2217 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2218 }
2219
2220 /* NOTREACHED */
2221 VERIFY(0); /* we should never get here */
2222 }
2223
2224 /*
2225 * Must be called on an attached ifnet (caller is expected to check.)
2226 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2227 */
2228 errno_t
2229 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2230 boolean_t locked)
2231 {
2232 struct dlil_threading_info *inp;
2233 u_int64_t sample_holdtime, inbw;
2234
2235 VERIFY(ifp != NULL);
2236 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2237 return (ENXIO);
2238
2239 if (p != NULL) {
2240 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2241 (p->packets_lowat != 0 && p->packets_hiwat == 0))
2242 return (EINVAL);
2243 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2244 p->packets_lowat >= p->packets_hiwat)
2245 return (EINVAL);
2246 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2247 (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
2248 return (EINVAL);
2249 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2250 p->bytes_lowat >= p->bytes_hiwat)
2251 return (EINVAL);
2252 if (p->interval_time != 0 &&
2253 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
2254 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2255 }
2256
2257 if (!locked)
2258 lck_mtx_lock(&inp->input_lck);
2259
2260 lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2261
2262 /*
2263 * Normally, we'd reset the parameters to the auto-tuned values
2264 * if the the input thread detects a change in link rate. If the
2265 * driver provides its own parameters right after a link rate
2266 * changes, but before the input thread gets to run, we want to
2267 * make sure to keep the driver's values. Clearing if_poll_update
2268 * will achieve that.
2269 */
2270 if (p != NULL && !locked && ifp->if_poll_update != 0)
2271 ifp->if_poll_update = 0;
2272
2273 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2274 sample_holdtime = 0; /* polling is disabled */
2275 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2276 inp->rxpoll_blowat = 0;
2277 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2278 inp->rxpoll_bhiwat = (u_int32_t)-1;
2279 inp->rxpoll_plim = 0;
2280 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2281 } else {
2282 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2283 u_int64_t ival;
2284 unsigned int n, i;
2285
2286 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2287 if (inbw < rxpoll_tbl[i].speed)
2288 break;
2289 n = i;
2290 }
2291 /* auto-tune if caller didn't specify a value */
2292 plowat = ((p == NULL || p->packets_lowat == 0) ?
2293 rxpoll_tbl[n].plowat : p->packets_lowat);
2294 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2295 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2296 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2297 rxpoll_tbl[n].blowat : p->bytes_lowat);
2298 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2299 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2300 plim = ((p == NULL || p->packets_limit == 0) ?
2301 if_rxpoll_max : p->packets_limit);
2302 ival = ((p == NULL || p->interval_time == 0) ?
2303 if_rxpoll_interval_time : p->interval_time);
2304
2305 VERIFY(plowat != 0 && phiwat != 0);
2306 VERIFY(blowat != 0 && bhiwat != 0);
2307 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2308
2309 sample_holdtime = if_rxpoll_sample_holdtime;
2310 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2311 inp->rxpoll_whiwat = if_rxpoll_whiwat;
2312 inp->rxpoll_plowat = plowat;
2313 inp->rxpoll_phiwat = phiwat;
2314 inp->rxpoll_blowat = blowat;
2315 inp->rxpoll_bhiwat = bhiwat;
2316 inp->rxpoll_plim = plim;
2317 inp->rxpoll_ival = ival;
2318 }
2319
2320 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2321 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2322
2323 if (dlil_verbose) {
2324 printf("%s: speed %llu bps, sample per %llu nsec, "
2325 "poll interval %llu nsec, pkts per poll %u, "
2326 "pkt limits [%u/%u], wreq limits [%u/%u], "
2327 "bytes limits [%u/%u]\n", if_name(ifp),
2328 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2329 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2330 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
2331 }
2332
2333 if (!locked)
2334 lck_mtx_unlock(&inp->input_lck);
2335
2336 return (0);
2337 }
2338
2339 /*
2340 * Must be called on an attached ifnet (caller is expected to check.)
2341 */
2342 errno_t
2343 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2344 {
2345 struct dlil_threading_info *inp;
2346
2347 VERIFY(ifp != NULL && p != NULL);
2348 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2349 return (ENXIO);
2350
2351 bzero(p, sizeof (*p));
2352
2353 lck_mtx_lock(&inp->input_lck);
2354 p->packets_limit = inp->rxpoll_plim;
2355 p->packets_lowat = inp->rxpoll_plowat;
2356 p->packets_hiwat = inp->rxpoll_phiwat;
2357 p->bytes_lowat = inp->rxpoll_blowat;
2358 p->bytes_hiwat = inp->rxpoll_bhiwat;
2359 p->interval_time = inp->rxpoll_ival;
2360 lck_mtx_unlock(&inp->input_lck);
2361
2362 return (0);
2363 }
2364
2365 errno_t
2366 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2367 const struct ifnet_stat_increment_param *s)
2368 {
2369 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
2370 }
2371
2372 errno_t
2373 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2374 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2375 {
2376 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
2377 }
2378
2379 static errno_t
2380 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2381 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2382 {
2383 ifnet_input_handler_func handler_func;
2384 struct ifnet_stat_increment_param _s;
2385 u_int32_t m_cnt = 0, m_size = 0;
2386 struct mbuf *last;
2387 errno_t err = 0;
2388
2389 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2390 if (m_head != NULL)
2391 mbuf_freem_list(m_head);
2392 return (EINVAL);
2393 }
2394
2395 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2396 VERIFY(m_tail == NULL || ext);
2397 VERIFY(s != NULL || !ext);
2398
2399 /*
2400 * Drop the packet(s) if the parameters are invalid, or if the
2401 * interface is no longer attached; else hold an IO refcnt to
2402 * prevent it from being detached (will be released below.)
2403 */
2404 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
2405 if (m_head != NULL)
2406 mbuf_freem_list(m_head);
2407 return (EINVAL);
2408 }
2409
2410 handler_func = ifp->if_input_handler;
2411 VERIFY(handler_func != NULL);
2412
2413 if (m_tail == NULL) {
2414 last = m_head;
2415 while (m_head != NULL) {
2416 #if IFNET_INPUT_SANITY_CHK
2417 if (dlil_input_sanity_check != 0)
2418 DLIL_INPUT_CHECK(last, ifp);
2419 #endif /* IFNET_INPUT_SANITY_CHK */
2420 m_cnt++;
2421 m_size += m_length(last);
2422 if (mbuf_nextpkt(last) == NULL)
2423 break;
2424 last = mbuf_nextpkt(last);
2425 }
2426 m_tail = last;
2427 } else {
2428 #if IFNET_INPUT_SANITY_CHK
2429 if (dlil_input_sanity_check != 0) {
2430 last = m_head;
2431 while (1) {
2432 DLIL_INPUT_CHECK(last, ifp);
2433 m_cnt++;
2434 m_size += m_length(last);
2435 if (mbuf_nextpkt(last) == NULL)
2436 break;
2437 last = mbuf_nextpkt(last);
2438 }
2439 } else {
2440 m_cnt = s->packets_in;
2441 m_size = s->bytes_in;
2442 last = m_tail;
2443 }
2444 #else
2445 m_cnt = s->packets_in;
2446 m_size = s->bytes_in;
2447 last = m_tail;
2448 #endif /* IFNET_INPUT_SANITY_CHK */
2449 }
2450
2451 if (last != m_tail) {
2452 panic_plain("%s: invalid input packet chain for %s, "
2453 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2454 m_tail, last);
2455 }
2456
2457 /*
2458 * Assert packet count only for the extended variant, for backwards
2459 * compatibility, since this came directly from the device driver.
2460 * Relax this assertion for input bytes, as the driver may have
2461 * included the link-layer headers in the computation; hence
2462 * m_size is just an approximation.
2463 */
2464 if (ext && s->packets_in != m_cnt) {
2465 panic_plain("%s: input packet count mismatch for %s, "
2466 "%d instead of %d\n", __func__, if_name(ifp),
2467 s->packets_in, m_cnt);
2468 }
2469
2470 if (s == NULL) {
2471 bzero(&_s, sizeof (_s));
2472 s = &_s;
2473 } else {
2474 _s = *s;
2475 }
2476 _s.packets_in = m_cnt;
2477 _s.bytes_in = m_size;
2478
2479 err = (*handler_func)(ifp, m_head, m_tail, s, poll, current_thread());
2480
2481 if (ifp != lo_ifp) {
2482 /* Release the IO refcnt */
2483 ifnet_decr_iorefcnt(ifp);
2484 }
2485
2486 return (err);
2487 }
2488
2489 errno_t
2490 ifnet_set_input_handler(struct ifnet *ifp, ifnet_input_handler_func fn)
2491 {
2492 return (atomic_test_set_ptr(&ifp->if_input_handler,
2493 dlil_input_handler, fn) ? 0 : EBUSY);
2494 }
2495
2496 void
2497 ifnet_reset_input_handler(struct ifnet *ifp)
2498 {
2499 atomic_set_ptr(&ifp->if_input_handler, dlil_input_handler);
2500 }
2501
2502 errno_t
2503 ifnet_set_output_handler(struct ifnet *ifp, ifnet_output_handler_func fn)
2504 {
2505 return (atomic_test_set_ptr(&ifp->if_output_handler,
2506 dlil_output_handler, fn) ? 0 : EBUSY);
2507 }
2508
2509 void
2510 ifnet_reset_output_handler(struct ifnet *ifp)
2511 {
2512 atomic_set_ptr(&ifp->if_output_handler, dlil_output_handler);
2513 }
2514
2515 errno_t
2516 dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2517 {
2518 return (ifp->if_output(ifp, m));
2519 }
2520
2521 errno_t
2522 dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2523 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2524 boolean_t poll, struct thread *tp)
2525 {
2526 struct dlil_threading_info *inp;
2527 u_int32_t m_cnt = s->packets_in;
2528 u_int32_t m_size = s->bytes_in;
2529
2530 if ((inp = ifp->if_inp) == NULL)
2531 inp = dlil_main_input_thread;
2532
2533 /*
2534 * If there is a matching DLIL input thread associated with an
2535 * affinity set, associate this thread with the same set. We
2536 * will only do this once.
2537 */
2538 lck_mtx_lock_spin(&inp->input_lck);
2539 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
2540 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2541 (poll && inp->poll_thr == THREAD_NULL))) {
2542 u_int32_t tag = inp->tag;
2543
2544 if (poll) {
2545 VERIFY(inp->poll_thr == THREAD_NULL);
2546 inp->poll_thr = tp;
2547 } else {
2548 VERIFY(inp->wloop_thr == THREAD_NULL);
2549 inp->wloop_thr = tp;
2550 }
2551 lck_mtx_unlock(&inp->input_lck);
2552
2553 /* Associate the current thread with the new affinity tag */
2554 (void) dlil_affinity_set(tp, tag);
2555
2556 /*
2557 * Take a reference on the current thread; during detach,
2558 * we will need to refer to it in order ot tear down its
2559 * affinity.
2560 */
2561 thread_reference(tp);
2562 lck_mtx_lock_spin(&inp->input_lck);
2563 }
2564
2565 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2566
2567 /*
2568 * Because of loopbacked multicast we cannot stuff the ifp in
2569 * the rcvif of the packet header: loopback (lo0) packets use a
2570 * dedicated list so that we can later associate them with lo_ifp
2571 * on their way up the stack. Packets for other interfaces without
2572 * dedicated input threads go to the regular list.
2573 */
2574 if (m_head != NULL) {
2575 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2576 struct dlil_main_threading_info *inpm =
2577 (struct dlil_main_threading_info *)inp;
2578 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2579 m_cnt, m_size);
2580 } else {
2581 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2582 m_cnt, m_size);
2583 }
2584 }
2585
2586 #if IFNET_INPUT_SANITY_CHK
2587 if (dlil_input_sanity_check != 0) {
2588 u_int32_t count;
2589 struct mbuf *m0;
2590
2591 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
2592 count++;
2593
2594 if (count != m_cnt) {
2595 panic_plain("%s: invalid packet count %d "
2596 "(expected %d)\n", if_name(ifp),
2597 count, m_cnt);
2598 /* NOTREACHED */
2599 }
2600
2601 inp->input_mbuf_cnt += m_cnt;
2602 }
2603 #endif /* IFNET_INPUT_SANITY_CHK */
2604
2605 dlil_input_stats_add(s, inp, poll);
2606 /*
2607 * If we're using the main input thread, synchronize the
2608 * stats now since we have the interface context. All
2609 * other cases involving dedicated input threads will
2610 * have their stats synchronized there.
2611 */
2612 if (inp == dlil_main_input_thread)
2613 dlil_input_stats_sync(ifp, inp);
2614
2615 inp->input_waiting |= DLIL_INPUT_WAITING;
2616 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2617 inp->wtot++;
2618 wakeup_one((caddr_t)&inp->input_waiting);
2619 }
2620 lck_mtx_unlock(&inp->input_lck);
2621
2622 return (0);
2623 }
2624
2625 static void
2626 ifnet_start_common(struct ifnet *ifp, int resetfc)
2627 {
2628 if (!(ifp->if_eflags & IFEF_TXSTART))
2629 return;
2630 /*
2631 * If the starter thread is inactive, signal it to do work,
2632 * unless the interface is being flow controlled from below,
2633 * e.g. a virtual interface being flow controlled by a real
2634 * network interface beneath it.
2635 */
2636 lck_mtx_lock_spin(&ifp->if_start_lock);
2637 if (resetfc) {
2638 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2639 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2640 lck_mtx_unlock(&ifp->if_start_lock);
2641 return;
2642 }
2643 ifp->if_start_req++;
2644 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2645 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2646 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
2647 ifp->if_start_delayed == 0)) {
2648 wakeup_one((caddr_t)&ifp->if_start_thread);
2649 }
2650 lck_mtx_unlock(&ifp->if_start_lock);
2651 }
2652
2653 void
2654 ifnet_start(struct ifnet *ifp)
2655 {
2656 ifnet_start_common(ifp, 0);
2657 }
2658
2659 static void
2660 ifnet_start_thread_fn(void *v, wait_result_t w)
2661 {
2662 #pragma unused(w)
2663 struct ifnet *ifp = v;
2664 char ifname[IFNAMSIZ + 1];
2665 char thread_name[MAXTHREADNAMESIZE];
2666 struct timespec *ts = NULL;
2667 struct ifclassq *ifq = &ifp->if_snd;
2668 struct timespec delay_start_ts;
2669
2670 /* Construct the name for this thread, and then apply it. */
2671 bzero(thread_name, sizeof(thread_name));
2672 snprintf(thread_name, sizeof(thread_name), "ifnet_start_%s", ifp->if_xname);
2673 thread_set_thread_name(ifp->if_start_thread, thread_name);
2674
2675 /*
2676 * Treat the dedicated starter thread for lo0 as equivalent to
2677 * the driver workloop thread; if net_affinity is enabled for
2678 * the main input thread, associate this starter thread to it
2679 * by binding them with the same affinity tag. This is done
2680 * only once (as we only have one lo_ifp which never goes away.)
2681 */
2682 if (ifp == lo_ifp) {
2683 struct dlil_threading_info *inp = dlil_main_input_thread;
2684 struct thread *tp = current_thread();
2685
2686 lck_mtx_lock(&inp->input_lck);
2687 if (inp->net_affinity) {
2688 u_int32_t tag = inp->tag;
2689
2690 VERIFY(inp->wloop_thr == THREAD_NULL);
2691 VERIFY(inp->poll_thr == THREAD_NULL);
2692 inp->wloop_thr = tp;
2693 lck_mtx_unlock(&inp->input_lck);
2694
2695 /* Associate this thread with the affinity tag */
2696 (void) dlil_affinity_set(tp, tag);
2697 } else {
2698 lck_mtx_unlock(&inp->input_lck);
2699 }
2700 }
2701
2702 snprintf(ifname, sizeof (ifname), "%s_starter",
2703 if_name(ifp));
2704
2705 lck_mtx_lock_spin(&ifp->if_start_lock);
2706
2707 for (;;) {
2708 if (ifp->if_start_thread != NULL)
2709 (void) msleep(&ifp->if_start_thread,
2710 &ifp->if_start_lock,
2711 (PZERO - 1) | PSPIN, ifname, ts);
2712
2713 /* interface is detached? */
2714 if (ifp->if_start_thread == THREAD_NULL) {
2715 ifnet_set_start_cycle(ifp, NULL);
2716 lck_mtx_unlock(&ifp->if_start_lock);
2717 ifnet_purge(ifp);
2718
2719 if (dlil_verbose) {
2720 printf("%s: starter thread terminated\n",
2721 if_name(ifp));
2722 }
2723
2724 /* for the extra refcnt from kernel_thread_start() */
2725 thread_deallocate(current_thread());
2726 /* this is the end */
2727 thread_terminate(current_thread());
2728 /* NOTREACHED */
2729 return;
2730 }
2731
2732 ifp->if_start_active = 1;
2733
2734 for (;;) {
2735 u_int32_t req = ifp->if_start_req;
2736 if (!IFCQ_IS_EMPTY(ifq) &&
2737 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2738 ifp->if_start_delayed == 0 &&
2739 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2740 (ifp->if_eflags & IFEF_DELAY_START)) {
2741 ifp->if_start_delayed = 1;
2742 ifnet_start_delayed++;
2743 break;
2744 } else {
2745 ifp->if_start_delayed = 0;
2746 }
2747 lck_mtx_unlock(&ifp->if_start_lock);
2748
2749 /*
2750 * If no longer attached, don't call start because ifp
2751 * is being destroyed; else hold an IO refcnt to
2752 * prevent the interface from being detached (will be
2753 * released below.)
2754 */
2755 if (!ifnet_is_attached(ifp, 1)) {
2756 lck_mtx_lock_spin(&ifp->if_start_lock);
2757 break;
2758 }
2759
2760 /* invoke the driver's start routine */
2761 ((*ifp->if_start)(ifp));
2762
2763 /*
2764 * Release the io ref count taken by ifnet_is_attached.
2765 */
2766 ifnet_decr_iorefcnt(ifp);
2767
2768 lck_mtx_lock_spin(&ifp->if_start_lock);
2769
2770 /* if there's no pending request, we're done */
2771 if (req == ifp->if_start_req)
2772 break;
2773 }
2774
2775 ifp->if_start_req = 0;
2776 ifp->if_start_active = 0;
2777
2778 /*
2779 * Wakeup N ns from now if rate-controlled by TBR, and if
2780 * there are still packets in the send queue which haven't
2781 * been dequeued so far; else sleep indefinitely (ts = NULL)
2782 * until ifnet_start() is called again.
2783 */
2784 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2785 &ifp->if_start_cycle : NULL);
2786
2787 if (ts == NULL && ifp->if_start_delayed == 1) {
2788 delay_start_ts.tv_sec = 0;
2789 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2790 ts = &delay_start_ts;
2791 }
2792
2793 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
2794 ts = NULL;
2795 }
2796
2797 /* NOTREACHED */
2798 }
2799
2800 void
2801 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2802 {
2803 if (ts == NULL)
2804 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
2805 else
2806 *(&ifp->if_start_cycle) = *ts;
2807
2808 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
2809 printf("%s: restart interval set to %lu nsec\n",
2810 if_name(ifp), ts->tv_nsec);
2811 }
2812
2813 static void
2814 ifnet_poll(struct ifnet *ifp)
2815 {
2816 /*
2817 * If the poller thread is inactive, signal it to do work.
2818 */
2819 lck_mtx_lock_spin(&ifp->if_poll_lock);
2820 ifp->if_poll_req++;
2821 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
2822 wakeup_one((caddr_t)&ifp->if_poll_thread);
2823 }
2824 lck_mtx_unlock(&ifp->if_poll_lock);
2825 }
2826
2827 static void
2828 ifnet_poll_thread_fn(void *v, wait_result_t w)
2829 {
2830 #pragma unused(w)
2831 struct dlil_threading_info *inp;
2832 struct ifnet *ifp = v;
2833 char ifname[IFNAMSIZ + 1];
2834 struct timespec *ts = NULL;
2835 struct ifnet_stat_increment_param s;
2836
2837 snprintf(ifname, sizeof (ifname), "%s_poller",
2838 if_name(ifp));
2839 bzero(&s, sizeof (s));
2840
2841 lck_mtx_lock_spin(&ifp->if_poll_lock);
2842
2843 inp = ifp->if_inp;
2844 VERIFY(inp != NULL);
2845
2846 for (;;) {
2847 if (ifp->if_poll_thread != THREAD_NULL) {
2848 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
2849 (PZERO - 1) | PSPIN, ifname, ts);
2850 }
2851
2852 /* interface is detached (maybe while asleep)? */
2853 if (ifp->if_poll_thread == THREAD_NULL) {
2854 ifnet_set_poll_cycle(ifp, NULL);
2855 lck_mtx_unlock(&ifp->if_poll_lock);
2856
2857 if (dlil_verbose) {
2858 printf("%s: poller thread terminated\n",
2859 if_name(ifp));
2860 }
2861
2862 /* for the extra refcnt from kernel_thread_start() */
2863 thread_deallocate(current_thread());
2864 /* this is the end */
2865 thread_terminate(current_thread());
2866 /* NOTREACHED */
2867 return;
2868 }
2869
2870 ifp->if_poll_active = 1;
2871 for (;;) {
2872 struct mbuf *m_head, *m_tail;
2873 u_int32_t m_lim, m_cnt, m_totlen;
2874 u_int16_t req = ifp->if_poll_req;
2875
2876 lck_mtx_unlock(&ifp->if_poll_lock);
2877
2878 /*
2879 * If no longer attached, there's nothing to do;
2880 * else hold an IO refcnt to prevent the interface
2881 * from being detached (will be released below.)
2882 */
2883 if (!ifnet_is_attached(ifp, 1)) {
2884 lck_mtx_lock_spin(&ifp->if_poll_lock);
2885 break;
2886 }
2887
2888 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
2889 MAX((qlimit(&inp->rcvq_pkts)),
2890 (inp->rxpoll_phiwat << 2));
2891
2892 if (dlil_verbose > 1) {
2893 printf("%s: polling up to %d pkts, "
2894 "pkts avg %d max %d, wreq avg %d, "
2895 "bytes avg %d\n",
2896 if_name(ifp), m_lim,
2897 inp->rxpoll_pavg, inp->rxpoll_pmax,
2898 inp->rxpoll_wavg, inp->rxpoll_bavg);
2899 }
2900
2901 /* invoke the driver's input poll routine */
2902 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
2903 &m_cnt, &m_totlen));
2904
2905 if (m_head != NULL) {
2906 VERIFY(m_tail != NULL && m_cnt > 0);
2907
2908 if (dlil_verbose > 1) {
2909 printf("%s: polled %d pkts, "
2910 "pkts avg %d max %d, wreq avg %d, "
2911 "bytes avg %d\n",
2912 if_name(ifp), m_cnt,
2913 inp->rxpoll_pavg, inp->rxpoll_pmax,
2914 inp->rxpoll_wavg, inp->rxpoll_bavg);
2915 }
2916
2917 /* stats are required for extended variant */
2918 s.packets_in = m_cnt;
2919 s.bytes_in = m_totlen;
2920
2921 (void) ifnet_input_common(ifp, m_head, m_tail,
2922 &s, TRUE, TRUE);
2923 } else {
2924 if (dlil_verbose > 1) {
2925 printf("%s: no packets, "
2926 "pkts avg %d max %d, wreq avg %d, "
2927 "bytes avg %d\n",
2928 if_name(ifp), inp->rxpoll_pavg,
2929 inp->rxpoll_pmax, inp->rxpoll_wavg,
2930 inp->rxpoll_bavg);
2931 }
2932
2933 (void) ifnet_input_common(ifp, NULL, NULL,
2934 NULL, FALSE, TRUE);
2935 }
2936
2937 /* Release the io ref count */
2938 ifnet_decr_iorefcnt(ifp);
2939
2940 lck_mtx_lock_spin(&ifp->if_poll_lock);
2941
2942 /* if there's no pending request, we're done */
2943 if (req == ifp->if_poll_req)
2944 break;
2945 }
2946 ifp->if_poll_req = 0;
2947 ifp->if_poll_active = 0;
2948
2949 /*
2950 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2951 * until ifnet_poll() is called again.
2952 */
2953 ts = &ifp->if_poll_cycle;
2954 if (ts->tv_sec == 0 && ts->tv_nsec == 0)
2955 ts = NULL;
2956 }
2957
2958 /* NOTREACHED */
2959 }
2960
2961 void
2962 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
2963 {
2964 if (ts == NULL)
2965 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
2966 else
2967 *(&ifp->if_poll_cycle) = *ts;
2968
2969 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
2970 printf("%s: poll interval set to %lu nsec\n",
2971 if_name(ifp), ts->tv_nsec);
2972 }
2973
2974 void
2975 ifnet_purge(struct ifnet *ifp)
2976 {
2977 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
2978 if_qflush(ifp, 0);
2979 }
2980
2981 void
2982 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
2983 {
2984 IFCQ_LOCK_ASSERT_HELD(ifq);
2985
2986 if (!(IFCQ_IS_READY(ifq)))
2987 return;
2988
2989 if (IFCQ_TBR_IS_ENABLED(ifq)) {
2990 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
2991 ifq->ifcq_tbr.tbr_percent, 0 };
2992 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
2993 }
2994
2995 ifclassq_update(ifq, ev);
2996 }
2997
2998 void
2999 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3000 {
3001 switch (ev) {
3002 case CLASSQ_EV_LINK_BANDWIDTH:
3003 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
3004 ifp->if_poll_update++;
3005 break;
3006
3007 default:
3008 break;
3009 }
3010 }
3011
3012 errno_t
3013 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3014 {
3015 struct ifclassq *ifq;
3016 u_int32_t omodel;
3017 errno_t err;
3018
3019 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX)
3020 return (EINVAL);
3021 else if (!(ifp->if_eflags & IFEF_TXSTART))
3022 return (ENXIO);
3023
3024 ifq = &ifp->if_snd;
3025 IFCQ_LOCK(ifq);
3026 omodel = ifp->if_output_sched_model;
3027 ifp->if_output_sched_model = model;
3028 if ((err = ifclassq_pktsched_setup(ifq)) != 0)
3029 ifp->if_output_sched_model = omodel;
3030 IFCQ_UNLOCK(ifq);
3031
3032 return (err);
3033 }
3034
3035 errno_t
3036 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3037 {
3038 if (ifp == NULL)
3039 return (EINVAL);
3040 else if (!(ifp->if_eflags & IFEF_TXSTART))
3041 return (ENXIO);
3042
3043 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3044
3045 return (0);
3046 }
3047
3048 errno_t
3049 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3050 {
3051 if (ifp == NULL || maxqlen == NULL)
3052 return (EINVAL);
3053 else if (!(ifp->if_eflags & IFEF_TXSTART))
3054 return (ENXIO);
3055
3056 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3057
3058 return (0);
3059 }
3060
3061 errno_t
3062 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
3063 {
3064 errno_t err;
3065
3066 if (ifp == NULL || pkts == NULL)
3067 err = EINVAL;
3068 else if (!(ifp->if_eflags & IFEF_TXSTART))
3069 err = ENXIO;
3070 else
3071 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3072 pkts, NULL);
3073
3074 return (err);
3075 }
3076
3077 errno_t
3078 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3079 u_int32_t *pkts, u_int32_t *bytes)
3080 {
3081 errno_t err;
3082
3083 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3084 (pkts == NULL && bytes == NULL))
3085 err = EINVAL;
3086 else if (!(ifp->if_eflags & IFEF_TXSTART))
3087 err = ENXIO;
3088 else
3089 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3090
3091 return (err);
3092 }
3093
3094 errno_t
3095 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3096 {
3097 struct dlil_threading_info *inp;
3098
3099 if (ifp == NULL)
3100 return (EINVAL);
3101 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3102 return (ENXIO);
3103
3104 if (maxqlen == 0)
3105 maxqlen = if_rcvq_maxlen;
3106 else if (maxqlen < IF_RCVQ_MINLEN)
3107 maxqlen = IF_RCVQ_MINLEN;
3108
3109 inp = ifp->if_inp;
3110 lck_mtx_lock(&inp->input_lck);
3111 qlimit(&inp->rcvq_pkts) = maxqlen;
3112 lck_mtx_unlock(&inp->input_lck);
3113
3114 return (0);
3115 }
3116
3117 errno_t
3118 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3119 {
3120 struct dlil_threading_info *inp;
3121
3122 if (ifp == NULL || maxqlen == NULL)
3123 return (EINVAL);
3124 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3125 return (ENXIO);
3126
3127 inp = ifp->if_inp;
3128 lck_mtx_lock(&inp->input_lck);
3129 *maxqlen = qlimit(&inp->rcvq_pkts);
3130 lck_mtx_unlock(&inp->input_lck);
3131 return (0);
3132 }
3133
3134 errno_t
3135 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3136 {
3137 int error;
3138 struct timespec now;
3139 u_int64_t now_nsec;
3140
3141 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3142 m->m_nextpkt != NULL) {
3143 if (m != NULL)
3144 m_freem_list(m);
3145 return (EINVAL);
3146 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3147 !(ifp->if_refflags & IFRF_ATTACHED)) {
3148 /* flag tested without lock for performance */
3149 m_freem(m);
3150 return (ENXIO);
3151 } else if (!(ifp->if_flags & IFF_UP)) {
3152 m_freem(m);
3153 return (ENETDOWN);
3154 }
3155
3156 nanouptime(&now);
3157 net_timernsec(&now, &now_nsec);
3158 m->m_pkthdr.pkt_timestamp = now_nsec;
3159 m->m_pkthdr.pkt_flags &= ~PKTF_DRV_TS_VALID;
3160
3161 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3162 /*
3163 * If the driver chose to delay start callback for
3164 * coalescing multiple packets, Then use the following
3165 * heuristics to make sure that start callback will
3166 * be delayed only when bulk data transfer is detected.
3167 * 1. number of packets enqueued in (delay_win * 2) is
3168 * greater than or equal to the delay qlen.
3169 * 2. If delay_start is enabled it will stay enabled for
3170 * another 10 idle windows. This is to take into account
3171 * variable RTT and burst traffic.
3172 * 3. If the time elapsed since last enqueue is more
3173 * than 200ms we disable delaying start callback. This is
3174 * is to take idle time into account.
3175 */
3176 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3177 if (ifp->if_start_delay_swin > 0) {
3178 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3179 ifp->if_start_delay_cnt++;
3180 } else if ((now_nsec - ifp->if_start_delay_swin)
3181 >= (200 * 1000 * 1000)) {
3182 ifp->if_start_delay_swin = now_nsec;
3183 ifp->if_start_delay_cnt = 1;
3184 ifp->if_start_delay_idle = 0;
3185 if (ifp->if_eflags & IFEF_DELAY_START) {
3186 ifp->if_eflags &=
3187 ~(IFEF_DELAY_START);
3188 ifnet_delay_start_disabled++;
3189 }
3190 } else {
3191 if (ifp->if_start_delay_cnt >=
3192 ifp->if_start_delay_qlen) {
3193 ifp->if_eflags |= IFEF_DELAY_START;
3194 ifp->if_start_delay_idle = 0;
3195 } else {
3196 if (ifp->if_start_delay_idle >= 10) {
3197 ifp->if_eflags &= ~(IFEF_DELAY_START);
3198 ifnet_delay_start_disabled++;
3199 } else {
3200 ifp->if_start_delay_idle++;
3201 }
3202 }
3203 ifp->if_start_delay_swin = now_nsec;
3204 ifp->if_start_delay_cnt = 1;
3205 }
3206 } else {
3207 ifp->if_start_delay_swin = now_nsec;
3208 ifp->if_start_delay_cnt = 1;
3209 ifp->if_start_delay_idle = 0;
3210 ifp->if_eflags &= ~(IFEF_DELAY_START);
3211 }
3212 } else {
3213 ifp->if_eflags &= ~(IFEF_DELAY_START);
3214 }
3215
3216 /* enqueue the packet */
3217 error = ifclassq_enqueue(&ifp->if_snd, m);
3218
3219 /*
3220 * Tell the driver to start dequeueing; do this even when the queue
3221 * for the packet is suspended (EQSUSPENDED), as the driver could still
3222 * be dequeueing from other unsuspended queues.
3223 */
3224 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3225 (error == 0 || error == EQFULL || error == EQSUSPENDED))
3226 ifnet_start(ifp);
3227
3228 return (error);
3229 }
3230
3231 errno_t
3232 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3233 {
3234 errno_t rc;
3235 if (ifp == NULL || mp == NULL)
3236 return (EINVAL);
3237 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3238 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3239 return (ENXIO);
3240 if (!ifnet_is_attached(ifp, 1))
3241 return (ENXIO);
3242 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3243 mp, NULL, NULL, NULL);
3244 ifnet_decr_iorefcnt(ifp);
3245
3246 return (rc);
3247 }
3248
3249 errno_t
3250 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3251 struct mbuf **mp)
3252 {
3253 errno_t rc;
3254 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
3255 return (EINVAL);
3256 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3257 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3258 return (ENXIO);
3259 if (!ifnet_is_attached(ifp, 1))
3260 return (ENXIO);
3261
3262 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL);
3263 ifnet_decr_iorefcnt(ifp);
3264 return (rc);
3265 }
3266
3267 errno_t
3268 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3269 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3270 {
3271 errno_t rc;
3272 if (ifp == NULL || head == NULL || pkt_limit < 1)
3273 return (EINVAL);
3274 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3275 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3276 return (ENXIO);
3277 if (!ifnet_is_attached(ifp, 1))
3278 return (ENXIO);
3279
3280 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
3281 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, head, tail, cnt, len);
3282 ifnet_decr_iorefcnt(ifp);
3283 return (rc);
3284 }
3285
3286 errno_t
3287 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3288 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3289 {
3290 errno_t rc;
3291 if (ifp == NULL || head == NULL || byte_limit < 1)
3292 return (EINVAL);
3293 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3294 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3295 return (ENXIO);
3296 if (!ifnet_is_attached(ifp, 1))
3297 return (ENXIO);
3298
3299 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3300 byte_limit, head, tail, cnt, len);
3301 ifnet_decr_iorefcnt(ifp);
3302 return (rc);
3303 }
3304
3305 errno_t
3306 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3307 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3308 u_int32_t *len)
3309 {
3310 errno_t rc;
3311 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3312 !MBUF_VALID_SC(sc))
3313 return (EINVAL);
3314 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3315 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3316 return (ENXIO);
3317 if (!ifnet_is_attached(ifp, 1))
3318 return (ENXIO);
3319 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit, head,
3320 tail, cnt, len);
3321 ifnet_decr_iorefcnt(ifp);
3322 return (rc);
3323 }
3324
3325 errno_t
3326 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3327 const struct sockaddr *dest, const char *dest_linkaddr,
3328 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3329 {
3330 if (pre != NULL)
3331 *pre = 0;
3332 if (post != NULL)
3333 *post = 0;
3334
3335 return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
3336 }
3337
3338 static int
3339 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3340 char **frame_header_p, protocol_family_t protocol_family)
3341 {
3342 struct ifnet_filter *filter;
3343
3344 /*
3345 * Pass the inbound packet to the interface filters
3346 */
3347 lck_mtx_lock_spin(&ifp->if_flt_lock);
3348 /* prevent filter list from changing in case we drop the lock */
3349 if_flt_monitor_busy(ifp);
3350 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3351 int result;
3352
3353 if (!filter->filt_skip && filter->filt_input != NULL &&
3354 (filter->filt_protocol == 0 ||
3355 filter->filt_protocol == protocol_family)) {
3356 lck_mtx_unlock(&ifp->if_flt_lock);
3357
3358 result = (*filter->filt_input)(filter->filt_cookie,
3359 ifp, protocol_family, m_p, frame_header_p);
3360
3361 lck_mtx_lock_spin(&ifp->if_flt_lock);
3362 if (result != 0) {
3363 /* we're done with the filter list */
3364 if_flt_monitor_unbusy(ifp);
3365 lck_mtx_unlock(&ifp->if_flt_lock);
3366 return (result);
3367 }
3368 }
3369 }
3370 /* we're done with the filter list */
3371 if_flt_monitor_unbusy(ifp);
3372 lck_mtx_unlock(&ifp->if_flt_lock);
3373
3374 /*
3375 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3376 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3377 */
3378 if (*m_p != NULL)
3379 (*m_p)->m_flags &= ~M_PROTO1;
3380
3381 return (0);
3382 }
3383
3384 static int
3385 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3386 protocol_family_t protocol_family)
3387 {
3388 struct ifnet_filter *filter;
3389
3390 /*
3391 * Pass the outbound packet to the interface filters
3392 */
3393 lck_mtx_lock_spin(&ifp->if_flt_lock);
3394 /* prevent filter list from changing in case we drop the lock */
3395 if_flt_monitor_busy(ifp);
3396 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3397 int result;
3398
3399 if (!filter->filt_skip && filter->filt_output != NULL &&
3400 (filter->filt_protocol == 0 ||
3401 filter->filt_protocol == protocol_family)) {
3402 lck_mtx_unlock(&ifp->if_flt_lock);
3403
3404 result = filter->filt_output(filter->filt_cookie, ifp,
3405 protocol_family, m_p);
3406
3407 lck_mtx_lock_spin(&ifp->if_flt_lock);
3408 if (result != 0) {
3409 /* we're done with the filter list */
3410 if_flt_monitor_unbusy(ifp);
3411 lck_mtx_unlock(&ifp->if_flt_lock);
3412 return (result);
3413 }
3414 }
3415 }
3416 /* we're done with the filter list */
3417 if_flt_monitor_unbusy(ifp);
3418 lck_mtx_unlock(&ifp->if_flt_lock);
3419
3420 return (0);
3421 }
3422
3423 static void
3424 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
3425 {
3426 int error;
3427
3428 if (ifproto->proto_kpi == kProtoKPI_v1) {
3429 /* Version 1 protocols get one packet at a time */
3430 while (m != NULL) {
3431 char * frame_header;
3432 mbuf_t next_packet;
3433
3434 next_packet = m->m_nextpkt;
3435 m->m_nextpkt = NULL;
3436 frame_header = m->m_pkthdr.pkt_hdr;
3437 m->m_pkthdr.pkt_hdr = NULL;
3438 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3439 ifproto->protocol_family, m, frame_header);
3440 if (error != 0 && error != EJUSTRETURN)
3441 m_freem(m);
3442 m = next_packet;
3443 }
3444 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
3445 /* Version 2 protocols support packet lists */
3446 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
3447 ifproto->protocol_family, m);
3448 if (error != 0 && error != EJUSTRETURN)
3449 m_freem_list(m);
3450 }
3451 }
3452
3453 static void
3454 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3455 struct dlil_threading_info *inp, boolean_t poll)
3456 {
3457 struct ifnet_stat_increment_param *d = &inp->stats;
3458
3459 if (s->packets_in != 0)
3460 d->packets_in += s->packets_in;
3461 if (s->bytes_in != 0)
3462 d->bytes_in += s->bytes_in;
3463 if (s->errors_in != 0)
3464 d->errors_in += s->errors_in;
3465
3466 if (s->packets_out != 0)
3467 d->packets_out += s->packets_out;
3468 if (s->bytes_out != 0)
3469 d->bytes_out += s->bytes_out;
3470 if (s->errors_out != 0)
3471 d->errors_out += s->errors_out;
3472
3473 if (s->collisions != 0)
3474 d->collisions += s->collisions;
3475 if (s->dropped != 0)
3476 d->dropped += s->dropped;
3477
3478 if (poll)
3479 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
3480 }
3481
3482 static void
3483 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3484 {
3485 struct ifnet_stat_increment_param *s = &inp->stats;
3486
3487 /*
3488 * Use of atomic operations is unavoidable here because
3489 * these stats may also be incremented elsewhere via KPIs.
3490 */
3491 if (s->packets_in != 0) {
3492 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3493 s->packets_in = 0;
3494 }
3495 if (s->bytes_in != 0) {
3496 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3497 s->bytes_in = 0;
3498 }
3499 if (s->errors_in != 0) {
3500 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3501 s->errors_in = 0;
3502 }
3503
3504 if (s->packets_out != 0) {
3505 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3506 s->packets_out = 0;
3507 }
3508 if (s->bytes_out != 0) {
3509 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3510 s->bytes_out = 0;
3511 }
3512 if (s->errors_out != 0) {
3513 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3514 s->errors_out = 0;
3515 }
3516
3517 if (s->collisions != 0) {
3518 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3519 s->collisions = 0;
3520 }
3521 if (s->dropped != 0) {
3522 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3523 s->dropped = 0;
3524 }
3525 /*
3526 * If we went over the threshold, notify NetworkStatistics.
3527 */
3528 if (ifp->if_data_threshold &&
3529 (ifp->if_ibytes + ifp->if_obytes) - ifp->if_dt_bytes >
3530 ifp->if_data_threshold) {
3531 ifp->if_dt_bytes = ifp->if_ibytes + ifp->if_obytes;
3532
3533 lck_mtx_convert_spin(&inp->input_lck);
3534 nstat_ifnet_threshold_reached(ifp->if_index);
3535 }
3536 /*
3537 * No need for atomic operations as they are modified here
3538 * only from within the DLIL input thread context.
3539 */
3540 if (inp->tstats.packets != 0) {
3541 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3542 inp->tstats.packets = 0;
3543 }
3544 if (inp->tstats.bytes != 0) {
3545 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3546 inp->tstats.bytes = 0;
3547 }
3548 }
3549
3550 __private_extern__ void
3551 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3552 {
3553 return (dlil_input_packet_list_common(ifp, m, 0,
3554 IFNET_MODEL_INPUT_POLL_OFF, FALSE));
3555 }
3556
3557 __private_extern__ void
3558 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3559 u_int32_t cnt, ifnet_model_t mode)
3560 {
3561 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
3562 }
3563
3564 static void
3565 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3566 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
3567 {
3568 int error = 0;
3569 protocol_family_t protocol_family;
3570 mbuf_t next_packet;
3571 ifnet_t ifp = ifp_param;
3572 char * frame_header;
3573 struct if_proto * last_ifproto = NULL;
3574 mbuf_t pkt_first = NULL;
3575 mbuf_t * pkt_next = NULL;
3576 u_int32_t poll_thresh = 0, poll_ival = 0;
3577
3578 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
3579
3580 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
3581 (poll_ival = if_rxpoll_interval_pkts) > 0)
3582 poll_thresh = cnt;
3583
3584 while (m != NULL) {
3585 struct if_proto *ifproto = NULL;
3586 int iorefcnt = 0;
3587 uint32_t pktf_mask; /* pkt flags to preserve */
3588
3589 if (ifp_param == NULL)
3590 ifp = m->m_pkthdr.rcvif;
3591
3592 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
3593 poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
3594 ifnet_poll(ifp);
3595
3596 /* Check if this mbuf looks valid */
3597 MBUF_INPUT_CHECK(m, ifp);
3598
3599 next_packet = m->m_nextpkt;
3600 m->m_nextpkt = NULL;
3601 frame_header = m->m_pkthdr.pkt_hdr;
3602 m->m_pkthdr.pkt_hdr = NULL;
3603
3604 /*
3605 * Get an IO reference count if the interface is not
3606 * loopback (lo0) and it is attached; lo0 never goes
3607 * away, so optimize for that.
3608 */
3609 if (ifp != lo_ifp) {
3610 if (!ifnet_is_attached(ifp, 1)) {
3611 m_freem(m);
3612 goto next;
3613 }
3614 iorefcnt = 1;
3615 pktf_mask = 0;
3616 } else {
3617 /*
3618 * If this arrived on lo0, preserve interface addr
3619 * info to allow for connectivity between loopback
3620 * and local interface addresses.
3621 */
3622 pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
3623 }
3624
3625 /* make sure packet comes in clean */
3626 m_classifier_init(m, pktf_mask);
3627
3628 ifp_inc_traffic_class_in(ifp, m);
3629
3630 /* find which protocol family this packet is for */
3631 ifnet_lock_shared(ifp);
3632 error = (*ifp->if_demux)(ifp, m, frame_header,
3633 &protocol_family);
3634 ifnet_lock_done(ifp);
3635 if (error != 0) {
3636 if (error == EJUSTRETURN)
3637 goto next;
3638 protocol_family = 0;
3639 }
3640
3641 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
3642 !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
3643 dlil_input_cksum_dbg(ifp, m, frame_header,
3644 protocol_family);
3645
3646 /*
3647 * For partial checksum offload, we expect the driver to
3648 * set the start offset indicating the start of the span
3649 * that is covered by the hardware-computed checksum;
3650 * adjust this start offset accordingly because the data
3651 * pointer has been advanced beyond the link-layer header.
3652 *
3653 * Don't adjust if the interface is a bridge member, as
3654 * the adjustment will occur from the context of the
3655 * bridge interface during input.
3656 */
3657 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
3658 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
3659 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
3660 int adj;
3661
3662 if (frame_header == NULL ||
3663 frame_header < (char *)mbuf_datastart(m) ||
3664 frame_header > (char *)m->m_data ||
3665 (adj = (m->m_data - frame_header)) >
3666 m->m_pkthdr.csum_rx_start) {
3667 m->m_pkthdr.csum_data = 0;
3668 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
3669 hwcksum_in_invalidated++;
3670 } else {
3671 m->m_pkthdr.csum_rx_start -= adj;
3672 }
3673 }
3674
3675 pktap_input(ifp, protocol_family, m, frame_header);
3676
3677 if (m->m_flags & (M_BCAST|M_MCAST))
3678 atomic_add_64(&ifp->if_imcasts, 1);
3679
3680 /* run interface filters, exclude VLAN packets PR-3586856 */
3681 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
3682 error = dlil_interface_filters_input(ifp, &m,
3683 &frame_header, protocol_family);
3684 if (error != 0) {
3685 if (error != EJUSTRETURN)
3686 m_freem(m);
3687 goto next;
3688 }
3689 }
3690 if (error != 0 || ((m->m_flags & M_PROMISC) != 0)) {
3691 m_freem(m);
3692 goto next;
3693 }
3694
3695 /* Lookup the protocol attachment to this interface */
3696 if (protocol_family == 0) {
3697 ifproto = NULL;
3698 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
3699 (last_ifproto->protocol_family == protocol_family)) {
3700 VERIFY(ifproto == NULL);
3701 ifproto = last_ifproto;
3702 if_proto_ref(last_ifproto);
3703 } else {
3704 VERIFY(ifproto == NULL);
3705 ifnet_lock_shared(ifp);
3706 /* callee holds a proto refcnt upon success */
3707 ifproto = find_attached_proto(ifp, protocol_family);
3708 ifnet_lock_done(ifp);
3709 }
3710 if (ifproto == NULL) {
3711 /* no protocol for this packet, discard */
3712 m_freem(m);
3713 goto next;
3714 }
3715 if (ifproto != last_ifproto) {
3716 if (last_ifproto != NULL) {
3717 /* pass up the list for the previous protocol */
3718 dlil_ifproto_input(last_ifproto, pkt_first);
3719 pkt_first = NULL;
3720 if_proto_free(last_ifproto);
3721 }
3722 last_ifproto = ifproto;
3723 if_proto_ref(ifproto);
3724 }
3725 /* extend the list */
3726 m->m_pkthdr.pkt_hdr = frame_header;
3727 if (pkt_first == NULL) {
3728 pkt_first = m;
3729 } else {
3730 *pkt_next = m;
3731 }
3732 pkt_next = &m->m_nextpkt;
3733
3734 next:
3735 if (next_packet == NULL && last_ifproto != NULL) {
3736 /* pass up the last list of packets */
3737 dlil_ifproto_input(last_ifproto, pkt_first);
3738 if_proto_free(last_ifproto);
3739 last_ifproto = NULL;
3740 }
3741 if (ifproto != NULL) {
3742 if_proto_free(ifproto);
3743 ifproto = NULL;
3744 }
3745
3746 m = next_packet;
3747
3748 /* update the driver's multicast filter, if needed */
3749 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
3750 ifp->if_updatemcasts = 0;
3751 if (iorefcnt == 1)
3752 ifnet_decr_iorefcnt(ifp);
3753 }
3754
3755 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
3756 }
3757
3758 errno_t
3759 if_mcasts_update(struct ifnet *ifp)
3760 {
3761 errno_t err;
3762
3763 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
3764 if (err == EAFNOSUPPORT)
3765 err = 0;
3766 printf("%s: %s %d suspended link-layer multicast membership(s) "
3767 "(err=%d)\n", if_name(ifp),
3768 (err == 0 ? "successfully restored" : "failed to restore"),
3769 ifp->if_updatemcasts, err);
3770
3771 /* just return success */
3772 return (0);
3773 }
3774
3775 /* If ifp is set, we will increment the generation for the interface */
3776 int
3777 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
3778 {
3779 if (ifp != NULL) {
3780 ifnet_increment_generation(ifp);
3781 }
3782
3783 #if NECP
3784 necp_update_all_clients();
3785 #endif /* NECP */
3786
3787 return (kev_post_msg(event));
3788 }
3789
3790 #define TMP_IF_PROTO_ARR_SIZE 10
3791 static int
3792 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
3793 {
3794 struct ifnet_filter *filter = NULL;
3795 struct if_proto *proto = NULL;
3796 int if_proto_count = 0;
3797 struct if_proto **tmp_ifproto_arr = NULL;
3798 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
3799 int tmp_ifproto_arr_idx = 0;
3800 bool tmp_malloc = false;
3801
3802 /*
3803 * Pass the event to the interface filters
3804 */
3805 lck_mtx_lock_spin(&ifp->if_flt_lock);
3806 /* prevent filter list from changing in case we drop the lock */
3807 if_flt_monitor_busy(ifp);
3808 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3809 if (filter->filt_event != NULL) {
3810 lck_mtx_unlock(&ifp->if_flt_lock);
3811
3812 filter->filt_event(filter->filt_cookie, ifp,
3813 filter->filt_protocol, event);
3814
3815 lck_mtx_lock_spin(&ifp->if_flt_lock);
3816 }
3817 }
3818 /* we're done with the filter list */
3819 if_flt_monitor_unbusy(ifp);
3820 lck_mtx_unlock(&ifp->if_flt_lock);
3821
3822 /* Get an io ref count if the interface is attached */
3823 if (!ifnet_is_attached(ifp, 1))
3824 goto done;
3825
3826 /*
3827 * An embedded tmp_list_entry in if_proto may still get
3828 * over-written by another thread after giving up ifnet lock,
3829 * therefore we are avoiding embedded pointers here.
3830 */
3831 ifnet_lock_shared(ifp);
3832 if_proto_count = dlil_ifp_proto_count(ifp);
3833 if (if_proto_count) {
3834 int i;
3835 VERIFY(ifp->if_proto_hash != NULL);
3836 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
3837 tmp_ifproto_arr = tmp_ifproto_stack_arr;
3838 } else {
3839 MALLOC(tmp_ifproto_arr, struct if_proto **,
3840 sizeof (*tmp_ifproto_arr) * if_proto_count,
3841 M_TEMP, M_ZERO);
3842 if (tmp_ifproto_arr == NULL) {
3843 ifnet_lock_done(ifp);
3844 goto cleanup;
3845 }
3846 tmp_malloc = true;
3847 }
3848
3849 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
3850 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
3851 next_hash) {
3852 if_proto_ref(proto);
3853 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
3854 tmp_ifproto_arr_idx++;
3855 }
3856 }
3857 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
3858 }
3859 ifnet_lock_done(ifp);
3860
3861 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
3862 tmp_ifproto_arr_idx++) {
3863 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
3864 VERIFY(proto != NULL);
3865 proto_media_event eventp =
3866 (proto->proto_kpi == kProtoKPI_v1 ?
3867 proto->kpi.v1.event :
3868 proto->kpi.v2.event);
3869
3870 if (eventp != NULL) {
3871 eventp(ifp, proto->protocol_family,
3872 event);
3873 }
3874 if_proto_free(proto);
3875 }
3876
3877 cleanup:
3878 if (tmp_malloc) {
3879 FREE(tmp_ifproto_arr, M_TEMP);
3880 }
3881
3882 /* Pass the event to the interface */
3883 if (ifp->if_event != NULL)
3884 ifp->if_event(ifp, event);
3885
3886 /* Release the io ref count */
3887 ifnet_decr_iorefcnt(ifp);
3888 done:
3889 return (dlil_post_complete_msg(update_generation ? ifp : NULL, event));
3890 }
3891
3892 errno_t
3893 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
3894 {
3895 struct kev_msg kev_msg;
3896 int result = 0;
3897
3898 if (ifp == NULL || event == NULL)
3899 return (EINVAL);
3900
3901 bzero(&kev_msg, sizeof (kev_msg));
3902 kev_msg.vendor_code = event->vendor_code;
3903 kev_msg.kev_class = event->kev_class;
3904 kev_msg.kev_subclass = event->kev_subclass;
3905 kev_msg.event_code = event->event_code;
3906 kev_msg.dv[0].data_ptr = &event->event_data[0];
3907 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
3908 kev_msg.dv[1].data_length = 0;
3909
3910 result = dlil_event_internal(ifp, &kev_msg, TRUE);
3911
3912 return (result);
3913 }
3914
3915 #if CONFIG_MACF_NET
3916 #include <netinet/ip6.h>
3917 #include <netinet/ip.h>
3918 static int
3919 dlil_get_socket_type(struct mbuf **mp, int family, int raw)
3920 {
3921 struct mbuf *m;
3922 struct ip *ip;
3923 struct ip6_hdr *ip6;
3924 int type = SOCK_RAW;
3925
3926 if (!raw) {
3927 switch (family) {
3928 case PF_INET:
3929 m = m_pullup(*mp, sizeof(struct ip));
3930 if (m == NULL)
3931 break;
3932 *mp = m;
3933 ip = mtod(m, struct ip *);
3934 if (ip->ip_p == IPPROTO_TCP)
3935 type = SOCK_STREAM;
3936 else if (ip->ip_p == IPPROTO_UDP)
3937 type = SOCK_DGRAM;
3938 break;
3939 case PF_INET6:
3940 m = m_pullup(*mp, sizeof(struct ip6_hdr));
3941 if (m == NULL)
3942 break;
3943 *mp = m;
3944 ip6 = mtod(m, struct ip6_hdr *);
3945 if (ip6->ip6_nxt == IPPROTO_TCP)
3946 type = SOCK_STREAM;
3947 else if (ip6->ip6_nxt == IPPROTO_UDP)
3948 type = SOCK_DGRAM;
3949 break;
3950 }
3951 }
3952
3953 return (type);
3954 }
3955 #endif
3956
3957 /*
3958 * This is mostly called from the context of the DLIL input thread;
3959 * because of that there is no need for atomic operations.
3960 */
3961 static __inline void
3962 ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
3963 {
3964 if (!(m->m_flags & M_PKTHDR))
3965 return;
3966
3967 switch (m_get_traffic_class(m)) {
3968 case MBUF_TC_BE:
3969 ifp->if_tc.ifi_ibepackets++;
3970 ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len;
3971 break;
3972 case MBUF_TC_BK:
3973 ifp->if_tc.ifi_ibkpackets++;
3974 ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len;
3975 break;
3976 case MBUF_TC_VI:
3977 ifp->if_tc.ifi_ivipackets++;
3978 ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len;
3979 break;
3980 case MBUF_TC_VO:
3981 ifp->if_tc.ifi_ivopackets++;
3982 ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len;
3983 break;
3984 default:
3985 break;
3986 }
3987
3988 if (mbuf_is_traffic_class_privileged(m)) {
3989 ifp->if_tc.ifi_ipvpackets++;
3990 ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len;
3991 }
3992 }
3993
3994 /*
3995 * This is called from DLIL output, hence multiple threads could end
3996 * up modifying the statistics. We trade off acccuracy for performance
3997 * by not using atomic operations here.
3998 */
3999 static __inline void
4000 ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
4001 {
4002 if (!(m->m_flags & M_PKTHDR))
4003 return;
4004
4005 switch (m_get_traffic_class(m)) {
4006 case MBUF_TC_BE:
4007 ifp->if_tc.ifi_obepackets++;
4008 ifp->if_tc.ifi_obebytes += m->m_pkthdr.len;
4009 break;
4010 case MBUF_TC_BK:
4011 ifp->if_tc.ifi_obkpackets++;
4012 ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len;
4013 break;
4014 case MBUF_TC_VI:
4015 ifp->if_tc.ifi_ovipackets++;
4016 ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len;
4017 break;
4018 case MBUF_TC_VO:
4019 ifp->if_tc.ifi_ovopackets++;
4020 ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len;
4021 break;
4022 default:
4023 break;
4024 }
4025
4026 if (mbuf_is_traffic_class_privileged(m)) {
4027 ifp->if_tc.ifi_opvpackets++;
4028 ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len;
4029 }
4030 }
4031
4032 static void
4033 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4034 {
4035 mbuf_t n = m;
4036 int chainlen = 0;
4037
4038 while (n != NULL) {
4039 chainlen++;
4040 n = n->m_next;
4041 }
4042 switch (chainlen) {
4043 case 0:
4044 break;
4045 case 1:
4046 atomic_add_64(&cls->cls_one, 1);
4047 break;
4048 case 2:
4049 atomic_add_64(&cls->cls_two, 1);
4050 break;
4051 case 3:
4052 atomic_add_64(&cls->cls_three, 1);
4053 break;
4054 case 4:
4055 atomic_add_64(&cls->cls_four, 1);
4056 break;
4057 case 5:
4058 default:
4059 atomic_add_64(&cls->cls_five_or_more, 1);
4060 break;
4061 }
4062 }
4063
4064 /*
4065 * dlil_output
4066 *
4067 * Caller should have a lock on the protocol domain if the protocol
4068 * doesn't support finer grained locking. In most cases, the lock
4069 * will be held from the socket layer and won't be released until
4070 * we return back to the socket layer.
4071 *
4072 * This does mean that we must take a protocol lock before we take
4073 * an interface lock if we're going to take both. This makes sense
4074 * because a protocol is likely to interact with an ifp while it
4075 * is under the protocol lock.
4076 *
4077 * An advisory code will be returned if adv is not null. This
4078 * can be used to provide feedback about interface queues to the
4079 * application.
4080 */
4081 errno_t
4082 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
4083 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
4084 {
4085 ifnet_output_handler_func handler_func;
4086 char *frame_type = NULL;
4087 char *dst_linkaddr = NULL;
4088 int retval = 0;
4089 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4090 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
4091 struct if_proto *proto = NULL;
4092 mbuf_t m;
4093 mbuf_t send_head = NULL;
4094 mbuf_t *send_tail = &send_head;
4095 int iorefcnt = 0;
4096 u_int32_t pre = 0, post = 0;
4097 u_int32_t fpkts = 0, fbytes = 0;
4098 int32_t flen = 0;
4099
4100 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4101
4102 /*
4103 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4104 * from happening while this operation is in progress
4105 */
4106 if (!ifnet_is_attached(ifp, 1)) {
4107 retval = ENXIO;
4108 goto cleanup;
4109 }
4110 iorefcnt = 1;
4111
4112 handler_func = ifp->if_output_handler;
4113 VERIFY(handler_func != NULL);
4114
4115 /* update the driver's multicast filter, if needed */
4116 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
4117 ifp->if_updatemcasts = 0;
4118
4119 frame_type = frame_type_buffer;
4120 dst_linkaddr = dst_linkaddr_buffer;
4121
4122 if (raw == 0) {
4123 ifnet_lock_shared(ifp);
4124 /* callee holds a proto refcnt upon success */
4125 proto = find_attached_proto(ifp, proto_family);
4126 if (proto == NULL) {
4127 ifnet_lock_done(ifp);
4128 retval = ENXIO;
4129 goto cleanup;
4130 }
4131 ifnet_lock_done(ifp);
4132 }
4133
4134 preout_again:
4135 if (packetlist == NULL)
4136 goto cleanup;
4137
4138 m = packetlist;
4139 packetlist = packetlist->m_nextpkt;
4140 m->m_nextpkt = NULL;
4141
4142 if (raw == 0) {
4143 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
4144 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
4145 retval = 0;
4146 if (preoutp != NULL) {
4147 retval = preoutp(ifp, proto_family, &m, dest, route,
4148 frame_type, dst_linkaddr);
4149
4150 if (retval != 0) {
4151 if (retval == EJUSTRETURN)
4152 goto preout_again;
4153 m_freem(m);
4154 goto cleanup;
4155 }
4156 }
4157 }
4158
4159 #if CONFIG_MACF_NET
4160 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
4161 dlil_get_socket_type(&m, proto_family, raw));
4162 if (retval != 0) {
4163 m_freem(m);
4164 goto cleanup;
4165 }
4166 #endif
4167
4168 do {
4169 #if CONFIG_DTRACE
4170 if (!raw && proto_family == PF_INET) {
4171 struct ip *ip = mtod(m, struct ip *);
4172 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4173 struct ip *, ip, struct ifnet *, ifp,
4174 struct ip *, ip, struct ip6_hdr *, NULL);
4175
4176 } else if (!raw && proto_family == PF_INET6) {
4177 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
4178 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4179 struct ip6_hdr *, ip6, struct ifnet *, ifp,
4180 struct ip *, NULL, struct ip6_hdr *, ip6);
4181 }
4182 #endif /* CONFIG_DTRACE */
4183
4184 if (raw == 0 && ifp->if_framer != NULL) {
4185 int rcvif_set = 0;
4186
4187 /*
4188 * If this is a broadcast packet that needs to be
4189 * looped back into the system, set the inbound ifp
4190 * to that of the outbound ifp. This will allow
4191 * us to determine that it is a legitimate packet
4192 * for the system. Only set the ifp if it's not
4193 * already set, just to be safe.
4194 */
4195 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
4196 m->m_pkthdr.rcvif == NULL) {
4197 m->m_pkthdr.rcvif = ifp;
4198 rcvif_set = 1;
4199 }
4200
4201 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
4202 frame_type, &pre, &post);
4203 if (retval != 0) {
4204 if (retval != EJUSTRETURN)
4205 m_freem(m);
4206 goto next;
4207 }
4208
4209 /*
4210 * For partial checksum offload, adjust the start
4211 * and stuff offsets based on the prepended header.
4212 */
4213 if ((m->m_pkthdr.csum_flags &
4214 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4215 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4216 m->m_pkthdr.csum_tx_stuff += pre;
4217 m->m_pkthdr.csum_tx_start += pre;
4218 }
4219
4220 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
4221 dlil_output_cksum_dbg(ifp, m, pre,
4222 proto_family);
4223
4224 /*
4225 * Clear the ifp if it was set above, and to be
4226 * safe, only if it is still the same as the
4227 * outbound ifp we have in context. If it was
4228 * looped back, then a copy of it was sent to the
4229 * loopback interface with the rcvif set, and we
4230 * are clearing the one that will go down to the
4231 * layer below.
4232 */
4233 if (rcvif_set && m->m_pkthdr.rcvif == ifp)
4234 m->m_pkthdr.rcvif = NULL;
4235 }
4236
4237 /*
4238 * Let interface filters (if any) do their thing ...
4239 */
4240 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4241 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
4242 retval = dlil_interface_filters_output(ifp,
4243 &m, proto_family);
4244 if (retval != 0) {
4245 if (retval != EJUSTRETURN)
4246 m_freem(m);
4247 goto next;
4248 }
4249 }
4250 /*
4251 * Strip away M_PROTO1 bit prior to sending packet
4252 * to the driver as this field may be used by the driver
4253 */
4254 m->m_flags &= ~M_PROTO1;
4255
4256 /*
4257 * If the underlying interface is not capable of handling a
4258 * packet whose data portion spans across physically disjoint
4259 * pages, we need to "normalize" the packet so that we pass
4260 * down a chain of mbufs where each mbuf points to a span that
4261 * resides in the system page boundary. If the packet does
4262 * not cross page(s), the following is a no-op.
4263 */
4264 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
4265 if ((m = m_normalize(m)) == NULL)
4266 goto next;
4267 }
4268
4269 /*
4270 * If this is a TSO packet, make sure the interface still
4271 * advertise TSO capability.
4272 */
4273 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
4274 retval = EMSGSIZE;
4275 m_freem(m);
4276 goto cleanup;
4277 }
4278
4279 /*
4280 * If the packet service class is not background,
4281 * update the timestamp to indicate recent activity
4282 * on a foreground socket.
4283 */
4284 if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
4285 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
4286 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND))
4287 ifp->if_fg_sendts = net_uptime();
4288
4289 if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME)
4290 ifp->if_rt_sendts = net_uptime();
4291 }
4292
4293 ifp_inc_traffic_class_out(ifp, m);
4294 pktap_output(ifp, proto_family, m, pre, post);
4295
4296 /*
4297 * Count the number of elements in the mbuf chain
4298 */
4299 if (tx_chain_len_count) {
4300 dlil_count_chain_len(m, &tx_chain_len_stats);
4301 }
4302
4303 /*
4304 * Finally, call the driver.
4305 */
4306 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
4307 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4308 flen += (m_pktlen(m) - (pre + post));
4309 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4310 }
4311 *send_tail = m;
4312 send_tail = &m->m_nextpkt;
4313 } else {
4314 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4315 flen = (m_pktlen(m) - (pre + post));
4316 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4317 } else {
4318 flen = 0;
4319 }
4320 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4321 0, 0, 0, 0, 0);
4322 retval = (*handler_func)(ifp, m);
4323 if (retval == EQFULL || retval == EQSUSPENDED) {
4324 if (adv != NULL && adv->code == FADV_SUCCESS) {
4325 adv->code = (retval == EQFULL ?
4326 FADV_FLOW_CONTROLLED :
4327 FADV_SUSPENDED);
4328 }
4329 retval = 0;
4330 }
4331 if (retval == 0 && flen > 0) {
4332 fbytes += flen;
4333 fpkts++;
4334 }
4335 if (retval != 0 && dlil_verbose) {
4336 printf("%s: output error on %s retval = %d\n",
4337 __func__, if_name(ifp),
4338 retval);
4339 }
4340 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
4341 0, 0, 0, 0, 0);
4342 }
4343 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4344
4345 next:
4346 m = packetlist;
4347 if (m != NULL) {
4348 packetlist = packetlist->m_nextpkt;
4349 m->m_nextpkt = NULL;
4350 }
4351 } while (m != NULL);
4352
4353 if (send_head != NULL) {
4354 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4355 0, 0, 0, 0, 0);
4356 if (ifp->if_eflags & IFEF_SENDLIST) {
4357 retval = (*handler_func)(ifp, send_head);
4358 if (retval == EQFULL || retval == EQSUSPENDED) {
4359 if (adv != NULL) {
4360 adv->code = (retval == EQFULL ?
4361 FADV_FLOW_CONTROLLED :
4362 FADV_SUSPENDED);
4363 }
4364 retval = 0;
4365 }
4366 if (retval == 0 && flen > 0) {
4367 fbytes += flen;
4368 fpkts++;
4369 }
4370 if (retval != 0 && dlil_verbose) {
4371 printf("%s: output error on %s retval = %d\n",
4372 __func__, if_name(ifp), retval);
4373 }
4374 } else {
4375 struct mbuf *send_m;
4376 int enq_cnt = 0;
4377 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
4378 while (send_head != NULL) {
4379 send_m = send_head;
4380 send_head = send_m->m_nextpkt;
4381 send_m->m_nextpkt = NULL;
4382 retval = (*handler_func)(ifp, send_m);
4383 if (retval == EQFULL || retval == EQSUSPENDED) {
4384 if (adv != NULL) {
4385 adv->code = (retval == EQFULL ?
4386 FADV_FLOW_CONTROLLED :
4387 FADV_SUSPENDED);
4388 }
4389 retval = 0;
4390 }
4391 if (retval == 0) {
4392 enq_cnt++;
4393 if (flen > 0)
4394 fpkts++;
4395 }
4396 if (retval != 0 && dlil_verbose) {
4397 printf("%s: output error on %s "
4398 "retval = %d\n",
4399 __func__, if_name(ifp), retval);
4400 }
4401 }
4402 if (enq_cnt > 0) {
4403 fbytes += flen;
4404 ifnet_start(ifp);
4405 }
4406 }
4407 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4408 }
4409
4410 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4411
4412 cleanup:
4413 if (fbytes > 0)
4414 ifp->if_fbytes += fbytes;
4415 if (fpkts > 0)
4416 ifp->if_fpackets += fpkts;
4417 if (proto != NULL)
4418 if_proto_free(proto);
4419 if (packetlist) /* if any packets are left, clean up */
4420 mbuf_freem_list(packetlist);
4421 if (retval == EJUSTRETURN)
4422 retval = 0;
4423 if (iorefcnt == 1)
4424 ifnet_decr_iorefcnt(ifp);
4425
4426 return (retval);
4427 }
4428
4429 errno_t
4430 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
4431 void *ioctl_arg)
4432 {
4433 struct ifnet_filter *filter;
4434 int retval = EOPNOTSUPP;
4435 int result = 0;
4436
4437 if (ifp == NULL || ioctl_code == 0)
4438 return (EINVAL);
4439
4440 /* Get an io ref count if the interface is attached */
4441 if (!ifnet_is_attached(ifp, 1))
4442 return (EOPNOTSUPP);
4443
4444 /*
4445 * Run the interface filters first.
4446 * We want to run all filters before calling the protocol,
4447 * interface family, or interface.
4448 */
4449 lck_mtx_lock_spin(&ifp->if_flt_lock);
4450 /* prevent filter list from changing in case we drop the lock */
4451 if_flt_monitor_busy(ifp);
4452 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4453 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
4454 filter->filt_protocol == proto_fam)) {
4455 lck_mtx_unlock(&ifp->if_flt_lock);
4456
4457 result = filter->filt_ioctl(filter->filt_cookie, ifp,
4458 proto_fam, ioctl_code, ioctl_arg);
4459
4460 lck_mtx_lock_spin(&ifp->if_flt_lock);
4461
4462 /* Only update retval if no one has handled the ioctl */
4463 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4464 if (result == ENOTSUP)
4465 result = EOPNOTSUPP;
4466 retval = result;
4467 if (retval != 0 && retval != EOPNOTSUPP) {
4468 /* we're done with the filter list */
4469 if_flt_monitor_unbusy(ifp);
4470 lck_mtx_unlock(&ifp->if_flt_lock);
4471 goto cleanup;
4472 }
4473 }
4474 }
4475 }
4476 /* we're done with the filter list */
4477 if_flt_monitor_unbusy(ifp);
4478 lck_mtx_unlock(&ifp->if_flt_lock);
4479
4480 /* Allow the protocol to handle the ioctl */
4481 if (proto_fam != 0) {
4482 struct if_proto *proto;
4483
4484 /* callee holds a proto refcnt upon success */
4485 ifnet_lock_shared(ifp);
4486 proto = find_attached_proto(ifp, proto_fam);
4487 ifnet_lock_done(ifp);
4488 if (proto != NULL) {
4489 proto_media_ioctl ioctlp =
4490 (proto->proto_kpi == kProtoKPI_v1 ?
4491 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
4492 result = EOPNOTSUPP;
4493 if (ioctlp != NULL)
4494 result = ioctlp(ifp, proto_fam, ioctl_code,
4495 ioctl_arg);
4496 if_proto_free(proto);
4497
4498 /* Only update retval if no one has handled the ioctl */
4499 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4500 if (result == ENOTSUP)
4501 result = EOPNOTSUPP;
4502 retval = result;
4503 if (retval && retval != EOPNOTSUPP)
4504 goto cleanup;
4505 }
4506 }
4507 }
4508
4509 /* retval is either 0 or EOPNOTSUPP */
4510
4511 /*
4512 * Let the interface handle this ioctl.
4513 * If it returns EOPNOTSUPP, ignore that, we may have
4514 * already handled this in the protocol or family.
4515 */
4516 if (ifp->if_ioctl)
4517 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
4518
4519 /* Only update retval if no one has handled the ioctl */
4520 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4521 if (result == ENOTSUP)
4522 result = EOPNOTSUPP;
4523 retval = result;
4524 if (retval && retval != EOPNOTSUPP) {
4525 goto cleanup;
4526 }
4527 }
4528
4529 cleanup:
4530 if (retval == EJUSTRETURN)
4531 retval = 0;
4532
4533 ifnet_decr_iorefcnt(ifp);
4534
4535 return (retval);
4536 }
4537
4538 __private_extern__ errno_t
4539 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
4540 {
4541 errno_t error = 0;
4542
4543
4544 if (ifp->if_set_bpf_tap) {
4545 /* Get an io reference on the interface if it is attached */
4546 if (!ifnet_is_attached(ifp, 1))
4547 return (ENXIO);
4548 error = ifp->if_set_bpf_tap(ifp, mode, callback);
4549 ifnet_decr_iorefcnt(ifp);
4550 }
4551 return (error);
4552 }
4553
4554 errno_t
4555 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4556 struct sockaddr *ll_addr, size_t ll_len)
4557 {
4558 errno_t result = EOPNOTSUPP;
4559 struct if_proto *proto;
4560 const struct sockaddr *verify;
4561 proto_media_resolve_multi resolvep;
4562
4563 if (!ifnet_is_attached(ifp, 1))
4564 return (result);
4565
4566 bzero(ll_addr, ll_len);
4567
4568 /* Call the protocol first; callee holds a proto refcnt upon success */
4569 ifnet_lock_shared(ifp);
4570 proto = find_attached_proto(ifp, proto_addr->sa_family);
4571 ifnet_lock_done(ifp);
4572 if (proto != NULL) {
4573 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4574 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4575 if (resolvep != NULL)
4576 result = resolvep(ifp, proto_addr,
4577 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
4578 if_proto_free(proto);
4579 }
4580
4581 /* Let the interface verify the multicast address */
4582 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4583 if (result == 0)
4584 verify = ll_addr;
4585 else
4586 verify = proto_addr;
4587 result = ifp->if_check_multi(ifp, verify);
4588 }
4589
4590 ifnet_decr_iorefcnt(ifp);
4591 return (result);
4592 }
4593
4594 __private_extern__ errno_t
4595 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4596 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4597 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4598 {
4599 struct if_proto *proto;
4600 errno_t result = 0;
4601
4602 /* callee holds a proto refcnt upon success */
4603 ifnet_lock_shared(ifp);
4604 proto = find_attached_proto(ifp, target_proto->sa_family);
4605 ifnet_lock_done(ifp);
4606 if (proto == NULL) {
4607 result = ENOTSUP;
4608 } else {
4609 proto_media_send_arp arpp;
4610 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4611 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4612 if (arpp == NULL) {
4613 result = ENOTSUP;
4614 } else {
4615 switch (arpop) {
4616 case ARPOP_REQUEST:
4617 arpstat.txrequests++;
4618 if (target_hw != NULL)
4619 arpstat.txurequests++;
4620 break;
4621 case ARPOP_REPLY:
4622 arpstat.txreplies++;
4623 break;
4624 }
4625 result = arpp(ifp, arpop, sender_hw, sender_proto,
4626 target_hw, target_proto);
4627 }
4628 if_proto_free(proto);
4629 }
4630
4631 return (result);
4632 }
4633
4634 struct net_thread_marks { };
4635 static const struct net_thread_marks net_thread_marks_base = { };
4636
4637 __private_extern__ const net_thread_marks_t net_thread_marks_none =
4638 &net_thread_marks_base;
4639
4640 __private_extern__ net_thread_marks_t
4641 net_thread_marks_push(u_int32_t push)
4642 {
4643 static const char *const base = (const void*)&net_thread_marks_base;
4644 u_int32_t pop = 0;
4645
4646 if (push != 0) {
4647 struct uthread *uth = get_bsdthread_info(current_thread());
4648
4649 pop = push & ~uth->uu_network_marks;
4650 if (pop != 0)
4651 uth->uu_network_marks |= pop;
4652 }
4653
4654 return ((net_thread_marks_t)&base[pop]);
4655 }
4656
4657 __private_extern__ net_thread_marks_t
4658 net_thread_unmarks_push(u_int32_t unpush)
4659 {
4660 static const char *const base = (const void*)&net_thread_marks_base;
4661 u_int32_t unpop = 0;
4662
4663 if (unpush != 0) {
4664 struct uthread *uth = get_bsdthread_info(current_thread());
4665
4666 unpop = unpush & uth->uu_network_marks;
4667 if (unpop != 0)
4668 uth->uu_network_marks &= ~unpop;
4669 }
4670
4671 return ((net_thread_marks_t)&base[unpop]);
4672 }
4673
4674 __private_extern__ void
4675 net_thread_marks_pop(net_thread_marks_t popx)
4676 {
4677 static const char *const base = (const void*)&net_thread_marks_base;
4678 const ptrdiff_t pop = (const char *)popx - (const char *)base;
4679
4680 if (pop != 0) {
4681 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4682 struct uthread *uth = get_bsdthread_info(current_thread());
4683
4684 VERIFY((pop & ones) == pop);
4685 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
4686 uth->uu_network_marks &= ~pop;
4687 }
4688 }
4689
4690 __private_extern__ void
4691 net_thread_unmarks_pop(net_thread_marks_t unpopx)
4692 {
4693 static const char *const base = (const void*)&net_thread_marks_base;
4694 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
4695
4696 if (unpop != 0) {
4697 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4698 struct uthread *uth = get_bsdthread_info(current_thread());
4699
4700 VERIFY((unpop & ones) == unpop);
4701 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
4702 uth->uu_network_marks |= unpop;
4703 }
4704 }
4705
4706 __private_extern__ u_int32_t
4707 net_thread_is_marked(u_int32_t check)
4708 {
4709 if (check != 0) {
4710 struct uthread *uth = get_bsdthread_info(current_thread());
4711 return (uth->uu_network_marks & check);
4712 }
4713 else
4714 return (0);
4715 }
4716
4717 __private_extern__ u_int32_t
4718 net_thread_is_unmarked(u_int32_t check)
4719 {
4720 if (check != 0) {
4721 struct uthread *uth = get_bsdthread_info(current_thread());
4722 return (~uth->uu_network_marks & check);
4723 }
4724 else
4725 return (0);
4726 }
4727
4728 static __inline__ int
4729 _is_announcement(const struct sockaddr_in * sender_sin,
4730 const struct sockaddr_in * target_sin)
4731 {
4732 if (sender_sin == NULL) {
4733 return (FALSE);
4734 }
4735 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
4736 }
4737
4738 __private_extern__ errno_t
4739 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4740 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4741 const struct sockaddr *target_proto0, u_int32_t rtflags)
4742 {
4743 errno_t result = 0;
4744 const struct sockaddr_in * sender_sin;
4745 const struct sockaddr_in * target_sin;
4746 struct sockaddr_inarp target_proto_sinarp;
4747 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
4748
4749 if (target_proto == NULL || (sender_proto != NULL &&
4750 sender_proto->sa_family != target_proto->sa_family))
4751 return (EINVAL);
4752
4753 /*
4754 * If the target is a (default) router, provide that
4755 * information to the send_arp callback routine.
4756 */
4757 if (rtflags & RTF_ROUTER) {
4758 bcopy(target_proto, &target_proto_sinarp,
4759 sizeof (struct sockaddr_in));
4760 target_proto_sinarp.sin_other |= SIN_ROUTER;
4761 target_proto = (struct sockaddr *)&target_proto_sinarp;
4762 }
4763
4764 /*
4765 * If this is an ARP request and the target IP is IPv4LL,
4766 * send the request on all interfaces. The exception is
4767 * an announcement, which must only appear on the specific
4768 * interface.
4769 */
4770 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
4771 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
4772 if (target_proto->sa_family == AF_INET &&
4773 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4774 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4775 !_is_announcement(target_sin, sender_sin)) {
4776 ifnet_t *ifp_list;
4777 u_int32_t count;
4778 u_int32_t ifp_on;
4779
4780 result = ENOTSUP;
4781
4782 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4783 for (ifp_on = 0; ifp_on < count; ifp_on++) {
4784 errno_t new_result;
4785 ifaddr_t source_hw = NULL;
4786 ifaddr_t source_ip = NULL;
4787 struct sockaddr_in source_ip_copy;
4788 struct ifnet *cur_ifp = ifp_list[ifp_on];
4789
4790 /*
4791 * Only arp on interfaces marked for IPv4LL
4792 * ARPing. This may mean that we don't ARP on
4793 * the interface the subnet route points to.
4794 */
4795 if (!(cur_ifp->if_eflags & IFEF_ARPLL))
4796 continue;
4797
4798 /* Find the source IP address */
4799 ifnet_lock_shared(cur_ifp);
4800 source_hw = cur_ifp->if_lladdr;
4801 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4802 ifa_link) {
4803 IFA_LOCK(source_ip);
4804 if (source_ip->ifa_addr != NULL &&
4805 source_ip->ifa_addr->sa_family ==
4806 AF_INET) {
4807 /* Copy the source IP address */
4808 source_ip_copy =
4809 *(struct sockaddr_in *)
4810 (void *)source_ip->ifa_addr;
4811 IFA_UNLOCK(source_ip);
4812 break;
4813 }
4814 IFA_UNLOCK(source_ip);
4815 }
4816
4817 /* No IP Source, don't arp */
4818 if (source_ip == NULL) {
4819 ifnet_lock_done(cur_ifp);
4820 continue;
4821 }
4822
4823 IFA_ADDREF(source_hw);
4824 ifnet_lock_done(cur_ifp);
4825
4826 /* Send the ARP */
4827 new_result = dlil_send_arp_internal(cur_ifp,
4828 arpop, (struct sockaddr_dl *)(void *)
4829 source_hw->ifa_addr,
4830 (struct sockaddr *)&source_ip_copy, NULL,
4831 target_proto);
4832
4833 IFA_REMREF(source_hw);
4834 if (result == ENOTSUP) {
4835 result = new_result;
4836 }
4837 }
4838 ifnet_list_free(ifp_list);
4839 }
4840 } else {
4841 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4842 sender_proto, target_hw, target_proto);
4843 }
4844
4845 return (result);
4846 }
4847
4848 /*
4849 * Caller must hold ifnet head lock.
4850 */
4851 static int
4852 ifnet_lookup(struct ifnet *ifp)
4853 {
4854 struct ifnet *_ifp;
4855
4856 lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
4857 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
4858 if (_ifp == ifp)
4859 break;
4860 }
4861 return (_ifp != NULL);
4862 }
4863
4864 /*
4865 * Caller has to pass a non-zero refio argument to get a
4866 * IO reference count. This will prevent ifnet_detach from
4867 * being called when there are outstanding io reference counts.
4868 */
4869 int
4870 ifnet_is_attached(struct ifnet *ifp, int refio)
4871 {
4872 int ret;
4873
4874 lck_mtx_lock_spin(&ifp->if_ref_lock);
4875 if ((ret = ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
4876 IFRF_ATTACHED))) {
4877 if (refio > 0)
4878 ifp->if_refio++;
4879 }
4880 lck_mtx_unlock(&ifp->if_ref_lock);
4881
4882 return (ret);
4883 }
4884
4885 /*
4886 * Caller must ensure the interface is attached; the assumption is that
4887 * there is at least an outstanding IO reference count held already.
4888 * Most callers would call ifnet_is_attached() instead.
4889 */
4890 void
4891 ifnet_incr_iorefcnt(struct ifnet *ifp)
4892 {
4893 lck_mtx_lock_spin(&ifp->if_ref_lock);
4894 VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
4895 IFRF_ATTACHED);
4896 VERIFY(ifp->if_refio > 0);
4897 ifp->if_refio++;
4898 lck_mtx_unlock(&ifp->if_ref_lock);
4899 }
4900
4901 void
4902 ifnet_decr_iorefcnt(struct ifnet *ifp)
4903 {
4904 lck_mtx_lock_spin(&ifp->if_ref_lock);
4905 VERIFY(ifp->if_refio > 0);
4906 VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) != 0);
4907 ifp->if_refio--;
4908
4909 /*
4910 * if there are no more outstanding io references, wakeup the
4911 * ifnet_detach thread if detaching flag is set.
4912 */
4913 if (ifp->if_refio == 0 &&
4914 (ifp->if_refflags & IFRF_DETACHING) != 0) {
4915 wakeup(&(ifp->if_refio));
4916 }
4917 lck_mtx_unlock(&ifp->if_ref_lock);
4918 }
4919
4920 static void
4921 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
4922 {
4923 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
4924 ctrace_t *tr;
4925 u_int32_t idx;
4926 u_int16_t *cnt;
4927
4928 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
4929 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
4930 /* NOTREACHED */
4931 }
4932
4933 if (refhold) {
4934 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
4935 tr = dl_if_dbg->dldbg_if_refhold;
4936 } else {
4937 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
4938 tr = dl_if_dbg->dldbg_if_refrele;
4939 }
4940
4941 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
4942 ctrace_record(&tr[idx]);
4943 }
4944
4945 errno_t
4946 dlil_if_ref(struct ifnet *ifp)
4947 {
4948 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4949
4950 if (dl_if == NULL)
4951 return (EINVAL);
4952
4953 lck_mtx_lock_spin(&dl_if->dl_if_lock);
4954 ++dl_if->dl_if_refcnt;
4955 if (dl_if->dl_if_refcnt == 0) {
4956 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
4957 /* NOTREACHED */
4958 }
4959 if (dl_if->dl_if_trace != NULL)
4960 (*dl_if->dl_if_trace)(dl_if, TRUE);
4961 lck_mtx_unlock(&dl_if->dl_if_lock);
4962
4963 return (0);
4964 }
4965
4966 errno_t
4967 dlil_if_free(struct ifnet *ifp)
4968 {
4969 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4970
4971 if (dl_if == NULL)
4972 return (EINVAL);
4973
4974 lck_mtx_lock_spin(&dl_if->dl_if_lock);
4975 if (dl_if->dl_if_refcnt == 0) {
4976 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
4977 /* NOTREACHED */
4978 }
4979 --dl_if->dl_if_refcnt;
4980 if (dl_if->dl_if_trace != NULL)
4981 (*dl_if->dl_if_trace)(dl_if, FALSE);
4982 lck_mtx_unlock(&dl_if->dl_if_lock);
4983
4984 return (0);
4985 }
4986
4987 static errno_t
4988 dlil_attach_protocol_internal(struct if_proto *proto,
4989 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count)
4990 {
4991 struct kev_dl_proto_data ev_pr_data;
4992 struct ifnet *ifp = proto->ifp;
4993 int retval = 0;
4994 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
4995 struct if_proto *prev_proto;
4996 struct if_proto *_proto;
4997
4998 /* callee holds a proto refcnt upon success */
4999 ifnet_lock_exclusive(ifp);
5000 _proto = find_attached_proto(ifp, proto->protocol_family);
5001 if (_proto != NULL) {
5002 ifnet_lock_done(ifp);
5003 if_proto_free(_proto);
5004 return (EEXIST);
5005 }
5006
5007 /*
5008 * Call family module add_proto routine so it can refine the
5009 * demux descriptors as it wishes.
5010 */
5011 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
5012 demux_count);
5013 if (retval) {
5014 ifnet_lock_done(ifp);
5015 return (retval);
5016 }
5017
5018 /*
5019 * Insert the protocol in the hash
5020 */
5021 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
5022 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
5023 prev_proto = SLIST_NEXT(prev_proto, next_hash);
5024 if (prev_proto)
5025 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
5026 else
5027 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
5028 proto, next_hash);
5029
5030 /* hold a proto refcnt for attach */
5031 if_proto_ref(proto);
5032
5033 /*
5034 * The reserved field carries the number of protocol still attached
5035 * (subject to change)
5036 */
5037 ev_pr_data.proto_family = proto->protocol_family;
5038 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
5039 ifnet_lock_done(ifp);
5040
5041 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
5042 (struct net_event_data *)&ev_pr_data,
5043 sizeof (struct kev_dl_proto_data));
5044 return (retval);
5045 }
5046
5047 errno_t
5048 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
5049 const struct ifnet_attach_proto_param *proto_details)
5050 {
5051 int retval = 0;
5052 struct if_proto *ifproto = NULL;
5053
5054 ifnet_head_lock_shared();
5055 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5056 retval = EINVAL;
5057 goto end;
5058 }
5059 /* Check that the interface is in the global list */
5060 if (!ifnet_lookup(ifp)) {
5061 retval = ENXIO;
5062 goto end;
5063 }
5064
5065 ifproto = zalloc(dlif_proto_zone);
5066 if (ifproto == NULL) {
5067 retval = ENOMEM;
5068 goto end;
5069 }
5070 bzero(ifproto, dlif_proto_size);
5071
5072 /* refcnt held above during lookup */
5073 ifproto->ifp = ifp;
5074 ifproto->protocol_family = protocol;
5075 ifproto->proto_kpi = kProtoKPI_v1;
5076 ifproto->kpi.v1.input = proto_details->input;
5077 ifproto->kpi.v1.pre_output = proto_details->pre_output;
5078 ifproto->kpi.v1.event = proto_details->event;
5079 ifproto->kpi.v1.ioctl = proto_details->ioctl;
5080 ifproto->kpi.v1.detached = proto_details->detached;
5081 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
5082 ifproto->kpi.v1.send_arp = proto_details->send_arp;
5083
5084 retval = dlil_attach_protocol_internal(ifproto,
5085 proto_details->demux_list, proto_details->demux_count);
5086
5087 if (dlil_verbose) {
5088 printf("%s: attached v1 protocol %d\n", if_name(ifp),
5089 protocol);
5090 }
5091
5092 end:
5093 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5094 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5095 if_name(ifp), protocol, retval);
5096 }
5097 ifnet_head_done();
5098 if (retval != 0 && ifproto != NULL)
5099 zfree(dlif_proto_zone, ifproto);
5100 return (retval);
5101 }
5102
5103 errno_t
5104 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
5105 const struct ifnet_attach_proto_param_v2 *proto_details)
5106 {
5107 int retval = 0;
5108 struct if_proto *ifproto = NULL;
5109
5110 ifnet_head_lock_shared();
5111 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5112 retval = EINVAL;
5113 goto end;
5114 }
5115 /* Check that the interface is in the global list */
5116 if (!ifnet_lookup(ifp)) {
5117 retval = ENXIO;
5118 goto end;
5119 }
5120
5121 ifproto = zalloc(dlif_proto_zone);
5122 if (ifproto == NULL) {
5123 retval = ENOMEM;
5124 goto end;
5125 }
5126 bzero(ifproto, sizeof(*ifproto));
5127
5128 /* refcnt held above during lookup */
5129 ifproto->ifp = ifp;
5130 ifproto->protocol_family = protocol;
5131 ifproto->proto_kpi = kProtoKPI_v2;
5132 ifproto->kpi.v2.input = proto_details->input;
5133 ifproto->kpi.v2.pre_output = proto_details->pre_output;
5134 ifproto->kpi.v2.event = proto_details->event;
5135 ifproto->kpi.v2.ioctl = proto_details->ioctl;
5136 ifproto->kpi.v2.detached = proto_details->detached;
5137 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
5138 ifproto->kpi.v2.send_arp = proto_details->send_arp;
5139
5140 retval = dlil_attach_protocol_internal(ifproto,
5141 proto_details->demux_list, proto_details->demux_count);
5142
5143 if (dlil_verbose) {
5144 printf("%s: attached v2 protocol %d\n", if_name(ifp),
5145 protocol);
5146 }
5147
5148 end:
5149 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5150 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5151 if_name(ifp), protocol, retval);
5152 }
5153 ifnet_head_done();
5154 if (retval != 0 && ifproto != NULL)
5155 zfree(dlif_proto_zone, ifproto);
5156 return (retval);
5157 }
5158
5159 errno_t
5160 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
5161 {
5162 struct if_proto *proto = NULL;
5163 int retval = 0;
5164
5165 if (ifp == NULL || proto_family == 0) {
5166 retval = EINVAL;
5167 goto end;
5168 }
5169
5170 ifnet_lock_exclusive(ifp);
5171 /* callee holds a proto refcnt upon success */
5172 proto = find_attached_proto(ifp, proto_family);
5173 if (proto == NULL) {
5174 retval = ENXIO;
5175 ifnet_lock_done(ifp);
5176 goto end;
5177 }
5178
5179 /* call family module del_proto */
5180 if (ifp->if_del_proto)
5181 ifp->if_del_proto(ifp, proto->protocol_family);
5182
5183 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
5184 proto, if_proto, next_hash);
5185
5186 if (proto->proto_kpi == kProtoKPI_v1) {
5187 proto->kpi.v1.input = ifproto_media_input_v1;
5188 proto->kpi.v1.pre_output = ifproto_media_preout;
5189 proto->kpi.v1.event = ifproto_media_event;
5190 proto->kpi.v1.ioctl = ifproto_media_ioctl;
5191 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
5192 proto->kpi.v1.send_arp = ifproto_media_send_arp;
5193 } else {
5194 proto->kpi.v2.input = ifproto_media_input_v2;
5195 proto->kpi.v2.pre_output = ifproto_media_preout;
5196 proto->kpi.v2.event = ifproto_media_event;
5197 proto->kpi.v2.ioctl = ifproto_media_ioctl;
5198 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
5199 proto->kpi.v2.send_arp = ifproto_media_send_arp;
5200 }
5201 proto->detached = 1;
5202 ifnet_lock_done(ifp);
5203
5204 if (dlil_verbose) {
5205 printf("%s: detached %s protocol %d\n", if_name(ifp),
5206 (proto->proto_kpi == kProtoKPI_v1) ?
5207 "v1" : "v2", proto_family);
5208 }
5209
5210 /* release proto refcnt held during protocol attach */
5211 if_proto_free(proto);
5212
5213 /*
5214 * Release proto refcnt held during lookup; the rest of
5215 * protocol detach steps will happen when the last proto
5216 * reference is released.
5217 */
5218 if_proto_free(proto);
5219
5220 end:
5221 return (retval);
5222 }
5223
5224
5225 static errno_t
5226 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
5227 struct mbuf *packet, char *header)
5228 {
5229 #pragma unused(ifp, protocol, packet, header)
5230 return (ENXIO);
5231 }
5232
5233 static errno_t
5234 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
5235 struct mbuf *packet)
5236 {
5237 #pragma unused(ifp, protocol, packet)
5238 return (ENXIO);
5239
5240 }
5241
5242 static errno_t
5243 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
5244 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
5245 char *link_layer_dest)
5246 {
5247 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5248 return (ENXIO);
5249
5250 }
5251
5252 static void
5253 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
5254 const struct kev_msg *event)
5255 {
5256 #pragma unused(ifp, protocol, event)
5257 }
5258
5259 static errno_t
5260 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
5261 unsigned long command, void *argument)
5262 {
5263 #pragma unused(ifp, protocol, command, argument)
5264 return (ENXIO);
5265 }
5266
5267 static errno_t
5268 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
5269 struct sockaddr_dl *out_ll, size_t ll_len)
5270 {
5271 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5272 return (ENXIO);
5273 }
5274
5275 static errno_t
5276 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
5277 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5278 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5279 {
5280 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5281 return (ENXIO);
5282 }
5283
5284 extern int if_next_index(void);
5285 extern int tcp_ecn_outbound;
5286
5287 errno_t
5288 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
5289 {
5290 struct ifnet *tmp_if;
5291 struct ifaddr *ifa;
5292 struct if_data_internal if_data_saved;
5293 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5294 struct dlil_threading_info *dl_inp;
5295 u_int32_t sflags = 0;
5296 int err;
5297
5298 if (ifp == NULL)
5299 return (EINVAL);
5300
5301 /*
5302 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5303 * prevent the interface from being configured while it is
5304 * embryonic, as ifnet_head_lock is dropped and reacquired
5305 * below prior to marking the ifnet with IFRF_ATTACHED.
5306 */
5307 dlil_if_lock();
5308 ifnet_head_lock_exclusive();
5309 /* Verify we aren't already on the list */
5310 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
5311 if (tmp_if == ifp) {
5312 ifnet_head_done();
5313 dlil_if_unlock();
5314 return (EEXIST);
5315 }
5316 }
5317
5318 lck_mtx_lock_spin(&ifp->if_ref_lock);
5319 if (ifp->if_refflags & IFRF_ATTACHED) {
5320 panic_plain("%s: flags mismatch (attached set) ifp=%p",
5321 __func__, ifp);
5322 /* NOTREACHED */
5323 }
5324 lck_mtx_unlock(&ifp->if_ref_lock);
5325
5326 ifnet_lock_exclusive(ifp);
5327
5328 /* Sanity check */
5329 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5330 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5331
5332 if (ll_addr != NULL) {
5333 if (ifp->if_addrlen == 0) {
5334 ifp->if_addrlen = ll_addr->sdl_alen;
5335 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
5336 ifnet_lock_done(ifp);
5337 ifnet_head_done();
5338 dlil_if_unlock();
5339 return (EINVAL);
5340 }
5341 }
5342
5343 /*
5344 * Allow interfaces without protocol families to attach
5345 * only if they have the necessary fields filled out.
5346 */
5347 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
5348 DLIL_PRINTF("%s: Attempt to attach interface without "
5349 "family module - %d\n", __func__, ifp->if_family);
5350 ifnet_lock_done(ifp);
5351 ifnet_head_done();
5352 dlil_if_unlock();
5353 return (ENODEV);
5354 }
5355
5356 /* Allocate protocol hash table */
5357 VERIFY(ifp->if_proto_hash == NULL);
5358 ifp->if_proto_hash = zalloc(dlif_phash_zone);
5359 if (ifp->if_proto_hash == NULL) {
5360 ifnet_lock_done(ifp);
5361 ifnet_head_done();
5362 dlil_if_unlock();
5363 return (ENOBUFS);
5364 }
5365 bzero(ifp->if_proto_hash, dlif_phash_size);
5366
5367 lck_mtx_lock_spin(&ifp->if_flt_lock);
5368 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
5369 TAILQ_INIT(&ifp->if_flt_head);
5370 VERIFY(ifp->if_flt_busy == 0);
5371 VERIFY(ifp->if_flt_waiters == 0);
5372 lck_mtx_unlock(&ifp->if_flt_lock);
5373
5374 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
5375 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
5376 LIST_INIT(&ifp->if_multiaddrs);
5377 }
5378
5379 VERIFY(ifp->if_allhostsinm == NULL);
5380 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5381 TAILQ_INIT(&ifp->if_addrhead);
5382
5383 if (ifp->if_index == 0) {
5384 int idx = if_next_index();
5385
5386 if (idx == -1) {
5387 ifp->if_index = 0;
5388 ifnet_lock_done(ifp);
5389 ifnet_head_done();
5390 dlil_if_unlock();
5391 return (ENOBUFS);
5392 }
5393 ifp->if_index = idx;
5394 }
5395 /* There should not be anything occupying this slot */
5396 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5397
5398 /* allocate (if needed) and initialize a link address */
5399 VERIFY(!(dl_if->dl_if_flags & DLIF_REUSE) || ifp->if_lladdr != NULL);
5400 ifa = dlil_alloc_lladdr(ifp, ll_addr);
5401 if (ifa == NULL) {
5402 ifnet_lock_done(ifp);
5403 ifnet_head_done();
5404 dlil_if_unlock();
5405 return (ENOBUFS);
5406 }
5407
5408 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5409 ifnet_addrs[ifp->if_index - 1] = ifa;
5410
5411 /* make this address the first on the list */
5412 IFA_LOCK(ifa);
5413 /* hold a reference for ifnet_addrs[] */
5414 IFA_ADDREF_LOCKED(ifa);
5415 /* if_attach_link_ifa() holds a reference for ifa_link */
5416 if_attach_link_ifa(ifp, ifa);
5417 IFA_UNLOCK(ifa);
5418
5419 #if CONFIG_MACF_NET
5420 mac_ifnet_label_associate(ifp);
5421 #endif
5422
5423 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5424 ifindex2ifnet[ifp->if_index] = ifp;
5425
5426 /* Hold a reference to the underlying dlil_ifnet */
5427 ifnet_reference(ifp);
5428
5429 /* Clear stats (save and restore other fields that we care) */
5430 if_data_saved = ifp->if_data;
5431 bzero(&ifp->if_data, sizeof (ifp->if_data));
5432 ifp->if_data.ifi_type = if_data_saved.ifi_type;
5433 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5434 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5435 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5436 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5437 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5438 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5439 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5440 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5441 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5442 ifnet_touch_lastchange(ifp);
5443
5444 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
5445 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
5446 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
5447
5448 /* By default, use SFB and enable flow advisory */
5449 sflags = PKTSCHEDF_QALG_SFB;
5450 if (if_flowadv)
5451 sflags |= PKTSCHEDF_QALG_FLOWCTL;
5452
5453 if (if_delaybased_queue)
5454 sflags |= PKTSCHEDF_QALG_DELAYBASED;
5455
5456 /* Initialize transmit queue(s) */
5457 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
5458 if (err != 0) {
5459 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5460 "err=%d", __func__, ifp, err);
5461 /* NOTREACHED */
5462 }
5463
5464 /* Sanity checks on the input thread storage */
5465 dl_inp = &dl_if->dl_if_inpstorage;
5466 bzero(&dl_inp->stats, sizeof (dl_inp->stats));
5467 VERIFY(dl_inp->input_waiting == 0);
5468 VERIFY(dl_inp->wtot == 0);
5469 VERIFY(dl_inp->ifp == NULL);
5470 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
5471 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
5472 VERIFY(!dl_inp->net_affinity);
5473 VERIFY(ifp->if_inp == NULL);
5474 VERIFY(dl_inp->input_thr == THREAD_NULL);
5475 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
5476 VERIFY(dl_inp->poll_thr == THREAD_NULL);
5477 VERIFY(dl_inp->tag == 0);
5478 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
5479 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
5480 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
5481 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
5482 #if IFNET_INPUT_SANITY_CHK
5483 VERIFY(dl_inp->input_mbuf_cnt == 0);
5484 #endif /* IFNET_INPUT_SANITY_CHK */
5485
5486 /*
5487 * A specific DLIL input thread is created per Ethernet/cellular
5488 * interface or for an interface which supports opportunistic
5489 * input polling. Pseudo interfaces or other types of interfaces
5490 * use the main input thread instead.
5491 */
5492 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
5493 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
5494 ifp->if_inp = dl_inp;
5495 err = dlil_create_input_thread(ifp, ifp->if_inp);
5496 if (err != 0) {
5497 panic_plain("%s: ifp=%p couldn't get an input thread; "
5498 "err=%d", __func__, ifp, err);
5499 /* NOTREACHED */
5500 }
5501 }
5502
5503 /*
5504 * If the driver supports the new transmit model, calculate flow hash
5505 * and create a workloop starter thread to invoke the if_start callback
5506 * where the packets may be dequeued and transmitted.
5507 */
5508 if (ifp->if_eflags & IFEF_TXSTART) {
5509 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5510 VERIFY(ifp->if_flowhash != 0);
5511
5512 VERIFY(ifp->if_start != NULL);
5513 VERIFY(ifp->if_start_thread == THREAD_NULL);
5514
5515 ifnet_set_start_cycle(ifp, NULL);
5516 ifp->if_start_active = 0;
5517 ifp->if_start_req = 0;
5518 ifp->if_start_flags = 0;
5519 if ((err = kernel_thread_start(ifnet_start_thread_fn, ifp,
5520 &ifp->if_start_thread)) != KERN_SUCCESS) {
5521 panic_plain("%s: ifp=%p couldn't get a start thread; "
5522 "err=%d", __func__, ifp, err);
5523 /* NOTREACHED */
5524 }
5525 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
5526 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5527 } else {
5528 ifp->if_flowhash = 0;
5529 }
5530
5531 /*
5532 * If the driver supports the new receive model, create a poller
5533 * thread to invoke if_input_poll callback where the packets may
5534 * be dequeued from the driver and processed for reception.
5535 */
5536 if (ifp->if_eflags & IFEF_RXPOLL) {
5537 VERIFY(ifp->if_input_poll != NULL);
5538 VERIFY(ifp->if_input_ctl != NULL);
5539 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5540
5541 ifnet_set_poll_cycle(ifp, NULL);
5542 ifp->if_poll_update = 0;
5543 ifp->if_poll_active = 0;
5544 ifp->if_poll_req = 0;
5545 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
5546 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5547 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5548 "err=%d", __func__, ifp, err);
5549 /* NOTREACHED */
5550 }
5551 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
5552 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5553 }
5554
5555 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5556 VERIFY(ifp->if_desc.ifd_len == 0);
5557 VERIFY(ifp->if_desc.ifd_desc != NULL);
5558
5559 /* Record attach PC stacktrace */
5560 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5561
5562 ifp->if_updatemcasts = 0;
5563 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5564 struct ifmultiaddr *ifma;
5565 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5566 IFMA_LOCK(ifma);
5567 if (ifma->ifma_addr->sa_family == AF_LINK ||
5568 ifma->ifma_addr->sa_family == AF_UNSPEC)
5569 ifp->if_updatemcasts++;
5570 IFMA_UNLOCK(ifma);
5571 }
5572
5573 printf("%s: attached with %d suspended link-layer multicast "
5574 "membership(s)\n", if_name(ifp),
5575 ifp->if_updatemcasts);
5576 }
5577
5578 /* Clear logging parameters */
5579 bzero(&ifp->if_log, sizeof (ifp->if_log));
5580 ifp->if_fg_sendts = 0;
5581
5582 VERIFY(ifp->if_delegated.ifp == NULL);
5583 VERIFY(ifp->if_delegated.type == 0);
5584 VERIFY(ifp->if_delegated.family == 0);
5585 VERIFY(ifp->if_delegated.subfamily == 0);
5586 VERIFY(ifp->if_delegated.expensive == 0);
5587
5588 VERIFY(ifp->if_agentids == NULL);
5589 VERIFY(ifp->if_agentcount == 0);
5590
5591 /* Reset interface state */
5592 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5593 ifp->if_interface_state.valid_bitmask |=
5594 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5595 ifp->if_interface_state.interface_availability =
5596 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5597
5598 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5599 if (ifp == lo_ifp) {
5600 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5601 ifp->if_interface_state.valid_bitmask |=
5602 IF_INTERFACE_STATE_LQM_STATE_VALID;
5603 } else {
5604 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5605 }
5606
5607 /*
5608 * Enable ECN capability on this interface depending on the
5609 * value of ECN global setting
5610 */
5611 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
5612 ifp->if_eflags |= IFEF_ECN_ENABLE;
5613 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
5614 }
5615
5616 /*
5617 * Built-in Cyclops always on policy for WiFi infra
5618 */
5619 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5620 errno_t error;
5621
5622 error = if_set_qosmarking_mode(ifp,
5623 IFRTYPE_QOSMARKING_FASTLANE);
5624 if (error != 0) {
5625 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5626 __func__, ifp->if_xname, error);
5627 } else {
5628 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
5629 #if (DEVELOPMENT || DEBUG)
5630 printf("%s fastlane enabled on %s\n",
5631 __func__, ifp->if_xname);
5632 #endif /* (DEVELOPMENT || DEBUG) */
5633 }
5634 }
5635
5636 ifnet_lock_done(ifp);
5637 ifnet_head_done();
5638
5639 lck_mtx_lock(&ifp->if_cached_route_lock);
5640 /* Enable forwarding cached route */
5641 ifp->if_fwd_cacheok = 1;
5642 /* Clean up any existing cached routes */
5643 ROUTE_RELEASE(&ifp->if_fwd_route);
5644 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
5645 ROUTE_RELEASE(&ifp->if_src_route);
5646 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
5647 ROUTE_RELEASE(&ifp->if_src_route6);
5648 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
5649 lck_mtx_unlock(&ifp->if_cached_route_lock);
5650
5651 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5652
5653 /*
5654 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5655 * and trees; do this before the ifnet is marked as attached.
5656 * The ifnet keeps the reference to the info structures even after
5657 * the ifnet is detached, since the network-layer records still
5658 * refer to the info structures even after that. This also
5659 * makes it possible for them to still function after the ifnet
5660 * is recycled or reattached.
5661 */
5662 #if INET
5663 if (IGMP_IFINFO(ifp) == NULL) {
5664 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
5665 VERIFY(IGMP_IFINFO(ifp) != NULL);
5666 } else {
5667 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5668 igmp_domifreattach(IGMP_IFINFO(ifp));
5669 }
5670 #endif /* INET */
5671 #if INET6
5672 if (MLD_IFINFO(ifp) == NULL) {
5673 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
5674 VERIFY(MLD_IFINFO(ifp) != NULL);
5675 } else {
5676 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5677 mld_domifreattach(MLD_IFINFO(ifp));
5678 }
5679 #endif /* INET6 */
5680
5681 VERIFY(ifp->if_data_threshold == 0);
5682
5683 /*
5684 * Finally, mark this ifnet as attached.
5685 */
5686 lck_mtx_lock(rnh_lock);
5687 ifnet_lock_exclusive(ifp);
5688 lck_mtx_lock_spin(&ifp->if_ref_lock);
5689 ifp->if_refflags = IFRF_ATTACHED;
5690 lck_mtx_unlock(&ifp->if_ref_lock);
5691 if (net_rtref) {
5692 /* boot-args override; enable idle notification */
5693 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5694 IFRF_IDLE_NOTIFY);
5695 } else {
5696 /* apply previous request(s) to set the idle flags, if any */
5697 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5698 ifp->if_idle_new_flags_mask);
5699
5700 }
5701 ifnet_lock_done(ifp);
5702 lck_mtx_unlock(rnh_lock);
5703 dlil_if_unlock();
5704
5705 #if PF
5706 /*
5707 * Attach packet filter to this interface, if enabled.
5708 */
5709 pf_ifnet_hook(ifp, 1);
5710 #endif /* PF */
5711
5712 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
5713
5714 if (dlil_verbose) {
5715 printf("%s: attached%s\n", if_name(ifp),
5716 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5717 }
5718
5719 return (0);
5720 }
5721
5722 /*
5723 * Prepare the storage for the first/permanent link address, which must
5724 * must have the same lifetime as the ifnet itself. Although the link
5725 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5726 * its location in memory must never change as it may still be referred
5727 * to by some parts of the system afterwards (unfortunate implementation
5728 * artifacts inherited from BSD.)
5729 *
5730 * Caller must hold ifnet lock as writer.
5731 */
5732 static struct ifaddr *
5733 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
5734 {
5735 struct ifaddr *ifa, *oifa;
5736 struct sockaddr_dl *asdl, *msdl;
5737 char workbuf[IFNAMSIZ*2];
5738 int namelen, masklen, socksize;
5739 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5740
5741 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
5742 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
5743
5744 namelen = snprintf(workbuf, sizeof (workbuf), "%s",
5745 if_name(ifp));
5746 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
5747 + ((namelen > 0) ? namelen : 0);
5748 socksize = masklen + ifp->if_addrlen;
5749 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
5750 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
5751 socksize = sizeof(struct sockaddr_dl);
5752 socksize = ROUNDUP(socksize);
5753 #undef ROUNDUP
5754
5755 ifa = ifp->if_lladdr;
5756 if (socksize > DLIL_SDLMAXLEN ||
5757 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
5758 /*
5759 * Rare, but in the event that the link address requires
5760 * more storage space than DLIL_SDLMAXLEN, allocate the
5761 * largest possible storages for address and mask, such
5762 * that we can reuse the same space when if_addrlen grows.
5763 * This same space will be used when if_addrlen shrinks.
5764 */
5765 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
5766 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
5767 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
5768 if (ifa == NULL)
5769 return (NULL);
5770 ifa_lock_init(ifa);
5771 /* Don't set IFD_ALLOC, as this is permanent */
5772 ifa->ifa_debug = IFD_LINK;
5773 }
5774 IFA_LOCK(ifa);
5775 /* address and mask sockaddr_dl locations */
5776 asdl = (struct sockaddr_dl *)(ifa + 1);
5777 bzero(asdl, SOCK_MAXADDRLEN);
5778 msdl = (struct sockaddr_dl *)(void *)
5779 ((char *)asdl + SOCK_MAXADDRLEN);
5780 bzero(msdl, SOCK_MAXADDRLEN);
5781 } else {
5782 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
5783 /*
5784 * Use the storage areas for address and mask within the
5785 * dlil_ifnet structure. This is the most common case.
5786 */
5787 if (ifa == NULL) {
5788 ifa = &dl_if->dl_if_lladdr.ifa;
5789 ifa_lock_init(ifa);
5790 /* Don't set IFD_ALLOC, as this is permanent */
5791 ifa->ifa_debug = IFD_LINK;
5792 }
5793 IFA_LOCK(ifa);
5794 /* address and mask sockaddr_dl locations */
5795 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
5796 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
5797 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
5798 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
5799 }
5800
5801 /* hold a permanent reference for the ifnet itself */
5802 IFA_ADDREF_LOCKED(ifa);
5803 oifa = ifp->if_lladdr;
5804 ifp->if_lladdr = ifa;
5805
5806 VERIFY(ifa->ifa_debug == IFD_LINK);
5807 ifa->ifa_ifp = ifp;
5808 ifa->ifa_rtrequest = link_rtrequest;
5809 ifa->ifa_addr = (struct sockaddr *)asdl;
5810 asdl->sdl_len = socksize;
5811 asdl->sdl_family = AF_LINK;
5812 if (namelen > 0) {
5813 bcopy(workbuf, asdl->sdl_data, min(namelen,
5814 sizeof (asdl->sdl_data)));
5815 asdl->sdl_nlen = namelen;
5816 } else {
5817 asdl->sdl_nlen = 0;
5818 }
5819 asdl->sdl_index = ifp->if_index;
5820 asdl->sdl_type = ifp->if_type;
5821 if (ll_addr != NULL) {
5822 asdl->sdl_alen = ll_addr->sdl_alen;
5823 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
5824 } else {
5825 asdl->sdl_alen = 0;
5826 }
5827 ifa->ifa_netmask = (struct sockaddr *)msdl;
5828 msdl->sdl_len = masklen;
5829 while (namelen > 0)
5830 msdl->sdl_data[--namelen] = 0xff;
5831 IFA_UNLOCK(ifa);
5832
5833 if (oifa != NULL)
5834 IFA_REMREF(oifa);
5835
5836 return (ifa);
5837 }
5838
5839 static void
5840 if_purgeaddrs(struct ifnet *ifp)
5841 {
5842 #if INET
5843 in_purgeaddrs(ifp);
5844 #endif /* INET */
5845 #if INET6
5846 in6_purgeaddrs(ifp);
5847 #endif /* INET6 */
5848 }
5849
5850 errno_t
5851 ifnet_detach(ifnet_t ifp)
5852 {
5853 struct ifnet *delegated_ifp;
5854 struct nd_ifinfo *ndi = NULL;
5855
5856 if (ifp == NULL)
5857 return (EINVAL);
5858
5859 ndi = ND_IFINFO(ifp);
5860 if (NULL != ndi)
5861 ndi->cga_initialized = FALSE;
5862
5863 lck_mtx_lock(rnh_lock);
5864 ifnet_head_lock_exclusive();
5865 ifnet_lock_exclusive(ifp);
5866
5867 /*
5868 * Check to see if this interface has previously triggered
5869 * aggressive protocol draining; if so, decrement the global
5870 * refcnt and clear PR_AGGDRAIN on the route domain if
5871 * there are no more of such an interface around.
5872 */
5873 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
5874
5875 lck_mtx_lock_spin(&ifp->if_ref_lock);
5876 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
5877 lck_mtx_unlock(&ifp->if_ref_lock);
5878 ifnet_lock_done(ifp);
5879 ifnet_head_done();
5880 lck_mtx_unlock(rnh_lock);
5881 return (EINVAL);
5882 } else if (ifp->if_refflags & IFRF_DETACHING) {
5883 /* Interface has already been detached */
5884 lck_mtx_unlock(&ifp->if_ref_lock);
5885 ifnet_lock_done(ifp);
5886 ifnet_head_done();
5887 lck_mtx_unlock(rnh_lock);
5888 return (ENXIO);
5889 }
5890 /* Indicate this interface is being detached */
5891 ifp->if_refflags &= ~IFRF_ATTACHED;
5892 ifp->if_refflags |= IFRF_DETACHING;
5893 lck_mtx_unlock(&ifp->if_ref_lock);
5894
5895 if (dlil_verbose)
5896 printf("%s: detaching\n", if_name(ifp));
5897
5898 /* Reset ECN enable/disable flags */
5899 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
5900 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
5901
5902 /*
5903 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5904 * no longer be visible during lookups from this point.
5905 */
5906 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
5907 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
5908 ifp->if_link.tqe_next = NULL;
5909 ifp->if_link.tqe_prev = NULL;
5910 if (ifp->if_ordered_link.tqe_next != NULL ||
5911 ifp->if_ordered_link.tqe_prev != NULL) {
5912 ifnet_remove_from_ordered_list(ifp);
5913 }
5914 ifindex2ifnet[ifp->if_index] = NULL;
5915
5916 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
5917 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
5918
5919 /* Record detach PC stacktrace */
5920 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
5921
5922 /* Clear logging parameters */
5923 bzero(&ifp->if_log, sizeof (ifp->if_log));
5924
5925 /* Clear delegated interface info (reference released below) */
5926 delegated_ifp = ifp->if_delegated.ifp;
5927 bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
5928
5929 /* Reset interface state */
5930 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5931
5932 ifnet_lock_done(ifp);
5933 ifnet_head_done();
5934 lck_mtx_unlock(rnh_lock);
5935
5936 /* Release reference held on the delegated interface */
5937 if (delegated_ifp != NULL)
5938 ifnet_release(delegated_ifp);
5939
5940 /* Reset Link Quality Metric (unless loopback [lo0]) */
5941 if (ifp != lo_ifp)
5942 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
5943
5944 /* Reset TCP local statistics */
5945 if (ifp->if_tcp_stat != NULL)
5946 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
5947
5948 /* Reset UDP local statistics */
5949 if (ifp->if_udp_stat != NULL)
5950 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
5951
5952 /* Reset ifnet IPv4 stats */
5953 if (ifp->if_ipv4_stat != NULL)
5954 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
5955
5956 /* Reset ifnet IPv6 stats */
5957 if (ifp->if_ipv6_stat != NULL)
5958 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
5959
5960 /* Release memory held for interface link status report */
5961 if (ifp->if_link_status != NULL) {
5962 FREE(ifp->if_link_status, M_TEMP);
5963 ifp->if_link_status = NULL;
5964 }
5965
5966 /* Clear agent IDs */
5967 if (ifp->if_agentids != NULL) {
5968 FREE(ifp->if_agentids, M_NETAGENT);
5969 ifp->if_agentids = NULL;
5970 }
5971 ifp->if_agentcount = 0;
5972
5973
5974 /* Let BPF know we're detaching */
5975 bpfdetach(ifp);
5976
5977 /* Mark the interface as DOWN */
5978 if_down(ifp);
5979
5980 /* Disable forwarding cached route */
5981 lck_mtx_lock(&ifp->if_cached_route_lock);
5982 ifp->if_fwd_cacheok = 0;
5983 lck_mtx_unlock(&ifp->if_cached_route_lock);
5984
5985 ifp->if_data_threshold = 0;
5986 /*
5987 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5988 * references to the info structures and leave them attached to
5989 * this ifnet.
5990 */
5991 #if INET
5992 igmp_domifdetach(ifp);
5993 #endif /* INET */
5994 #if INET6
5995 mld_domifdetach(ifp);
5996 #endif /* INET6 */
5997
5998 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
5999
6000 /* Let worker thread take care of the rest, to avoid reentrancy */
6001 dlil_if_lock();
6002 ifnet_detaching_enqueue(ifp);
6003 dlil_if_unlock();
6004
6005 return (0);
6006 }
6007
6008 static void
6009 ifnet_detaching_enqueue(struct ifnet *ifp)
6010 {
6011 dlil_if_lock_assert();
6012
6013 ++ifnet_detaching_cnt;
6014 VERIFY(ifnet_detaching_cnt != 0);
6015 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
6016 wakeup((caddr_t)&ifnet_delayed_run);
6017 }
6018
6019 static struct ifnet *
6020 ifnet_detaching_dequeue(void)
6021 {
6022 struct ifnet *ifp;
6023
6024 dlil_if_lock_assert();
6025
6026 ifp = TAILQ_FIRST(&ifnet_detaching_head);
6027 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
6028 if (ifp != NULL) {
6029 VERIFY(ifnet_detaching_cnt != 0);
6030 --ifnet_detaching_cnt;
6031 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
6032 ifp->if_detaching_link.tqe_next = NULL;
6033 ifp->if_detaching_link.tqe_prev = NULL;
6034 }
6035 return (ifp);
6036 }
6037
6038 static int
6039 ifnet_detacher_thread_cont(int err)
6040 {
6041 #pragma unused(err)
6042 struct ifnet *ifp;
6043
6044 for (;;) {
6045 dlil_if_lock_assert();
6046 while (ifnet_detaching_cnt == 0) {
6047 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6048 (PZERO - 1), "ifnet_detacher_cont", 0,
6049 ifnet_detacher_thread_cont);
6050 /* NOTREACHED */
6051 }
6052
6053 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
6054
6055 /* Take care of detaching ifnet */
6056 ifp = ifnet_detaching_dequeue();
6057 if (ifp != NULL) {
6058 dlil_if_unlock();
6059 ifnet_detach_final(ifp);
6060 dlil_if_lock();
6061 }
6062 }
6063 }
6064
6065 static void
6066 ifnet_detacher_thread_func(void *v, wait_result_t w)
6067 {
6068 #pragma unused(v, w)
6069 dlil_if_lock();
6070 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6071 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
6072 /*
6073 * msleep0() shouldn't have returned as PCATCH was not set;
6074 * therefore assert in this case.
6075 */
6076 dlil_if_unlock();
6077 VERIFY(0);
6078 }
6079
6080 static void
6081 ifnet_detach_final(struct ifnet *ifp)
6082 {
6083 struct ifnet_filter *filter, *filter_next;
6084 struct ifnet_filter_head fhead;
6085 struct dlil_threading_info *inp;
6086 struct ifaddr *ifa;
6087 ifnet_detached_func if_free;
6088 int i;
6089
6090 lck_mtx_lock(&ifp->if_ref_lock);
6091 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6092 panic("%s: flags mismatch (detaching not set) ifp=%p",
6093 __func__, ifp);
6094 /* NOTREACHED */
6095 }
6096
6097 /*
6098 * Wait until the existing IO references get released
6099 * before we proceed with ifnet_detach. This is not a
6100 * common case, so block without using a continuation.
6101 */
6102 while (ifp->if_refio > 0) {
6103 printf("%s: Waiting for IO references on %s interface "
6104 "to be released\n", __func__, if_name(ifp));
6105 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
6106 (PZERO - 1), "ifnet_ioref_wait", NULL);
6107 }
6108 lck_mtx_unlock(&ifp->if_ref_lock);
6109
6110 /* Drain and destroy send queue */
6111 ifclassq_teardown(ifp);
6112
6113 /* Detach interface filters */
6114 lck_mtx_lock(&ifp->if_flt_lock);
6115 if_flt_monitor_enter(ifp);
6116
6117 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6118 fhead = ifp->if_flt_head;
6119 TAILQ_INIT(&ifp->if_flt_head);
6120
6121 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
6122 filter_next = TAILQ_NEXT(filter, filt_next);
6123 lck_mtx_unlock(&ifp->if_flt_lock);
6124
6125 dlil_detach_filter_internal(filter, 1);
6126 lck_mtx_lock(&ifp->if_flt_lock);
6127 }
6128 if_flt_monitor_leave(ifp);
6129 lck_mtx_unlock(&ifp->if_flt_lock);
6130
6131 /* Tell upper layers to drop their network addresses */
6132 if_purgeaddrs(ifp);
6133
6134 ifnet_lock_exclusive(ifp);
6135
6136 /* Uplumb all protocols */
6137 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6138 struct if_proto *proto;
6139
6140 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6141 while (proto != NULL) {
6142 protocol_family_t family = proto->protocol_family;
6143 ifnet_lock_done(ifp);
6144 proto_unplumb(family, ifp);
6145 ifnet_lock_exclusive(ifp);
6146 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6147 }
6148 /* There should not be any protocols left */
6149 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
6150 }
6151 zfree(dlif_phash_zone, ifp->if_proto_hash);
6152 ifp->if_proto_hash = NULL;
6153
6154 /* Detach (permanent) link address from if_addrhead */
6155 ifa = TAILQ_FIRST(&ifp->if_addrhead);
6156 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
6157 IFA_LOCK(ifa);
6158 if_detach_link_ifa(ifp, ifa);
6159 IFA_UNLOCK(ifa);
6160
6161 /* Remove (permanent) link address from ifnet_addrs[] */
6162 IFA_REMREF(ifa);
6163 ifnet_addrs[ifp->if_index - 1] = NULL;
6164
6165 /* This interface should not be on {ifnet_head,detaching} */
6166 VERIFY(ifp->if_link.tqe_next == NULL);
6167 VERIFY(ifp->if_link.tqe_prev == NULL);
6168 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6169 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
6170 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
6171 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
6172
6173 /* The slot should have been emptied */
6174 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6175
6176 /* There should not be any addresses left */
6177 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
6178
6179 /*
6180 * Signal the starter thread to terminate itself.
6181 */
6182 if (ifp->if_start_thread != THREAD_NULL) {
6183 lck_mtx_lock_spin(&ifp->if_start_lock);
6184 ifp->if_start_flags = 0;
6185 ifp->if_start_thread = THREAD_NULL;
6186 wakeup_one((caddr_t)&ifp->if_start_thread);
6187 lck_mtx_unlock(&ifp->if_start_lock);
6188 }
6189
6190 /*
6191 * Signal the poller thread to terminate itself.
6192 */
6193 if (ifp->if_poll_thread != THREAD_NULL) {
6194 lck_mtx_lock_spin(&ifp->if_poll_lock);
6195 ifp->if_poll_thread = THREAD_NULL;
6196 wakeup_one((caddr_t)&ifp->if_poll_thread);
6197 lck_mtx_unlock(&ifp->if_poll_lock);
6198 }
6199
6200 /*
6201 * If thread affinity was set for the workloop thread, we will need
6202 * to tear down the affinity and release the extra reference count
6203 * taken at attach time. Does not apply to lo0 or other interfaces
6204 * without dedicated input threads.
6205 */
6206 if ((inp = ifp->if_inp) != NULL) {
6207 VERIFY(inp != dlil_main_input_thread);
6208
6209 if (inp->net_affinity) {
6210 struct thread *tp, *wtp, *ptp;
6211
6212 lck_mtx_lock_spin(&inp->input_lck);
6213 wtp = inp->wloop_thr;
6214 inp->wloop_thr = THREAD_NULL;
6215 ptp = inp->poll_thr;
6216 inp->poll_thr = THREAD_NULL;
6217 tp = inp->input_thr; /* don't nullify now */
6218 inp->tag = 0;
6219 inp->net_affinity = FALSE;
6220 lck_mtx_unlock(&inp->input_lck);
6221
6222 /* Tear down poll thread affinity */
6223 if (ptp != NULL) {
6224 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
6225 (void) dlil_affinity_set(ptp,
6226 THREAD_AFFINITY_TAG_NULL);
6227 thread_deallocate(ptp);
6228 }
6229
6230 /* Tear down workloop thread affinity */
6231 if (wtp != NULL) {
6232 (void) dlil_affinity_set(wtp,
6233 THREAD_AFFINITY_TAG_NULL);
6234 thread_deallocate(wtp);
6235 }
6236
6237 /* Tear down DLIL input thread affinity */
6238 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
6239 thread_deallocate(tp);
6240 }
6241
6242 /* disassociate ifp DLIL input thread */
6243 ifp->if_inp = NULL;
6244
6245 lck_mtx_lock_spin(&inp->input_lck);
6246 inp->input_waiting |= DLIL_INPUT_TERMINATE;
6247 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
6248 wakeup_one((caddr_t)&inp->input_waiting);
6249 }
6250 lck_mtx_unlock(&inp->input_lck);
6251 }
6252
6253 /* The driver might unload, so point these to ourselves */
6254 if_free = ifp->if_free;
6255 ifp->if_output_handler = ifp_if_output;
6256 ifp->if_output = ifp_if_output;
6257 ifp->if_pre_enqueue = ifp_if_output;
6258 ifp->if_start = ifp_if_start;
6259 ifp->if_output_ctl = ifp_if_ctl;
6260 ifp->if_input_handler = ifp_if_input;
6261 ifp->if_input_poll = ifp_if_input_poll;
6262 ifp->if_input_ctl = ifp_if_ctl;
6263 ifp->if_ioctl = ifp_if_ioctl;
6264 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
6265 ifp->if_free = ifp_if_free;
6266 ifp->if_demux = ifp_if_demux;
6267 ifp->if_event = ifp_if_event;
6268 ifp->if_framer_legacy = ifp_if_framer;
6269 ifp->if_framer = ifp_if_framer_extended;
6270 ifp->if_add_proto = ifp_if_add_proto;
6271 ifp->if_del_proto = ifp_if_del_proto;
6272 ifp->if_check_multi = ifp_if_check_multi;
6273
6274 /* wipe out interface description */
6275 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
6276 ifp->if_desc.ifd_len = 0;
6277 VERIFY(ifp->if_desc.ifd_desc != NULL);
6278 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
6279
6280 /* there shouldn't be any delegation by now */
6281 VERIFY(ifp->if_delegated.ifp == NULL);
6282 VERIFY(ifp->if_delegated.type == 0);
6283 VERIFY(ifp->if_delegated.family == 0);
6284 VERIFY(ifp->if_delegated.subfamily == 0);
6285 VERIFY(ifp->if_delegated.expensive == 0);
6286
6287 /* QoS marking get cleared */
6288 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
6289 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
6290
6291 ifnet_lock_done(ifp);
6292
6293 #if PF
6294 /*
6295 * Detach this interface from packet filter, if enabled.
6296 */
6297 pf_ifnet_hook(ifp, 0);
6298 #endif /* PF */
6299
6300 /* Filter list should be empty */
6301 lck_mtx_lock_spin(&ifp->if_flt_lock);
6302 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6303 VERIFY(ifp->if_flt_busy == 0);
6304 VERIFY(ifp->if_flt_waiters == 0);
6305 lck_mtx_unlock(&ifp->if_flt_lock);
6306
6307 /* Last chance to drain send queue */
6308 if_qflush(ifp, 0);
6309
6310 /* Last chance to cleanup any cached route */
6311 lck_mtx_lock(&ifp->if_cached_route_lock);
6312 VERIFY(!ifp->if_fwd_cacheok);
6313 ROUTE_RELEASE(&ifp->if_fwd_route);
6314 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
6315 ROUTE_RELEASE(&ifp->if_src_route);
6316 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
6317 ROUTE_RELEASE(&ifp->if_src_route6);
6318 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
6319 lck_mtx_unlock(&ifp->if_cached_route_lock);
6320
6321 VERIFY(ifp->if_data_threshold == 0);
6322
6323 ifnet_llreach_ifdetach(ifp);
6324
6325 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
6326
6327 /*
6328 * Finally, mark this ifnet as detached.
6329 */
6330 lck_mtx_lock_spin(&ifp->if_ref_lock);
6331 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6332 panic("%s: flags mismatch (detaching not set) ifp=%p",
6333 __func__, ifp);
6334 /* NOTREACHED */
6335 }
6336 ifp->if_refflags &= ~IFRF_DETACHING;
6337 lck_mtx_unlock(&ifp->if_ref_lock);
6338 if (if_free != NULL)
6339 if_free(ifp);
6340
6341 if (dlil_verbose)
6342 printf("%s: detached\n", if_name(ifp));
6343
6344 /* Release reference held during ifnet attach */
6345 ifnet_release(ifp);
6346 }
6347
6348 static errno_t
6349 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6350 {
6351 #pragma unused(ifp)
6352 m_freem_list(m);
6353 return (0);
6354 }
6355
6356 static void
6357 ifp_if_start(struct ifnet *ifp)
6358 {
6359 ifnet_purge(ifp);
6360 }
6361
6362 static errno_t
6363 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6364 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6365 boolean_t poll, struct thread *tp)
6366 {
6367 #pragma unused(ifp, m_tail, s, poll, tp)
6368 m_freem_list(m_head);
6369 return (ENXIO);
6370 }
6371
6372 static void
6373 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6374 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6375 {
6376 #pragma unused(ifp, flags, max_cnt)
6377 if (m_head != NULL)
6378 *m_head = NULL;
6379 if (m_tail != NULL)
6380 *m_tail = NULL;
6381 if (cnt != NULL)
6382 *cnt = 0;
6383 if (len != NULL)
6384 *len = 0;
6385 }
6386
6387 static errno_t
6388 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6389 {
6390 #pragma unused(ifp, cmd, arglen, arg)
6391 return (EOPNOTSUPP);
6392 }
6393
6394 static errno_t
6395 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6396 {
6397 #pragma unused(ifp, fh, pf)
6398 m_freem(m);
6399 return (EJUSTRETURN);
6400 }
6401
6402 static errno_t
6403 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6404 const struct ifnet_demux_desc *da, u_int32_t dc)
6405 {
6406 #pragma unused(ifp, pf, da, dc)
6407 return (EINVAL);
6408 }
6409
6410 static errno_t
6411 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6412 {
6413 #pragma unused(ifp, pf)
6414 return (EINVAL);
6415 }
6416
6417 static errno_t
6418 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6419 {
6420 #pragma unused(ifp, sa)
6421 return (EOPNOTSUPP);
6422 }
6423
6424 static errno_t
6425 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6426 const struct sockaddr *sa, const char *ll, const char *t)
6427 {
6428 #pragma unused(ifp, m, sa, ll, t)
6429 return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
6430 }
6431
6432 static errno_t
6433 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6434 const struct sockaddr *sa, const char *ll, const char *t,
6435 u_int32_t *pre, u_int32_t *post)
6436 {
6437 #pragma unused(ifp, sa, ll, t)
6438 m_freem(*m);
6439 *m = NULL;
6440
6441 if (pre != NULL)
6442 *pre = 0;
6443 if (post != NULL)
6444 *post = 0;
6445
6446 return (EJUSTRETURN);
6447 }
6448
6449 errno_t
6450 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6451 {
6452 #pragma unused(ifp, cmd, arg)
6453 return (EOPNOTSUPP);
6454 }
6455
6456 static errno_t
6457 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6458 {
6459 #pragma unused(ifp, tm, f)
6460 /* XXX not sure what to do here */
6461 return (0);
6462 }
6463
6464 static void
6465 ifp_if_free(struct ifnet *ifp)
6466 {
6467 #pragma unused(ifp)
6468 }
6469
6470 static void
6471 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6472 {
6473 #pragma unused(ifp, e)
6474 }
6475
6476 __private_extern__
6477 int dlil_if_acquire(u_int32_t family, const void *uniqueid,
6478 size_t uniqueid_len, struct ifnet **ifp)
6479 {
6480 struct ifnet *ifp1 = NULL;
6481 struct dlil_ifnet *dlifp1 = NULL;
6482 void *buf, *base, **pbuf;
6483 int ret = 0;
6484
6485 dlil_if_lock();
6486 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
6487 ifp1 = (struct ifnet *)dlifp1;
6488
6489 if (ifp1->if_family != family)
6490 continue;
6491
6492 lck_mtx_lock(&dlifp1->dl_if_lock);
6493 /* same uniqueid and same len or no unique id specified */
6494 if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) &&
6495 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
6496 /* check for matching interface in use */
6497 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6498 if (uniqueid_len) {
6499 ret = EBUSY;
6500 lck_mtx_unlock(&dlifp1->dl_if_lock);
6501 goto end;
6502 }
6503 } else {
6504 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
6505 lck_mtx_unlock(&dlifp1->dl_if_lock);
6506 *ifp = ifp1;
6507 goto end;
6508 }
6509 }
6510 lck_mtx_unlock(&dlifp1->dl_if_lock);
6511 }
6512
6513 /* no interface found, allocate a new one */
6514 buf = zalloc(dlif_zone);
6515 if (buf == NULL) {
6516 ret = ENOMEM;
6517 goto end;
6518 }
6519 bzero(buf, dlif_bufsize);
6520
6521 /* Get the 64-bit aligned base address for this object */
6522 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
6523 sizeof (u_int64_t));
6524 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
6525
6526 /*
6527 * Wind back a pointer size from the aligned base and
6528 * save the original address so we can free it later.
6529 */
6530 pbuf = (void **)((intptr_t)base - sizeof (void *));
6531 *pbuf = buf;
6532 dlifp1 = base;
6533
6534 if (uniqueid_len) {
6535 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
6536 M_NKE, M_WAITOK);
6537 if (dlifp1->dl_if_uniqueid == NULL) {
6538 zfree(dlif_zone, dlifp1);
6539 ret = ENOMEM;
6540 goto end;
6541 }
6542 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
6543 dlifp1->dl_if_uniqueid_len = uniqueid_len;
6544 }
6545
6546 ifp1 = (struct ifnet *)dlifp1;
6547 dlifp1->dl_if_flags = DLIF_INUSE;
6548 if (ifnet_debug) {
6549 dlifp1->dl_if_flags |= DLIF_DEBUG;
6550 dlifp1->dl_if_trace = dlil_if_trace;
6551 }
6552 ifp1->if_name = dlifp1->dl_if_namestorage;
6553 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
6554
6555 /* initialize interface description */
6556 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
6557 ifp1->if_desc.ifd_len = 0;
6558 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
6559
6560 #if CONFIG_MACF_NET
6561 mac_ifnet_label_init(ifp1);
6562 #endif
6563
6564 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
6565 DLIL_PRINTF("%s: failed to allocate if local stats, "
6566 "error: %d\n", __func__, ret);
6567 /* This probably shouldn't be fatal */
6568 ret = 0;
6569 }
6570
6571 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
6572 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
6573 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
6574 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6575 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
6576 ifnet_lock_attr);
6577 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
6578 #if INET
6579 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
6580 ifnet_lock_attr);
6581 ifp1->if_inetdata = NULL;
6582 #endif
6583 #if INET6
6584 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
6585 ifnet_lock_attr);
6586 ifp1->if_inet6data = NULL;
6587 #endif
6588 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
6589 ifnet_lock_attr);
6590 ifp1->if_link_status = NULL;
6591
6592 /* for send data paths */
6593 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
6594 ifnet_lock_attr);
6595 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
6596 ifnet_lock_attr);
6597 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
6598 ifnet_lock_attr);
6599
6600 /* for receive data paths */
6601 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
6602 ifnet_lock_attr);
6603
6604 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
6605
6606 *ifp = ifp1;
6607
6608 end:
6609 dlil_if_unlock();
6610
6611 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
6612 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
6613
6614 return (ret);
6615 }
6616
6617 __private_extern__ void
6618 dlil_if_release(ifnet_t ifp)
6619 {
6620 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
6621
6622 ifnet_lock_exclusive(ifp);
6623 lck_mtx_lock(&dlifp->dl_if_lock);
6624 dlifp->dl_if_flags &= ~DLIF_INUSE;
6625 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6626 ifp->if_name = dlifp->dl_if_namestorage;
6627 /* Reset external name (name + unit) */
6628 ifp->if_xname = dlifp->dl_if_xnamestorage;
6629 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
6630 "%s?", ifp->if_name);
6631 lck_mtx_unlock(&dlifp->dl_if_lock);
6632 #if CONFIG_MACF_NET
6633 /*
6634 * We can either recycle the MAC label here or in dlil_if_acquire().
6635 * It seems logical to do it here but this means that anything that
6636 * still has a handle on ifp will now see it as unlabeled.
6637 * Since the interface is "dead" that may be OK. Revisit later.
6638 */
6639 mac_ifnet_label_recycle(ifp);
6640 #endif
6641 ifnet_lock_done(ifp);
6642 }
6643
6644 __private_extern__ void
6645 dlil_if_lock(void)
6646 {
6647 lck_mtx_lock(&dlil_ifnet_lock);
6648 }
6649
6650 __private_extern__ void
6651 dlil_if_unlock(void)
6652 {
6653 lck_mtx_unlock(&dlil_ifnet_lock);
6654 }
6655
6656 __private_extern__ void
6657 dlil_if_lock_assert(void)
6658 {
6659 lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
6660 }
6661
6662 __private_extern__ void
6663 dlil_proto_unplumb_all(struct ifnet *ifp)
6664 {
6665 /*
6666 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6667 * each bucket contains exactly one entry; PF_VLAN does not need an
6668 * explicit unplumb.
6669 *
6670 * if_proto_hash[3] is for other protocols; we expect anything
6671 * in this bucket to respond to the DETACHING event (which would
6672 * have happened by now) and do the unplumb then.
6673 */
6674 (void) proto_unplumb(PF_INET, ifp);
6675 #if INET6
6676 (void) proto_unplumb(PF_INET6, ifp);
6677 #endif /* INET6 */
6678 }
6679
6680 static void
6681 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6682 {
6683 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6684 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6685
6686 route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
6687
6688 lck_mtx_unlock(&ifp->if_cached_route_lock);
6689 }
6690
6691 static void
6692 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6693 {
6694 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6695 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6696
6697 if (ifp->if_fwd_cacheok) {
6698 route_copyin(src, &ifp->if_src_route, sizeof (*src));
6699 } else {
6700 ROUTE_RELEASE(src);
6701 }
6702 lck_mtx_unlock(&ifp->if_cached_route_lock);
6703 }
6704
6705 #if INET6
6706 static void
6707 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6708 {
6709 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6710 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6711
6712 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6713 sizeof (*dst));
6714
6715 lck_mtx_unlock(&ifp->if_cached_route_lock);
6716 }
6717
6718 static void
6719 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6720 {
6721 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6722 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6723
6724 if (ifp->if_fwd_cacheok) {
6725 route_copyin((struct route *)src,
6726 (struct route *)&ifp->if_src_route6, sizeof (*src));
6727 } else {
6728 ROUTE_RELEASE(src);
6729 }
6730 lck_mtx_unlock(&ifp->if_cached_route_lock);
6731 }
6732 #endif /* INET6 */
6733
6734 struct rtentry *
6735 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6736 {
6737 struct route src_rt;
6738 struct sockaddr_in *dst;
6739
6740 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
6741
6742 ifp_src_route_copyout(ifp, &src_rt);
6743
6744 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6745 ROUTE_RELEASE(&src_rt);
6746 if (dst->sin_family != AF_INET) {
6747 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
6748 dst->sin_len = sizeof (src_rt.ro_dst);
6749 dst->sin_family = AF_INET;
6750 }
6751 dst->sin_addr = src_ip;
6752
6753 if (src_rt.ro_rt == NULL) {
6754 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
6755 0, 0, ifp->if_index);
6756
6757 if (src_rt.ro_rt != NULL) {
6758 /* retain a ref, copyin consumes one */
6759 struct rtentry *rte = src_rt.ro_rt;
6760 RT_ADDREF(rte);
6761 ifp_src_route_copyin(ifp, &src_rt);
6762 src_rt.ro_rt = rte;
6763 }
6764 }
6765 }
6766
6767 return (src_rt.ro_rt);
6768 }
6769
6770 #if INET6
6771 struct rtentry *
6772 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
6773 {
6774 struct route_in6 src_rt;
6775
6776 ifp_src_route6_copyout(ifp, &src_rt);
6777
6778 if (ROUTE_UNUSABLE(&src_rt) ||
6779 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
6780 ROUTE_RELEASE(&src_rt);
6781 if (src_rt.ro_dst.sin6_family != AF_INET6) {
6782 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
6783 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
6784 src_rt.ro_dst.sin6_family = AF_INET6;
6785 }
6786 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
6787 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
6788 sizeof (src_rt.ro_dst.sin6_addr));
6789
6790 if (src_rt.ro_rt == NULL) {
6791 src_rt.ro_rt = rtalloc1_scoped(
6792 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
6793 ifp->if_index);
6794
6795 if (src_rt.ro_rt != NULL) {
6796 /* retain a ref, copyin consumes one */
6797 struct rtentry *rte = src_rt.ro_rt;
6798 RT_ADDREF(rte);
6799 ifp_src_route6_copyin(ifp, &src_rt);
6800 src_rt.ro_rt = rte;
6801 }
6802 }
6803 }
6804
6805 return (src_rt.ro_rt);
6806 }
6807 #endif /* INET6 */
6808
6809 void
6810 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
6811 {
6812 struct kev_dl_link_quality_metric_data ev_lqm_data;
6813
6814 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
6815
6816 /* Normalize to edge */
6817 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_BAD)
6818 lqm = IFNET_LQM_THRESH_BAD;
6819 else if (lqm > IFNET_LQM_THRESH_BAD && lqm <= IFNET_LQM_THRESH_POOR)
6820 lqm = IFNET_LQM_THRESH_POOR;
6821 else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD)
6822 lqm = IFNET_LQM_THRESH_GOOD;
6823
6824 /*
6825 * Take the lock if needed
6826 */
6827 if (!locked)
6828 ifnet_lock_exclusive(ifp);
6829
6830 if (lqm == ifp->if_interface_state.lqm_state &&
6831 (ifp->if_interface_state.valid_bitmask &
6832 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
6833 /*
6834 * Release the lock if was not held by the caller
6835 */
6836 if (!locked)
6837 ifnet_lock_done(ifp);
6838 return; /* nothing to update */
6839 }
6840 ifp->if_interface_state.valid_bitmask |=
6841 IF_INTERFACE_STATE_LQM_STATE_VALID;
6842 ifp->if_interface_state.lqm_state = lqm;
6843
6844 /*
6845 * Don't want to hold the lock when issuing kernel events
6846 */
6847 ifnet_lock_done(ifp);
6848
6849 bzero(&ev_lqm_data, sizeof (ev_lqm_data));
6850 ev_lqm_data.link_quality_metric = lqm;
6851
6852 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
6853 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
6854
6855 /*
6856 * Reacquire the lock for the caller
6857 */
6858 if (locked)
6859 ifnet_lock_exclusive(ifp);
6860 }
6861
6862 static void
6863 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
6864 {
6865 struct kev_dl_rrc_state kev;
6866
6867 if (rrc_state == ifp->if_interface_state.rrc_state &&
6868 (ifp->if_interface_state.valid_bitmask &
6869 IF_INTERFACE_STATE_RRC_STATE_VALID))
6870 return;
6871
6872 ifp->if_interface_state.valid_bitmask |=
6873 IF_INTERFACE_STATE_RRC_STATE_VALID;
6874
6875 ifp->if_interface_state.rrc_state = rrc_state;
6876
6877 /*
6878 * Don't want to hold the lock when issuing kernel events
6879 */
6880 ifnet_lock_done(ifp);
6881
6882 bzero(&kev, sizeof(struct kev_dl_rrc_state));
6883 kev.rrc_state = rrc_state;
6884
6885 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
6886 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
6887
6888 ifnet_lock_exclusive(ifp);
6889 }
6890
6891 errno_t
6892 if_state_update(struct ifnet *ifp,
6893 struct if_interface_state *if_interface_state)
6894 {
6895 u_short if_index_available = 0;
6896
6897 ifnet_lock_exclusive(ifp);
6898
6899 if ((ifp->if_type != IFT_CELLULAR) &&
6900 (if_interface_state->valid_bitmask &
6901 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6902 ifnet_lock_done(ifp);
6903 return (ENOTSUP);
6904 }
6905 if ((if_interface_state->valid_bitmask &
6906 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
6907 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
6908 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
6909 ifnet_lock_done(ifp);
6910 return (EINVAL);
6911 }
6912 if ((if_interface_state->valid_bitmask &
6913 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
6914 if_interface_state->rrc_state !=
6915 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
6916 if_interface_state->rrc_state !=
6917 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
6918 ifnet_lock_done(ifp);
6919 return (EINVAL);
6920 }
6921
6922 if (if_interface_state->valid_bitmask &
6923 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6924 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
6925 }
6926 if (if_interface_state->valid_bitmask &
6927 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6928 if_rrc_state_update(ifp, if_interface_state->rrc_state);
6929 }
6930 if (if_interface_state->valid_bitmask &
6931 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6932 ifp->if_interface_state.valid_bitmask |=
6933 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6934 ifp->if_interface_state.interface_availability =
6935 if_interface_state->interface_availability;
6936
6937 if (ifp->if_interface_state.interface_availability ==
6938 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
6939 if_index_available = ifp->if_index;
6940 }
6941 }
6942 ifnet_lock_done(ifp);
6943
6944 /*
6945 * Check if the TCP connections going on this interface should be
6946 * forced to send probe packets instead of waiting for TCP timers
6947 * to fire. This will be done when there is an explicit
6948 * notification that the interface became available.
6949 */
6950 if (if_index_available > 0)
6951 tcp_interface_send_probe(if_index_available);
6952
6953 return (0);
6954 }
6955
6956 void
6957 if_get_state(struct ifnet *ifp,
6958 struct if_interface_state *if_interface_state)
6959 {
6960 ifnet_lock_shared(ifp);
6961
6962 if_interface_state->valid_bitmask = 0;
6963
6964 if (ifp->if_interface_state.valid_bitmask &
6965 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6966 if_interface_state->valid_bitmask |=
6967 IF_INTERFACE_STATE_RRC_STATE_VALID;
6968 if_interface_state->rrc_state =
6969 ifp->if_interface_state.rrc_state;
6970 }
6971 if (ifp->if_interface_state.valid_bitmask &
6972 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6973 if_interface_state->valid_bitmask |=
6974 IF_INTERFACE_STATE_LQM_STATE_VALID;
6975 if_interface_state->lqm_state =
6976 ifp->if_interface_state.lqm_state;
6977 }
6978 if (ifp->if_interface_state.valid_bitmask &
6979 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6980 if_interface_state->valid_bitmask |=
6981 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6982 if_interface_state->interface_availability =
6983 ifp->if_interface_state.interface_availability;
6984 }
6985
6986 ifnet_lock_done(ifp);
6987 }
6988
6989 errno_t
6990 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
6991 {
6992 ifnet_lock_exclusive(ifp);
6993 if (conn_probe > 1) {
6994 ifnet_lock_done(ifp);
6995 return (EINVAL);
6996 }
6997 if (conn_probe == 0)
6998 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
6999 else
7000 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
7001 ifnet_lock_done(ifp);
7002
7003 tcp_probe_connectivity(ifp, conn_probe);
7004 return (0);
7005 }
7006
7007 /* for uuid.c */
7008 int
7009 uuid_get_ethernet(u_int8_t *node)
7010 {
7011 struct ifnet *ifp;
7012 struct sockaddr_dl *sdl;
7013
7014 ifnet_head_lock_shared();
7015 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
7016 ifnet_lock_shared(ifp);
7017 IFA_LOCK_SPIN(ifp->if_lladdr);
7018 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
7019 if (sdl->sdl_type == IFT_ETHER) {
7020 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
7021 IFA_UNLOCK(ifp->if_lladdr);
7022 ifnet_lock_done(ifp);
7023 ifnet_head_done();
7024 return (0);
7025 }
7026 IFA_UNLOCK(ifp->if_lladdr);
7027 ifnet_lock_done(ifp);
7028 }
7029 ifnet_head_done();
7030
7031 return (-1);
7032 }
7033
7034 static int
7035 sysctl_rxpoll SYSCTL_HANDLER_ARGS
7036 {
7037 #pragma unused(arg1, arg2)
7038 uint32_t i;
7039 int err;
7040
7041 i = if_rxpoll;
7042
7043 err = sysctl_handle_int(oidp, &i, 0, req);
7044 if (err != 0 || req->newptr == USER_ADDR_NULL)
7045 return (err);
7046
7047 if (net_rxpoll == 0)
7048 return (ENXIO);
7049
7050 if_rxpoll = i;
7051 return (err);
7052 }
7053
7054 static int
7055 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7056 {
7057 #pragma unused(arg1, arg2)
7058 uint64_t q;
7059 int err;
7060
7061 q = if_rxpoll_mode_holdtime;
7062
7063 err = sysctl_handle_quad(oidp, &q, 0, req);
7064 if (err != 0 || req->newptr == USER_ADDR_NULL)
7065 return (err);
7066
7067 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN)
7068 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
7069
7070 if_rxpoll_mode_holdtime = q;
7071
7072 return (err);
7073 }
7074
7075 static int
7076 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7077 {
7078 #pragma unused(arg1, arg2)
7079 uint64_t q;
7080 int err;
7081
7082 q = if_rxpoll_sample_holdtime;
7083
7084 err = sysctl_handle_quad(oidp, &q, 0, req);
7085 if (err != 0 || req->newptr == USER_ADDR_NULL)
7086 return (err);
7087
7088 if (q < IF_RXPOLL_SAMPLETIME_MIN)
7089 q = IF_RXPOLL_SAMPLETIME_MIN;
7090
7091 if_rxpoll_sample_holdtime = q;
7092
7093 return (err);
7094 }
7095
7096 static int
7097 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7098 {
7099 #pragma unused(arg1, arg2)
7100 uint64_t q;
7101 int err;
7102
7103 q = if_rxpoll_interval_time;
7104
7105 err = sysctl_handle_quad(oidp, &q, 0, req);
7106 if (err != 0 || req->newptr == USER_ADDR_NULL)
7107 return (err);
7108
7109 if (q < IF_RXPOLL_INTERVALTIME_MIN)
7110 q = IF_RXPOLL_INTERVALTIME_MIN;
7111
7112 if_rxpoll_interval_time = q;
7113
7114 return (err);
7115 }
7116
7117 static int
7118 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7119 {
7120 #pragma unused(arg1, arg2)
7121 uint32_t i;
7122 int err;
7123
7124 i = if_rxpoll_wlowat;
7125
7126 err = sysctl_handle_int(oidp, &i, 0, req);
7127 if (err != 0 || req->newptr == USER_ADDR_NULL)
7128 return (err);
7129
7130 if (i == 0 || i >= if_rxpoll_whiwat)
7131 return (EINVAL);
7132
7133 if_rxpoll_wlowat = i;
7134 return (err);
7135 }
7136
7137 static int
7138 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7139 {
7140 #pragma unused(arg1, arg2)
7141 uint32_t i;
7142 int err;
7143
7144 i = if_rxpoll_whiwat;
7145
7146 err = sysctl_handle_int(oidp, &i, 0, req);
7147 if (err != 0 || req->newptr == USER_ADDR_NULL)
7148 return (err);
7149
7150 if (i <= if_rxpoll_wlowat)
7151 return (EINVAL);
7152
7153 if_rxpoll_whiwat = i;
7154 return (err);
7155 }
7156
7157 static int
7158 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
7159 {
7160 #pragma unused(arg1, arg2)
7161 int i, err;
7162
7163 i = if_sndq_maxlen;
7164
7165 err = sysctl_handle_int(oidp, &i, 0, req);
7166 if (err != 0 || req->newptr == USER_ADDR_NULL)
7167 return (err);
7168
7169 if (i < IF_SNDQ_MINLEN)
7170 i = IF_SNDQ_MINLEN;
7171
7172 if_sndq_maxlen = i;
7173 return (err);
7174 }
7175
7176 static int
7177 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
7178 {
7179 #pragma unused(arg1, arg2)
7180 int i, err;
7181
7182 i = if_rcvq_maxlen;
7183
7184 err = sysctl_handle_int(oidp, &i, 0, req);
7185 if (err != 0 || req->newptr == USER_ADDR_NULL)
7186 return (err);
7187
7188 if (i < IF_RCVQ_MINLEN)
7189 i = IF_RCVQ_MINLEN;
7190
7191 if_rcvq_maxlen = i;
7192 return (err);
7193 }
7194
7195 void
7196 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
7197 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
7198 {
7199 struct kev_dl_node_presence kev;
7200 struct sockaddr_dl *sdl;
7201 struct sockaddr_in6 *sin6;
7202
7203 VERIFY(ifp);
7204 VERIFY(sa);
7205 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7206
7207 bzero(&kev, sizeof (kev));
7208 sin6 = &kev.sin6_node_address;
7209 sdl = &kev.sdl_node_address;
7210 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7211 kev.rssi = rssi;
7212 kev.link_quality_metric = lqm;
7213 kev.node_proximity_metric = npm;
7214 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
7215
7216 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
7217 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
7218 &kev.link_data, sizeof (kev));
7219 }
7220
7221 void
7222 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
7223 {
7224 struct kev_dl_node_absence kev;
7225 struct sockaddr_in6 *sin6;
7226 struct sockaddr_dl *sdl;
7227
7228 VERIFY(ifp);
7229 VERIFY(sa);
7230 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7231
7232 bzero(&kev, sizeof (kev));
7233 sin6 = &kev.sin6_node_address;
7234 sdl = &kev.sdl_node_address;
7235 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7236
7237 nd6_alt_node_absent(ifp, sin6);
7238 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
7239 &kev.link_data, sizeof (kev));
7240 }
7241
7242 const void *
7243 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
7244 kauth_cred_t *credp)
7245 {
7246 const u_int8_t *bytes;
7247 size_t size;
7248
7249 bytes = CONST_LLADDR(sdl);
7250 size = sdl->sdl_alen;
7251
7252 #if CONFIG_MACF
7253 if (dlil_lladdr_ckreq) {
7254 switch (sdl->sdl_type) {
7255 case IFT_ETHER:
7256 case IFT_IEEE1394:
7257 break;
7258 default:
7259 credp = NULL;
7260 break;
7261 };
7262
7263 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
7264 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
7265 [0] = 2
7266 };
7267
7268 switch (sdl->sdl_type) {
7269 case IFT_ETHER:
7270 VERIFY(size == ETHER_ADDR_LEN);
7271 bytes = unspec;
7272 break;
7273 case IFT_IEEE1394:
7274 VERIFY(size == FIREWIRE_EUI64_LEN);
7275 bytes = unspec;
7276 break;
7277 default:
7278 VERIFY(FALSE);
7279 break;
7280 };
7281 }
7282 }
7283 #else
7284 #pragma unused(credp)
7285 #endif
7286
7287 if (sizep != NULL) *sizep = size;
7288 return (bytes);
7289 }
7290
7291 void
7292 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
7293 u_int8_t info[DLIL_MODARGLEN])
7294 {
7295 struct kev_dl_issues kev;
7296 struct timeval tv;
7297
7298 VERIFY(ifp != NULL);
7299 VERIFY(modid != NULL);
7300 _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
7301 _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
7302
7303 bzero(&kev, sizeof (kev));
7304
7305 microtime(&tv);
7306 kev.timestamp = tv.tv_sec;
7307 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
7308 if (info != NULL)
7309 bcopy(info, &kev.info, DLIL_MODARGLEN);
7310
7311 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
7312 &kev.link_data, sizeof (kev));
7313 }
7314
7315 errno_t
7316 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7317 struct proc *p)
7318 {
7319 u_int32_t level = IFNET_THROTTLE_OFF;
7320 errno_t result = 0;
7321
7322 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
7323
7324 if (cmd == SIOCSIFOPPORTUNISTIC) {
7325 /*
7326 * XXX: Use priv_check_cred() instead of root check?
7327 */
7328 if ((result = proc_suser(p)) != 0)
7329 return (result);
7330
7331 if (ifr->ifr_opportunistic.ifo_flags ==
7332 IFRIFOF_BLOCK_OPPORTUNISTIC)
7333 level = IFNET_THROTTLE_OPPORTUNISTIC;
7334 else if (ifr->ifr_opportunistic.ifo_flags == 0)
7335 level = IFNET_THROTTLE_OFF;
7336 else
7337 result = EINVAL;
7338
7339 if (result == 0)
7340 result = ifnet_set_throttle(ifp, level);
7341 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
7342 ifr->ifr_opportunistic.ifo_flags = 0;
7343 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
7344 ifr->ifr_opportunistic.ifo_flags |=
7345 IFRIFOF_BLOCK_OPPORTUNISTIC;
7346 }
7347 }
7348
7349 /*
7350 * Return the count of current opportunistic connections
7351 * over the interface.
7352 */
7353 if (result == 0) {
7354 uint32_t flags = 0;
7355 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
7356 INPCB_OPPORTUNISTIC_SETCMD : 0;
7357 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
7358 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
7359 ifr->ifr_opportunistic.ifo_inuse =
7360 udp_count_opportunistic(ifp->if_index, flags) +
7361 tcp_count_opportunistic(ifp->if_index, flags);
7362 }
7363
7364 if (result == EALREADY)
7365 result = 0;
7366
7367 return (result);
7368 }
7369
7370 int
7371 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
7372 {
7373 struct ifclassq *ifq;
7374 int err = 0;
7375
7376 if (!(ifp->if_eflags & IFEF_TXSTART))
7377 return (ENXIO);
7378
7379 *level = IFNET_THROTTLE_OFF;
7380
7381 ifq = &ifp->if_snd;
7382 IFCQ_LOCK(ifq);
7383 /* Throttling works only for IFCQ, not ALTQ instances */
7384 if (IFCQ_IS_ENABLED(ifq))
7385 IFCQ_GET_THROTTLE(ifq, *level, err);
7386 IFCQ_UNLOCK(ifq);
7387
7388 return (err);
7389 }
7390
7391 int
7392 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
7393 {
7394 struct ifclassq *ifq;
7395 int err = 0;
7396
7397 if (!(ifp->if_eflags & IFEF_TXSTART))
7398 return (ENXIO);
7399
7400 ifq = &ifp->if_snd;
7401
7402 switch (level) {
7403 case IFNET_THROTTLE_OFF:
7404 case IFNET_THROTTLE_OPPORTUNISTIC:
7405 #if PF_ALTQ
7406 /* Throttling works only for IFCQ, not ALTQ instances */
7407 if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
7408 return (ENXIO);
7409 #endif /* PF_ALTQ */
7410 break;
7411 default:
7412 return (EINVAL);
7413 }
7414
7415 IFCQ_LOCK(ifq);
7416 if (IFCQ_IS_ENABLED(ifq))
7417 IFCQ_SET_THROTTLE(ifq, level, err);
7418 IFCQ_UNLOCK(ifq);
7419
7420 if (err == 0) {
7421 printf("%s: throttling level set to %d\n", if_name(ifp),
7422 level);
7423 if (level == IFNET_THROTTLE_OFF)
7424 ifnet_start(ifp);
7425 }
7426
7427 return (err);
7428 }
7429
7430 errno_t
7431 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7432 struct proc *p)
7433 {
7434 #pragma unused(p)
7435 errno_t result = 0;
7436 uint32_t flags;
7437 int level, category, subcategory;
7438
7439 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7440
7441 if (cmd == SIOCSIFLOG) {
7442 if ((result = priv_check_cred(kauth_cred_get(),
7443 PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
7444 return (result);
7445
7446 level = ifr->ifr_log.ifl_level;
7447 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX)
7448 result = EINVAL;
7449
7450 flags = ifr->ifr_log.ifl_flags;
7451 if ((flags &= IFNET_LOGF_MASK) == 0)
7452 result = EINVAL;
7453
7454 category = ifr->ifr_log.ifl_category;
7455 subcategory = ifr->ifr_log.ifl_subcategory;
7456
7457 if (result == 0)
7458 result = ifnet_set_log(ifp, level, flags,
7459 category, subcategory);
7460 } else {
7461 result = ifnet_get_log(ifp, &level, &flags, &category,
7462 &subcategory);
7463 if (result == 0) {
7464 ifr->ifr_log.ifl_level = level;
7465 ifr->ifr_log.ifl_flags = flags;
7466 ifr->ifr_log.ifl_category = category;
7467 ifr->ifr_log.ifl_subcategory = subcategory;
7468 }
7469 }
7470
7471 return (result);
7472 }
7473
7474 int
7475 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7476 int32_t category, int32_t subcategory)
7477 {
7478 int err = 0;
7479
7480 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7481 VERIFY(flags & IFNET_LOGF_MASK);
7482
7483 /*
7484 * The logging level applies to all facilities; make sure to
7485 * update them all with the most current level.
7486 */
7487 flags |= ifp->if_log.flags;
7488
7489 if (ifp->if_output_ctl != NULL) {
7490 struct ifnet_log_params l;
7491
7492 bzero(&l, sizeof (l));
7493 l.level = level;
7494 l.flags = flags;
7495 l.flags &= ~IFNET_LOGF_DLIL;
7496 l.category = category;
7497 l.subcategory = subcategory;
7498
7499 /* Send this request to lower layers */
7500 if (l.flags != 0) {
7501 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7502 sizeof (l), &l);
7503 }
7504 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7505 /*
7506 * If targeted to the lower layers without an output
7507 * control callback registered on the interface, just
7508 * silently ignore facilities other than ours.
7509 */
7510 flags &= IFNET_LOGF_DLIL;
7511 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL)))
7512 level = 0;
7513 }
7514
7515 if (err == 0) {
7516 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT)
7517 ifp->if_log.flags = 0;
7518 else
7519 ifp->if_log.flags |= flags;
7520
7521 log(LOG_INFO, "%s: logging level set to %d flags=%b "
7522 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
7523 ifp->if_log.level, ifp->if_log.flags,
7524 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
7525 category, subcategory);
7526 }
7527
7528 return (err);
7529 }
7530
7531 int
7532 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7533 int32_t *category, int32_t *subcategory)
7534 {
7535 if (level != NULL)
7536 *level = ifp->if_log.level;
7537 if (flags != NULL)
7538 *flags = ifp->if_log.flags;
7539 if (category != NULL)
7540 *category = ifp->if_log.category;
7541 if (subcategory != NULL)
7542 *subcategory = ifp->if_log.subcategory;
7543
7544 return (0);
7545 }
7546
7547 int
7548 ifnet_notify_address(struct ifnet *ifp, int af)
7549 {
7550 struct ifnet_notify_address_params na;
7551
7552 #if PF
7553 (void) pf_ifaddr_hook(ifp);
7554 #endif /* PF */
7555
7556 if (ifp->if_output_ctl == NULL)
7557 return (EOPNOTSUPP);
7558
7559 bzero(&na, sizeof (na));
7560 na.address_family = af;
7561
7562 return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7563 sizeof (na), &na));
7564 }
7565
7566 errno_t
7567 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7568 {
7569 if (ifp == NULL || flowid == NULL) {
7570 return (EINVAL);
7571 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7572 !(ifp->if_refflags & IFRF_ATTACHED)) {
7573 return (ENXIO);
7574 }
7575
7576 *flowid = ifp->if_flowhash;
7577
7578 return (0);
7579 }
7580
7581 errno_t
7582 ifnet_disable_output(struct ifnet *ifp)
7583 {
7584 int err;
7585
7586 if (ifp == NULL) {
7587 return (EINVAL);
7588 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7589 !(ifp->if_refflags & IFRF_ATTACHED)) {
7590 return (ENXIO);
7591 }
7592
7593 if ((err = ifnet_fc_add(ifp)) == 0) {
7594 lck_mtx_lock_spin(&ifp->if_start_lock);
7595 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7596 lck_mtx_unlock(&ifp->if_start_lock);
7597 }
7598 return (err);
7599 }
7600
7601 errno_t
7602 ifnet_enable_output(struct ifnet *ifp)
7603 {
7604 if (ifp == NULL) {
7605 return (EINVAL);
7606 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7607 !(ifp->if_refflags & IFRF_ATTACHED)) {
7608 return (ENXIO);
7609 }
7610
7611 ifnet_start_common(ifp, 1);
7612 return (0);
7613 }
7614
7615 void
7616 ifnet_flowadv(uint32_t flowhash)
7617 {
7618 struct ifnet_fc_entry *ifce;
7619 struct ifnet *ifp;
7620
7621 ifce = ifnet_fc_get(flowhash);
7622 if (ifce == NULL)
7623 return;
7624
7625 VERIFY(ifce->ifce_ifp != NULL);
7626 ifp = ifce->ifce_ifp;
7627
7628 /* flow hash gets recalculated per attach, so check */
7629 if (ifnet_is_attached(ifp, 1)) {
7630 if (ifp->if_flowhash == flowhash)
7631 (void) ifnet_enable_output(ifp);
7632 ifnet_decr_iorefcnt(ifp);
7633 }
7634 ifnet_fc_entry_free(ifce);
7635 }
7636
7637 /*
7638 * Function to compare ifnet_fc_entries in ifnet flow control tree
7639 */
7640 static inline int
7641 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7642 {
7643 return (fc1->ifce_flowhash - fc2->ifce_flowhash);
7644 }
7645
7646 static int
7647 ifnet_fc_add(struct ifnet *ifp)
7648 {
7649 struct ifnet_fc_entry keyfc, *ifce;
7650 uint32_t flowhash;
7651
7652 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7653 VERIFY(ifp->if_flowhash != 0);
7654 flowhash = ifp->if_flowhash;
7655
7656 bzero(&keyfc, sizeof (keyfc));
7657 keyfc.ifce_flowhash = flowhash;
7658
7659 lck_mtx_lock_spin(&ifnet_fc_lock);
7660 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7661 if (ifce != NULL && ifce->ifce_ifp == ifp) {
7662 /* Entry is already in ifnet_fc_tree, return */
7663 lck_mtx_unlock(&ifnet_fc_lock);
7664 return (0);
7665 }
7666
7667 if (ifce != NULL) {
7668 /*
7669 * There is a different fc entry with the same flow hash
7670 * but different ifp pointer. There can be a collision
7671 * on flow hash but the probability is low. Let's just
7672 * avoid adding a second one when there is a collision.
7673 */
7674 lck_mtx_unlock(&ifnet_fc_lock);
7675 return (EAGAIN);
7676 }
7677
7678 /* become regular mutex */
7679 lck_mtx_convert_spin(&ifnet_fc_lock);
7680
7681 ifce = zalloc_noblock(ifnet_fc_zone);
7682 if (ifce == NULL) {
7683 /* memory allocation failed */
7684 lck_mtx_unlock(&ifnet_fc_lock);
7685 return (ENOMEM);
7686 }
7687 bzero(ifce, ifnet_fc_zone_size);
7688
7689 ifce->ifce_flowhash = flowhash;
7690 ifce->ifce_ifp = ifp;
7691
7692 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7693 lck_mtx_unlock(&ifnet_fc_lock);
7694 return (0);
7695 }
7696
7697 static struct ifnet_fc_entry *
7698 ifnet_fc_get(uint32_t flowhash)
7699 {
7700 struct ifnet_fc_entry keyfc, *ifce;
7701 struct ifnet *ifp;
7702
7703 bzero(&keyfc, sizeof (keyfc));
7704 keyfc.ifce_flowhash = flowhash;
7705
7706 lck_mtx_lock_spin(&ifnet_fc_lock);
7707 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7708 if (ifce == NULL) {
7709 /* Entry is not present in ifnet_fc_tree, return */
7710 lck_mtx_unlock(&ifnet_fc_lock);
7711 return (NULL);
7712 }
7713
7714 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7715
7716 VERIFY(ifce->ifce_ifp != NULL);
7717 ifp = ifce->ifce_ifp;
7718
7719 /* become regular mutex */
7720 lck_mtx_convert_spin(&ifnet_fc_lock);
7721
7722 if (!ifnet_is_attached(ifp, 0)) {
7723 /*
7724 * This ifp is not attached or in the process of being
7725 * detached; just don't process it.
7726 */
7727 ifnet_fc_entry_free(ifce);
7728 ifce = NULL;
7729 }
7730 lck_mtx_unlock(&ifnet_fc_lock);
7731
7732 return (ifce);
7733 }
7734
7735 static void
7736 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7737 {
7738 zfree(ifnet_fc_zone, ifce);
7739 }
7740
7741 static uint32_t
7742 ifnet_calc_flowhash(struct ifnet *ifp)
7743 {
7744 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7745 uint32_t flowhash = 0;
7746
7747 if (ifnet_flowhash_seed == 0)
7748 ifnet_flowhash_seed = RandomULong();
7749
7750 bzero(&fh, sizeof (fh));
7751
7752 (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name);
7753 fh.ifk_unit = ifp->if_unit;
7754 fh.ifk_flags = ifp->if_flags;
7755 fh.ifk_eflags = ifp->if_eflags;
7756 fh.ifk_capabilities = ifp->if_capabilities;
7757 fh.ifk_capenable = ifp->if_capenable;
7758 fh.ifk_output_sched_model = ifp->if_output_sched_model;
7759 fh.ifk_rand1 = RandomULong();
7760 fh.ifk_rand2 = RandomULong();
7761
7762 try_again:
7763 flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed);
7764 if (flowhash == 0) {
7765 /* try to get a non-zero flowhash */
7766 ifnet_flowhash_seed = RandomULong();
7767 goto try_again;
7768 }
7769
7770 return (flowhash);
7771 }
7772
7773 int
7774 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
7775 uint16_t flags, uint8_t *data)
7776 {
7777 #pragma unused(flags)
7778 int error = 0;
7779
7780 switch (family) {
7781 case AF_INET:
7782 if_inetdata_lock_exclusive(ifp);
7783 if (IN_IFEXTRA(ifp) != NULL) {
7784 if (len == 0) {
7785 /* Allow clearing the signature */
7786 IN_IFEXTRA(ifp)->netsig_len = 0;
7787 bzero(IN_IFEXTRA(ifp)->netsig,
7788 sizeof (IN_IFEXTRA(ifp)->netsig));
7789 if_inetdata_lock_done(ifp);
7790 break;
7791 } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) {
7792 error = EINVAL;
7793 if_inetdata_lock_done(ifp);
7794 break;
7795 }
7796 IN_IFEXTRA(ifp)->netsig_len = len;
7797 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
7798 } else {
7799 error = ENOMEM;
7800 }
7801 if_inetdata_lock_done(ifp);
7802 break;
7803
7804 case AF_INET6:
7805 if_inet6data_lock_exclusive(ifp);
7806 if (IN6_IFEXTRA(ifp) != NULL) {
7807 if (len == 0) {
7808 /* Allow clearing the signature */
7809 IN6_IFEXTRA(ifp)->netsig_len = 0;
7810 bzero(IN6_IFEXTRA(ifp)->netsig,
7811 sizeof (IN6_IFEXTRA(ifp)->netsig));
7812 if_inet6data_lock_done(ifp);
7813 break;
7814 } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) {
7815 error = EINVAL;
7816 if_inet6data_lock_done(ifp);
7817 break;
7818 }
7819 IN6_IFEXTRA(ifp)->netsig_len = len;
7820 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
7821 } else {
7822 error = ENOMEM;
7823 }
7824 if_inet6data_lock_done(ifp);
7825 break;
7826
7827 default:
7828 error = EINVAL;
7829 break;
7830 }
7831
7832 return (error);
7833 }
7834
7835 int
7836 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
7837 uint16_t *flags, uint8_t *data)
7838 {
7839 int error = 0;
7840
7841 if (ifp == NULL || len == NULL || flags == NULL || data == NULL)
7842 return (EINVAL);
7843
7844 switch (family) {
7845 case AF_INET:
7846 if_inetdata_lock_shared(ifp);
7847 if (IN_IFEXTRA(ifp) != NULL) {
7848 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
7849 error = EINVAL;
7850 if_inetdata_lock_done(ifp);
7851 break;
7852 }
7853 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0)
7854 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
7855 else
7856 error = ENOENT;
7857 } else {
7858 error = ENOMEM;
7859 }
7860 if_inetdata_lock_done(ifp);
7861 break;
7862
7863 case AF_INET6:
7864 if_inet6data_lock_shared(ifp);
7865 if (IN6_IFEXTRA(ifp) != NULL) {
7866 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
7867 error = EINVAL;
7868 if_inet6data_lock_done(ifp);
7869 break;
7870 }
7871 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0)
7872 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
7873 else
7874 error = ENOENT;
7875 } else {
7876 error = ENOMEM;
7877 }
7878 if_inet6data_lock_done(ifp);
7879 break;
7880
7881 default:
7882 error = EINVAL;
7883 break;
7884 }
7885
7886 if (error == 0)
7887 *flags = 0;
7888
7889 return (error);
7890 }
7891
7892 static void
7893 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
7894 protocol_family_t pf)
7895 {
7896 #pragma unused(ifp)
7897 uint32_t did_sw;
7898
7899 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
7900 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6)))
7901 return;
7902
7903 switch (pf) {
7904 case PF_INET:
7905 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
7906 if (did_sw & CSUM_DELAY_IP)
7907 hwcksum_dbg_finalized_hdr++;
7908 if (did_sw & CSUM_DELAY_DATA)
7909 hwcksum_dbg_finalized_data++;
7910 break;
7911 #if INET6
7912 case PF_INET6:
7913 /*
7914 * Checksum offload should not have been enabled when
7915 * extension headers exist; that also means that we
7916 * cannot force-finalize packets with extension headers.
7917 * Indicate to the callee should it skip such case by
7918 * setting optlen to -1.
7919 */
7920 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
7921 m->m_pkthdr.csum_flags);
7922 if (did_sw & CSUM_DELAY_IPV6_DATA)
7923 hwcksum_dbg_finalized_data++;
7924 break;
7925 #endif /* INET6 */
7926 default:
7927 return;
7928 }
7929 }
7930
7931 static void
7932 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
7933 protocol_family_t pf)
7934 {
7935 uint16_t sum;
7936 uint32_t hlen;
7937
7938 if (frame_header == NULL ||
7939 frame_header < (char *)mbuf_datastart(m) ||
7940 frame_header > (char *)m->m_data) {
7941 printf("%s: frame header pointer 0x%llx out of range "
7942 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
7943 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
7944 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
7945 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
7946 (uint64_t)VM_KERNEL_ADDRPERM(m));
7947 return;
7948 }
7949 hlen = (m->m_data - frame_header);
7950
7951 switch (pf) {
7952 case PF_INET:
7953 #if INET6
7954 case PF_INET6:
7955 #endif /* INET6 */
7956 break;
7957 default:
7958 return;
7959 }
7960
7961 /*
7962 * Force partial checksum offload; useful to simulate cases
7963 * where the hardware does not support partial checksum offload,
7964 * in order to validate correctness throughout the layers above.
7965 */
7966 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
7967 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
7968
7969 if (foff > (uint32_t)m->m_pkthdr.len)
7970 return;
7971
7972 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
7973
7974 /* Compute 16-bit 1's complement sum from forced offset */
7975 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
7976
7977 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
7978 m->m_pkthdr.csum_rx_val = sum;
7979 m->m_pkthdr.csum_rx_start = (foff + hlen);
7980
7981 hwcksum_dbg_partial_forced++;
7982 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
7983 }
7984
7985 /*
7986 * Partial checksum offload verification (and adjustment);
7987 * useful to validate and test cases where the hardware
7988 * supports partial checksum offload.
7989 */
7990 if ((m->m_pkthdr.csum_flags &
7991 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
7992 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
7993 uint32_t rxoff;
7994
7995 /* Start offset must begin after frame header */
7996 rxoff = m->m_pkthdr.csum_rx_start;
7997 if (hlen > rxoff) {
7998 hwcksum_dbg_bad_rxoff++;
7999 if (dlil_verbose) {
8000 printf("%s: partial cksum start offset %d "
8001 "is less than frame header length %d for "
8002 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
8003 (uint64_t)VM_KERNEL_ADDRPERM(m));
8004 }
8005 return;
8006 }
8007 rxoff -= hlen;
8008
8009 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
8010 /*
8011 * Compute the expected 16-bit 1's complement sum;
8012 * skip this if we've already computed it above
8013 * when partial checksum offload is forced.
8014 */
8015 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
8016
8017 /* Hardware or driver is buggy */
8018 if (sum != m->m_pkthdr.csum_rx_val) {
8019 hwcksum_dbg_bad_cksum++;
8020 if (dlil_verbose) {
8021 printf("%s: bad partial cksum value "
8022 "0x%x (expected 0x%x) for mbuf "
8023 "0x%llx [rx_start %d]\n",
8024 if_name(ifp),
8025 m->m_pkthdr.csum_rx_val, sum,
8026 (uint64_t)VM_KERNEL_ADDRPERM(m),
8027 m->m_pkthdr.csum_rx_start);
8028 }
8029 return;
8030 }
8031 }
8032 hwcksum_dbg_verified++;
8033
8034 /*
8035 * This code allows us to emulate various hardwares that
8036 * perform 16-bit 1's complement sum beginning at various
8037 * start offset values.
8038 */
8039 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
8040 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
8041
8042 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len)
8043 return;
8044
8045 sum = m_adj_sum16(m, rxoff, aoff, sum);
8046
8047 m->m_pkthdr.csum_rx_val = sum;
8048 m->m_pkthdr.csum_rx_start = (aoff + hlen);
8049
8050 hwcksum_dbg_adjusted++;
8051 }
8052 }
8053 }
8054
8055 static int
8056 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8057 {
8058 #pragma unused(arg1, arg2)
8059 u_int32_t i;
8060 int err;
8061
8062 i = hwcksum_dbg_mode;
8063
8064 err = sysctl_handle_int(oidp, &i, 0, req);
8065 if (err != 0 || req->newptr == USER_ADDR_NULL)
8066 return (err);
8067
8068 if (hwcksum_dbg == 0)
8069 return (ENODEV);
8070
8071 if ((i & ~HWCKSUM_DBG_MASK) != 0)
8072 return (EINVAL);
8073
8074 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
8075
8076 return (err);
8077 }
8078
8079 static int
8080 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8081 {
8082 #pragma unused(arg1, arg2)
8083 u_int32_t i;
8084 int err;
8085
8086 i = hwcksum_dbg_partial_rxoff_forced;
8087
8088 err = sysctl_handle_int(oidp, &i, 0, req);
8089 if (err != 0 || req->newptr == USER_ADDR_NULL)
8090 return (err);
8091
8092 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED))
8093 return (ENODEV);
8094
8095 hwcksum_dbg_partial_rxoff_forced = i;
8096
8097 return (err);
8098 }
8099
8100 static int
8101 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8102 {
8103 #pragma unused(arg1, arg2)
8104 u_int32_t i;
8105 int err;
8106
8107 i = hwcksum_dbg_partial_rxoff_adj;
8108
8109 err = sysctl_handle_int(oidp, &i, 0, req);
8110 if (err != 0 || req->newptr == USER_ADDR_NULL)
8111 return (err);
8112
8113 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ))
8114 return (ENODEV);
8115
8116 hwcksum_dbg_partial_rxoff_adj = i;
8117
8118 return (err);
8119 }
8120
8121 static int
8122 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8123 {
8124 #pragma unused(oidp, arg1, arg2)
8125 int err;
8126
8127 if (req->oldptr == USER_ADDR_NULL) {
8128
8129 }
8130 if (req->newptr != USER_ADDR_NULL) {
8131 return (EPERM);
8132 }
8133 err = SYSCTL_OUT(req, &tx_chain_len_stats,
8134 sizeof(struct chain_len_stats));
8135
8136 return (err);
8137 }
8138
8139
8140 #if DEBUG
8141 /* Blob for sum16 verification */
8142 static uint8_t sumdata[] = {
8143 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8144 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8145 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8146 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8147 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8148 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8149 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8150 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8151 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8152 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8153 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8154 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8155 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8156 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8157 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8158 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8159 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8160 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8161 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8162 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8163 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8164 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8165 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8166 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8167 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8168 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8169 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8170 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8171 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8172 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8173 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8174 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8175 0xc8, 0x28, 0x02, 0x00, 0x00
8176 };
8177
8178 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
8179 static struct {
8180 int len;
8181 uint16_t sum;
8182 } sumtbl[] = {
8183 { 11, 0xcb6d },
8184 { 20, 0x20dd },
8185 { 27, 0xbabd },
8186 { 32, 0xf3e8 },
8187 { 37, 0x197d },
8188 { 43, 0x9eae },
8189 { 64, 0x4678 },
8190 { 127, 0x9399 },
8191 { 256, 0xd147 },
8192 { 325, 0x0358 }
8193 };
8194 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8195
8196 static void
8197 dlil_verify_sum16(void)
8198 {
8199 struct mbuf *m;
8200 uint8_t *buf;
8201 int n;
8202
8203 /* Make sure test data plus extra room for alignment fits in cluster */
8204 _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES);
8205
8206 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
8207 MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */
8208 buf = mtod(m, uint8_t *); /* base address */
8209
8210 for (n = 0; n < SUMTBL_MAX; n++) {
8211 uint16_t len = sumtbl[n].len;
8212 int i;
8213
8214 /* Verify for all possible alignments */
8215 for (i = 0; i < (int)sizeof (uint64_t); i++) {
8216 uint16_t sum;
8217 uint8_t *c;
8218
8219 /* Copy over test data to mbuf */
8220 VERIFY(len <= sizeof (sumdata));
8221 c = buf + i;
8222 bcopy(sumdata, c, len);
8223
8224 /* Zero-offset test (align by data pointer) */
8225 m->m_data = (caddr_t)c;
8226 m->m_len = len;
8227 sum = m_sum16(m, 0, len);
8228
8229 /* Something is horribly broken; stop now */
8230 if (sum != sumtbl[n].sum) {
8231 panic("%s: broken m_sum16 for len=%d align=%d "
8232 "sum=0x%04x [expected=0x%04x]\n", __func__,
8233 len, i, sum, sumtbl[n].sum);
8234 /* NOTREACHED */
8235 }
8236
8237 /* Alignment test by offset (fixed data pointer) */
8238 m->m_data = (caddr_t)buf;
8239 m->m_len = i + len;
8240 sum = m_sum16(m, i, len);
8241
8242 /* Something is horribly broken; stop now */
8243 if (sum != sumtbl[n].sum) {
8244 panic("%s: broken m_sum16 for len=%d offset=%d "
8245 "sum=0x%04x [expected=0x%04x]\n", __func__,
8246 len, i, sum, sumtbl[n].sum);
8247 /* NOTREACHED */
8248 }
8249 #if INET
8250 /* Simple sum16 contiguous buffer test by aligment */
8251 sum = b_sum16(c, len);
8252
8253 /* Something is horribly broken; stop now */
8254 if (sum != sumtbl[n].sum) {
8255 panic("%s: broken b_sum16 for len=%d align=%d "
8256 "sum=0x%04x [expected=0x%04x]\n", __func__,
8257 len, i, sum, sumtbl[n].sum);
8258 /* NOTREACHED */
8259 }
8260 #endif /* INET */
8261 }
8262 }
8263 m_freem(m);
8264
8265 printf("DLIL: SUM16 self-tests PASSED\n");
8266 }
8267 #endif /* DEBUG */
8268
8269 #define CASE_STRINGIFY(x) case x: return #x
8270
8271 __private_extern__ const char *
8272 dlil_kev_dl_code_str(u_int32_t event_code)
8273 {
8274 switch (event_code) {
8275 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
8276 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
8277 CASE_STRINGIFY(KEV_DL_SIFMTU);
8278 CASE_STRINGIFY(KEV_DL_SIFPHYS);
8279 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
8280 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
8281 CASE_STRINGIFY(KEV_DL_ADDMULTI);
8282 CASE_STRINGIFY(KEV_DL_DELMULTI);
8283 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
8284 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
8285 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
8286 CASE_STRINGIFY(KEV_DL_LINK_OFF);
8287 CASE_STRINGIFY(KEV_DL_LINK_ON);
8288 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
8289 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
8290 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
8291 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
8292 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
8293 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
8294 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
8295 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
8296 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
8297 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
8298 CASE_STRINGIFY(KEV_DL_ISSUES);
8299 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
8300 default:
8301 break;
8302 }
8303 return ("");
8304 }
8305
8306 /*
8307 * Mirror the arguments of ifnet_get_local_ports_extended()
8308 * ifindex
8309 * protocol
8310 * flags
8311 */
8312 static int
8313 sysctl_get_ports_used SYSCTL_HANDLER_ARGS
8314 {
8315 #pragma unused(oidp)
8316 int *name = (int *)arg1;
8317 int namelen = arg2;
8318 int error = 0;
8319 int idx;
8320 protocol_family_t protocol;
8321 u_int32_t flags;
8322 ifnet_t ifp = NULL;
8323 u_int8_t *bitfield = NULL;
8324
8325 if (req->newptr != USER_ADDR_NULL) {
8326 error = EPERM;
8327 goto done;
8328 }
8329 if (namelen != 3) {
8330 error = ENOENT;
8331 goto done;
8332 }
8333
8334 if (req->oldptr == USER_ADDR_NULL) {
8335 req->oldidx = bitstr_size(65536);
8336 goto done;
8337 }
8338 if (req->oldlen < bitstr_size(65536)) {
8339 error = ENOMEM;
8340 goto done;
8341 }
8342
8343 idx = name[0];
8344 protocol = name[1];
8345 flags = name[2];
8346
8347 ifnet_head_lock_shared();
8348 if (idx > if_index) {
8349 ifnet_head_done();
8350 error = ENOENT;
8351 goto done;
8352 }
8353 ifp = ifindex2ifnet[idx];
8354 ifnet_head_done();
8355
8356 bitfield = _MALLOC(bitstr_size(65536), M_TEMP, M_WAITOK);
8357 if (bitfield == NULL) {
8358 error = ENOMEM;
8359 goto done;
8360 }
8361 error = ifnet_get_local_ports_extended(ifp, protocol, flags, bitfield);
8362 if (error != 0) {
8363 printf("%s: ifnet_get_local_ports_extended() error %d\n",
8364 __func__, error);
8365 goto done;
8366 }
8367 error = SYSCTL_OUT(req, bitfield, bitstr_size(65536));
8368 done:
8369 if (bitfield != NULL)
8370 _FREE(bitfield, M_TEMP);
8371 return (error);
8372 }
8373
8374 #if (DEVELOPMENT || DEBUG)
8375 /*
8376 * The sysctl variable name contains the input parameters of
8377 * ifnet_get_keepalive_offload_frames()
8378 * ifp (interface index): name[0]
8379 * frames_array_count: name[1]
8380 * frame_data_offset: name[2]
8381 * The return length gives used_frames_count
8382 */
8383 static int
8384 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8385 {
8386 #pragma unused(oidp)
8387 int *name = (int *)arg1;
8388 u_int namelen = arg2;
8389 int idx;
8390 ifnet_t ifp = NULL;
8391 u_int32_t frames_array_count;
8392 size_t frame_data_offset;
8393 u_int32_t used_frames_count;
8394 struct ifnet_keepalive_offload_frame *frames_array = NULL;
8395 int error = 0;
8396 u_int32_t i;
8397
8398 /*
8399 * Only root can get look at other people TCP frames
8400 */
8401 error = proc_suser(current_proc());
8402 if (error != 0)
8403 goto done;
8404 /*
8405 * Validate the input parameters
8406 */
8407 if (req->newptr != USER_ADDR_NULL) {
8408 error = EPERM;
8409 goto done;
8410 }
8411 if (namelen != 3) {
8412 error = EINVAL;
8413 goto done;
8414 }
8415 if (req->oldptr == USER_ADDR_NULL) {
8416 error = EINVAL;
8417 goto done;
8418 }
8419 if (req->oldlen == 0) {
8420 error = EINVAL;
8421 goto done;
8422 }
8423 idx = name[0];
8424 frames_array_count = name[1];
8425 frame_data_offset = name[2];
8426
8427 /* Make sure the passed buffer is large enough */
8428 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
8429 req->oldlen) {
8430 error = ENOMEM;
8431 goto done;
8432 }
8433
8434 ifnet_head_lock_shared();
8435 if (idx > if_index) {
8436 ifnet_head_done();
8437 error = ENOENT;
8438 goto done;
8439 }
8440 ifp = ifindex2ifnet[idx];
8441 ifnet_head_done();
8442
8443 frames_array = _MALLOC(frames_array_count *
8444 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
8445 if (frames_array == NULL) {
8446 error = ENOMEM;
8447 goto done;
8448 }
8449
8450 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
8451 frames_array_count, frame_data_offset, &used_frames_count);
8452 if (error != 0) {
8453 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8454 __func__, error);
8455 goto done;
8456 }
8457
8458 for (i = 0; i < used_frames_count; i++) {
8459 error = SYSCTL_OUT(req, frames_array + i,
8460 sizeof(struct ifnet_keepalive_offload_frame));
8461 if (error != 0) {
8462 goto done;
8463 }
8464 }
8465 done:
8466 if (frames_array != NULL)
8467 _FREE(frames_array, M_TEMP);
8468 return (error);
8469 }
8470 #endif /* DEVELOPMENT || DEBUG */