]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/dlil.c
xnu-4570.51.1.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
1 /*
2 * Copyright (c) 1999-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
43 #include <sys/user.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
47 #include <net/if.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
50 #include <net/dlil.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
59 #include <sys/priv.h>
60
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
67
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_llreach.h>
71 #include <net/kpi_interfacefilter.h>
72 #include <net/classq/classq.h>
73 #include <net/classq/classq_sfb.h>
74 #include <net/flowhash.h>
75 #include <net/ntstat.h>
76 #include <net/if_llatbl.h>
77 #include <net/net_api_stats.h>
78 #include <net/if_ports_used.h>
79
80 #if INET
81 #include <netinet/in_var.h>
82 #include <netinet/igmp_var.h>
83 #include <netinet/ip_var.h>
84 #include <netinet/tcp.h>
85 #include <netinet/tcp_var.h>
86 #include <netinet/udp.h>
87 #include <netinet/udp_var.h>
88 #include <netinet/if_ether.h>
89 #include <netinet/in_pcb.h>
90 #include <netinet/in_tclass.h>
91 #endif /* INET */
92
93 #if INET6
94 #include <netinet6/in6_var.h>
95 #include <netinet6/nd6.h>
96 #include <netinet6/mld6_var.h>
97 #include <netinet6/scope6_var.h>
98 #endif /* INET6 */
99
100 #include <libkern/OSAtomic.h>
101 #include <libkern/tree.h>
102
103 #include <dev/random/randomdev.h>
104 #include <machine/machine_routines.h>
105
106 #include <mach/thread_act.h>
107 #include <mach/sdt.h>
108
109 #if CONFIG_MACF
110 #include <sys/kauth.h>
111 #include <security/mac_framework.h>
112 #include <net/ethernet.h>
113 #include <net/firewire.h>
114 #endif
115
116 #if PF
117 #include <net/pfvar.h>
118 #endif /* PF */
119 #include <net/pktsched/pktsched.h>
120
121 #if NECP
122 #include <net/necp.h>
123 #endif /* NECP */
124
125
126 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
127 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
128 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
129 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
130 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
131
132 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
133 #define MAX_LINKADDR 4 /* LONGWORDS */
134 #define M_NKE M_IFADDR
135
136 #if 1
137 #define DLIL_PRINTF printf
138 #else
139 #define DLIL_PRINTF kprintf
140 #endif
141
142 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
143 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
144
145 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
146 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
147
148 enum {
149 kProtoKPI_v1 = 1,
150 kProtoKPI_v2 = 2
151 };
152
153 /*
154 * List of if_proto structures in if_proto_hash[] is protected by
155 * the ifnet lock. The rest of the fields are initialized at protocol
156 * attach time and never change, thus no lock required as long as
157 * a reference to it is valid, via if_proto_ref().
158 */
159 struct if_proto {
160 SLIST_ENTRY(if_proto) next_hash;
161 u_int32_t refcount;
162 u_int32_t detached;
163 struct ifnet *ifp;
164 protocol_family_t protocol_family;
165 int proto_kpi;
166 union {
167 struct {
168 proto_media_input input;
169 proto_media_preout pre_output;
170 proto_media_event event;
171 proto_media_ioctl ioctl;
172 proto_media_detached detached;
173 proto_media_resolve_multi resolve_multi;
174 proto_media_send_arp send_arp;
175 } v1;
176 struct {
177 proto_media_input_v2 input;
178 proto_media_preout pre_output;
179 proto_media_event event;
180 proto_media_ioctl ioctl;
181 proto_media_detached detached;
182 proto_media_resolve_multi resolve_multi;
183 proto_media_send_arp send_arp;
184 } v2;
185 } kpi;
186 };
187
188 SLIST_HEAD(proto_hash_entry, if_proto);
189
190 #define DLIL_SDLDATALEN \
191 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
192
193 struct dlil_ifnet {
194 struct ifnet dl_if; /* public ifnet */
195 /*
196 * DLIL private fields, protected by dl_if_lock
197 */
198 decl_lck_mtx_data(, dl_if_lock);
199 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
200 u_int32_t dl_if_flags; /* flags (below) */
201 u_int32_t dl_if_refcnt; /* refcnt */
202 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
203 void *dl_if_uniqueid; /* unique interface id */
204 size_t dl_if_uniqueid_len; /* length of the unique id */
205 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
206 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
207 struct {
208 struct ifaddr ifa; /* lladdr ifa */
209 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
210 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
211 } dl_if_lladdr;
212 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
213 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
214 ctrace_t dl_if_attach; /* attach PC stacktrace */
215 ctrace_t dl_if_detach; /* detach PC stacktrace */
216 };
217
218 /* Values for dl_if_flags (private to DLIL) */
219 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
220 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
221 #define DLIF_DEBUG 0x4 /* has debugging info */
222
223 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
224
225 /* For gdb */
226 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
227
228 struct dlil_ifnet_dbg {
229 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
230 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
231 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
232 /*
233 * Circular lists of ifnet_{reference,release} callers.
234 */
235 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
236 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
237 };
238
239 #define DLIL_TO_IFP(s) (&s->dl_if)
240 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
241
242 struct ifnet_filter {
243 TAILQ_ENTRY(ifnet_filter) filt_next;
244 u_int32_t filt_skip;
245 u_int32_t filt_flags;
246 ifnet_t filt_ifp;
247 const char *filt_name;
248 void *filt_cookie;
249 protocol_family_t filt_protocol;
250 iff_input_func filt_input;
251 iff_output_func filt_output;
252 iff_event_func filt_event;
253 iff_ioctl_func filt_ioctl;
254 iff_detached_func filt_detached;
255 };
256
257 struct proto_input_entry;
258
259 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
260 static lck_grp_t *dlil_lock_group;
261 lck_grp_t *ifnet_lock_group;
262 static lck_grp_t *ifnet_head_lock_group;
263 static lck_grp_t *ifnet_snd_lock_group;
264 static lck_grp_t *ifnet_rcv_lock_group;
265 lck_attr_t *ifnet_lock_attr;
266 decl_lck_rw_data(static, ifnet_head_lock);
267 decl_lck_mtx_data(static, dlil_ifnet_lock);
268 u_int32_t dlil_filter_disable_tso_count = 0;
269
270 #if DEBUG
271 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
272 #else
273 static unsigned int ifnet_debug; /* debugging (disabled) */
274 #endif /* !DEBUG */
275 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
276 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
277 static struct zone *dlif_zone; /* zone for dlil_ifnet */
278
279 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
280 #define DLIF_ZONE_NAME "ifnet" /* zone name */
281
282 static unsigned int dlif_filt_size; /* size of ifnet_filter */
283 static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
284
285 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
286 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
287
288 static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
289 static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
290
291 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
292 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
293
294 static unsigned int dlif_proto_size; /* size of if_proto */
295 static struct zone *dlif_proto_zone; /* zone for if_proto */
296
297 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
298 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
299
300 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
301 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
302 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
303
304 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
305 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
306
307 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
308 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
309 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
310
311 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
312 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
313
314 static u_int32_t net_rtref;
315
316 static struct dlil_main_threading_info dlil_main_input_thread_info;
317 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
318 (struct dlil_threading_info *)&dlil_main_input_thread_info;
319
320 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
321 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
322 static void dlil_if_trace(struct dlil_ifnet *, int);
323 static void if_proto_ref(struct if_proto *);
324 static void if_proto_free(struct if_proto *);
325 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
326 static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
327 u_int32_t list_count);
328 static void if_flt_monitor_busy(struct ifnet *);
329 static void if_flt_monitor_unbusy(struct ifnet *);
330 static void if_flt_monitor_enter(struct ifnet *);
331 static void if_flt_monitor_leave(struct ifnet *);
332 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
333 char **, protocol_family_t);
334 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
335 protocol_family_t);
336 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
337 const struct sockaddr_dl *);
338 static int ifnet_lookup(struct ifnet *);
339 static void if_purgeaddrs(struct ifnet *);
340
341 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
342 struct mbuf *, char *);
343 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
344 struct mbuf *);
345 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
346 mbuf_t *, const struct sockaddr *, void *, char *, char *);
347 static void ifproto_media_event(struct ifnet *, protocol_family_t,
348 const struct kev_msg *);
349 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
350 unsigned long, void *);
351 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
352 struct sockaddr_dl *, size_t);
353 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
354 const struct sockaddr_dl *, const struct sockaddr *,
355 const struct sockaddr_dl *, const struct sockaddr *);
356
357 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
358 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
359 boolean_t poll, struct thread *tp);
360 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
361 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
362 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
363 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
364 protocol_family_t *);
365 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
366 const struct ifnet_demux_desc *, u_int32_t);
367 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
368 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
369 #if CONFIG_EMBEDDED
370 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
371 const struct sockaddr *, const char *, const char *,
372 u_int32_t *, u_int32_t *);
373 #else
374 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
375 const struct sockaddr *, const char *, const char *);
376 #endif /* CONFIG_EMBEDDED */
377 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
378 const struct sockaddr *, const char *, const char *,
379 u_int32_t *, u_int32_t *);
380 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
381 static void ifp_if_free(struct ifnet *);
382 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
383 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
384 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
385
386 static void dlil_main_input_thread_func(void *, wait_result_t);
387 static void dlil_input_thread_func(void *, wait_result_t);
388 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
389 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
390 static void dlil_terminate_input_thread(struct dlil_threading_info *);
391 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
392 struct dlil_threading_info *, boolean_t);
393 static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
394 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
395 u_int32_t, ifnet_model_t, boolean_t);
396 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
397 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
398
399 #if DEBUG || DEVELOPMENT
400 static void dlil_verify_sum16(void);
401 #endif /* DEBUG || DEVELOPMENT */
402 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
403 protocol_family_t);
404 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
405 protocol_family_t);
406
407 static void ifnet_detacher_thread_func(void *, wait_result_t);
408 static int ifnet_detacher_thread_cont(int);
409 static void ifnet_detach_final(struct ifnet *);
410 static void ifnet_detaching_enqueue(struct ifnet *);
411 static struct ifnet *ifnet_detaching_dequeue(void);
412
413 static void ifnet_start_thread_fn(void *, wait_result_t);
414 static void ifnet_poll_thread_fn(void *, wait_result_t);
415 static void ifnet_poll(struct ifnet *);
416 static errno_t ifnet_enqueue_common(struct ifnet *, void *,
417 classq_pkt_type_t, boolean_t, boolean_t *);
418
419 static void ifp_src_route_copyout(struct ifnet *, struct route *);
420 static void ifp_src_route_copyin(struct ifnet *, struct route *);
421 #if INET6
422 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
423 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
424 #endif /* INET6 */
425
426 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
427 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
428 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
429 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
430 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
431 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
432 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
433 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
434 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
435 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
436 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
437
438 struct chain_len_stats tx_chain_len_stats;
439 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
440
441 #if TEST_INPUT_THREAD_TERMINATION
442 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
443 #endif /* TEST_INPUT_THREAD_TERMINATION */
444
445 /* The following are protected by dlil_ifnet_lock */
446 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
447 static u_int32_t ifnet_detaching_cnt;
448 static void *ifnet_delayed_run; /* wait channel for detaching thread */
449
450 decl_lck_mtx_data(static, ifnet_fc_lock);
451
452 static uint32_t ifnet_flowhash_seed;
453
454 struct ifnet_flowhash_key {
455 char ifk_name[IFNAMSIZ];
456 uint32_t ifk_unit;
457 uint32_t ifk_flags;
458 uint32_t ifk_eflags;
459 uint32_t ifk_capabilities;
460 uint32_t ifk_capenable;
461 uint32_t ifk_output_sched_model;
462 uint32_t ifk_rand1;
463 uint32_t ifk_rand2;
464 };
465
466 /* Flow control entry per interface */
467 struct ifnet_fc_entry {
468 RB_ENTRY(ifnet_fc_entry) ifce_entry;
469 u_int32_t ifce_flowhash;
470 struct ifnet *ifce_ifp;
471 };
472
473 static uint32_t ifnet_calc_flowhash(struct ifnet *);
474 static int ifce_cmp(const struct ifnet_fc_entry *,
475 const struct ifnet_fc_entry *);
476 static int ifnet_fc_add(struct ifnet *);
477 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
478 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
479
480 /* protected by ifnet_fc_lock */
481 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
482 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
483 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
484
485 static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
486 static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
487
488 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
489 #define IFNET_FC_ZONE_MAX 32
490
491 extern void bpfdetach(struct ifnet *);
492 extern void proto_input_run(void);
493
494 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
495 u_int32_t flags);
496 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
497 u_int32_t flags);
498
499 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
500
501 #if CONFIG_MACF
502 #ifdef CONFIG_EMBEDDED
503 int dlil_lladdr_ckreq = 1;
504 #else
505 int dlil_lladdr_ckreq = 0;
506 #endif
507 #endif
508
509 #if DEBUG
510 int dlil_verbose = 1;
511 #else
512 int dlil_verbose = 0;
513 #endif /* DEBUG */
514 #if IFNET_INPUT_SANITY_CHK
515 /* sanity checking of input packet lists received */
516 static u_int32_t dlil_input_sanity_check = 0;
517 #endif /* IFNET_INPUT_SANITY_CHK */
518 /* rate limit debug messages */
519 struct timespec dlil_dbgrate = { 1, 0 };
520
521 SYSCTL_DECL(_net_link_generic_system);
522
523 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
524 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
525
526 #define IF_SNDQ_MINLEN 32
527 u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
528 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
529 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
530 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
531
532 #define IF_RCVQ_MINLEN 32
533 #define IF_RCVQ_MAXLEN 256
534 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
535 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
536 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
537 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
538
539 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
540 static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
541 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
542 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
543 "ilog2 of EWMA decay rate of avg inbound packets");
544
545 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
546 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
547 static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
548 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
549 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
550 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
551 "Q", "input poll mode freeze time");
552
553 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
554 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
555 static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
556 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
557 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
558 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
559 "Q", "input poll sampling time");
560
561 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
562 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
563 static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
564 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
565 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
566 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
567 "Q", "input poll interval (time)");
568
569 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
570 static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
571 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
572 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
573 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
574
575 #define IF_RXPOLL_WLOWAT 10
576 static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
577 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
578 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
579 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
580 "I", "input poll wakeup low watermark");
581
582 #define IF_RXPOLL_WHIWAT 100
583 static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
584 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
585 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
586 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
587 "I", "input poll wakeup high watermark");
588
589 static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
590 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
591 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
592 "max packets per poll call");
593
594 static u_int32_t if_rxpoll = 1;
595 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
596 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
597 sysctl_rxpoll, "I", "enable opportunistic input polling");
598
599 #if TEST_INPUT_THREAD_TERMINATION
600 static u_int32_t if_input_thread_termination_spin = 0;
601 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
602 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
603 &if_input_thread_termination_spin, 0,
604 sysctl_input_thread_termination_spin,
605 "I", "input thread termination spin limit");
606 #endif /* TEST_INPUT_THREAD_TERMINATION */
607
608 static u_int32_t cur_dlil_input_threads = 0;
609 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
610 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
611 "Current number of DLIL input threads");
612
613 #if IFNET_INPUT_SANITY_CHK
614 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
615 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
616 "Turn on sanity checking in DLIL input");
617 #endif /* IFNET_INPUT_SANITY_CHK */
618
619 static u_int32_t if_flowadv = 1;
620 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
621 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
622 "enable flow-advisory mechanism");
623
624 static u_int32_t if_delaybased_queue = 1;
625 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
626 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
627 "enable delay based dynamic queue sizing");
628
629 static uint64_t hwcksum_in_invalidated = 0;
630 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
631 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
632 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
633
634 uint32_t hwcksum_dbg = 0;
635 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
636 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
637 "enable hardware cksum debugging");
638
639 u_int32_t ifnet_start_delayed = 0;
640 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
641 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
642 "number of times start was delayed");
643
644 u_int32_t ifnet_delay_start_disabled = 0;
645 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
646 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
647 "number of times start was delayed");
648
649 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
650 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
651 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
652 #define HWCKSUM_DBG_MASK \
653 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
654 HWCKSUM_DBG_FINALIZE_FORCED)
655
656 static uint32_t hwcksum_dbg_mode = 0;
657 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
658 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
659 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
660
661 static uint64_t hwcksum_dbg_partial_forced = 0;
662 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
663 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
664 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
665
666 static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
667 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
668 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
669 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
670
671 static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
672 SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
673 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
674 &hwcksum_dbg_partial_rxoff_forced, 0,
675 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
676 "forced partial cksum rx offset");
677
678 static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
679 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
680 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
681 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
682 "adjusted partial cksum rx offset");
683
684 static uint64_t hwcksum_dbg_verified = 0;
685 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
686 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
687 &hwcksum_dbg_verified, "packets verified for having good checksum");
688
689 static uint64_t hwcksum_dbg_bad_cksum = 0;
690 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
691 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
692 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
693
694 static uint64_t hwcksum_dbg_bad_rxoff = 0;
695 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
696 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
697 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
698
699 static uint64_t hwcksum_dbg_adjusted = 0;
700 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
701 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
702 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
703
704 static uint64_t hwcksum_dbg_finalized_hdr = 0;
705 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
706 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
707 &hwcksum_dbg_finalized_hdr, "finalized headers");
708
709 static uint64_t hwcksum_dbg_finalized_data = 0;
710 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
711 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
712 &hwcksum_dbg_finalized_data, "finalized payloads");
713
714 uint32_t hwcksum_tx = 1;
715 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
716 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
717 "enable transmit hardware checksum offload");
718
719 uint32_t hwcksum_rx = 1;
720 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
721 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
722 "enable receive hardware checksum offload");
723
724 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
725 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
726 sysctl_tx_chain_len_stats, "S", "");
727
728 uint32_t tx_chain_len_count = 0;
729 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
730 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
731
732 static uint32_t threshold_notify = 1; /* enable/disable */
733 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
734 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
735
736 static uint32_t threshold_interval = 2; /* in seconds */
737 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
738 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
739
740 #if (DEVELOPMENT || DEBUG)
741 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
742 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
743 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
744 #endif /* DEVELOPMENT || DEBUG */
745
746 struct net_api_stats net_api_stats;
747 SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD|CTLFLAG_LOCKED,
748 &net_api_stats, net_api_stats, "");
749
750
751 unsigned int net_rxpoll = 1;
752 unsigned int net_affinity = 1;
753 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
754
755 extern u_int32_t inject_buckets;
756
757 static lck_grp_attr_t *dlil_grp_attributes = NULL;
758 static lck_attr_t *dlil_lck_attributes = NULL;
759
760 /* DLIL data threshold thread call */
761 static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
762
763 static void dlil_mit_tcall_fn(thread_call_param_t, thread_call_param_t);
764
765 uint32_t dlil_rcv_mit_pkts_min = 5;
766 uint32_t dlil_rcv_mit_pkts_max = 64;
767 uint32_t dlil_rcv_mit_interval = (500 * 1000);
768
769 #if (DEVELOPMENT || DEBUG)
770 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_min,
771 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_min, 0, "");
772 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_max,
773 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_max, 0, "");
774 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_interval,
775 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_interval, 0, "");
776 #endif /* DEVELOPMENT || DEBUG */
777
778
779 #define DLIL_INPUT_CHECK(m, ifp) { \
780 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
781 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
782 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
783 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
784 /* NOTREACHED */ \
785 } \
786 }
787
788 #define DLIL_EWMA(old, new, decay) do { \
789 u_int32_t _avg; \
790 if ((_avg = (old)) > 0) \
791 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
792 else \
793 _avg = (new); \
794 (old) = _avg; \
795 } while (0)
796
797 #define MBPS (1ULL * 1000 * 1000)
798 #define GBPS (MBPS * 1000)
799
800 struct rxpoll_time_tbl {
801 u_int64_t speed; /* downlink speed */
802 u_int32_t plowat; /* packets low watermark */
803 u_int32_t phiwat; /* packets high watermark */
804 u_int32_t blowat; /* bytes low watermark */
805 u_int32_t bhiwat; /* bytes high watermark */
806 };
807
808 static struct rxpoll_time_tbl rxpoll_tbl[] = {
809 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
810 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
811 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
812 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
813 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
814 { 0, 0, 0, 0, 0 }
815 };
816
817 int
818 proto_hash_value(u_int32_t protocol_family)
819 {
820 /*
821 * dlil_proto_unplumb_all() depends on the mapping between
822 * the hash bucket index and the protocol family defined
823 * here; future changes must be applied there as well.
824 */
825 switch (protocol_family) {
826 case PF_INET:
827 return (0);
828 case PF_INET6:
829 return (1);
830 case PF_VLAN:
831 return (2);
832 case PF_UNSPEC:
833 default:
834 return (3);
835 }
836 }
837
838 /*
839 * Caller must already be holding ifnet lock.
840 */
841 static struct if_proto *
842 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
843 {
844 struct if_proto *proto = NULL;
845 u_int32_t i = proto_hash_value(protocol_family);
846
847 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
848
849 if (ifp->if_proto_hash != NULL)
850 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
851
852 while (proto != NULL && proto->protocol_family != protocol_family)
853 proto = SLIST_NEXT(proto, next_hash);
854
855 if (proto != NULL)
856 if_proto_ref(proto);
857
858 return (proto);
859 }
860
861 static void
862 if_proto_ref(struct if_proto *proto)
863 {
864 atomic_add_32(&proto->refcount, 1);
865 }
866
867 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
868
869 static void
870 if_proto_free(struct if_proto *proto)
871 {
872 u_int32_t oldval;
873 struct ifnet *ifp = proto->ifp;
874 u_int32_t proto_family = proto->protocol_family;
875 struct kev_dl_proto_data ev_pr_data;
876
877 oldval = atomic_add_32_ov(&proto->refcount, -1);
878 if (oldval > 1)
879 return;
880
881 /* No more reference on this, protocol must have been detached */
882 VERIFY(proto->detached);
883
884 if (proto->proto_kpi == kProtoKPI_v1) {
885 if (proto->kpi.v1.detached)
886 proto->kpi.v1.detached(ifp, proto->protocol_family);
887 }
888 if (proto->proto_kpi == kProtoKPI_v2) {
889 if (proto->kpi.v2.detached)
890 proto->kpi.v2.detached(ifp, proto->protocol_family);
891 }
892
893 /*
894 * Cleanup routes that may still be in the routing table for that
895 * interface/protocol pair.
896 */
897 if_rtproto_del(ifp, proto_family);
898
899 /*
900 * The reserved field carries the number of protocol still attached
901 * (subject to change)
902 */
903 ifnet_lock_shared(ifp);
904 ev_pr_data.proto_family = proto_family;
905 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
906 ifnet_lock_done(ifp);
907
908 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
909 (struct net_event_data *)&ev_pr_data,
910 sizeof (struct kev_dl_proto_data));
911
912 if (ev_pr_data.proto_remaining_count == 0) {
913 /*
914 * The protocol count has gone to zero, mark the interface down.
915 * This used to be done by configd.KernelEventMonitor, but that
916 * is inherently prone to races (rdar://problem/30810208).
917 */
918 (void) ifnet_set_flags(ifp, 0, IFF_UP);
919 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
920 dlil_post_sifflags_msg(ifp);
921 }
922
923 zfree(dlif_proto_zone, proto);
924 }
925
926 __private_extern__ void
927 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
928 {
929 #if !MACH_ASSERT
930 #pragma unused(ifp)
931 #endif
932 unsigned int type = 0;
933 int ass = 1;
934
935 switch (what) {
936 case IFNET_LCK_ASSERT_EXCLUSIVE:
937 type = LCK_RW_ASSERT_EXCLUSIVE;
938 break;
939
940 case IFNET_LCK_ASSERT_SHARED:
941 type = LCK_RW_ASSERT_SHARED;
942 break;
943
944 case IFNET_LCK_ASSERT_OWNED:
945 type = LCK_RW_ASSERT_HELD;
946 break;
947
948 case IFNET_LCK_ASSERT_NOTOWNED:
949 /* nothing to do here for RW lock; bypass assert */
950 ass = 0;
951 break;
952
953 default:
954 panic("bad ifnet assert type: %d", what);
955 /* NOTREACHED */
956 }
957 if (ass)
958 LCK_RW_ASSERT(&ifp->if_lock, type);
959 }
960
961 __private_extern__ void
962 ifnet_lock_shared(struct ifnet *ifp)
963 {
964 lck_rw_lock_shared(&ifp->if_lock);
965 }
966
967 __private_extern__ void
968 ifnet_lock_exclusive(struct ifnet *ifp)
969 {
970 lck_rw_lock_exclusive(&ifp->if_lock);
971 }
972
973 __private_extern__ void
974 ifnet_lock_done(struct ifnet *ifp)
975 {
976 lck_rw_done(&ifp->if_lock);
977 }
978
979 #if INET
980 __private_extern__ void
981 if_inetdata_lock_shared(struct ifnet *ifp)
982 {
983 lck_rw_lock_shared(&ifp->if_inetdata_lock);
984 }
985
986 __private_extern__ void
987 if_inetdata_lock_exclusive(struct ifnet *ifp)
988 {
989 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
990 }
991
992 __private_extern__ void
993 if_inetdata_lock_done(struct ifnet *ifp)
994 {
995 lck_rw_done(&ifp->if_inetdata_lock);
996 }
997 #endif
998
999 #if INET6
1000 __private_extern__ void
1001 if_inet6data_lock_shared(struct ifnet *ifp)
1002 {
1003 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1004 }
1005
1006 __private_extern__ void
1007 if_inet6data_lock_exclusive(struct ifnet *ifp)
1008 {
1009 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1010 }
1011
1012 __private_extern__ void
1013 if_inet6data_lock_done(struct ifnet *ifp)
1014 {
1015 lck_rw_done(&ifp->if_inet6data_lock);
1016 }
1017 #endif
1018
1019 __private_extern__ void
1020 ifnet_head_lock_shared(void)
1021 {
1022 lck_rw_lock_shared(&ifnet_head_lock);
1023 }
1024
1025 __private_extern__ void
1026 ifnet_head_lock_exclusive(void)
1027 {
1028 lck_rw_lock_exclusive(&ifnet_head_lock);
1029 }
1030
1031 __private_extern__ void
1032 ifnet_head_done(void)
1033 {
1034 lck_rw_done(&ifnet_head_lock);
1035 }
1036
1037 __private_extern__ void
1038 ifnet_head_assert_exclusive(void)
1039 {
1040 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1041 }
1042
1043 /*
1044 * dlil_ifp_protolist
1045 * - get the list of protocols attached to the interface, or just the number
1046 * of attached protocols
1047 * - if the number returned is greater than 'list_count', truncation occurred
1048 *
1049 * Note:
1050 * - caller must already be holding ifnet lock.
1051 */
1052 static u_int32_t
1053 dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1054 u_int32_t list_count)
1055 {
1056 u_int32_t count = 0;
1057 int i;
1058
1059 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1060
1061 if (ifp->if_proto_hash == NULL)
1062 goto done;
1063
1064 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1065 struct if_proto *proto;
1066 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1067 if (list != NULL && count < list_count) {
1068 list[count] = proto->protocol_family;
1069 }
1070 count++;
1071 }
1072 }
1073 done:
1074 return (count);
1075 }
1076
1077 __private_extern__ u_int32_t
1078 if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1079 {
1080 ifnet_lock_shared(ifp);
1081 count = dlil_ifp_protolist(ifp, protolist, count);
1082 ifnet_lock_done(ifp);
1083 return (count);
1084 }
1085
1086 __private_extern__ void
1087 if_free_protolist(u_int32_t *list)
1088 {
1089 _FREE(list, M_TEMP);
1090 }
1091
1092 __private_extern__ void
1093 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1094 u_int32_t event_code, struct net_event_data *event_data,
1095 u_int32_t event_data_len)
1096 {
1097 struct net_event_data ev_data;
1098 struct kev_msg ev_msg;
1099
1100 bzero(&ev_msg, sizeof (ev_msg));
1101 bzero(&ev_data, sizeof (ev_data));
1102 /*
1103 * a net event always starts with a net_event_data structure
1104 * but the caller can generate a simple net event or
1105 * provide a longer event structure to post
1106 */
1107 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1108 ev_msg.kev_class = KEV_NETWORK_CLASS;
1109 ev_msg.kev_subclass = event_subclass;
1110 ev_msg.event_code = event_code;
1111
1112 if (event_data == NULL) {
1113 event_data = &ev_data;
1114 event_data_len = sizeof (struct net_event_data);
1115 }
1116
1117 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1118 event_data->if_family = ifp->if_family;
1119 event_data->if_unit = (u_int32_t)ifp->if_unit;
1120
1121 ev_msg.dv[0].data_length = event_data_len;
1122 ev_msg.dv[0].data_ptr = event_data;
1123 ev_msg.dv[1].data_length = 0;
1124
1125 /* Don't update interface generation for quality and RRC state changess */
1126 bool update_generation = (event_subclass != KEV_DL_SUBCLASS ||
1127 (event_code != KEV_DL_LINK_QUALITY_METRIC_CHANGED &&
1128 event_code != KEV_DL_RRC_STATE_CHANGED));
1129
1130 dlil_event_internal(ifp, &ev_msg, update_generation);
1131 }
1132
1133 __private_extern__ int
1134 dlil_alloc_local_stats(struct ifnet *ifp)
1135 {
1136 int ret = EINVAL;
1137 void *buf, *base, **pbuf;
1138
1139 if (ifp == NULL)
1140 goto end;
1141
1142 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1143 /* allocate tcpstat_local structure */
1144 buf = zalloc(dlif_tcpstat_zone);
1145 if (buf == NULL) {
1146 ret = ENOMEM;
1147 goto end;
1148 }
1149 bzero(buf, dlif_tcpstat_bufsize);
1150
1151 /* Get the 64-bit aligned base address for this object */
1152 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1153 sizeof (u_int64_t));
1154 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1155 ((intptr_t)buf + dlif_tcpstat_bufsize));
1156
1157 /*
1158 * Wind back a pointer size from the aligned base and
1159 * save the original address so we can free it later.
1160 */
1161 pbuf = (void **)((intptr_t)base - sizeof (void *));
1162 *pbuf = buf;
1163 ifp->if_tcp_stat = base;
1164
1165 /* allocate udpstat_local structure */
1166 buf = zalloc(dlif_udpstat_zone);
1167 if (buf == NULL) {
1168 ret = ENOMEM;
1169 goto end;
1170 }
1171 bzero(buf, dlif_udpstat_bufsize);
1172
1173 /* Get the 64-bit aligned base address for this object */
1174 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1175 sizeof (u_int64_t));
1176 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1177 ((intptr_t)buf + dlif_udpstat_bufsize));
1178
1179 /*
1180 * Wind back a pointer size from the aligned base and
1181 * save the original address so we can free it later.
1182 */
1183 pbuf = (void **)((intptr_t)base - sizeof (void *));
1184 *pbuf = buf;
1185 ifp->if_udp_stat = base;
1186
1187 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
1188 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
1189
1190 ret = 0;
1191 }
1192
1193 if (ifp->if_ipv4_stat == NULL) {
1194 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1195 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1196 if (ifp->if_ipv4_stat == NULL) {
1197 ret = ENOMEM;
1198 goto end;
1199 }
1200 }
1201
1202 if (ifp->if_ipv6_stat == NULL) {
1203 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1204 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1205 if (ifp->if_ipv6_stat == NULL) {
1206 ret = ENOMEM;
1207 goto end;
1208 }
1209 }
1210 end:
1211 if (ret != 0) {
1212 if (ifp->if_tcp_stat != NULL) {
1213 pbuf = (void **)
1214 ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
1215 zfree(dlif_tcpstat_zone, *pbuf);
1216 ifp->if_tcp_stat = NULL;
1217 }
1218 if (ifp->if_udp_stat != NULL) {
1219 pbuf = (void **)
1220 ((intptr_t)ifp->if_udp_stat - sizeof (void *));
1221 zfree(dlif_udpstat_zone, *pbuf);
1222 ifp->if_udp_stat = NULL;
1223 }
1224 if (ifp->if_ipv4_stat != NULL) {
1225 FREE(ifp->if_ipv4_stat, M_TEMP);
1226 ifp->if_ipv4_stat = NULL;
1227 }
1228 if (ifp->if_ipv6_stat != NULL) {
1229 FREE(ifp->if_ipv6_stat, M_TEMP);
1230 ifp->if_ipv6_stat = NULL;
1231 }
1232 }
1233
1234 return (ret);
1235 }
1236
1237 static int
1238 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
1239 {
1240 thread_continue_t func;
1241 u_int32_t limit;
1242 int error;
1243
1244 /* NULL ifp indicates the main input thread, called at dlil_init time */
1245 if (ifp == NULL) {
1246 func = dlil_main_input_thread_func;
1247 VERIFY(inp == dlil_main_input_thread);
1248 (void) strlcat(inp->input_name,
1249 "main_input", DLIL_THREADNAME_LEN);
1250 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1251 func = dlil_rxpoll_input_thread_func;
1252 VERIFY(inp != dlil_main_input_thread);
1253 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1254 "%s_input_poll", if_name(ifp));
1255 } else {
1256 func = dlil_input_thread_func;
1257 VERIFY(inp != dlil_main_input_thread);
1258 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1259 "%s_input", if_name(ifp));
1260 }
1261 VERIFY(inp->input_thr == THREAD_NULL);
1262
1263 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1264 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1265
1266 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1267 inp->ifp = ifp; /* NULL for main input thread */
1268
1269 net_timerclear(&inp->mode_holdtime);
1270 net_timerclear(&inp->mode_lasttime);
1271 net_timerclear(&inp->sample_holdtime);
1272 net_timerclear(&inp->sample_lasttime);
1273 net_timerclear(&inp->dbg_lasttime);
1274
1275 /*
1276 * For interfaces that support opportunistic polling, set the
1277 * low and high watermarks for outstanding inbound packets/bytes.
1278 * Also define freeze times for transitioning between modes
1279 * and updating the average.
1280 */
1281 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1282 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1283 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1284 } else {
1285 limit = (u_int32_t)-1;
1286 }
1287
1288 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1289 if (inp == dlil_main_input_thread) {
1290 struct dlil_main_threading_info *inpm =
1291 (struct dlil_main_threading_info *)inp;
1292 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1293 }
1294
1295 error = kernel_thread_start(func, inp, &inp->input_thr);
1296 if (error == KERN_SUCCESS) {
1297 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
1298 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
1299 /*
1300 * We create an affinity set so that the matching workloop
1301 * thread or the starter thread (for loopback) can be
1302 * scheduled on the same processor set as the input thread.
1303 */
1304 if (net_affinity) {
1305 struct thread *tp = inp->input_thr;
1306 u_int32_t tag;
1307 /*
1308 * Randomize to reduce the probability
1309 * of affinity tag namespace collision.
1310 */
1311 read_frandom(&tag, sizeof (tag));
1312 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1313 thread_reference(tp);
1314 inp->tag = tag;
1315 inp->net_affinity = TRUE;
1316 }
1317 }
1318 } else if (inp == dlil_main_input_thread) {
1319 panic_plain("%s: couldn't create main input thread", __func__);
1320 /* NOTREACHED */
1321 } else {
1322 panic_plain("%s: couldn't create %s input thread", __func__,
1323 if_name(ifp));
1324 /* NOTREACHED */
1325 }
1326 OSAddAtomic(1, &cur_dlil_input_threads);
1327
1328 return (error);
1329 }
1330
1331 #if TEST_INPUT_THREAD_TERMINATION
1332 static int
1333 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1334 {
1335 #pragma unused(arg1, arg2)
1336 uint32_t i;
1337 int err;
1338
1339 i = if_input_thread_termination_spin;
1340
1341 err = sysctl_handle_int(oidp, &i, 0, req);
1342 if (err != 0 || req->newptr == USER_ADDR_NULL)
1343 return (err);
1344
1345 if (net_rxpoll == 0)
1346 return (ENXIO);
1347
1348 if_input_thread_termination_spin = i;
1349 return (err);
1350 }
1351 #endif /* TEST_INPUT_THREAD_TERMINATION */
1352
1353 static void
1354 dlil_clean_threading_info(struct dlil_threading_info *inp)
1355 {
1356 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1357 lck_grp_free(inp->lck_grp);
1358
1359 inp->input_waiting = 0;
1360 inp->wtot = 0;
1361 bzero(inp->input_name, sizeof (inp->input_name));
1362 inp->ifp = NULL;
1363 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1364 qlimit(&inp->rcvq_pkts) = 0;
1365 bzero(&inp->stats, sizeof (inp->stats));
1366
1367 VERIFY(!inp->net_affinity);
1368 inp->input_thr = THREAD_NULL;
1369 VERIFY(inp->wloop_thr == THREAD_NULL);
1370 VERIFY(inp->poll_thr == THREAD_NULL);
1371 VERIFY(inp->tag == 0);
1372
1373 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1374 bzero(&inp->tstats, sizeof (inp->tstats));
1375 bzero(&inp->pstats, sizeof (inp->pstats));
1376 bzero(&inp->sstats, sizeof (inp->sstats));
1377
1378 net_timerclear(&inp->mode_holdtime);
1379 net_timerclear(&inp->mode_lasttime);
1380 net_timerclear(&inp->sample_holdtime);
1381 net_timerclear(&inp->sample_lasttime);
1382 net_timerclear(&inp->dbg_lasttime);
1383
1384 #if IFNET_INPUT_SANITY_CHK
1385 inp->input_mbuf_cnt = 0;
1386 #endif /* IFNET_INPUT_SANITY_CHK */
1387 }
1388
1389 static void
1390 dlil_terminate_input_thread(struct dlil_threading_info *inp)
1391 {
1392 struct ifnet *ifp = inp->ifp;
1393
1394 VERIFY(current_thread() == inp->input_thr);
1395 VERIFY(inp != dlil_main_input_thread);
1396
1397 OSAddAtomic(-1, &cur_dlil_input_threads);
1398
1399 #if TEST_INPUT_THREAD_TERMINATION
1400 { /* do something useless that won't get optimized away */
1401 uint32_t v = 1;
1402 for (uint32_t i = 0;
1403 i < if_input_thread_termination_spin;
1404 i++) {
1405 v = (i + 1) * v;
1406 }
1407 printf("the value is %d\n", v);
1408 }
1409 #endif /* TEST_INPUT_THREAD_TERMINATION */
1410
1411 lck_mtx_lock_spin(&inp->input_lck);
1412 VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0);
1413 inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE;
1414 wakeup_one((caddr_t)&inp->input_waiting);
1415 lck_mtx_unlock(&inp->input_lck);
1416
1417 /* for the extra refcnt from kernel_thread_start() */
1418 thread_deallocate(current_thread());
1419
1420 if (dlil_verbose) {
1421 printf("%s: input thread terminated\n",
1422 if_name(ifp));
1423 }
1424
1425 /* this is the end */
1426 thread_terminate(current_thread());
1427 /* NOTREACHED */
1428 }
1429
1430 static kern_return_t
1431 dlil_affinity_set(struct thread *tp, u_int32_t tag)
1432 {
1433 thread_affinity_policy_data_t policy;
1434
1435 bzero(&policy, sizeof (policy));
1436 policy.affinity_tag = tag;
1437 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1438 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
1439 }
1440
1441 void
1442 dlil_init(void)
1443 {
1444 thread_t thread = THREAD_NULL;
1445
1446 /*
1447 * The following fields must be 64-bit aligned for atomic operations.
1448 */
1449 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1450 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1451 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1452 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1453 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1454 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1455 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1456 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1457 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1458 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1459 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1460 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1461 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1462 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1463 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1464
1465 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1466 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1467 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1468 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1469 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1470 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1471 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1472 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1473 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1474 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1475 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1476 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1477 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1478 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1479 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1480
1481 /*
1482 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1483 */
1484 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1485 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1486 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1487 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1488 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1489 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1490 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1491 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1492 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1493 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1494 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1495 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1496 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1497 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1498
1499 /*
1500 * ... as well as the mbuf checksum flags counterparts.
1501 */
1502 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1503 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1504 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1505 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1506 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1507 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1508 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1509 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1510 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1511 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1512 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1513
1514 /*
1515 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1516 */
1517 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1518 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1519
1520 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1521 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1522 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1523 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1524
1525 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1526 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1527 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1528
1529 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1530 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1531 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1532 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1533 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1534 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1535 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1536 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1537 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1538 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1539 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1540 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1541 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1542 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1543 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1544 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1545
1546 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1547 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1548 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1549 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1550 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1551 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1552 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1553
1554 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1555 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1556
1557 PE_parse_boot_argn("net_affinity", &net_affinity,
1558 sizeof (net_affinity));
1559
1560 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
1561
1562 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
1563
1564 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
1565
1566 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
1567 sizeof (struct dlil_ifnet_dbg);
1568 /* Enforce 64-bit alignment for dlil_ifnet structure */
1569 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
1570 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
1571 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1572 0, DLIF_ZONE_NAME);
1573 if (dlif_zone == NULL) {
1574 panic_plain("%s: failed allocating %s", __func__,
1575 DLIF_ZONE_NAME);
1576 /* NOTREACHED */
1577 }
1578 zone_change(dlif_zone, Z_EXPAND, TRUE);
1579 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1580
1581 dlif_filt_size = sizeof (struct ifnet_filter);
1582 dlif_filt_zone = zinit(dlif_filt_size,
1583 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1584 if (dlif_filt_zone == NULL) {
1585 panic_plain("%s: failed allocating %s", __func__,
1586 DLIF_FILT_ZONE_NAME);
1587 /* NOTREACHED */
1588 }
1589 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1590 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1591
1592 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
1593 dlif_phash_zone = zinit(dlif_phash_size,
1594 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1595 if (dlif_phash_zone == NULL) {
1596 panic_plain("%s: failed allocating %s", __func__,
1597 DLIF_PHASH_ZONE_NAME);
1598 /* NOTREACHED */
1599 }
1600 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1601 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1602
1603 dlif_proto_size = sizeof (struct if_proto);
1604 dlif_proto_zone = zinit(dlif_proto_size,
1605 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1606 if (dlif_proto_zone == NULL) {
1607 panic_plain("%s: failed allocating %s", __func__,
1608 DLIF_PROTO_ZONE_NAME);
1609 /* NOTREACHED */
1610 }
1611 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1612 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1613
1614 dlif_tcpstat_size = sizeof (struct tcpstat_local);
1615 /* Enforce 64-bit alignment for tcpstat_local structure */
1616 dlif_tcpstat_bufsize =
1617 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
1618 dlif_tcpstat_bufsize =
1619 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
1620 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1621 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1622 DLIF_TCPSTAT_ZONE_NAME);
1623 if (dlif_tcpstat_zone == NULL) {
1624 panic_plain("%s: failed allocating %s", __func__,
1625 DLIF_TCPSTAT_ZONE_NAME);
1626 /* NOTREACHED */
1627 }
1628 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1629 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1630
1631 dlif_udpstat_size = sizeof (struct udpstat_local);
1632 /* Enforce 64-bit alignment for udpstat_local structure */
1633 dlif_udpstat_bufsize =
1634 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
1635 dlif_udpstat_bufsize =
1636 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
1637 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1638 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1639 DLIF_UDPSTAT_ZONE_NAME);
1640 if (dlif_udpstat_zone == NULL) {
1641 panic_plain("%s: failed allocating %s", __func__,
1642 DLIF_UDPSTAT_ZONE_NAME);
1643 /* NOTREACHED */
1644 }
1645 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1646 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1647
1648 ifnet_llreach_init();
1649 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1650
1651 TAILQ_INIT(&dlil_ifnet_head);
1652 TAILQ_INIT(&ifnet_head);
1653 TAILQ_INIT(&ifnet_detaching_head);
1654 TAILQ_INIT(&ifnet_ordered_head);
1655
1656 /* Setup the lock groups we will use */
1657 dlil_grp_attributes = lck_grp_attr_alloc_init();
1658
1659 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1660 dlil_grp_attributes);
1661 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1662 dlil_grp_attributes);
1663 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1664 dlil_grp_attributes);
1665 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1666 dlil_grp_attributes);
1667 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1668 dlil_grp_attributes);
1669
1670 /* Setup the lock attributes we will use */
1671 dlil_lck_attributes = lck_attr_alloc_init();
1672
1673 ifnet_lock_attr = lck_attr_alloc_init();
1674
1675 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1676 dlil_lck_attributes);
1677 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1678
1679 /* Setup interface flow control related items */
1680 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1681
1682 ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry);
1683 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1684 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1685 if (ifnet_fc_zone == NULL) {
1686 panic_plain("%s: failed allocating %s", __func__,
1687 IFNET_FC_ZONE_NAME);
1688 /* NOTREACHED */
1689 }
1690 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1691 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
1692
1693 /* Initialize interface address subsystem */
1694 ifa_init();
1695
1696 #if PF
1697 /* Initialize the packet filter */
1698 pfinit();
1699 #endif /* PF */
1700
1701 /* Initialize queue algorithms */
1702 classq_init();
1703
1704 /* Initialize packet schedulers */
1705 pktsched_init();
1706
1707 /* Initialize flow advisory subsystem */
1708 flowadv_init();
1709
1710 /* Initialize the pktap virtual interface */
1711 pktap_init();
1712
1713 /* Initialize the service class to dscp map */
1714 net_qos_map_init();
1715
1716 /* Initialize the interface port list */
1717 if_ports_used_init();
1718
1719 #if DEBUG || DEVELOPMENT
1720 /* Run self-tests */
1721 dlil_verify_sum16();
1722 #endif /* DEBUG || DEVELOPMENT */
1723
1724 /* Initialize link layer table */
1725 lltable_glbl_init();
1726
1727 /*
1728 * Create and start up the main DLIL input thread and the interface
1729 * detacher threads once everything is initialized.
1730 */
1731 dlil_create_input_thread(NULL, dlil_main_input_thread);
1732
1733 if (kernel_thread_start(ifnet_detacher_thread_func,
1734 NULL, &thread) != KERN_SUCCESS) {
1735 panic_plain("%s: couldn't create detacher thread", __func__);
1736 /* NOTREACHED */
1737 }
1738 thread_deallocate(thread);
1739
1740 }
1741
1742 static void
1743 if_flt_monitor_busy(struct ifnet *ifp)
1744 {
1745 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1746
1747 ++ifp->if_flt_busy;
1748 VERIFY(ifp->if_flt_busy != 0);
1749 }
1750
1751 static void
1752 if_flt_monitor_unbusy(struct ifnet *ifp)
1753 {
1754 if_flt_monitor_leave(ifp);
1755 }
1756
1757 static void
1758 if_flt_monitor_enter(struct ifnet *ifp)
1759 {
1760 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1761
1762 while (ifp->if_flt_busy) {
1763 ++ifp->if_flt_waiters;
1764 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1765 (PZERO - 1), "if_flt_monitor", NULL);
1766 }
1767 if_flt_monitor_busy(ifp);
1768 }
1769
1770 static void
1771 if_flt_monitor_leave(struct ifnet *ifp)
1772 {
1773 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1774
1775 VERIFY(ifp->if_flt_busy != 0);
1776 --ifp->if_flt_busy;
1777
1778 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1779 ifp->if_flt_waiters = 0;
1780 wakeup(&ifp->if_flt_head);
1781 }
1782 }
1783
1784 __private_extern__ int
1785 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1786 interface_filter_t *filter_ref, u_int32_t flags)
1787 {
1788 int retval = 0;
1789 struct ifnet_filter *filter = NULL;
1790
1791 ifnet_head_lock_shared();
1792 /* Check that the interface is in the global list */
1793 if (!ifnet_lookup(ifp)) {
1794 retval = ENXIO;
1795 goto done;
1796 }
1797
1798 filter = zalloc(dlif_filt_zone);
1799 if (filter == NULL) {
1800 retval = ENOMEM;
1801 goto done;
1802 }
1803 bzero(filter, dlif_filt_size);
1804
1805 /* refcnt held above during lookup */
1806 filter->filt_flags = flags;
1807 filter->filt_ifp = ifp;
1808 filter->filt_cookie = if_filter->iff_cookie;
1809 filter->filt_name = if_filter->iff_name;
1810 filter->filt_protocol = if_filter->iff_protocol;
1811 /*
1812 * Do not install filter callbacks for internal coproc interface
1813 */
1814 if (!IFNET_IS_INTCOPROC(ifp)) {
1815 filter->filt_input = if_filter->iff_input;
1816 filter->filt_output = if_filter->iff_output;
1817 filter->filt_event = if_filter->iff_event;
1818 filter->filt_ioctl = if_filter->iff_ioctl;
1819 }
1820 filter->filt_detached = if_filter->iff_detached;
1821
1822 lck_mtx_lock(&ifp->if_flt_lock);
1823 if_flt_monitor_enter(ifp);
1824
1825 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1826 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1827
1828 if_flt_monitor_leave(ifp);
1829 lck_mtx_unlock(&ifp->if_flt_lock);
1830
1831 *filter_ref = filter;
1832
1833 /*
1834 * Bump filter count and route_generation ID to let TCP
1835 * know it shouldn't do TSO on this connection
1836 */
1837 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1838 OSAddAtomic(1, &dlil_filter_disable_tso_count);
1839 routegenid_update();
1840 }
1841 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1842 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1843 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1844 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1845 }
1846 if (dlil_verbose) {
1847 printf("%s: %s filter attached\n", if_name(ifp),
1848 if_filter->iff_name);
1849 }
1850 done:
1851 ifnet_head_done();
1852 if (retval != 0 && ifp != NULL) {
1853 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1854 if_name(ifp), if_filter->iff_name, retval);
1855 }
1856 if (retval != 0 && filter != NULL)
1857 zfree(dlif_filt_zone, filter);
1858
1859 return (retval);
1860 }
1861
1862 static int
1863 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1864 {
1865 int retval = 0;
1866
1867 if (detached == 0) {
1868 ifnet_t ifp = NULL;
1869
1870 ifnet_head_lock_shared();
1871 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1872 interface_filter_t entry = NULL;
1873
1874 lck_mtx_lock(&ifp->if_flt_lock);
1875 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1876 if (entry != filter || entry->filt_skip)
1877 continue;
1878 /*
1879 * We've found a match; since it's possible
1880 * that the thread gets blocked in the monitor,
1881 * we do the lock dance. Interface should
1882 * not be detached since we still have a use
1883 * count held during filter attach.
1884 */
1885 entry->filt_skip = 1; /* skip input/output */
1886 lck_mtx_unlock(&ifp->if_flt_lock);
1887 ifnet_head_done();
1888
1889 lck_mtx_lock(&ifp->if_flt_lock);
1890 if_flt_monitor_enter(ifp);
1891 LCK_MTX_ASSERT(&ifp->if_flt_lock,
1892 LCK_MTX_ASSERT_OWNED);
1893
1894 /* Remove the filter from the list */
1895 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1896 filt_next);
1897
1898 if_flt_monitor_leave(ifp);
1899 lck_mtx_unlock(&ifp->if_flt_lock);
1900 if (dlil_verbose) {
1901 printf("%s: %s filter detached\n",
1902 if_name(ifp), filter->filt_name);
1903 }
1904 goto destroy;
1905 }
1906 lck_mtx_unlock(&ifp->if_flt_lock);
1907 }
1908 ifnet_head_done();
1909
1910 /* filter parameter is not a valid filter ref */
1911 retval = EINVAL;
1912 goto done;
1913 }
1914
1915 if (dlil_verbose)
1916 printf("%s filter detached\n", filter->filt_name);
1917
1918 destroy:
1919
1920 /* Call the detached function if there is one */
1921 if (filter->filt_detached)
1922 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1923
1924 /*
1925 * Decrease filter count and route_generation ID to let TCP
1926 * know it should reevalute doing TSO or not
1927 */
1928 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1929 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
1930 routegenid_update();
1931 }
1932
1933 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1934
1935 /* Free the filter */
1936 zfree(dlif_filt_zone, filter);
1937 filter = NULL;
1938 done:
1939 if (retval != 0 && filter != NULL) {
1940 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1941 filter->filt_name, retval);
1942 }
1943
1944 return (retval);
1945 }
1946
1947 __private_extern__ void
1948 dlil_detach_filter(interface_filter_t filter)
1949 {
1950 if (filter == NULL)
1951 return;
1952 dlil_detach_filter_internal(filter, 0);
1953 }
1954
1955 /*
1956 * Main input thread:
1957 *
1958 * a) handles all inbound packets for lo0
1959 * b) handles all inbound packets for interfaces with no dedicated
1960 * input thread (e.g. anything but Ethernet/PDP or those that support
1961 * opportunistic polling.)
1962 * c) protocol registrations
1963 * d) packet injections
1964 */
1965 __attribute__((noreturn))
1966 static void
1967 dlil_main_input_thread_func(void *v, wait_result_t w)
1968 {
1969 #pragma unused(w)
1970 struct dlil_main_threading_info *inpm = v;
1971 struct dlil_threading_info *inp = v;
1972
1973 VERIFY(inp == dlil_main_input_thread);
1974 VERIFY(inp->ifp == NULL);
1975 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1976
1977 while (1) {
1978 struct mbuf *m = NULL, *m_loop = NULL;
1979 u_int32_t m_cnt, m_cnt_loop;
1980 boolean_t proto_req;
1981
1982 lck_mtx_lock_spin(&inp->input_lck);
1983
1984 /* Wait until there is work to be done */
1985 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1986 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1987 (void) msleep(&inp->input_waiting, &inp->input_lck,
1988 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1989 }
1990
1991 inp->input_waiting |= DLIL_INPUT_RUNNING;
1992 inp->input_waiting &= ~DLIL_INPUT_WAITING;
1993
1994 /* Main input thread cannot be terminated */
1995 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
1996
1997 proto_req = (inp->input_waiting &
1998 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
1999
2000 /* Packets for non-dedicated interfaces other than lo0 */
2001 m_cnt = qlen(&inp->rcvq_pkts);
2002 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2003
2004 /* Packets exclusive to lo0 */
2005 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
2006 m_loop = _getq_all(&inpm->lo_rcvq_pkts, NULL, NULL, NULL);
2007
2008 inp->wtot = 0;
2009
2010 lck_mtx_unlock(&inp->input_lck);
2011
2012 /*
2013 * NOTE warning %%% attention !!!!
2014 * We should think about putting some thread starvation
2015 * safeguards if we deal with long chains of packets.
2016 */
2017 if (m_loop != NULL)
2018 dlil_input_packet_list_extended(lo_ifp, m_loop,
2019 m_cnt_loop, inp->mode);
2020
2021 if (m != NULL)
2022 dlil_input_packet_list_extended(NULL, m,
2023 m_cnt, inp->mode);
2024
2025 if (proto_req)
2026 proto_input_run();
2027 }
2028
2029 /* NOTREACHED */
2030 VERIFY(0); /* we should never get here */
2031 }
2032
2033 /*
2034 * Input thread for interfaces with legacy input model.
2035 */
2036 static void
2037 dlil_input_thread_func(void *v, wait_result_t w)
2038 {
2039 #pragma unused(w)
2040 char thread_name[MAXTHREADNAMESIZE];
2041 struct dlil_threading_info *inp = v;
2042 struct ifnet *ifp = inp->ifp;
2043
2044 /* Construct the name for this thread, and then apply it. */
2045 bzero(thread_name, sizeof(thread_name));
2046 snprintf(thread_name, sizeof(thread_name), "dlil_input_%s", ifp->if_xname);
2047 thread_set_thread_name(inp->input_thr, thread_name);
2048
2049 VERIFY(inp != dlil_main_input_thread);
2050 VERIFY(ifp != NULL);
2051 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
2052 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2053
2054 while (1) {
2055 struct mbuf *m = NULL;
2056 u_int32_t m_cnt;
2057
2058 lck_mtx_lock_spin(&inp->input_lck);
2059
2060 /* Wait until there is work to be done */
2061 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2062 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2063 (void) msleep(&inp->input_waiting, &inp->input_lck,
2064 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2065 }
2066
2067 inp->input_waiting |= DLIL_INPUT_RUNNING;
2068 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2069
2070 /*
2071 * Protocol registration and injection must always use
2072 * the main input thread; in theory the latter can utilize
2073 * the corresponding input thread where the packet arrived
2074 * on, but that requires our knowing the interface in advance
2075 * (and the benefits might not worth the trouble.)
2076 */
2077 VERIFY(!(inp->input_waiting &
2078 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2079
2080 /* Packets for this interface */
2081 m_cnt = qlen(&inp->rcvq_pkts);
2082 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2083
2084 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2085 lck_mtx_unlock(&inp->input_lck);
2086
2087 /* Free up pending packets */
2088 if (m != NULL)
2089 mbuf_freem_list(m);
2090
2091 dlil_terminate_input_thread(inp);
2092 /* NOTREACHED */
2093 return;
2094 }
2095
2096 inp->wtot = 0;
2097
2098 dlil_input_stats_sync(ifp, inp);
2099
2100 lck_mtx_unlock(&inp->input_lck);
2101
2102 /*
2103 * NOTE warning %%% attention !!!!
2104 * We should think about putting some thread starvation
2105 * safeguards if we deal with long chains of packets.
2106 */
2107 if (m != NULL)
2108 dlil_input_packet_list_extended(NULL, m,
2109 m_cnt, inp->mode);
2110 }
2111
2112 /* NOTREACHED */
2113 VERIFY(0); /* we should never get here */
2114 }
2115
2116 /*
2117 * Input thread for interfaces with opportunistic polling input model.
2118 */
2119 static void
2120 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2121 {
2122 #pragma unused(w)
2123 struct dlil_threading_info *inp = v;
2124 struct ifnet *ifp = inp->ifp;
2125 struct timespec ts;
2126
2127 VERIFY(inp != dlil_main_input_thread);
2128 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
2129
2130 while (1) {
2131 struct mbuf *m = NULL;
2132 u_int32_t m_cnt, m_size, poll_req = 0;
2133 ifnet_model_t mode;
2134 struct timespec now, delta;
2135 u_int64_t ival;
2136
2137 lck_mtx_lock_spin(&inp->input_lck);
2138
2139 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
2140 ival = IF_RXPOLL_INTERVALTIME_MIN;
2141
2142 /* Link parameters changed? */
2143 if (ifp->if_poll_update != 0) {
2144 ifp->if_poll_update = 0;
2145 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
2146 }
2147
2148 /* Current operating mode */
2149 mode = inp->mode;
2150
2151 /* Wait until there is work to be done */
2152 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2153 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2154 (void) msleep(&inp->input_waiting, &inp->input_lck,
2155 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2156 }
2157
2158 inp->input_waiting |= DLIL_INPUT_RUNNING;
2159 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2160
2161 /*
2162 * Protocol registration and injection must always use
2163 * the main input thread; in theory the latter can utilize
2164 * the corresponding input thread where the packet arrived
2165 * on, but that requires our knowing the interface in advance
2166 * (and the benefits might not worth the trouble.)
2167 */
2168 VERIFY(!(inp->input_waiting &
2169 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2170
2171 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2172 /* Free up pending packets */
2173 lck_mtx_convert_spin(&inp->input_lck);
2174 _flushq(&inp->rcvq_pkts);
2175 if (inp->input_mit_tcall != NULL) {
2176 if (thread_call_isactive(inp->input_mit_tcall))
2177 thread_call_cancel(inp->input_mit_tcall);
2178 }
2179 lck_mtx_unlock(&inp->input_lck);
2180
2181 dlil_terminate_input_thread(inp);
2182 /* NOTREACHED */
2183 return;
2184 }
2185
2186 /* Total count of all packets */
2187 m_cnt = qlen(&inp->rcvq_pkts);
2188
2189 /* Total bytes of all packets */
2190 m_size = qsize(&inp->rcvq_pkts);
2191
2192 /* Packets for this interface */
2193 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2194 VERIFY(m != NULL || m_cnt == 0);
2195
2196 nanouptime(&now);
2197 if (!net_timerisset(&inp->sample_lasttime))
2198 *(&inp->sample_lasttime) = *(&now);
2199
2200 net_timersub(&now, &inp->sample_lasttime, &delta);
2201 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
2202 u_int32_t ptot, btot;
2203
2204 /* Accumulate statistics for current sampling */
2205 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
2206
2207 if (net_timercmp(&delta, &inp->sample_holdtime, <))
2208 goto skip;
2209
2210 *(&inp->sample_lasttime) = *(&now);
2211
2212 /* Calculate min/max of inbound bytes */
2213 btot = (u_int32_t)inp->sstats.bytes;
2214 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
2215 inp->rxpoll_bmin = btot;
2216 if (btot > inp->rxpoll_bmax)
2217 inp->rxpoll_bmax = btot;
2218
2219 /* Calculate EWMA of inbound bytes */
2220 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
2221
2222 /* Calculate min/max of inbound packets */
2223 ptot = (u_int32_t)inp->sstats.packets;
2224 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
2225 inp->rxpoll_pmin = ptot;
2226 if (ptot > inp->rxpoll_pmax)
2227 inp->rxpoll_pmax = ptot;
2228
2229 /* Calculate EWMA of inbound packets */
2230 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
2231
2232 /* Reset sampling statistics */
2233 PKTCNTR_CLEAR(&inp->sstats);
2234
2235 /* Calculate EWMA of wakeup requests */
2236 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2237 inp->wtot = 0;
2238
2239 if (dlil_verbose) {
2240 if (!net_timerisset(&inp->dbg_lasttime))
2241 *(&inp->dbg_lasttime) = *(&now);
2242 net_timersub(&now, &inp->dbg_lasttime, &delta);
2243 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2244 *(&inp->dbg_lasttime) = *(&now);
2245 printf("%s: [%s] pkts avg %d max %d "
2246 "limits [%d/%d], wreq avg %d "
2247 "limits [%d/%d], bytes avg %d "
2248 "limits [%d/%d]\n", if_name(ifp),
2249 (inp->mode ==
2250 IFNET_MODEL_INPUT_POLL_ON) ?
2251 "ON" : "OFF", inp->rxpoll_pavg,
2252 inp->rxpoll_pmax,
2253 inp->rxpoll_plowat,
2254 inp->rxpoll_phiwat,
2255 inp->rxpoll_wavg,
2256 inp->rxpoll_wlowat,
2257 inp->rxpoll_whiwat,
2258 inp->rxpoll_bavg,
2259 inp->rxpoll_blowat,
2260 inp->rxpoll_bhiwat);
2261 }
2262 }
2263
2264 /* Perform mode transition, if necessary */
2265 if (!net_timerisset(&inp->mode_lasttime))
2266 *(&inp->mode_lasttime) = *(&now);
2267
2268 net_timersub(&now, &inp->mode_lasttime, &delta);
2269 if (net_timercmp(&delta, &inp->mode_holdtime, <))
2270 goto skip;
2271
2272 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2273 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
2274 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2275 mode = IFNET_MODEL_INPUT_POLL_OFF;
2276 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2277 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2278 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2279 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2280 mode = IFNET_MODEL_INPUT_POLL_ON;
2281 }
2282
2283 if (mode != inp->mode) {
2284 inp->mode = mode;
2285 *(&inp->mode_lasttime) = *(&now);
2286 poll_req++;
2287 }
2288 }
2289 skip:
2290 dlil_input_stats_sync(ifp, inp);
2291
2292 lck_mtx_unlock(&inp->input_lck);
2293
2294 /*
2295 * If there's a mode change and interface is still attached,
2296 * perform a downcall to the driver for the new mode. Also
2297 * hold an IO refcnt on the interface to prevent it from
2298 * being detached (will be release below.)
2299 */
2300 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2301 struct ifnet_model_params p = { mode, { 0 } };
2302 errno_t err;
2303
2304 if (dlil_verbose) {
2305 printf("%s: polling is now %s, "
2306 "pkts avg %d max %d limits [%d/%d], "
2307 "wreq avg %d limits [%d/%d], "
2308 "bytes avg %d limits [%d/%d]\n",
2309 if_name(ifp),
2310 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2311 "ON" : "OFF", inp->rxpoll_pavg,
2312 inp->rxpoll_pmax, inp->rxpoll_plowat,
2313 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2314 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2315 inp->rxpoll_bavg, inp->rxpoll_blowat,
2316 inp->rxpoll_bhiwat);
2317 }
2318
2319 if ((err = ((*ifp->if_input_ctl)(ifp,
2320 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
2321 printf("%s: error setting polling mode "
2322 "to %s (%d)\n", if_name(ifp),
2323 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2324 "ON" : "OFF", err);
2325 }
2326
2327 switch (mode) {
2328 case IFNET_MODEL_INPUT_POLL_OFF:
2329 ifnet_set_poll_cycle(ifp, NULL);
2330 inp->rxpoll_offreq++;
2331 if (err != 0)
2332 inp->rxpoll_offerr++;
2333 break;
2334
2335 case IFNET_MODEL_INPUT_POLL_ON:
2336 net_nsectimer(&ival, &ts);
2337 ifnet_set_poll_cycle(ifp, &ts);
2338 ifnet_poll(ifp);
2339 inp->rxpoll_onreq++;
2340 if (err != 0)
2341 inp->rxpoll_onerr++;
2342 break;
2343
2344 default:
2345 VERIFY(0);
2346 /* NOTREACHED */
2347 }
2348
2349 /* Release the IO refcnt */
2350 ifnet_decr_iorefcnt(ifp);
2351 }
2352
2353 /*
2354 * NOTE warning %%% attention !!!!
2355 * We should think about putting some thread starvation
2356 * safeguards if we deal with long chains of packets.
2357 */
2358 if (m != NULL)
2359 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2360 }
2361
2362 /* NOTREACHED */
2363 VERIFY(0); /* we should never get here */
2364 }
2365
2366 /*
2367 * Must be called on an attached ifnet (caller is expected to check.)
2368 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2369 */
2370 errno_t
2371 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2372 boolean_t locked)
2373 {
2374 struct dlil_threading_info *inp;
2375 u_int64_t sample_holdtime, inbw;
2376
2377 VERIFY(ifp != NULL);
2378 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2379 return (ENXIO);
2380
2381 if (p != NULL) {
2382 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2383 (p->packets_lowat != 0 && p->packets_hiwat == 0))
2384 return (EINVAL);
2385 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2386 p->packets_lowat >= p->packets_hiwat)
2387 return (EINVAL);
2388 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2389 (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
2390 return (EINVAL);
2391 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2392 p->bytes_lowat >= p->bytes_hiwat)
2393 return (EINVAL);
2394 if (p->interval_time != 0 &&
2395 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
2396 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2397 }
2398
2399 if (!locked)
2400 lck_mtx_lock(&inp->input_lck);
2401
2402 LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2403
2404 /*
2405 * Normally, we'd reset the parameters to the auto-tuned values
2406 * if the the input thread detects a change in link rate. If the
2407 * driver provides its own parameters right after a link rate
2408 * changes, but before the input thread gets to run, we want to
2409 * make sure to keep the driver's values. Clearing if_poll_update
2410 * will achieve that.
2411 */
2412 if (p != NULL && !locked && ifp->if_poll_update != 0)
2413 ifp->if_poll_update = 0;
2414
2415 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2416 sample_holdtime = 0; /* polling is disabled */
2417 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2418 inp->rxpoll_blowat = 0;
2419 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2420 inp->rxpoll_bhiwat = (u_int32_t)-1;
2421 inp->rxpoll_plim = 0;
2422 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2423 } else {
2424 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2425 u_int64_t ival;
2426 unsigned int n, i;
2427
2428 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2429 if (inbw < rxpoll_tbl[i].speed)
2430 break;
2431 n = i;
2432 }
2433 /* auto-tune if caller didn't specify a value */
2434 plowat = ((p == NULL || p->packets_lowat == 0) ?
2435 rxpoll_tbl[n].plowat : p->packets_lowat);
2436 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2437 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2438 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2439 rxpoll_tbl[n].blowat : p->bytes_lowat);
2440 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2441 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2442 plim = ((p == NULL || p->packets_limit == 0) ?
2443 if_rxpoll_max : p->packets_limit);
2444 ival = ((p == NULL || p->interval_time == 0) ?
2445 if_rxpoll_interval_time : p->interval_time);
2446
2447 VERIFY(plowat != 0 && phiwat != 0);
2448 VERIFY(blowat != 0 && bhiwat != 0);
2449 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2450
2451 sample_holdtime = if_rxpoll_sample_holdtime;
2452 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2453 inp->rxpoll_whiwat = if_rxpoll_whiwat;
2454 inp->rxpoll_plowat = plowat;
2455 inp->rxpoll_phiwat = phiwat;
2456 inp->rxpoll_blowat = blowat;
2457 inp->rxpoll_bhiwat = bhiwat;
2458 inp->rxpoll_plim = plim;
2459 inp->rxpoll_ival = ival;
2460 }
2461
2462 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2463 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2464
2465 if (dlil_verbose) {
2466 printf("%s: speed %llu bps, sample per %llu nsec, "
2467 "poll interval %llu nsec, pkts per poll %u, "
2468 "pkt limits [%u/%u], wreq limits [%u/%u], "
2469 "bytes limits [%u/%u]\n", if_name(ifp),
2470 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2471 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2472 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
2473 }
2474
2475 if (!locked)
2476 lck_mtx_unlock(&inp->input_lck);
2477
2478 return (0);
2479 }
2480
2481 /*
2482 * Must be called on an attached ifnet (caller is expected to check.)
2483 */
2484 errno_t
2485 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2486 {
2487 struct dlil_threading_info *inp;
2488
2489 VERIFY(ifp != NULL && p != NULL);
2490 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2491 return (ENXIO);
2492
2493 bzero(p, sizeof (*p));
2494
2495 lck_mtx_lock(&inp->input_lck);
2496 p->packets_limit = inp->rxpoll_plim;
2497 p->packets_lowat = inp->rxpoll_plowat;
2498 p->packets_hiwat = inp->rxpoll_phiwat;
2499 p->bytes_lowat = inp->rxpoll_blowat;
2500 p->bytes_hiwat = inp->rxpoll_bhiwat;
2501 p->interval_time = inp->rxpoll_ival;
2502 lck_mtx_unlock(&inp->input_lck);
2503
2504 return (0);
2505 }
2506
2507 errno_t
2508 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2509 const struct ifnet_stat_increment_param *s)
2510 {
2511 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
2512 }
2513
2514 errno_t
2515 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2516 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2517 {
2518 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
2519 }
2520
2521 static errno_t
2522 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2523 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2524 {
2525 dlil_input_func input_func;
2526 struct ifnet_stat_increment_param _s;
2527 u_int32_t m_cnt = 0, m_size = 0;
2528 struct mbuf *last;
2529 errno_t err = 0;
2530
2531 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2532 if (m_head != NULL)
2533 mbuf_freem_list(m_head);
2534 return (EINVAL);
2535 }
2536
2537 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2538 VERIFY(m_tail == NULL || ext);
2539 VERIFY(s != NULL || !ext);
2540
2541 /*
2542 * Drop the packet(s) if the parameters are invalid, or if the
2543 * interface is no longer attached; else hold an IO refcnt to
2544 * prevent it from being detached (will be released below.)
2545 */
2546 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
2547 if (m_head != NULL)
2548 mbuf_freem_list(m_head);
2549 return (EINVAL);
2550 }
2551
2552 input_func = ifp->if_input_dlil;
2553 VERIFY(input_func != NULL);
2554
2555 if (m_tail == NULL) {
2556 last = m_head;
2557 while (m_head != NULL) {
2558 #if IFNET_INPUT_SANITY_CHK
2559 if (dlil_input_sanity_check != 0)
2560 DLIL_INPUT_CHECK(last, ifp);
2561 #endif /* IFNET_INPUT_SANITY_CHK */
2562 m_cnt++;
2563 m_size += m_length(last);
2564 if (mbuf_nextpkt(last) == NULL)
2565 break;
2566 last = mbuf_nextpkt(last);
2567 }
2568 m_tail = last;
2569 } else {
2570 #if IFNET_INPUT_SANITY_CHK
2571 if (dlil_input_sanity_check != 0) {
2572 last = m_head;
2573 while (1) {
2574 DLIL_INPUT_CHECK(last, ifp);
2575 m_cnt++;
2576 m_size += m_length(last);
2577 if (mbuf_nextpkt(last) == NULL)
2578 break;
2579 last = mbuf_nextpkt(last);
2580 }
2581 } else {
2582 m_cnt = s->packets_in;
2583 m_size = s->bytes_in;
2584 last = m_tail;
2585 }
2586 #else
2587 m_cnt = s->packets_in;
2588 m_size = s->bytes_in;
2589 last = m_tail;
2590 #endif /* IFNET_INPUT_SANITY_CHK */
2591 }
2592
2593 if (last != m_tail) {
2594 panic_plain("%s: invalid input packet chain for %s, "
2595 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2596 m_tail, last);
2597 }
2598
2599 /*
2600 * Assert packet count only for the extended variant, for backwards
2601 * compatibility, since this came directly from the device driver.
2602 * Relax this assertion for input bytes, as the driver may have
2603 * included the link-layer headers in the computation; hence
2604 * m_size is just an approximation.
2605 */
2606 if (ext && s->packets_in != m_cnt) {
2607 panic_plain("%s: input packet count mismatch for %s, "
2608 "%d instead of %d\n", __func__, if_name(ifp),
2609 s->packets_in, m_cnt);
2610 }
2611
2612 if (s == NULL) {
2613 bzero(&_s, sizeof (_s));
2614 s = &_s;
2615 } else {
2616 _s = *s;
2617 }
2618 _s.packets_in = m_cnt;
2619 _s.bytes_in = m_size;
2620
2621 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2622
2623 if (ifp != lo_ifp) {
2624 /* Release the IO refcnt */
2625 ifnet_decr_iorefcnt(ifp);
2626 }
2627
2628 return (err);
2629 }
2630
2631
2632 errno_t
2633 dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2634 {
2635 return (ifp->if_output(ifp, m));
2636 }
2637
2638 errno_t
2639 dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2640 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2641 boolean_t poll, struct thread *tp)
2642 {
2643 struct dlil_threading_info *inp;
2644 u_int32_t m_cnt = s->packets_in;
2645 u_int32_t m_size = s->bytes_in;
2646
2647 if ((inp = ifp->if_inp) == NULL)
2648 inp = dlil_main_input_thread;
2649
2650 /*
2651 * If there is a matching DLIL input thread associated with an
2652 * affinity set, associate this thread with the same set. We
2653 * will only do this once.
2654 */
2655 lck_mtx_lock_spin(&inp->input_lck);
2656 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
2657 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2658 (poll && inp->poll_thr == THREAD_NULL))) {
2659 u_int32_t tag = inp->tag;
2660
2661 if (poll) {
2662 VERIFY(inp->poll_thr == THREAD_NULL);
2663 inp->poll_thr = tp;
2664 } else {
2665 VERIFY(inp->wloop_thr == THREAD_NULL);
2666 inp->wloop_thr = tp;
2667 }
2668 lck_mtx_unlock(&inp->input_lck);
2669
2670 /* Associate the current thread with the new affinity tag */
2671 (void) dlil_affinity_set(tp, tag);
2672
2673 /*
2674 * Take a reference on the current thread; during detach,
2675 * we will need to refer to it in order to tear down its
2676 * affinity.
2677 */
2678 thread_reference(tp);
2679 lck_mtx_lock_spin(&inp->input_lck);
2680 }
2681
2682 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2683
2684 /*
2685 * Because of loopbacked multicast we cannot stuff the ifp in
2686 * the rcvif of the packet header: loopback (lo0) packets use a
2687 * dedicated list so that we can later associate them with lo_ifp
2688 * on their way up the stack. Packets for other interfaces without
2689 * dedicated input threads go to the regular list.
2690 */
2691 if (m_head != NULL) {
2692 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2693 struct dlil_main_threading_info *inpm =
2694 (struct dlil_main_threading_info *)inp;
2695 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2696 m_cnt, m_size);
2697 } else {
2698 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2699 m_cnt, m_size);
2700 }
2701 }
2702
2703 #if IFNET_INPUT_SANITY_CHK
2704 if (dlil_input_sanity_check != 0) {
2705 u_int32_t count;
2706 struct mbuf *m0;
2707
2708 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
2709 count++;
2710
2711 if (count != m_cnt) {
2712 panic_plain("%s: invalid packet count %d "
2713 "(expected %d)\n", if_name(ifp),
2714 count, m_cnt);
2715 /* NOTREACHED */
2716 }
2717
2718 inp->input_mbuf_cnt += m_cnt;
2719 }
2720 #endif /* IFNET_INPUT_SANITY_CHK */
2721
2722 dlil_input_stats_add(s, inp, poll);
2723 /*
2724 * If we're using the main input thread, synchronize the
2725 * stats now since we have the interface context. All
2726 * other cases involving dedicated input threads will
2727 * have their stats synchronized there.
2728 */
2729 if (inp == dlil_main_input_thread)
2730 dlil_input_stats_sync(ifp, inp);
2731
2732 if (inp->input_mit_tcall &&
2733 qlen(&inp->rcvq_pkts) >= dlil_rcv_mit_pkts_min &&
2734 qlen(&inp->rcvq_pkts) < dlil_rcv_mit_pkts_max &&
2735 (ifp->if_family == IFNET_FAMILY_ETHERNET ||
2736 ifp->if_type == IFT_CELLULAR)
2737 ) {
2738 if (!thread_call_isactive(inp->input_mit_tcall)) {
2739 uint64_t deadline;
2740 clock_interval_to_deadline(dlil_rcv_mit_interval,
2741 1, &deadline);
2742 (void) thread_call_enter_delayed(
2743 inp->input_mit_tcall, deadline);
2744 }
2745 } else {
2746 inp->input_waiting |= DLIL_INPUT_WAITING;
2747 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2748 inp->wtot++;
2749 wakeup_one((caddr_t)&inp->input_waiting);
2750 }
2751 }
2752 lck_mtx_unlock(&inp->input_lck);
2753
2754 return (0);
2755 }
2756
2757
2758 static void
2759 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
2760 {
2761 if (!(ifp->if_eflags & IFEF_TXSTART))
2762 return;
2763 /*
2764 * If the starter thread is inactive, signal it to do work,
2765 * unless the interface is being flow controlled from below,
2766 * e.g. a virtual interface being flow controlled by a real
2767 * network interface beneath it, or it's been disabled via
2768 * a call to ifnet_disable_output().
2769 */
2770 lck_mtx_lock_spin(&ifp->if_start_lock);
2771 if (resetfc) {
2772 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2773 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2774 lck_mtx_unlock(&ifp->if_start_lock);
2775 return;
2776 }
2777 ifp->if_start_req++;
2778 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2779 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2780 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
2781 ifp->if_start_delayed == 0)) {
2782 (void) thread_wakeup_thread((caddr_t)&ifp->if_start_thread,
2783 ifp->if_start_thread);
2784 }
2785 lck_mtx_unlock(&ifp->if_start_lock);
2786 }
2787
2788 void
2789 ifnet_start(struct ifnet *ifp)
2790 {
2791 ifnet_start_common(ifp, FALSE);
2792 }
2793
2794 static void
2795 ifnet_start_thread_fn(void *v, wait_result_t w)
2796 {
2797 #pragma unused(w)
2798 struct ifnet *ifp = v;
2799 char ifname[IFNAMSIZ + 1];
2800 char thread_name[MAXTHREADNAMESIZE];
2801 struct timespec *ts = NULL;
2802 struct ifclassq *ifq = &ifp->if_snd;
2803 struct timespec delay_start_ts;
2804
2805 /* Construct the name for this thread, and then apply it. */
2806 bzero(thread_name, sizeof(thread_name));
2807 (void) snprintf(thread_name, sizeof (thread_name),
2808 "ifnet_start_%s", ifp->if_xname);
2809 thread_set_thread_name(ifp->if_start_thread, thread_name);
2810
2811 /*
2812 * Treat the dedicated starter thread for lo0 as equivalent to
2813 * the driver workloop thread; if net_affinity is enabled for
2814 * the main input thread, associate this starter thread to it
2815 * by binding them with the same affinity tag. This is done
2816 * only once (as we only have one lo_ifp which never goes away.)
2817 */
2818 if (ifp == lo_ifp) {
2819 struct dlil_threading_info *inp = dlil_main_input_thread;
2820 struct thread *tp = current_thread();
2821
2822 lck_mtx_lock(&inp->input_lck);
2823 if (inp->net_affinity) {
2824 u_int32_t tag = inp->tag;
2825
2826 VERIFY(inp->wloop_thr == THREAD_NULL);
2827 VERIFY(inp->poll_thr == THREAD_NULL);
2828 inp->wloop_thr = tp;
2829 lck_mtx_unlock(&inp->input_lck);
2830
2831 /* Associate this thread with the affinity tag */
2832 (void) dlil_affinity_set(tp, tag);
2833 } else {
2834 lck_mtx_unlock(&inp->input_lck);
2835 }
2836 }
2837
2838 (void) snprintf(ifname, sizeof (ifname), "%s_starter", if_name(ifp));
2839
2840 lck_mtx_lock_spin(&ifp->if_start_lock);
2841
2842 for (;;) {
2843 if (ifp->if_start_thread != NULL) {
2844 (void) msleep(&ifp->if_start_thread,
2845 &ifp->if_start_lock,
2846 (PZERO - 1) | PSPIN, ifname, ts);
2847 }
2848 /* interface is detached? */
2849 if (ifp->if_start_thread == THREAD_NULL) {
2850 ifnet_set_start_cycle(ifp, NULL);
2851 lck_mtx_unlock(&ifp->if_start_lock);
2852 ifnet_purge(ifp);
2853
2854 if (dlil_verbose) {
2855 printf("%s: starter thread terminated\n",
2856 if_name(ifp));
2857 }
2858
2859 /* for the extra refcnt from kernel_thread_start() */
2860 thread_deallocate(current_thread());
2861 /* this is the end */
2862 thread_terminate(current_thread());
2863 /* NOTREACHED */
2864 return;
2865 }
2866
2867 ifp->if_start_active = 1;
2868
2869 for (;;) {
2870 u_int32_t req = ifp->if_start_req;
2871 if (!IFCQ_IS_EMPTY(ifq) &&
2872 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2873 ifp->if_start_delayed == 0 &&
2874 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2875 (ifp->if_eflags & IFEF_DELAY_START)) {
2876 ifp->if_start_delayed = 1;
2877 ifnet_start_delayed++;
2878 break;
2879 } else {
2880 ifp->if_start_delayed = 0;
2881 }
2882 lck_mtx_unlock(&ifp->if_start_lock);
2883
2884 /*
2885 * If no longer attached, don't call start because ifp
2886 * is being destroyed; else hold an IO refcnt to
2887 * prevent the interface from being detached (will be
2888 * released below.)
2889 */
2890 if (!ifnet_is_attached(ifp, 1)) {
2891 lck_mtx_lock_spin(&ifp->if_start_lock);
2892 break;
2893 }
2894
2895 /* invoke the driver's start routine */
2896 ((*ifp->if_start)(ifp));
2897
2898 /*
2899 * Release the io ref count taken by ifnet_is_attached.
2900 */
2901 ifnet_decr_iorefcnt(ifp);
2902
2903 lck_mtx_lock_spin(&ifp->if_start_lock);
2904
2905 /*
2906 * If there's no pending request or if the
2907 * interface has been disabled, we're done.
2908 */
2909 if (req == ifp->if_start_req ||
2910 (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
2911 break;
2912 }
2913 }
2914
2915 ifp->if_start_req = 0;
2916 ifp->if_start_active = 0;
2917
2918 /*
2919 * Wakeup N ns from now if rate-controlled by TBR, and if
2920 * there are still packets in the send queue which haven't
2921 * been dequeued so far; else sleep indefinitely (ts = NULL)
2922 * until ifnet_start() is called again.
2923 */
2924 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2925 &ifp->if_start_cycle : NULL);
2926
2927 if (ts == NULL && ifp->if_start_delayed == 1) {
2928 delay_start_ts.tv_sec = 0;
2929 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2930 ts = &delay_start_ts;
2931 }
2932
2933 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
2934 ts = NULL;
2935 }
2936
2937 /* NOTREACHED */
2938 }
2939
2940 void
2941 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2942 {
2943 if (ts == NULL)
2944 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
2945 else
2946 *(&ifp->if_start_cycle) = *ts;
2947
2948 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
2949 printf("%s: restart interval set to %lu nsec\n",
2950 if_name(ifp), ts->tv_nsec);
2951 }
2952
2953 static void
2954 ifnet_poll(struct ifnet *ifp)
2955 {
2956 /*
2957 * If the poller thread is inactive, signal it to do work.
2958 */
2959 lck_mtx_lock_spin(&ifp->if_poll_lock);
2960 ifp->if_poll_req++;
2961 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
2962 wakeup_one((caddr_t)&ifp->if_poll_thread);
2963 }
2964 lck_mtx_unlock(&ifp->if_poll_lock);
2965 }
2966
2967 static void
2968 ifnet_poll_thread_fn(void *v, wait_result_t w)
2969 {
2970 #pragma unused(w)
2971 struct dlil_threading_info *inp;
2972 struct ifnet *ifp = v;
2973 char ifname[IFNAMSIZ + 1];
2974 struct timespec *ts = NULL;
2975 struct ifnet_stat_increment_param s;
2976
2977 snprintf(ifname, sizeof (ifname), "%s_poller",
2978 if_name(ifp));
2979 bzero(&s, sizeof (s));
2980
2981 lck_mtx_lock_spin(&ifp->if_poll_lock);
2982
2983 inp = ifp->if_inp;
2984 VERIFY(inp != NULL);
2985
2986 for (;;) {
2987 if (ifp->if_poll_thread != THREAD_NULL) {
2988 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
2989 (PZERO - 1) | PSPIN, ifname, ts);
2990 }
2991
2992 /* interface is detached (maybe while asleep)? */
2993 if (ifp->if_poll_thread == THREAD_NULL) {
2994 ifnet_set_poll_cycle(ifp, NULL);
2995 lck_mtx_unlock(&ifp->if_poll_lock);
2996
2997 if (dlil_verbose) {
2998 printf("%s: poller thread terminated\n",
2999 if_name(ifp));
3000 }
3001
3002 /* for the extra refcnt from kernel_thread_start() */
3003 thread_deallocate(current_thread());
3004 /* this is the end */
3005 thread_terminate(current_thread());
3006 /* NOTREACHED */
3007 return;
3008 }
3009
3010 ifp->if_poll_active = 1;
3011 for (;;) {
3012 struct mbuf *m_head, *m_tail;
3013 u_int32_t m_lim, m_cnt, m_totlen;
3014 u_int16_t req = ifp->if_poll_req;
3015
3016 lck_mtx_unlock(&ifp->if_poll_lock);
3017
3018 /*
3019 * If no longer attached, there's nothing to do;
3020 * else hold an IO refcnt to prevent the interface
3021 * from being detached (will be released below.)
3022 */
3023 if (!ifnet_is_attached(ifp, 1)) {
3024 lck_mtx_lock_spin(&ifp->if_poll_lock);
3025 break;
3026 }
3027
3028 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
3029 MAX((qlimit(&inp->rcvq_pkts)),
3030 (inp->rxpoll_phiwat << 2));
3031
3032 if (dlil_verbose > 1) {
3033 printf("%s: polling up to %d pkts, "
3034 "pkts avg %d max %d, wreq avg %d, "
3035 "bytes avg %d\n",
3036 if_name(ifp), m_lim,
3037 inp->rxpoll_pavg, inp->rxpoll_pmax,
3038 inp->rxpoll_wavg, inp->rxpoll_bavg);
3039 }
3040
3041 /* invoke the driver's input poll routine */
3042 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3043 &m_cnt, &m_totlen));
3044
3045 if (m_head != NULL) {
3046 VERIFY(m_tail != NULL && m_cnt > 0);
3047
3048 if (dlil_verbose > 1) {
3049 printf("%s: polled %d pkts, "
3050 "pkts avg %d max %d, wreq avg %d, "
3051 "bytes avg %d\n",
3052 if_name(ifp), m_cnt,
3053 inp->rxpoll_pavg, inp->rxpoll_pmax,
3054 inp->rxpoll_wavg, inp->rxpoll_bavg);
3055 }
3056
3057 /* stats are required for extended variant */
3058 s.packets_in = m_cnt;
3059 s.bytes_in = m_totlen;
3060
3061 (void) ifnet_input_common(ifp, m_head, m_tail,
3062 &s, TRUE, TRUE);
3063 } else {
3064 if (dlil_verbose > 1) {
3065 printf("%s: no packets, "
3066 "pkts avg %d max %d, wreq avg %d, "
3067 "bytes avg %d\n",
3068 if_name(ifp), inp->rxpoll_pavg,
3069 inp->rxpoll_pmax, inp->rxpoll_wavg,
3070 inp->rxpoll_bavg);
3071 }
3072
3073 (void) ifnet_input_common(ifp, NULL, NULL,
3074 NULL, FALSE, TRUE);
3075 }
3076
3077 /* Release the io ref count */
3078 ifnet_decr_iorefcnt(ifp);
3079
3080 lck_mtx_lock_spin(&ifp->if_poll_lock);
3081
3082 /* if there's no pending request, we're done */
3083 if (req == ifp->if_poll_req) {
3084 break;
3085 }
3086 }
3087 ifp->if_poll_req = 0;
3088 ifp->if_poll_active = 0;
3089
3090 /*
3091 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3092 * until ifnet_poll() is called again.
3093 */
3094 ts = &ifp->if_poll_cycle;
3095 if (ts->tv_sec == 0 && ts->tv_nsec == 0)
3096 ts = NULL;
3097 }
3098
3099 /* NOTREACHED */
3100 }
3101
3102 void
3103 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3104 {
3105 if (ts == NULL)
3106 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
3107 else
3108 *(&ifp->if_poll_cycle) = *ts;
3109
3110 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
3111 printf("%s: poll interval set to %lu nsec\n",
3112 if_name(ifp), ts->tv_nsec);
3113 }
3114
3115 void
3116 ifnet_purge(struct ifnet *ifp)
3117 {
3118 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
3119 if_qflush(ifp, 0);
3120 }
3121
3122 void
3123 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3124 {
3125 IFCQ_LOCK_ASSERT_HELD(ifq);
3126
3127 if (!(IFCQ_IS_READY(ifq)))
3128 return;
3129
3130 if (IFCQ_TBR_IS_ENABLED(ifq)) {
3131 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
3132 ifq->ifcq_tbr.tbr_percent, 0 };
3133 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3134 }
3135
3136 ifclassq_update(ifq, ev);
3137 }
3138
3139 void
3140 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3141 {
3142 switch (ev) {
3143 case CLASSQ_EV_LINK_BANDWIDTH:
3144 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
3145 ifp->if_poll_update++;
3146 break;
3147
3148 default:
3149 break;
3150 }
3151 }
3152
3153 errno_t
3154 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3155 {
3156 struct ifclassq *ifq;
3157 u_int32_t omodel;
3158 errno_t err;
3159
3160 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX)
3161 return (EINVAL);
3162 else if (!(ifp->if_eflags & IFEF_TXSTART))
3163 return (ENXIO);
3164
3165 ifq = &ifp->if_snd;
3166 IFCQ_LOCK(ifq);
3167 omodel = ifp->if_output_sched_model;
3168 ifp->if_output_sched_model = model;
3169 if ((err = ifclassq_pktsched_setup(ifq)) != 0)
3170 ifp->if_output_sched_model = omodel;
3171 IFCQ_UNLOCK(ifq);
3172
3173 return (err);
3174 }
3175
3176 errno_t
3177 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3178 {
3179 if (ifp == NULL)
3180 return (EINVAL);
3181 else if (!(ifp->if_eflags & IFEF_TXSTART))
3182 return (ENXIO);
3183
3184 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3185
3186 return (0);
3187 }
3188
3189 errno_t
3190 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3191 {
3192 if (ifp == NULL || maxqlen == NULL)
3193 return (EINVAL);
3194 else if (!(ifp->if_eflags & IFEF_TXSTART))
3195 return (ENXIO);
3196
3197 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3198
3199 return (0);
3200 }
3201
3202 errno_t
3203 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
3204 {
3205 errno_t err;
3206
3207 if (ifp == NULL || pkts == NULL)
3208 err = EINVAL;
3209 else if (!(ifp->if_eflags & IFEF_TXSTART))
3210 err = ENXIO;
3211 else
3212 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3213 pkts, NULL);
3214
3215 return (err);
3216 }
3217
3218 errno_t
3219 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3220 u_int32_t *pkts, u_int32_t *bytes)
3221 {
3222 errno_t err;
3223
3224 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3225 (pkts == NULL && bytes == NULL))
3226 err = EINVAL;
3227 else if (!(ifp->if_eflags & IFEF_TXSTART))
3228 err = ENXIO;
3229 else
3230 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3231
3232 return (err);
3233 }
3234
3235 errno_t
3236 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3237 {
3238 struct dlil_threading_info *inp;
3239
3240 if (ifp == NULL)
3241 return (EINVAL);
3242 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3243 return (ENXIO);
3244
3245 if (maxqlen == 0)
3246 maxqlen = if_rcvq_maxlen;
3247 else if (maxqlen < IF_RCVQ_MINLEN)
3248 maxqlen = IF_RCVQ_MINLEN;
3249
3250 inp = ifp->if_inp;
3251 lck_mtx_lock(&inp->input_lck);
3252 qlimit(&inp->rcvq_pkts) = maxqlen;
3253 lck_mtx_unlock(&inp->input_lck);
3254
3255 return (0);
3256 }
3257
3258 errno_t
3259 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3260 {
3261 struct dlil_threading_info *inp;
3262
3263 if (ifp == NULL || maxqlen == NULL)
3264 return (EINVAL);
3265 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3266 return (ENXIO);
3267
3268 inp = ifp->if_inp;
3269 lck_mtx_lock(&inp->input_lck);
3270 *maxqlen = qlimit(&inp->rcvq_pkts);
3271 lck_mtx_unlock(&inp->input_lck);
3272 return (0);
3273 }
3274
3275 void
3276 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3277 uint16_t delay_timeout)
3278 {
3279 if (delay_qlen > 0 && delay_timeout > 0) {
3280 ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
3281 ifp->if_start_delay_qlen = min(100, delay_qlen);
3282 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3283 /* convert timeout to nanoseconds */
3284 ifp->if_start_delay_timeout *= 1000;
3285 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3286 ifp->if_xname, (uint32_t)delay_qlen,
3287 (uint32_t)delay_timeout);
3288 } else {
3289 ifp->if_eflags &= ~IFEF_ENQUEUE_MULTI;
3290 }
3291 }
3292
3293 static inline errno_t
3294 ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype,
3295 boolean_t flush, boolean_t *pdrop)
3296 {
3297 volatile uint64_t *fg_ts = NULL;
3298 volatile uint64_t *rt_ts = NULL;
3299 struct mbuf *m = p;
3300 struct timespec now;
3301 u_int64_t now_nsec = 0;
3302 int error = 0;
3303
3304 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3305
3306 /*
3307 * If packet already carries a timestamp, either from dlil_output()
3308 * or from flowswitch, use it here. Otherwise, record timestamp.
3309 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3310 * the timestamp value is used internally there.
3311 */
3312 switch (ptype) {
3313 case QP_MBUF:
3314 ASSERT(m->m_flags & M_PKTHDR);
3315 ASSERT(m->m_nextpkt == NULL);
3316
3317 if (!(m->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3318 m->m_pkthdr.pkt_timestamp == 0) {
3319 nanouptime(&now);
3320 net_timernsec(&now, &now_nsec);
3321 m->m_pkthdr.pkt_timestamp = now_nsec;
3322 }
3323 m->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3324 /*
3325 * If the packet service class is not background,
3326 * update the timestamp to indicate recent activity
3327 * on a foreground socket.
3328 */
3329 if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3330 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3331 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND)) {
3332 ifp->if_fg_sendts = _net_uptime;
3333 if (fg_ts != NULL)
3334 *fg_ts = _net_uptime;
3335 }
3336 if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3337 ifp->if_rt_sendts = _net_uptime;
3338 if (rt_ts != NULL)
3339 *rt_ts = _net_uptime;
3340 }
3341 }
3342 break;
3343
3344
3345 default:
3346 VERIFY(0);
3347 /* NOTREACHED */
3348 }
3349
3350 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3351 if (now_nsec == 0) {
3352 nanouptime(&now);
3353 net_timernsec(&now, &now_nsec);
3354 }
3355 /*
3356 * If the driver chose to delay start callback for
3357 * coalescing multiple packets, Then use the following
3358 * heuristics to make sure that start callback will
3359 * be delayed only when bulk data transfer is detected.
3360 * 1. number of packets enqueued in (delay_win * 2) is
3361 * greater than or equal to the delay qlen.
3362 * 2. If delay_start is enabled it will stay enabled for
3363 * another 10 idle windows. This is to take into account
3364 * variable RTT and burst traffic.
3365 * 3. If the time elapsed since last enqueue is more
3366 * than 200ms we disable delaying start callback. This is
3367 * is to take idle time into account.
3368 */
3369 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3370 if (ifp->if_start_delay_swin > 0) {
3371 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3372 ifp->if_start_delay_cnt++;
3373 } else if ((now_nsec - ifp->if_start_delay_swin)
3374 >= (200 * 1000 * 1000)) {
3375 ifp->if_start_delay_swin = now_nsec;
3376 ifp->if_start_delay_cnt = 1;
3377 ifp->if_start_delay_idle = 0;
3378 if (ifp->if_eflags & IFEF_DELAY_START) {
3379 ifp->if_eflags &=
3380 ~(IFEF_DELAY_START);
3381 ifnet_delay_start_disabled++;
3382 }
3383 } else {
3384 if (ifp->if_start_delay_cnt >=
3385 ifp->if_start_delay_qlen) {
3386 ifp->if_eflags |= IFEF_DELAY_START;
3387 ifp->if_start_delay_idle = 0;
3388 } else {
3389 if (ifp->if_start_delay_idle >= 10) {
3390 ifp->if_eflags &= ~(IFEF_DELAY_START);
3391 ifnet_delay_start_disabled++;
3392 } else {
3393 ifp->if_start_delay_idle++;
3394 }
3395 }
3396 ifp->if_start_delay_swin = now_nsec;
3397 ifp->if_start_delay_cnt = 1;
3398 }
3399 } else {
3400 ifp->if_start_delay_swin = now_nsec;
3401 ifp->if_start_delay_cnt = 1;
3402 ifp->if_start_delay_idle = 0;
3403 ifp->if_eflags &= ~(IFEF_DELAY_START);
3404 }
3405 } else {
3406 ifp->if_eflags &= ~(IFEF_DELAY_START);
3407 }
3408
3409 switch (ptype) {
3410 case QP_MBUF:
3411 /* enqueue the packet (caller consumes object) */
3412 error = ifclassq_enqueue(&ifp->if_snd, m, QP_MBUF, pdrop);
3413 m = NULL;
3414 break;
3415
3416
3417 default:
3418 break;
3419 }
3420
3421 /*
3422 * Tell the driver to start dequeueing; do this even when the queue
3423 * for the packet is suspended (EQSUSPENDED), as the driver could still
3424 * be dequeueing from other unsuspended queues.
3425 */
3426 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3427 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED))
3428 ifnet_start(ifp);
3429
3430 return (error);
3431 }
3432
3433 errno_t
3434 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3435 {
3436 boolean_t pdrop;
3437 return (ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop));
3438 }
3439
3440 errno_t
3441 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3442 boolean_t *pdrop)
3443 {
3444 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3445 m->m_nextpkt != NULL) {
3446 if (m != NULL) {
3447 m_freem_list(m);
3448 *pdrop = TRUE;
3449 }
3450 return (EINVAL);
3451 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3452 !IF_FULLY_ATTACHED(ifp)) {
3453 /* flag tested without lock for performance */
3454 m_freem(m);
3455 *pdrop = TRUE;
3456 return (ENXIO);
3457 } else if (!(ifp->if_flags & IFF_UP)) {
3458 m_freem(m);
3459 *pdrop = TRUE;
3460 return (ENETDOWN);
3461 }
3462
3463 return (ifnet_enqueue_common(ifp, m, QP_MBUF, flush, pdrop));
3464 }
3465
3466
3467 errno_t
3468 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3469 {
3470 errno_t rc;
3471 classq_pkt_type_t ptype;
3472 if (ifp == NULL || mp == NULL)
3473 return (EINVAL);
3474 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3475 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3476 return (ENXIO);
3477 if (!ifnet_is_attached(ifp, 1))
3478 return (ENXIO);
3479
3480 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3481 (void **)mp, NULL, NULL, NULL, &ptype);
3482 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
3483 ifnet_decr_iorefcnt(ifp);
3484
3485 return (rc);
3486 }
3487
3488 errno_t
3489 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3490 struct mbuf **mp)
3491 {
3492 errno_t rc;
3493 classq_pkt_type_t ptype;
3494 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
3495 return (EINVAL);
3496 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3497 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3498 return (ENXIO);
3499 if (!ifnet_is_attached(ifp, 1))
3500 return (ENXIO);
3501
3502 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
3503 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)mp, NULL, NULL,
3504 NULL, &ptype);
3505 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
3506 ifnet_decr_iorefcnt(ifp);
3507 return (rc);
3508 }
3509
3510 errno_t
3511 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3512 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3513 {
3514 errno_t rc;
3515 classq_pkt_type_t ptype;
3516 if (ifp == NULL || head == NULL || pkt_limit < 1)
3517 return (EINVAL);
3518 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3519 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3520 return (ENXIO);
3521 if (!ifnet_is_attached(ifp, 1))
3522 return (ENXIO);
3523
3524 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
3525 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head, (void **)tail, cnt,
3526 len, &ptype);
3527 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3528 ifnet_decr_iorefcnt(ifp);
3529 return (rc);
3530 }
3531
3532 errno_t
3533 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3534 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3535 {
3536 errno_t rc;
3537 classq_pkt_type_t ptype;
3538 if (ifp == NULL || head == NULL || byte_limit < 1)
3539 return (EINVAL);
3540 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3541 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3542 return (ENXIO);
3543 if (!ifnet_is_attached(ifp, 1))
3544 return (ENXIO);
3545
3546 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3547 byte_limit, (void **)head, (void **)tail, cnt, len, &ptype);
3548 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3549 ifnet_decr_iorefcnt(ifp);
3550 return (rc);
3551 }
3552
3553 errno_t
3554 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3555 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3556 u_int32_t *len)
3557 {
3558 errno_t rc;
3559 classq_pkt_type_t ptype;
3560 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3561 !MBUF_VALID_SC(sc))
3562 return (EINVAL);
3563 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3564 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3565 return (ENXIO);
3566 if (!ifnet_is_attached(ifp, 1))
3567 return (ENXIO);
3568
3569 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
3570 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head,
3571 (void **)tail, cnt, len, &ptype);
3572 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3573 ifnet_decr_iorefcnt(ifp);
3574 return (rc);
3575 }
3576
3577 #if !CONFIG_EMBEDDED
3578 errno_t
3579 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3580 const struct sockaddr *dest, const char *dest_linkaddr,
3581 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3582 {
3583 if (pre != NULL)
3584 *pre = 0;
3585 if (post != NULL)
3586 *post = 0;
3587
3588 return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
3589 }
3590 #endif /* !CONFIG_EMBEDDED */
3591
3592 static int
3593 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3594 char **frame_header_p, protocol_family_t protocol_family)
3595 {
3596 struct ifnet_filter *filter;
3597
3598 /*
3599 * Pass the inbound packet to the interface filters
3600 */
3601 lck_mtx_lock_spin(&ifp->if_flt_lock);
3602 /* prevent filter list from changing in case we drop the lock */
3603 if_flt_monitor_busy(ifp);
3604 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3605 int result;
3606
3607 if (!filter->filt_skip && filter->filt_input != NULL &&
3608 (filter->filt_protocol == 0 ||
3609 filter->filt_protocol == protocol_family)) {
3610 lck_mtx_unlock(&ifp->if_flt_lock);
3611
3612 result = (*filter->filt_input)(filter->filt_cookie,
3613 ifp, protocol_family, m_p, frame_header_p);
3614
3615 lck_mtx_lock_spin(&ifp->if_flt_lock);
3616 if (result != 0) {
3617 /* we're done with the filter list */
3618 if_flt_monitor_unbusy(ifp);
3619 lck_mtx_unlock(&ifp->if_flt_lock);
3620 return (result);
3621 }
3622 }
3623 }
3624 /* we're done with the filter list */
3625 if_flt_monitor_unbusy(ifp);
3626 lck_mtx_unlock(&ifp->if_flt_lock);
3627
3628 /*
3629 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3630 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3631 */
3632 if (*m_p != NULL)
3633 (*m_p)->m_flags &= ~M_PROTO1;
3634
3635 return (0);
3636 }
3637
3638 static int
3639 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3640 protocol_family_t protocol_family)
3641 {
3642 struct ifnet_filter *filter;
3643
3644 /*
3645 * Pass the outbound packet to the interface filters
3646 */
3647 lck_mtx_lock_spin(&ifp->if_flt_lock);
3648 /* prevent filter list from changing in case we drop the lock */
3649 if_flt_monitor_busy(ifp);
3650 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3651 int result;
3652
3653 if (!filter->filt_skip && filter->filt_output != NULL &&
3654 (filter->filt_protocol == 0 ||
3655 filter->filt_protocol == protocol_family)) {
3656 lck_mtx_unlock(&ifp->if_flt_lock);
3657
3658 result = filter->filt_output(filter->filt_cookie, ifp,
3659 protocol_family, m_p);
3660
3661 lck_mtx_lock_spin(&ifp->if_flt_lock);
3662 if (result != 0) {
3663 /* we're done with the filter list */
3664 if_flt_monitor_unbusy(ifp);
3665 lck_mtx_unlock(&ifp->if_flt_lock);
3666 return (result);
3667 }
3668 }
3669 }
3670 /* we're done with the filter list */
3671 if_flt_monitor_unbusy(ifp);
3672 lck_mtx_unlock(&ifp->if_flt_lock);
3673
3674 return (0);
3675 }
3676
3677 static void
3678 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
3679 {
3680 int error;
3681
3682 if (ifproto->proto_kpi == kProtoKPI_v1) {
3683 /* Version 1 protocols get one packet at a time */
3684 while (m != NULL) {
3685 char * frame_header;
3686 mbuf_t next_packet;
3687
3688 next_packet = m->m_nextpkt;
3689 m->m_nextpkt = NULL;
3690 frame_header = m->m_pkthdr.pkt_hdr;
3691 m->m_pkthdr.pkt_hdr = NULL;
3692 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3693 ifproto->protocol_family, m, frame_header);
3694 if (error != 0 && error != EJUSTRETURN)
3695 m_freem(m);
3696 m = next_packet;
3697 }
3698 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
3699 /* Version 2 protocols support packet lists */
3700 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
3701 ifproto->protocol_family, m);
3702 if (error != 0 && error != EJUSTRETURN)
3703 m_freem_list(m);
3704 }
3705 }
3706
3707 static void
3708 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3709 struct dlil_threading_info *inp, boolean_t poll)
3710 {
3711 struct ifnet_stat_increment_param *d = &inp->stats;
3712
3713 if (s->packets_in != 0)
3714 d->packets_in += s->packets_in;
3715 if (s->bytes_in != 0)
3716 d->bytes_in += s->bytes_in;
3717 if (s->errors_in != 0)
3718 d->errors_in += s->errors_in;
3719
3720 if (s->packets_out != 0)
3721 d->packets_out += s->packets_out;
3722 if (s->bytes_out != 0)
3723 d->bytes_out += s->bytes_out;
3724 if (s->errors_out != 0)
3725 d->errors_out += s->errors_out;
3726
3727 if (s->collisions != 0)
3728 d->collisions += s->collisions;
3729 if (s->dropped != 0)
3730 d->dropped += s->dropped;
3731
3732 if (poll)
3733 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
3734 }
3735
3736 static void
3737 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3738 {
3739 struct ifnet_stat_increment_param *s = &inp->stats;
3740
3741 /*
3742 * Use of atomic operations is unavoidable here because
3743 * these stats may also be incremented elsewhere via KPIs.
3744 */
3745 if (s->packets_in != 0) {
3746 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3747 s->packets_in = 0;
3748 }
3749 if (s->bytes_in != 0) {
3750 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3751 s->bytes_in = 0;
3752 }
3753 if (s->errors_in != 0) {
3754 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3755 s->errors_in = 0;
3756 }
3757
3758 if (s->packets_out != 0) {
3759 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3760 s->packets_out = 0;
3761 }
3762 if (s->bytes_out != 0) {
3763 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3764 s->bytes_out = 0;
3765 }
3766 if (s->errors_out != 0) {
3767 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3768 s->errors_out = 0;
3769 }
3770
3771 if (s->collisions != 0) {
3772 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3773 s->collisions = 0;
3774 }
3775 if (s->dropped != 0) {
3776 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3777 s->dropped = 0;
3778 }
3779
3780 if (ifp->if_data_threshold != 0) {
3781 lck_mtx_convert_spin(&inp->input_lck);
3782 ifnet_notify_data_threshold(ifp);
3783 }
3784
3785 /*
3786 * No need for atomic operations as they are modified here
3787 * only from within the DLIL input thread context.
3788 */
3789 if (inp->tstats.packets != 0) {
3790 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3791 inp->tstats.packets = 0;
3792 }
3793 if (inp->tstats.bytes != 0) {
3794 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3795 inp->tstats.bytes = 0;
3796 }
3797 }
3798
3799 __private_extern__ void
3800 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3801 {
3802 return (dlil_input_packet_list_common(ifp, m, 0,
3803 IFNET_MODEL_INPUT_POLL_OFF, FALSE));
3804 }
3805
3806 __private_extern__ void
3807 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3808 u_int32_t cnt, ifnet_model_t mode)
3809 {
3810 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
3811 }
3812
3813 static void
3814 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3815 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
3816 {
3817 int error = 0;
3818 protocol_family_t protocol_family;
3819 mbuf_t next_packet;
3820 ifnet_t ifp = ifp_param;
3821 char * frame_header;
3822 struct if_proto * last_ifproto = NULL;
3823 mbuf_t pkt_first = NULL;
3824 mbuf_t * pkt_next = NULL;
3825 u_int32_t poll_thresh = 0, poll_ival = 0;
3826
3827 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
3828
3829 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
3830 (poll_ival = if_rxpoll_interval_pkts) > 0)
3831 poll_thresh = cnt;
3832
3833 while (m != NULL) {
3834 struct if_proto *ifproto = NULL;
3835 int iorefcnt = 0;
3836 uint32_t pktf_mask; /* pkt flags to preserve */
3837
3838 if (ifp_param == NULL)
3839 ifp = m->m_pkthdr.rcvif;
3840
3841 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
3842 poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
3843 ifnet_poll(ifp);
3844
3845 /* Check if this mbuf looks valid */
3846 MBUF_INPUT_CHECK(m, ifp);
3847
3848 next_packet = m->m_nextpkt;
3849 m->m_nextpkt = NULL;
3850 frame_header = m->m_pkthdr.pkt_hdr;
3851 m->m_pkthdr.pkt_hdr = NULL;
3852
3853 /*
3854 * Get an IO reference count if the interface is not
3855 * loopback (lo0) and it is attached; lo0 never goes
3856 * away, so optimize for that.
3857 */
3858 if (ifp != lo_ifp) {
3859 if (!ifnet_is_attached(ifp, 1)) {
3860 m_freem(m);
3861 goto next;
3862 }
3863 iorefcnt = 1;
3864 /*
3865 * Preserve the time stamp if it was set.
3866 */
3867 pktf_mask = PKTF_TS_VALID;
3868 } else {
3869 /*
3870 * If this arrived on lo0, preserve interface addr
3871 * info to allow for connectivity between loopback
3872 * and local interface addresses.
3873 */
3874 pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
3875 }
3876
3877 /* make sure packet comes in clean */
3878 m_classifier_init(m, pktf_mask);
3879
3880 ifp_inc_traffic_class_in(ifp, m);
3881
3882 /* find which protocol family this packet is for */
3883 ifnet_lock_shared(ifp);
3884 error = (*ifp->if_demux)(ifp, m, frame_header,
3885 &protocol_family);
3886 ifnet_lock_done(ifp);
3887 if (error != 0) {
3888 if (error == EJUSTRETURN)
3889 goto next;
3890 protocol_family = 0;
3891 }
3892
3893 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
3894 !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
3895 dlil_input_cksum_dbg(ifp, m, frame_header,
3896 protocol_family);
3897
3898 /*
3899 * For partial checksum offload, we expect the driver to
3900 * set the start offset indicating the start of the span
3901 * that is covered by the hardware-computed checksum;
3902 * adjust this start offset accordingly because the data
3903 * pointer has been advanced beyond the link-layer header.
3904 *
3905 * Don't adjust if the interface is a bridge member, as
3906 * the adjustment will occur from the context of the
3907 * bridge interface during input.
3908 */
3909 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
3910 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
3911 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
3912 int adj;
3913
3914 if (frame_header == NULL ||
3915 frame_header < (char *)mbuf_datastart(m) ||
3916 frame_header > (char *)m->m_data ||
3917 (adj = (m->m_data - frame_header)) >
3918 m->m_pkthdr.csum_rx_start) {
3919 m->m_pkthdr.csum_data = 0;
3920 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
3921 hwcksum_in_invalidated++;
3922 } else {
3923 m->m_pkthdr.csum_rx_start -= adj;
3924 }
3925 }
3926
3927 pktap_input(ifp, protocol_family, m, frame_header);
3928
3929 if (m->m_flags & (M_BCAST|M_MCAST))
3930 atomic_add_64(&ifp->if_imcasts, 1);
3931
3932 /* run interface filters, exclude VLAN packets PR-3586856 */
3933 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
3934 error = dlil_interface_filters_input(ifp, &m,
3935 &frame_header, protocol_family);
3936 if (error != 0) {
3937 if (error != EJUSTRETURN)
3938 m_freem(m);
3939 goto next;
3940 }
3941 }
3942 if (error != 0 || ((m->m_flags & M_PROMISC) != 0)) {
3943 m_freem(m);
3944 goto next;
3945 }
3946
3947 /* Lookup the protocol attachment to this interface */
3948 if (protocol_family == 0) {
3949 ifproto = NULL;
3950 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
3951 (last_ifproto->protocol_family == protocol_family)) {
3952 VERIFY(ifproto == NULL);
3953 ifproto = last_ifproto;
3954 if_proto_ref(last_ifproto);
3955 } else {
3956 VERIFY(ifproto == NULL);
3957 ifnet_lock_shared(ifp);
3958 /* callee holds a proto refcnt upon success */
3959 ifproto = find_attached_proto(ifp, protocol_family);
3960 ifnet_lock_done(ifp);
3961 }
3962 if (ifproto == NULL) {
3963 /* no protocol for this packet, discard */
3964 m_freem(m);
3965 goto next;
3966 }
3967 if (ifproto != last_ifproto) {
3968 if (last_ifproto != NULL) {
3969 /* pass up the list for the previous protocol */
3970 dlil_ifproto_input(last_ifproto, pkt_first);
3971 pkt_first = NULL;
3972 if_proto_free(last_ifproto);
3973 }
3974 last_ifproto = ifproto;
3975 if_proto_ref(ifproto);
3976 }
3977 /* extend the list */
3978 m->m_pkthdr.pkt_hdr = frame_header;
3979 if (pkt_first == NULL) {
3980 pkt_first = m;
3981 } else {
3982 *pkt_next = m;
3983 }
3984 pkt_next = &m->m_nextpkt;
3985
3986 next:
3987 if (next_packet == NULL && last_ifproto != NULL) {
3988 /* pass up the last list of packets */
3989 dlil_ifproto_input(last_ifproto, pkt_first);
3990 if_proto_free(last_ifproto);
3991 last_ifproto = NULL;
3992 }
3993 if (ifproto != NULL) {
3994 if_proto_free(ifproto);
3995 ifproto = NULL;
3996 }
3997
3998 m = next_packet;
3999
4000 /* update the driver's multicast filter, if needed */
4001 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
4002 ifp->if_updatemcasts = 0;
4003 if (iorefcnt == 1)
4004 ifnet_decr_iorefcnt(ifp);
4005 }
4006
4007 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4008 }
4009
4010 errno_t
4011 if_mcasts_update(struct ifnet *ifp)
4012 {
4013 errno_t err;
4014
4015 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
4016 if (err == EAFNOSUPPORT)
4017 err = 0;
4018 printf("%s: %s %d suspended link-layer multicast membership(s) "
4019 "(err=%d)\n", if_name(ifp),
4020 (err == 0 ? "successfully restored" : "failed to restore"),
4021 ifp->if_updatemcasts, err);
4022
4023 /* just return success */
4024 return (0);
4025 }
4026
4027 /* If ifp is set, we will increment the generation for the interface */
4028 int
4029 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
4030 {
4031 if (ifp != NULL) {
4032 ifnet_increment_generation(ifp);
4033 }
4034
4035 #if NECP
4036 necp_update_all_clients();
4037 #endif /* NECP */
4038
4039 return (kev_post_msg(event));
4040 }
4041
4042 __private_extern__ void
4043 dlil_post_sifflags_msg(struct ifnet * ifp)
4044 {
4045 struct kev_msg ev_msg;
4046 struct net_event_data ev_data;
4047
4048 bzero(&ev_data, sizeof (ev_data));
4049 bzero(&ev_msg, sizeof (ev_msg));
4050 ev_msg.vendor_code = KEV_VENDOR_APPLE;
4051 ev_msg.kev_class = KEV_NETWORK_CLASS;
4052 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
4053 ev_msg.event_code = KEV_DL_SIFFLAGS;
4054 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
4055 ev_data.if_family = ifp->if_family;
4056 ev_data.if_unit = (u_int32_t) ifp->if_unit;
4057 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
4058 ev_msg.dv[0].data_ptr = &ev_data;
4059 ev_msg.dv[1].data_length = 0;
4060 dlil_post_complete_msg(ifp, &ev_msg);
4061 }
4062
4063 #define TMP_IF_PROTO_ARR_SIZE 10
4064 static int
4065 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
4066 {
4067 struct ifnet_filter *filter = NULL;
4068 struct if_proto *proto = NULL;
4069 int if_proto_count = 0;
4070 struct if_proto **tmp_ifproto_arr = NULL;
4071 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4072 int tmp_ifproto_arr_idx = 0;
4073 bool tmp_malloc = false;
4074
4075 /*
4076 * Pass the event to the interface filters
4077 */
4078 lck_mtx_lock_spin(&ifp->if_flt_lock);
4079 /* prevent filter list from changing in case we drop the lock */
4080 if_flt_monitor_busy(ifp);
4081 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4082 if (filter->filt_event != NULL) {
4083 lck_mtx_unlock(&ifp->if_flt_lock);
4084
4085 filter->filt_event(filter->filt_cookie, ifp,
4086 filter->filt_protocol, event);
4087
4088 lck_mtx_lock_spin(&ifp->if_flt_lock);
4089 }
4090 }
4091 /* we're done with the filter list */
4092 if_flt_monitor_unbusy(ifp);
4093 lck_mtx_unlock(&ifp->if_flt_lock);
4094
4095 /* Get an io ref count if the interface is attached */
4096 if (!ifnet_is_attached(ifp, 1))
4097 goto done;
4098
4099 /*
4100 * An embedded tmp_list_entry in if_proto may still get
4101 * over-written by another thread after giving up ifnet lock,
4102 * therefore we are avoiding embedded pointers here.
4103 */
4104 ifnet_lock_shared(ifp);
4105 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
4106 if (if_proto_count) {
4107 int i;
4108 VERIFY(ifp->if_proto_hash != NULL);
4109 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
4110 tmp_ifproto_arr = tmp_ifproto_stack_arr;
4111 } else {
4112 MALLOC(tmp_ifproto_arr, struct if_proto **,
4113 sizeof (*tmp_ifproto_arr) * if_proto_count,
4114 M_TEMP, M_ZERO);
4115 if (tmp_ifproto_arr == NULL) {
4116 ifnet_lock_done(ifp);
4117 goto cleanup;
4118 }
4119 tmp_malloc = true;
4120 }
4121
4122 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
4123 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
4124 next_hash) {
4125 if_proto_ref(proto);
4126 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
4127 tmp_ifproto_arr_idx++;
4128 }
4129 }
4130 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
4131 }
4132 ifnet_lock_done(ifp);
4133
4134 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
4135 tmp_ifproto_arr_idx++) {
4136 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
4137 VERIFY(proto != NULL);
4138 proto_media_event eventp =
4139 (proto->proto_kpi == kProtoKPI_v1 ?
4140 proto->kpi.v1.event :
4141 proto->kpi.v2.event);
4142
4143 if (eventp != NULL) {
4144 eventp(ifp, proto->protocol_family,
4145 event);
4146 }
4147 if_proto_free(proto);
4148 }
4149
4150 cleanup:
4151 if (tmp_malloc) {
4152 FREE(tmp_ifproto_arr, M_TEMP);
4153 }
4154
4155 /* Pass the event to the interface */
4156 if (ifp->if_event != NULL)
4157 ifp->if_event(ifp, event);
4158
4159 /* Release the io ref count */
4160 ifnet_decr_iorefcnt(ifp);
4161 done:
4162 return (dlil_post_complete_msg(update_generation ? ifp : NULL, event));
4163 }
4164
4165 errno_t
4166 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
4167 {
4168 struct kev_msg kev_msg;
4169 int result = 0;
4170
4171 if (ifp == NULL || event == NULL)
4172 return (EINVAL);
4173
4174 bzero(&kev_msg, sizeof (kev_msg));
4175 kev_msg.vendor_code = event->vendor_code;
4176 kev_msg.kev_class = event->kev_class;
4177 kev_msg.kev_subclass = event->kev_subclass;
4178 kev_msg.event_code = event->event_code;
4179 kev_msg.dv[0].data_ptr = &event->event_data[0];
4180 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
4181 kev_msg.dv[1].data_length = 0;
4182
4183 result = dlil_event_internal(ifp, &kev_msg, TRUE);
4184
4185 return (result);
4186 }
4187
4188 #if CONFIG_MACF_NET
4189 #include <netinet/ip6.h>
4190 #include <netinet/ip.h>
4191 static int
4192 dlil_get_socket_type(struct mbuf **mp, int family, int raw)
4193 {
4194 struct mbuf *m;
4195 struct ip *ip;
4196 struct ip6_hdr *ip6;
4197 int type = SOCK_RAW;
4198
4199 if (!raw) {
4200 switch (family) {
4201 case PF_INET:
4202 m = m_pullup(*mp, sizeof(struct ip));
4203 if (m == NULL)
4204 break;
4205 *mp = m;
4206 ip = mtod(m, struct ip *);
4207 if (ip->ip_p == IPPROTO_TCP)
4208 type = SOCK_STREAM;
4209 else if (ip->ip_p == IPPROTO_UDP)
4210 type = SOCK_DGRAM;
4211 break;
4212 case PF_INET6:
4213 m = m_pullup(*mp, sizeof(struct ip6_hdr));
4214 if (m == NULL)
4215 break;
4216 *mp = m;
4217 ip6 = mtod(m, struct ip6_hdr *);
4218 if (ip6->ip6_nxt == IPPROTO_TCP)
4219 type = SOCK_STREAM;
4220 else if (ip6->ip6_nxt == IPPROTO_UDP)
4221 type = SOCK_DGRAM;
4222 break;
4223 }
4224 }
4225
4226 return (type);
4227 }
4228 #endif
4229
4230 static void
4231 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4232 {
4233 mbuf_t n = m;
4234 int chainlen = 0;
4235
4236 while (n != NULL) {
4237 chainlen++;
4238 n = n->m_next;
4239 }
4240 switch (chainlen) {
4241 case 0:
4242 break;
4243 case 1:
4244 atomic_add_64(&cls->cls_one, 1);
4245 break;
4246 case 2:
4247 atomic_add_64(&cls->cls_two, 1);
4248 break;
4249 case 3:
4250 atomic_add_64(&cls->cls_three, 1);
4251 break;
4252 case 4:
4253 atomic_add_64(&cls->cls_four, 1);
4254 break;
4255 case 5:
4256 default:
4257 atomic_add_64(&cls->cls_five_or_more, 1);
4258 break;
4259 }
4260 }
4261
4262 /*
4263 * dlil_output
4264 *
4265 * Caller should have a lock on the protocol domain if the protocol
4266 * doesn't support finer grained locking. In most cases, the lock
4267 * will be held from the socket layer and won't be released until
4268 * we return back to the socket layer.
4269 *
4270 * This does mean that we must take a protocol lock before we take
4271 * an interface lock if we're going to take both. This makes sense
4272 * because a protocol is likely to interact with an ifp while it
4273 * is under the protocol lock.
4274 *
4275 * An advisory code will be returned if adv is not null. This
4276 * can be used to provide feedback about interface queues to the
4277 * application.
4278 */
4279 errno_t
4280 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
4281 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
4282 {
4283 char *frame_type = NULL;
4284 char *dst_linkaddr = NULL;
4285 int retval = 0;
4286 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4287 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
4288 struct if_proto *proto = NULL;
4289 mbuf_t m;
4290 mbuf_t send_head = NULL;
4291 mbuf_t *send_tail = &send_head;
4292 int iorefcnt = 0;
4293 u_int32_t pre = 0, post = 0;
4294 u_int32_t fpkts = 0, fbytes = 0;
4295 int32_t flen = 0;
4296 struct timespec now;
4297 u_int64_t now_nsec;
4298
4299 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4300
4301 /*
4302 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4303 * from happening while this operation is in progress
4304 */
4305 if (!ifnet_is_attached(ifp, 1)) {
4306 retval = ENXIO;
4307 goto cleanup;
4308 }
4309 iorefcnt = 1;
4310
4311 VERIFY(ifp->if_output_dlil != NULL);
4312
4313 /* update the driver's multicast filter, if needed */
4314 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
4315 ifp->if_updatemcasts = 0;
4316
4317 frame_type = frame_type_buffer;
4318 dst_linkaddr = dst_linkaddr_buffer;
4319
4320 if (raw == 0) {
4321 ifnet_lock_shared(ifp);
4322 /* callee holds a proto refcnt upon success */
4323 proto = find_attached_proto(ifp, proto_family);
4324 if (proto == NULL) {
4325 ifnet_lock_done(ifp);
4326 retval = ENXIO;
4327 goto cleanup;
4328 }
4329 ifnet_lock_done(ifp);
4330 }
4331
4332 preout_again:
4333 if (packetlist == NULL)
4334 goto cleanup;
4335
4336 m = packetlist;
4337 packetlist = packetlist->m_nextpkt;
4338 m->m_nextpkt = NULL;
4339
4340 if (raw == 0) {
4341 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
4342 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
4343 retval = 0;
4344 if (preoutp != NULL) {
4345 retval = preoutp(ifp, proto_family, &m, dest, route,
4346 frame_type, dst_linkaddr);
4347
4348 if (retval != 0) {
4349 if (retval == EJUSTRETURN)
4350 goto preout_again;
4351 m_freem(m);
4352 goto cleanup;
4353 }
4354 }
4355 }
4356
4357 #if CONFIG_MACF_NET
4358 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
4359 dlil_get_socket_type(&m, proto_family, raw));
4360 if (retval != 0) {
4361 m_freem(m);
4362 goto cleanup;
4363 }
4364 #endif
4365
4366 do {
4367 #if CONFIG_DTRACE
4368 if (!raw && proto_family == PF_INET) {
4369 struct ip *ip = mtod(m, struct ip *);
4370 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4371 struct ip *, ip, struct ifnet *, ifp,
4372 struct ip *, ip, struct ip6_hdr *, NULL);
4373
4374 } else if (!raw && proto_family == PF_INET6) {
4375 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
4376 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4377 struct ip6_hdr *, ip6, struct ifnet *, ifp,
4378 struct ip *, NULL, struct ip6_hdr *, ip6);
4379 }
4380 #endif /* CONFIG_DTRACE */
4381
4382 if (raw == 0 && ifp->if_framer != NULL) {
4383 int rcvif_set = 0;
4384
4385 /*
4386 * If this is a broadcast packet that needs to be
4387 * looped back into the system, set the inbound ifp
4388 * to that of the outbound ifp. This will allow
4389 * us to determine that it is a legitimate packet
4390 * for the system. Only set the ifp if it's not
4391 * already set, just to be safe.
4392 */
4393 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
4394 m->m_pkthdr.rcvif == NULL) {
4395 m->m_pkthdr.rcvif = ifp;
4396 rcvif_set = 1;
4397 }
4398
4399 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
4400 frame_type, &pre, &post);
4401 if (retval != 0) {
4402 if (retval != EJUSTRETURN)
4403 m_freem(m);
4404 goto next;
4405 }
4406
4407 /*
4408 * For partial checksum offload, adjust the start
4409 * and stuff offsets based on the prepended header.
4410 */
4411 if ((m->m_pkthdr.csum_flags &
4412 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4413 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4414 m->m_pkthdr.csum_tx_stuff += pre;
4415 m->m_pkthdr.csum_tx_start += pre;
4416 }
4417
4418 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
4419 dlil_output_cksum_dbg(ifp, m, pre,
4420 proto_family);
4421
4422 /*
4423 * Clear the ifp if it was set above, and to be
4424 * safe, only if it is still the same as the
4425 * outbound ifp we have in context. If it was
4426 * looped back, then a copy of it was sent to the
4427 * loopback interface with the rcvif set, and we
4428 * are clearing the one that will go down to the
4429 * layer below.
4430 */
4431 if (rcvif_set && m->m_pkthdr.rcvif == ifp)
4432 m->m_pkthdr.rcvif = NULL;
4433 }
4434
4435 /*
4436 * Let interface filters (if any) do their thing ...
4437 */
4438 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4439 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
4440 retval = dlil_interface_filters_output(ifp,
4441 &m, proto_family);
4442 if (retval != 0) {
4443 if (retval != EJUSTRETURN)
4444 m_freem(m);
4445 goto next;
4446 }
4447 }
4448 /*
4449 * Strip away M_PROTO1 bit prior to sending packet
4450 * to the driver as this field may be used by the driver
4451 */
4452 m->m_flags &= ~M_PROTO1;
4453
4454 /*
4455 * If the underlying interface is not capable of handling a
4456 * packet whose data portion spans across physically disjoint
4457 * pages, we need to "normalize" the packet so that we pass
4458 * down a chain of mbufs where each mbuf points to a span that
4459 * resides in the system page boundary. If the packet does
4460 * not cross page(s), the following is a no-op.
4461 */
4462 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
4463 if ((m = m_normalize(m)) == NULL)
4464 goto next;
4465 }
4466
4467 /*
4468 * If this is a TSO packet, make sure the interface still
4469 * advertise TSO capability.
4470 */
4471 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
4472 retval = EMSGSIZE;
4473 m_freem(m);
4474 goto cleanup;
4475 }
4476
4477 ifp_inc_traffic_class_out(ifp, m);
4478 pktap_output(ifp, proto_family, m, pre, post);
4479
4480 /*
4481 * Count the number of elements in the mbuf chain
4482 */
4483 if (tx_chain_len_count) {
4484 dlil_count_chain_len(m, &tx_chain_len_stats);
4485 }
4486
4487 /*
4488 * Record timestamp; ifnet_enqueue() will use this info
4489 * rather than redoing the work. An optimization could
4490 * involve doing this just once at the top, if there are
4491 * no interface filters attached, but that's probably
4492 * not a big deal.
4493 */
4494 nanouptime(&now);
4495 net_timernsec(&now, &now_nsec);
4496 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
4497
4498 /*
4499 * Discard partial sum information if this packet originated
4500 * from another interface; the packet would already have the
4501 * final checksum and we shouldn't recompute it.
4502 */
4503 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
4504 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID|CSUM_PARTIAL)) ==
4505 (CSUM_DATA_VALID|CSUM_PARTIAL)) {
4506 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4507 m->m_pkthdr.csum_data = 0;
4508 }
4509
4510 /*
4511 * Finally, call the driver.
4512 */
4513 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
4514 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4515 flen += (m_pktlen(m) - (pre + post));
4516 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4517 }
4518 *send_tail = m;
4519 send_tail = &m->m_nextpkt;
4520 } else {
4521 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4522 flen = (m_pktlen(m) - (pre + post));
4523 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4524 } else {
4525 flen = 0;
4526 }
4527 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4528 0, 0, 0, 0, 0);
4529 retval = (*ifp->if_output_dlil)(ifp, m);
4530 if (retval == EQFULL || retval == EQSUSPENDED) {
4531 if (adv != NULL && adv->code == FADV_SUCCESS) {
4532 adv->code = (retval == EQFULL ?
4533 FADV_FLOW_CONTROLLED :
4534 FADV_SUSPENDED);
4535 }
4536 retval = 0;
4537 }
4538 if (retval == 0 && flen > 0) {
4539 fbytes += flen;
4540 fpkts++;
4541 }
4542 if (retval != 0 && dlil_verbose) {
4543 printf("%s: output error on %s retval = %d\n",
4544 __func__, if_name(ifp),
4545 retval);
4546 }
4547 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
4548 0, 0, 0, 0, 0);
4549 }
4550 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4551
4552 next:
4553 m = packetlist;
4554 if (m != NULL) {
4555 packetlist = packetlist->m_nextpkt;
4556 m->m_nextpkt = NULL;
4557 }
4558 } while (m != NULL);
4559
4560 if (send_head != NULL) {
4561 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4562 0, 0, 0, 0, 0);
4563 if (ifp->if_eflags & IFEF_SENDLIST) {
4564 retval = (*ifp->if_output_dlil)(ifp, send_head);
4565 if (retval == EQFULL || retval == EQSUSPENDED) {
4566 if (adv != NULL) {
4567 adv->code = (retval == EQFULL ?
4568 FADV_FLOW_CONTROLLED :
4569 FADV_SUSPENDED);
4570 }
4571 retval = 0;
4572 }
4573 if (retval == 0 && flen > 0) {
4574 fbytes += flen;
4575 fpkts++;
4576 }
4577 if (retval != 0 && dlil_verbose) {
4578 printf("%s: output error on %s retval = %d\n",
4579 __func__, if_name(ifp), retval);
4580 }
4581 } else {
4582 struct mbuf *send_m;
4583 int enq_cnt = 0;
4584 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
4585 while (send_head != NULL) {
4586 send_m = send_head;
4587 send_head = send_m->m_nextpkt;
4588 send_m->m_nextpkt = NULL;
4589 retval = (*ifp->if_output_dlil)(ifp, send_m);
4590 if (retval == EQFULL || retval == EQSUSPENDED) {
4591 if (adv != NULL) {
4592 adv->code = (retval == EQFULL ?
4593 FADV_FLOW_CONTROLLED :
4594 FADV_SUSPENDED);
4595 }
4596 retval = 0;
4597 }
4598 if (retval == 0) {
4599 enq_cnt++;
4600 if (flen > 0)
4601 fpkts++;
4602 }
4603 if (retval != 0 && dlil_verbose) {
4604 printf("%s: output error on %s "
4605 "retval = %d\n",
4606 __func__, if_name(ifp), retval);
4607 }
4608 }
4609 if (enq_cnt > 0) {
4610 fbytes += flen;
4611 ifnet_start(ifp);
4612 }
4613 }
4614 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4615 }
4616
4617 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4618
4619 cleanup:
4620 if (fbytes > 0)
4621 ifp->if_fbytes += fbytes;
4622 if (fpkts > 0)
4623 ifp->if_fpackets += fpkts;
4624 if (proto != NULL)
4625 if_proto_free(proto);
4626 if (packetlist) /* if any packets are left, clean up */
4627 mbuf_freem_list(packetlist);
4628 if (retval == EJUSTRETURN)
4629 retval = 0;
4630 if (iorefcnt == 1)
4631 ifnet_decr_iorefcnt(ifp);
4632
4633 return (retval);
4634 }
4635
4636 errno_t
4637 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
4638 void *ioctl_arg)
4639 {
4640 struct ifnet_filter *filter;
4641 int retval = EOPNOTSUPP;
4642 int result = 0;
4643
4644 if (ifp == NULL || ioctl_code == 0)
4645 return (EINVAL);
4646
4647 /* Get an io ref count if the interface is attached */
4648 if (!ifnet_is_attached(ifp, 1))
4649 return (EOPNOTSUPP);
4650
4651 /*
4652 * Run the interface filters first.
4653 * We want to run all filters before calling the protocol,
4654 * interface family, or interface.
4655 */
4656 lck_mtx_lock_spin(&ifp->if_flt_lock);
4657 /* prevent filter list from changing in case we drop the lock */
4658 if_flt_monitor_busy(ifp);
4659 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4660 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
4661 filter->filt_protocol == proto_fam)) {
4662 lck_mtx_unlock(&ifp->if_flt_lock);
4663
4664 result = filter->filt_ioctl(filter->filt_cookie, ifp,
4665 proto_fam, ioctl_code, ioctl_arg);
4666
4667 lck_mtx_lock_spin(&ifp->if_flt_lock);
4668
4669 /* Only update retval if no one has handled the ioctl */
4670 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4671 if (result == ENOTSUP)
4672 result = EOPNOTSUPP;
4673 retval = result;
4674 if (retval != 0 && retval != EOPNOTSUPP) {
4675 /* we're done with the filter list */
4676 if_flt_monitor_unbusy(ifp);
4677 lck_mtx_unlock(&ifp->if_flt_lock);
4678 goto cleanup;
4679 }
4680 }
4681 }
4682 }
4683 /* we're done with the filter list */
4684 if_flt_monitor_unbusy(ifp);
4685 lck_mtx_unlock(&ifp->if_flt_lock);
4686
4687 /* Allow the protocol to handle the ioctl */
4688 if (proto_fam != 0) {
4689 struct if_proto *proto;
4690
4691 /* callee holds a proto refcnt upon success */
4692 ifnet_lock_shared(ifp);
4693 proto = find_attached_proto(ifp, proto_fam);
4694 ifnet_lock_done(ifp);
4695 if (proto != NULL) {
4696 proto_media_ioctl ioctlp =
4697 (proto->proto_kpi == kProtoKPI_v1 ?
4698 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
4699 result = EOPNOTSUPP;
4700 if (ioctlp != NULL)
4701 result = ioctlp(ifp, proto_fam, ioctl_code,
4702 ioctl_arg);
4703 if_proto_free(proto);
4704
4705 /* Only update retval if no one has handled the ioctl */
4706 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4707 if (result == ENOTSUP)
4708 result = EOPNOTSUPP;
4709 retval = result;
4710 if (retval && retval != EOPNOTSUPP)
4711 goto cleanup;
4712 }
4713 }
4714 }
4715
4716 /* retval is either 0 or EOPNOTSUPP */
4717
4718 /*
4719 * Let the interface handle this ioctl.
4720 * If it returns EOPNOTSUPP, ignore that, we may have
4721 * already handled this in the protocol or family.
4722 */
4723 if (ifp->if_ioctl)
4724 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
4725
4726 /* Only update retval if no one has handled the ioctl */
4727 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4728 if (result == ENOTSUP)
4729 result = EOPNOTSUPP;
4730 retval = result;
4731 if (retval && retval != EOPNOTSUPP) {
4732 goto cleanup;
4733 }
4734 }
4735
4736 cleanup:
4737 if (retval == EJUSTRETURN)
4738 retval = 0;
4739
4740 ifnet_decr_iorefcnt(ifp);
4741
4742 return (retval);
4743 }
4744
4745 __private_extern__ errno_t
4746 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
4747 {
4748 errno_t error = 0;
4749
4750
4751 if (ifp->if_set_bpf_tap) {
4752 /* Get an io reference on the interface if it is attached */
4753 if (!ifnet_is_attached(ifp, 1))
4754 return (ENXIO);
4755 error = ifp->if_set_bpf_tap(ifp, mode, callback);
4756 ifnet_decr_iorefcnt(ifp);
4757 }
4758 return (error);
4759 }
4760
4761 errno_t
4762 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4763 struct sockaddr *ll_addr, size_t ll_len)
4764 {
4765 errno_t result = EOPNOTSUPP;
4766 struct if_proto *proto;
4767 const struct sockaddr *verify;
4768 proto_media_resolve_multi resolvep;
4769
4770 if (!ifnet_is_attached(ifp, 1))
4771 return (result);
4772
4773 bzero(ll_addr, ll_len);
4774
4775 /* Call the protocol first; callee holds a proto refcnt upon success */
4776 ifnet_lock_shared(ifp);
4777 proto = find_attached_proto(ifp, proto_addr->sa_family);
4778 ifnet_lock_done(ifp);
4779 if (proto != NULL) {
4780 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4781 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4782 if (resolvep != NULL)
4783 result = resolvep(ifp, proto_addr,
4784 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
4785 if_proto_free(proto);
4786 }
4787
4788 /* Let the interface verify the multicast address */
4789 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4790 if (result == 0)
4791 verify = ll_addr;
4792 else
4793 verify = proto_addr;
4794 result = ifp->if_check_multi(ifp, verify);
4795 }
4796
4797 ifnet_decr_iorefcnt(ifp);
4798 return (result);
4799 }
4800
4801 __private_extern__ errno_t
4802 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4803 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4804 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4805 {
4806 struct if_proto *proto;
4807 errno_t result = 0;
4808
4809 /* callee holds a proto refcnt upon success */
4810 ifnet_lock_shared(ifp);
4811 proto = find_attached_proto(ifp, target_proto->sa_family);
4812 ifnet_lock_done(ifp);
4813 if (proto == NULL) {
4814 result = ENOTSUP;
4815 } else {
4816 proto_media_send_arp arpp;
4817 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4818 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4819 if (arpp == NULL) {
4820 result = ENOTSUP;
4821 } else {
4822 switch (arpop) {
4823 case ARPOP_REQUEST:
4824 arpstat.txrequests++;
4825 if (target_hw != NULL)
4826 arpstat.txurequests++;
4827 break;
4828 case ARPOP_REPLY:
4829 arpstat.txreplies++;
4830 break;
4831 }
4832 result = arpp(ifp, arpop, sender_hw, sender_proto,
4833 target_hw, target_proto);
4834 }
4835 if_proto_free(proto);
4836 }
4837
4838 return (result);
4839 }
4840
4841 struct net_thread_marks { };
4842 static const struct net_thread_marks net_thread_marks_base = { };
4843
4844 __private_extern__ const net_thread_marks_t net_thread_marks_none =
4845 &net_thread_marks_base;
4846
4847 __private_extern__ net_thread_marks_t
4848 net_thread_marks_push(u_int32_t push)
4849 {
4850 static const char *const base = (const void*)&net_thread_marks_base;
4851 u_int32_t pop = 0;
4852
4853 if (push != 0) {
4854 struct uthread *uth = get_bsdthread_info(current_thread());
4855
4856 pop = push & ~uth->uu_network_marks;
4857 if (pop != 0)
4858 uth->uu_network_marks |= pop;
4859 }
4860
4861 return ((net_thread_marks_t)&base[pop]);
4862 }
4863
4864 __private_extern__ net_thread_marks_t
4865 net_thread_unmarks_push(u_int32_t unpush)
4866 {
4867 static const char *const base = (const void*)&net_thread_marks_base;
4868 u_int32_t unpop = 0;
4869
4870 if (unpush != 0) {
4871 struct uthread *uth = get_bsdthread_info(current_thread());
4872
4873 unpop = unpush & uth->uu_network_marks;
4874 if (unpop != 0)
4875 uth->uu_network_marks &= ~unpop;
4876 }
4877
4878 return ((net_thread_marks_t)&base[unpop]);
4879 }
4880
4881 __private_extern__ void
4882 net_thread_marks_pop(net_thread_marks_t popx)
4883 {
4884 static const char *const base = (const void*)&net_thread_marks_base;
4885 const ptrdiff_t pop = (const char *)popx - (const char *)base;
4886
4887 if (pop != 0) {
4888 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4889 struct uthread *uth = get_bsdthread_info(current_thread());
4890
4891 VERIFY((pop & ones) == pop);
4892 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
4893 uth->uu_network_marks &= ~pop;
4894 }
4895 }
4896
4897 __private_extern__ void
4898 net_thread_unmarks_pop(net_thread_marks_t unpopx)
4899 {
4900 static const char *const base = (const void*)&net_thread_marks_base;
4901 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
4902
4903 if (unpop != 0) {
4904 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4905 struct uthread *uth = get_bsdthread_info(current_thread());
4906
4907 VERIFY((unpop & ones) == unpop);
4908 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
4909 uth->uu_network_marks |= unpop;
4910 }
4911 }
4912
4913 __private_extern__ u_int32_t
4914 net_thread_is_marked(u_int32_t check)
4915 {
4916 if (check != 0) {
4917 struct uthread *uth = get_bsdthread_info(current_thread());
4918 return (uth->uu_network_marks & check);
4919 }
4920 else
4921 return (0);
4922 }
4923
4924 __private_extern__ u_int32_t
4925 net_thread_is_unmarked(u_int32_t check)
4926 {
4927 if (check != 0) {
4928 struct uthread *uth = get_bsdthread_info(current_thread());
4929 return (~uth->uu_network_marks & check);
4930 }
4931 else
4932 return (0);
4933 }
4934
4935 static __inline__ int
4936 _is_announcement(const struct sockaddr_in * sender_sin,
4937 const struct sockaddr_in * target_sin)
4938 {
4939 if (sender_sin == NULL) {
4940 return (FALSE);
4941 }
4942 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
4943 }
4944
4945 __private_extern__ errno_t
4946 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4947 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4948 const struct sockaddr *target_proto0, u_int32_t rtflags)
4949 {
4950 errno_t result = 0;
4951 const struct sockaddr_in * sender_sin;
4952 const struct sockaddr_in * target_sin;
4953 struct sockaddr_inarp target_proto_sinarp;
4954 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
4955
4956 if (target_proto == NULL || (sender_proto != NULL &&
4957 sender_proto->sa_family != target_proto->sa_family))
4958 return (EINVAL);
4959
4960 /*
4961 * If the target is a (default) router, provide that
4962 * information to the send_arp callback routine.
4963 */
4964 if (rtflags & RTF_ROUTER) {
4965 bcopy(target_proto, &target_proto_sinarp,
4966 sizeof (struct sockaddr_in));
4967 target_proto_sinarp.sin_other |= SIN_ROUTER;
4968 target_proto = (struct sockaddr *)&target_proto_sinarp;
4969 }
4970
4971 /*
4972 * If this is an ARP request and the target IP is IPv4LL,
4973 * send the request on all interfaces. The exception is
4974 * an announcement, which must only appear on the specific
4975 * interface.
4976 */
4977 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
4978 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
4979 if (target_proto->sa_family == AF_INET &&
4980 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4981 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4982 !_is_announcement(target_sin, sender_sin)) {
4983 ifnet_t *ifp_list;
4984 u_int32_t count;
4985 u_int32_t ifp_on;
4986
4987 result = ENOTSUP;
4988
4989 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4990 for (ifp_on = 0; ifp_on < count; ifp_on++) {
4991 errno_t new_result;
4992 ifaddr_t source_hw = NULL;
4993 ifaddr_t source_ip = NULL;
4994 struct sockaddr_in source_ip_copy;
4995 struct ifnet *cur_ifp = ifp_list[ifp_on];
4996
4997 /*
4998 * Only arp on interfaces marked for IPv4LL
4999 * ARPing. This may mean that we don't ARP on
5000 * the interface the subnet route points to.
5001 */
5002 if (!(cur_ifp->if_eflags & IFEF_ARPLL))
5003 continue;
5004
5005 /* Find the source IP address */
5006 ifnet_lock_shared(cur_ifp);
5007 source_hw = cur_ifp->if_lladdr;
5008 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
5009 ifa_link) {
5010 IFA_LOCK(source_ip);
5011 if (source_ip->ifa_addr != NULL &&
5012 source_ip->ifa_addr->sa_family ==
5013 AF_INET) {
5014 /* Copy the source IP address */
5015 source_ip_copy =
5016 *(struct sockaddr_in *)
5017 (void *)source_ip->ifa_addr;
5018 IFA_UNLOCK(source_ip);
5019 break;
5020 }
5021 IFA_UNLOCK(source_ip);
5022 }
5023
5024 /* No IP Source, don't arp */
5025 if (source_ip == NULL) {
5026 ifnet_lock_done(cur_ifp);
5027 continue;
5028 }
5029
5030 IFA_ADDREF(source_hw);
5031 ifnet_lock_done(cur_ifp);
5032
5033 /* Send the ARP */
5034 new_result = dlil_send_arp_internal(cur_ifp,
5035 arpop, (struct sockaddr_dl *)(void *)
5036 source_hw->ifa_addr,
5037 (struct sockaddr *)&source_ip_copy, NULL,
5038 target_proto);
5039
5040 IFA_REMREF(source_hw);
5041 if (result == ENOTSUP) {
5042 result = new_result;
5043 }
5044 }
5045 ifnet_list_free(ifp_list);
5046 }
5047 } else {
5048 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
5049 sender_proto, target_hw, target_proto);
5050 }
5051
5052 return (result);
5053 }
5054
5055 /*
5056 * Caller must hold ifnet head lock.
5057 */
5058 static int
5059 ifnet_lookup(struct ifnet *ifp)
5060 {
5061 struct ifnet *_ifp;
5062
5063 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
5064 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
5065 if (_ifp == ifp)
5066 break;
5067 }
5068 return (_ifp != NULL);
5069 }
5070
5071 /*
5072 * Caller has to pass a non-zero refio argument to get a
5073 * IO reference count. This will prevent ifnet_detach from
5074 * being called when there are outstanding io reference counts.
5075 */
5076 int
5077 ifnet_is_attached(struct ifnet *ifp, int refio)
5078 {
5079 int ret;
5080
5081 lck_mtx_lock_spin(&ifp->if_ref_lock);
5082 if ((ret = IF_FULLY_ATTACHED(ifp))) {
5083 if (refio > 0)
5084 ifp->if_refio++;
5085 }
5086 lck_mtx_unlock(&ifp->if_ref_lock);
5087
5088 return (ret);
5089 }
5090
5091 /*
5092 * Caller must ensure the interface is attached; the assumption is that
5093 * there is at least an outstanding IO reference count held already.
5094 * Most callers would call ifnet_is_attached() instead.
5095 */
5096 void
5097 ifnet_incr_iorefcnt(struct ifnet *ifp)
5098 {
5099 lck_mtx_lock_spin(&ifp->if_ref_lock);
5100 VERIFY(IF_FULLY_ATTACHED(ifp));
5101 VERIFY(ifp->if_refio > 0);
5102 ifp->if_refio++;
5103 lck_mtx_unlock(&ifp->if_ref_lock);
5104 }
5105
5106 void
5107 ifnet_decr_iorefcnt(struct ifnet *ifp)
5108 {
5109 lck_mtx_lock_spin(&ifp->if_ref_lock);
5110 VERIFY(ifp->if_refio > 0);
5111 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
5112 ifp->if_refio--;
5113
5114 /*
5115 * if there are no more outstanding io references, wakeup the
5116 * ifnet_detach thread if detaching flag is set.
5117 */
5118 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING))
5119 wakeup(&(ifp->if_refio));
5120
5121 lck_mtx_unlock(&ifp->if_ref_lock);
5122 }
5123
5124 static void
5125 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
5126 {
5127 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
5128 ctrace_t *tr;
5129 u_int32_t idx;
5130 u_int16_t *cnt;
5131
5132 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
5133 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
5134 /* NOTREACHED */
5135 }
5136
5137 if (refhold) {
5138 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
5139 tr = dl_if_dbg->dldbg_if_refhold;
5140 } else {
5141 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
5142 tr = dl_if_dbg->dldbg_if_refrele;
5143 }
5144
5145 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
5146 ctrace_record(&tr[idx]);
5147 }
5148
5149 errno_t
5150 dlil_if_ref(struct ifnet *ifp)
5151 {
5152 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5153
5154 if (dl_if == NULL)
5155 return (EINVAL);
5156
5157 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5158 ++dl_if->dl_if_refcnt;
5159 if (dl_if->dl_if_refcnt == 0) {
5160 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
5161 /* NOTREACHED */
5162 }
5163 if (dl_if->dl_if_trace != NULL)
5164 (*dl_if->dl_if_trace)(dl_if, TRUE);
5165 lck_mtx_unlock(&dl_if->dl_if_lock);
5166
5167 return (0);
5168 }
5169
5170 errno_t
5171 dlil_if_free(struct ifnet *ifp)
5172 {
5173 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5174 bool need_release = FALSE;
5175
5176 if (dl_if == NULL)
5177 return (EINVAL);
5178
5179 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5180 switch (dl_if->dl_if_refcnt) {
5181 case 0:
5182 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
5183 /* NOTREACHED */
5184 break;
5185 case 1:
5186 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
5187 need_release = TRUE;
5188 }
5189 break;
5190 default:
5191 break;
5192 }
5193 --dl_if->dl_if_refcnt;
5194 if (dl_if->dl_if_trace != NULL)
5195 (*dl_if->dl_if_trace)(dl_if, FALSE);
5196 lck_mtx_unlock(&dl_if->dl_if_lock);
5197 if (need_release) {
5198 dlil_if_release(ifp);
5199 }
5200 return (0);
5201 }
5202
5203 static errno_t
5204 dlil_attach_protocol_internal(struct if_proto *proto,
5205 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
5206 uint32_t * proto_count)
5207 {
5208 struct kev_dl_proto_data ev_pr_data;
5209 struct ifnet *ifp = proto->ifp;
5210 int retval = 0;
5211 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
5212 struct if_proto *prev_proto;
5213 struct if_proto *_proto;
5214
5215 /* callee holds a proto refcnt upon success */
5216 ifnet_lock_exclusive(ifp);
5217 _proto = find_attached_proto(ifp, proto->protocol_family);
5218 if (_proto != NULL) {
5219 ifnet_lock_done(ifp);
5220 if_proto_free(_proto);
5221 return (EEXIST);
5222 }
5223
5224 /*
5225 * Call family module add_proto routine so it can refine the
5226 * demux descriptors as it wishes.
5227 */
5228 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
5229 demux_count);
5230 if (retval) {
5231 ifnet_lock_done(ifp);
5232 return (retval);
5233 }
5234
5235 /*
5236 * Insert the protocol in the hash
5237 */
5238 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
5239 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
5240 prev_proto = SLIST_NEXT(prev_proto, next_hash);
5241 if (prev_proto)
5242 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
5243 else
5244 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
5245 proto, next_hash);
5246
5247 /* hold a proto refcnt for attach */
5248 if_proto_ref(proto);
5249
5250 /*
5251 * The reserved field carries the number of protocol still attached
5252 * (subject to change)
5253 */
5254 ev_pr_data.proto_family = proto->protocol_family;
5255 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
5256
5257 ifnet_lock_done(ifp);
5258
5259 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
5260 (struct net_event_data *)&ev_pr_data,
5261 sizeof (struct kev_dl_proto_data));
5262 if (proto_count != NULL) {
5263 *proto_count = ev_pr_data.proto_remaining_count;
5264 }
5265 return (retval);
5266 }
5267
5268 errno_t
5269 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
5270 const struct ifnet_attach_proto_param *proto_details)
5271 {
5272 int retval = 0;
5273 struct if_proto *ifproto = NULL;
5274 uint32_t proto_count = 0;
5275
5276 ifnet_head_lock_shared();
5277 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5278 retval = EINVAL;
5279 goto end;
5280 }
5281 /* Check that the interface is in the global list */
5282 if (!ifnet_lookup(ifp)) {
5283 retval = ENXIO;
5284 goto end;
5285 }
5286
5287 ifproto = zalloc(dlif_proto_zone);
5288 if (ifproto == NULL) {
5289 retval = ENOMEM;
5290 goto end;
5291 }
5292 bzero(ifproto, dlif_proto_size);
5293
5294 /* refcnt held above during lookup */
5295 ifproto->ifp = ifp;
5296 ifproto->protocol_family = protocol;
5297 ifproto->proto_kpi = kProtoKPI_v1;
5298 ifproto->kpi.v1.input = proto_details->input;
5299 ifproto->kpi.v1.pre_output = proto_details->pre_output;
5300 ifproto->kpi.v1.event = proto_details->event;
5301 ifproto->kpi.v1.ioctl = proto_details->ioctl;
5302 ifproto->kpi.v1.detached = proto_details->detached;
5303 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
5304 ifproto->kpi.v1.send_arp = proto_details->send_arp;
5305
5306 retval = dlil_attach_protocol_internal(ifproto,
5307 proto_details->demux_list, proto_details->demux_count,
5308 &proto_count);
5309
5310 end:
5311 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5312 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5313 if_name(ifp), protocol, retval);
5314 } else {
5315 if (dlil_verbose) {
5316 printf("%s: attached v1 protocol %d (count = %d)\n",
5317 if_name(ifp),
5318 protocol, proto_count);
5319 }
5320 }
5321 ifnet_head_done();
5322 if (retval == 0) {
5323 /*
5324 * A protocol has been attached, mark the interface up.
5325 * This used to be done by configd.KernelEventMonitor, but that
5326 * is inherently prone to races (rdar://problem/30810208).
5327 */
5328 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
5329 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
5330 dlil_post_sifflags_msg(ifp);
5331 } else if (ifproto != NULL) {
5332 zfree(dlif_proto_zone, ifproto);
5333 }
5334 return (retval);
5335 }
5336
5337 errno_t
5338 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
5339 const struct ifnet_attach_proto_param_v2 *proto_details)
5340 {
5341 int retval = 0;
5342 struct if_proto *ifproto = NULL;
5343 uint32_t proto_count = 0;
5344
5345 ifnet_head_lock_shared();
5346 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5347 retval = EINVAL;
5348 goto end;
5349 }
5350 /* Check that the interface is in the global list */
5351 if (!ifnet_lookup(ifp)) {
5352 retval = ENXIO;
5353 goto end;
5354 }
5355
5356 ifproto = zalloc(dlif_proto_zone);
5357 if (ifproto == NULL) {
5358 retval = ENOMEM;
5359 goto end;
5360 }
5361 bzero(ifproto, sizeof(*ifproto));
5362
5363 /* refcnt held above during lookup */
5364 ifproto->ifp = ifp;
5365 ifproto->protocol_family = protocol;
5366 ifproto->proto_kpi = kProtoKPI_v2;
5367 ifproto->kpi.v2.input = proto_details->input;
5368 ifproto->kpi.v2.pre_output = proto_details->pre_output;
5369 ifproto->kpi.v2.event = proto_details->event;
5370 ifproto->kpi.v2.ioctl = proto_details->ioctl;
5371 ifproto->kpi.v2.detached = proto_details->detached;
5372 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
5373 ifproto->kpi.v2.send_arp = proto_details->send_arp;
5374
5375 retval = dlil_attach_protocol_internal(ifproto,
5376 proto_details->demux_list, proto_details->demux_count,
5377 &proto_count);
5378
5379 end:
5380 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5381 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5382 if_name(ifp), protocol, retval);
5383 } else {
5384 if (dlil_verbose) {
5385 printf("%s: attached v2 protocol %d (count = %d)\n",
5386 if_name(ifp),
5387 protocol, proto_count);
5388 }
5389 }
5390 ifnet_head_done();
5391 if (retval == 0) {
5392 /*
5393 * A protocol has been attached, mark the interface up.
5394 * This used to be done by configd.KernelEventMonitor, but that
5395 * is inherently prone to races (rdar://problem/30810208).
5396 */
5397 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
5398 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
5399 dlil_post_sifflags_msg(ifp);
5400 } else if (ifproto != NULL) {
5401 zfree(dlif_proto_zone, ifproto);
5402 }
5403 return (retval);
5404 }
5405
5406 errno_t
5407 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
5408 {
5409 struct if_proto *proto = NULL;
5410 int retval = 0;
5411
5412 if (ifp == NULL || proto_family == 0) {
5413 retval = EINVAL;
5414 goto end;
5415 }
5416
5417 ifnet_lock_exclusive(ifp);
5418 /* callee holds a proto refcnt upon success */
5419 proto = find_attached_proto(ifp, proto_family);
5420 if (proto == NULL) {
5421 retval = ENXIO;
5422 ifnet_lock_done(ifp);
5423 goto end;
5424 }
5425
5426 /* call family module del_proto */
5427 if (ifp->if_del_proto)
5428 ifp->if_del_proto(ifp, proto->protocol_family);
5429
5430 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
5431 proto, if_proto, next_hash);
5432
5433 if (proto->proto_kpi == kProtoKPI_v1) {
5434 proto->kpi.v1.input = ifproto_media_input_v1;
5435 proto->kpi.v1.pre_output = ifproto_media_preout;
5436 proto->kpi.v1.event = ifproto_media_event;
5437 proto->kpi.v1.ioctl = ifproto_media_ioctl;
5438 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
5439 proto->kpi.v1.send_arp = ifproto_media_send_arp;
5440 } else {
5441 proto->kpi.v2.input = ifproto_media_input_v2;
5442 proto->kpi.v2.pre_output = ifproto_media_preout;
5443 proto->kpi.v2.event = ifproto_media_event;
5444 proto->kpi.v2.ioctl = ifproto_media_ioctl;
5445 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
5446 proto->kpi.v2.send_arp = ifproto_media_send_arp;
5447 }
5448 proto->detached = 1;
5449 ifnet_lock_done(ifp);
5450
5451 if (dlil_verbose) {
5452 printf("%s: detached %s protocol %d\n", if_name(ifp),
5453 (proto->proto_kpi == kProtoKPI_v1) ?
5454 "v1" : "v2", proto_family);
5455 }
5456
5457 /* release proto refcnt held during protocol attach */
5458 if_proto_free(proto);
5459
5460 /*
5461 * Release proto refcnt held during lookup; the rest of
5462 * protocol detach steps will happen when the last proto
5463 * reference is released.
5464 */
5465 if_proto_free(proto);
5466
5467 end:
5468 return (retval);
5469 }
5470
5471
5472 static errno_t
5473 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
5474 struct mbuf *packet, char *header)
5475 {
5476 #pragma unused(ifp, protocol, packet, header)
5477 return (ENXIO);
5478 }
5479
5480 static errno_t
5481 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
5482 struct mbuf *packet)
5483 {
5484 #pragma unused(ifp, protocol, packet)
5485 return (ENXIO);
5486
5487 }
5488
5489 static errno_t
5490 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
5491 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
5492 char *link_layer_dest)
5493 {
5494 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5495 return (ENXIO);
5496
5497 }
5498
5499 static void
5500 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
5501 const struct kev_msg *event)
5502 {
5503 #pragma unused(ifp, protocol, event)
5504 }
5505
5506 static errno_t
5507 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
5508 unsigned long command, void *argument)
5509 {
5510 #pragma unused(ifp, protocol, command, argument)
5511 return (ENXIO);
5512 }
5513
5514 static errno_t
5515 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
5516 struct sockaddr_dl *out_ll, size_t ll_len)
5517 {
5518 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5519 return (ENXIO);
5520 }
5521
5522 static errno_t
5523 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
5524 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5525 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5526 {
5527 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5528 return (ENXIO);
5529 }
5530
5531 extern int if_next_index(void);
5532 extern int tcp_ecn_outbound;
5533
5534 errno_t
5535 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
5536 {
5537 struct ifnet *tmp_if;
5538 struct ifaddr *ifa;
5539 struct if_data_internal if_data_saved;
5540 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5541 struct dlil_threading_info *dl_inp;
5542 u_int32_t sflags = 0;
5543 int err;
5544
5545 if (ifp == NULL)
5546 return (EINVAL);
5547
5548 /*
5549 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5550 * prevent the interface from being configured while it is
5551 * embryonic, as ifnet_head_lock is dropped and reacquired
5552 * below prior to marking the ifnet with IFRF_ATTACHED.
5553 */
5554 dlil_if_lock();
5555 ifnet_head_lock_exclusive();
5556 /* Verify we aren't already on the list */
5557 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
5558 if (tmp_if == ifp) {
5559 ifnet_head_done();
5560 dlil_if_unlock();
5561 return (EEXIST);
5562 }
5563 }
5564
5565 lck_mtx_lock_spin(&ifp->if_ref_lock);
5566 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
5567 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
5568 __func__, ifp);
5569 /* NOTREACHED */
5570 }
5571 lck_mtx_unlock(&ifp->if_ref_lock);
5572
5573 ifnet_lock_exclusive(ifp);
5574
5575 /* Sanity check */
5576 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5577 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5578
5579 if (ll_addr != NULL) {
5580 if (ifp->if_addrlen == 0) {
5581 ifp->if_addrlen = ll_addr->sdl_alen;
5582 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
5583 ifnet_lock_done(ifp);
5584 ifnet_head_done();
5585 dlil_if_unlock();
5586 return (EINVAL);
5587 }
5588 }
5589
5590 /*
5591 * Allow interfaces without protocol families to attach
5592 * only if they have the necessary fields filled out.
5593 */
5594 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
5595 DLIL_PRINTF("%s: Attempt to attach interface without "
5596 "family module - %d\n", __func__, ifp->if_family);
5597 ifnet_lock_done(ifp);
5598 ifnet_head_done();
5599 dlil_if_unlock();
5600 return (ENODEV);
5601 }
5602
5603 /* Allocate protocol hash table */
5604 VERIFY(ifp->if_proto_hash == NULL);
5605 ifp->if_proto_hash = zalloc(dlif_phash_zone);
5606 if (ifp->if_proto_hash == NULL) {
5607 ifnet_lock_done(ifp);
5608 ifnet_head_done();
5609 dlil_if_unlock();
5610 return (ENOBUFS);
5611 }
5612 bzero(ifp->if_proto_hash, dlif_phash_size);
5613
5614 lck_mtx_lock_spin(&ifp->if_flt_lock);
5615 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
5616 TAILQ_INIT(&ifp->if_flt_head);
5617 VERIFY(ifp->if_flt_busy == 0);
5618 VERIFY(ifp->if_flt_waiters == 0);
5619 lck_mtx_unlock(&ifp->if_flt_lock);
5620
5621 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
5622 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
5623 LIST_INIT(&ifp->if_multiaddrs);
5624 }
5625
5626 VERIFY(ifp->if_allhostsinm == NULL);
5627 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5628 TAILQ_INIT(&ifp->if_addrhead);
5629
5630 if (ifp->if_index == 0) {
5631 int idx = if_next_index();
5632
5633 if (idx == -1) {
5634 ifp->if_index = 0;
5635 ifnet_lock_done(ifp);
5636 ifnet_head_done();
5637 dlil_if_unlock();
5638 return (ENOBUFS);
5639 }
5640 ifp->if_index = idx;
5641 }
5642 /* There should not be anything occupying this slot */
5643 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5644
5645 /* allocate (if needed) and initialize a link address */
5646 ifa = dlil_alloc_lladdr(ifp, ll_addr);
5647 if (ifa == NULL) {
5648 ifnet_lock_done(ifp);
5649 ifnet_head_done();
5650 dlil_if_unlock();
5651 return (ENOBUFS);
5652 }
5653
5654 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5655 ifnet_addrs[ifp->if_index - 1] = ifa;
5656
5657 /* make this address the first on the list */
5658 IFA_LOCK(ifa);
5659 /* hold a reference for ifnet_addrs[] */
5660 IFA_ADDREF_LOCKED(ifa);
5661 /* if_attach_link_ifa() holds a reference for ifa_link */
5662 if_attach_link_ifa(ifp, ifa);
5663 IFA_UNLOCK(ifa);
5664
5665 #if CONFIG_MACF_NET
5666 mac_ifnet_label_associate(ifp);
5667 #endif
5668
5669 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5670 ifindex2ifnet[ifp->if_index] = ifp;
5671
5672 /* Hold a reference to the underlying dlil_ifnet */
5673 ifnet_reference(ifp);
5674
5675 /* Clear stats (save and restore other fields that we care) */
5676 if_data_saved = ifp->if_data;
5677 bzero(&ifp->if_data, sizeof (ifp->if_data));
5678 ifp->if_data.ifi_type = if_data_saved.ifi_type;
5679 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5680 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5681 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5682 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5683 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5684 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5685 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5686 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5687 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5688 ifnet_touch_lastchange(ifp);
5689
5690 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
5691 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
5692 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
5693
5694 /* By default, use SFB and enable flow advisory */
5695 sflags = PKTSCHEDF_QALG_SFB;
5696 if (if_flowadv)
5697 sflags |= PKTSCHEDF_QALG_FLOWCTL;
5698
5699 if (if_delaybased_queue)
5700 sflags |= PKTSCHEDF_QALG_DELAYBASED;
5701
5702 if (ifp->if_output_sched_model ==
5703 IFNET_SCHED_MODEL_DRIVER_MANAGED)
5704 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
5705
5706 /* Initialize transmit queue(s) */
5707 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
5708 if (err != 0) {
5709 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5710 "err=%d", __func__, ifp, err);
5711 /* NOTREACHED */
5712 }
5713
5714 /* Sanity checks on the input thread storage */
5715 dl_inp = &dl_if->dl_if_inpstorage;
5716 bzero(&dl_inp->stats, sizeof (dl_inp->stats));
5717 VERIFY(dl_inp->input_waiting == 0);
5718 VERIFY(dl_inp->wtot == 0);
5719 VERIFY(dl_inp->ifp == NULL);
5720 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
5721 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
5722 VERIFY(!dl_inp->net_affinity);
5723 VERIFY(ifp->if_inp == NULL);
5724 VERIFY(dl_inp->input_thr == THREAD_NULL);
5725 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
5726 VERIFY(dl_inp->poll_thr == THREAD_NULL);
5727 VERIFY(dl_inp->tag == 0);
5728 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
5729 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
5730 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
5731 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
5732 #if IFNET_INPUT_SANITY_CHK
5733 VERIFY(dl_inp->input_mbuf_cnt == 0);
5734 #endif /* IFNET_INPUT_SANITY_CHK */
5735
5736 /*
5737 * A specific DLIL input thread is created per Ethernet/cellular
5738 * interface or for an interface which supports opportunistic
5739 * input polling. Pseudo interfaces or other types of interfaces
5740 * use the main input thread instead.
5741 */
5742 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
5743 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
5744 ifp->if_inp = dl_inp;
5745 err = dlil_create_input_thread(ifp, ifp->if_inp);
5746 if (err != 0) {
5747 panic_plain("%s: ifp=%p couldn't get an input thread; "
5748 "err=%d", __func__, ifp, err);
5749 /* NOTREACHED */
5750 }
5751 }
5752
5753 if (ifp->if_inp != NULL && ifp->if_inp->input_mit_tcall == NULL) {
5754 ifp->if_inp->input_mit_tcall =
5755 thread_call_allocate_with_priority(dlil_mit_tcall_fn,
5756 ifp, THREAD_CALL_PRIORITY_KERNEL);
5757 }
5758
5759 /*
5760 * If the driver supports the new transmit model, calculate flow hash
5761 * and create a workloop starter thread to invoke the if_start callback
5762 * where the packets may be dequeued and transmitted.
5763 */
5764 if (ifp->if_eflags & IFEF_TXSTART) {
5765 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5766 VERIFY(ifp->if_flowhash != 0);
5767 VERIFY(ifp->if_start_thread == THREAD_NULL);
5768
5769 ifnet_set_start_cycle(ifp, NULL);
5770 ifp->if_start_active = 0;
5771 ifp->if_start_req = 0;
5772 ifp->if_start_flags = 0;
5773 VERIFY(ifp->if_start != NULL);
5774 if ((err = kernel_thread_start(ifnet_start_thread_fn,
5775 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
5776 panic_plain("%s: "
5777 "ifp=%p couldn't get a start thread; "
5778 "err=%d", __func__, ifp, err);
5779 /* NOTREACHED */
5780 }
5781 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
5782 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5783 } else {
5784 ifp->if_flowhash = 0;
5785 }
5786
5787 /*
5788 * If the driver supports the new receive model, create a poller
5789 * thread to invoke if_input_poll callback where the packets may
5790 * be dequeued from the driver and processed for reception.
5791 */
5792 if (ifp->if_eflags & IFEF_RXPOLL) {
5793 VERIFY(ifp->if_input_poll != NULL);
5794 VERIFY(ifp->if_input_ctl != NULL);
5795 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5796
5797 ifnet_set_poll_cycle(ifp, NULL);
5798 ifp->if_poll_update = 0;
5799 ifp->if_poll_active = 0;
5800 ifp->if_poll_req = 0;
5801 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
5802 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5803 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5804 "err=%d", __func__, ifp, err);
5805 /* NOTREACHED */
5806 }
5807 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
5808 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5809 }
5810
5811 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5812 VERIFY(ifp->if_desc.ifd_len == 0);
5813 VERIFY(ifp->if_desc.ifd_desc != NULL);
5814
5815 /* Record attach PC stacktrace */
5816 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5817
5818 ifp->if_updatemcasts = 0;
5819 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5820 struct ifmultiaddr *ifma;
5821 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5822 IFMA_LOCK(ifma);
5823 if (ifma->ifma_addr->sa_family == AF_LINK ||
5824 ifma->ifma_addr->sa_family == AF_UNSPEC)
5825 ifp->if_updatemcasts++;
5826 IFMA_UNLOCK(ifma);
5827 }
5828
5829 printf("%s: attached with %d suspended link-layer multicast "
5830 "membership(s)\n", if_name(ifp),
5831 ifp->if_updatemcasts);
5832 }
5833
5834 /* Clear logging parameters */
5835 bzero(&ifp->if_log, sizeof (ifp->if_log));
5836
5837 /* Clear foreground/realtime activity timestamps */
5838 ifp->if_fg_sendts = 0;
5839 ifp->if_rt_sendts = 0;
5840
5841 VERIFY(ifp->if_delegated.ifp == NULL);
5842 VERIFY(ifp->if_delegated.type == 0);
5843 VERIFY(ifp->if_delegated.family == 0);
5844 VERIFY(ifp->if_delegated.subfamily == 0);
5845 VERIFY(ifp->if_delegated.expensive == 0);
5846
5847 VERIFY(ifp->if_agentids == NULL);
5848 VERIFY(ifp->if_agentcount == 0);
5849
5850 /* Reset interface state */
5851 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5852 ifp->if_interface_state.valid_bitmask |=
5853 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5854 ifp->if_interface_state.interface_availability =
5855 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5856
5857 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5858 if (ifp == lo_ifp) {
5859 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5860 ifp->if_interface_state.valid_bitmask |=
5861 IF_INTERFACE_STATE_LQM_STATE_VALID;
5862 } else {
5863 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5864 }
5865
5866 /*
5867 * Enable ECN capability on this interface depending on the
5868 * value of ECN global setting
5869 */
5870 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
5871 ifp->if_eflags |= IFEF_ECN_ENABLE;
5872 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
5873 }
5874
5875 /*
5876 * Built-in Cyclops always on policy for WiFi infra
5877 */
5878 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5879 errno_t error;
5880
5881 error = if_set_qosmarking_mode(ifp,
5882 IFRTYPE_QOSMARKING_FASTLANE);
5883 if (error != 0) {
5884 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5885 __func__, ifp->if_xname, error);
5886 } else {
5887 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
5888 #if (DEVELOPMENT || DEBUG)
5889 printf("%s fastlane enabled on %s\n",
5890 __func__, ifp->if_xname);
5891 #endif /* (DEVELOPMENT || DEBUG) */
5892 }
5893 }
5894
5895 ifnet_lock_done(ifp);
5896 ifnet_head_done();
5897
5898
5899 lck_mtx_lock(&ifp->if_cached_route_lock);
5900 /* Enable forwarding cached route */
5901 ifp->if_fwd_cacheok = 1;
5902 /* Clean up any existing cached routes */
5903 ROUTE_RELEASE(&ifp->if_fwd_route);
5904 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
5905 ROUTE_RELEASE(&ifp->if_src_route);
5906 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
5907 ROUTE_RELEASE(&ifp->if_src_route6);
5908 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
5909 lck_mtx_unlock(&ifp->if_cached_route_lock);
5910
5911 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5912
5913 /*
5914 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5915 * and trees; do this before the ifnet is marked as attached.
5916 * The ifnet keeps the reference to the info structures even after
5917 * the ifnet is detached, since the network-layer records still
5918 * refer to the info structures even after that. This also
5919 * makes it possible for them to still function after the ifnet
5920 * is recycled or reattached.
5921 */
5922 #if INET
5923 if (IGMP_IFINFO(ifp) == NULL) {
5924 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
5925 VERIFY(IGMP_IFINFO(ifp) != NULL);
5926 } else {
5927 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5928 igmp_domifreattach(IGMP_IFINFO(ifp));
5929 }
5930 #endif /* INET */
5931 #if INET6
5932 if (MLD_IFINFO(ifp) == NULL) {
5933 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
5934 VERIFY(MLD_IFINFO(ifp) != NULL);
5935 } else {
5936 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5937 mld_domifreattach(MLD_IFINFO(ifp));
5938 }
5939 #endif /* INET6 */
5940
5941 VERIFY(ifp->if_data_threshold == 0);
5942 VERIFY(ifp->if_dt_tcall != NULL);
5943
5944 /*
5945 * Finally, mark this ifnet as attached.
5946 */
5947 lck_mtx_lock(rnh_lock);
5948 ifnet_lock_exclusive(ifp);
5949 lck_mtx_lock_spin(&ifp->if_ref_lock);
5950 ifp->if_refflags = IFRF_ATTACHED; /* clears embryonic */
5951 lck_mtx_unlock(&ifp->if_ref_lock);
5952 if (net_rtref) {
5953 /* boot-args override; enable idle notification */
5954 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5955 IFRF_IDLE_NOTIFY);
5956 } else {
5957 /* apply previous request(s) to set the idle flags, if any */
5958 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5959 ifp->if_idle_new_flags_mask);
5960
5961 }
5962 ifnet_lock_done(ifp);
5963 lck_mtx_unlock(rnh_lock);
5964 dlil_if_unlock();
5965
5966 #if PF
5967 /*
5968 * Attach packet filter to this interface, if enabled.
5969 */
5970 pf_ifnet_hook(ifp, 1);
5971 #endif /* PF */
5972
5973 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
5974
5975 if (dlil_verbose) {
5976 printf("%s: attached%s\n", if_name(ifp),
5977 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5978 }
5979
5980 return (0);
5981 }
5982
5983 /*
5984 * Prepare the storage for the first/permanent link address, which must
5985 * must have the same lifetime as the ifnet itself. Although the link
5986 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5987 * its location in memory must never change as it may still be referred
5988 * to by some parts of the system afterwards (unfortunate implementation
5989 * artifacts inherited from BSD.)
5990 *
5991 * Caller must hold ifnet lock as writer.
5992 */
5993 static struct ifaddr *
5994 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
5995 {
5996 struct ifaddr *ifa, *oifa;
5997 struct sockaddr_dl *asdl, *msdl;
5998 char workbuf[IFNAMSIZ*2];
5999 int namelen, masklen, socksize;
6000 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6001
6002 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
6003 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
6004
6005 namelen = snprintf(workbuf, sizeof (workbuf), "%s",
6006 if_name(ifp));
6007 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
6008 + ((namelen > 0) ? namelen : 0);
6009 socksize = masklen + ifp->if_addrlen;
6010 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
6011 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
6012 socksize = sizeof(struct sockaddr_dl);
6013 socksize = ROUNDUP(socksize);
6014 #undef ROUNDUP
6015
6016 ifa = ifp->if_lladdr;
6017 if (socksize > DLIL_SDLMAXLEN ||
6018 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
6019 /*
6020 * Rare, but in the event that the link address requires
6021 * more storage space than DLIL_SDLMAXLEN, allocate the
6022 * largest possible storages for address and mask, such
6023 * that we can reuse the same space when if_addrlen grows.
6024 * This same space will be used when if_addrlen shrinks.
6025 */
6026 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
6027 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
6028 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
6029 if (ifa == NULL)
6030 return (NULL);
6031 ifa_lock_init(ifa);
6032 /* Don't set IFD_ALLOC, as this is permanent */
6033 ifa->ifa_debug = IFD_LINK;
6034 }
6035 IFA_LOCK(ifa);
6036 /* address and mask sockaddr_dl locations */
6037 asdl = (struct sockaddr_dl *)(ifa + 1);
6038 bzero(asdl, SOCK_MAXADDRLEN);
6039 msdl = (struct sockaddr_dl *)(void *)
6040 ((char *)asdl + SOCK_MAXADDRLEN);
6041 bzero(msdl, SOCK_MAXADDRLEN);
6042 } else {
6043 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
6044 /*
6045 * Use the storage areas for address and mask within the
6046 * dlil_ifnet structure. This is the most common case.
6047 */
6048 if (ifa == NULL) {
6049 ifa = &dl_if->dl_if_lladdr.ifa;
6050 ifa_lock_init(ifa);
6051 /* Don't set IFD_ALLOC, as this is permanent */
6052 ifa->ifa_debug = IFD_LINK;
6053 }
6054 IFA_LOCK(ifa);
6055 /* address and mask sockaddr_dl locations */
6056 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
6057 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
6058 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
6059 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
6060 }
6061
6062 /* hold a permanent reference for the ifnet itself */
6063 IFA_ADDREF_LOCKED(ifa);
6064 oifa = ifp->if_lladdr;
6065 ifp->if_lladdr = ifa;
6066
6067 VERIFY(ifa->ifa_debug == IFD_LINK);
6068 ifa->ifa_ifp = ifp;
6069 ifa->ifa_rtrequest = link_rtrequest;
6070 ifa->ifa_addr = (struct sockaddr *)asdl;
6071 asdl->sdl_len = socksize;
6072 asdl->sdl_family = AF_LINK;
6073 if (namelen > 0) {
6074 bcopy(workbuf, asdl->sdl_data, min(namelen,
6075 sizeof (asdl->sdl_data)));
6076 asdl->sdl_nlen = namelen;
6077 } else {
6078 asdl->sdl_nlen = 0;
6079 }
6080 asdl->sdl_index = ifp->if_index;
6081 asdl->sdl_type = ifp->if_type;
6082 if (ll_addr != NULL) {
6083 asdl->sdl_alen = ll_addr->sdl_alen;
6084 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
6085 } else {
6086 asdl->sdl_alen = 0;
6087 }
6088 ifa->ifa_netmask = (struct sockaddr *)msdl;
6089 msdl->sdl_len = masklen;
6090 while (namelen > 0)
6091 msdl->sdl_data[--namelen] = 0xff;
6092 IFA_UNLOCK(ifa);
6093
6094 if (oifa != NULL)
6095 IFA_REMREF(oifa);
6096
6097 return (ifa);
6098 }
6099
6100 static void
6101 if_purgeaddrs(struct ifnet *ifp)
6102 {
6103 #if INET
6104 in_purgeaddrs(ifp);
6105 #endif /* INET */
6106 #if INET6
6107 in6_purgeaddrs(ifp);
6108 #endif /* INET6 */
6109 }
6110
6111 errno_t
6112 ifnet_detach(ifnet_t ifp)
6113 {
6114 struct ifnet *delegated_ifp;
6115 struct nd_ifinfo *ndi = NULL;
6116
6117 if (ifp == NULL)
6118 return (EINVAL);
6119
6120 ndi = ND_IFINFO(ifp);
6121 if (NULL != ndi)
6122 ndi->cga_initialized = FALSE;
6123
6124 lck_mtx_lock(rnh_lock);
6125 ifnet_head_lock_exclusive();
6126 ifnet_lock_exclusive(ifp);
6127
6128 /*
6129 * Check to see if this interface has previously triggered
6130 * aggressive protocol draining; if so, decrement the global
6131 * refcnt and clear PR_AGGDRAIN on the route domain if
6132 * there are no more of such an interface around.
6133 */
6134 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
6135
6136 lck_mtx_lock_spin(&ifp->if_ref_lock);
6137 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
6138 lck_mtx_unlock(&ifp->if_ref_lock);
6139 ifnet_lock_done(ifp);
6140 ifnet_head_done();
6141 lck_mtx_unlock(rnh_lock);
6142 return (EINVAL);
6143 } else if (ifp->if_refflags & IFRF_DETACHING) {
6144 /* Interface has already been detached */
6145 lck_mtx_unlock(&ifp->if_ref_lock);
6146 ifnet_lock_done(ifp);
6147 ifnet_head_done();
6148 lck_mtx_unlock(rnh_lock);
6149 return (ENXIO);
6150 }
6151 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
6152 /* Indicate this interface is being detached */
6153 ifp->if_refflags &= ~IFRF_ATTACHED;
6154 ifp->if_refflags |= IFRF_DETACHING;
6155 lck_mtx_unlock(&ifp->if_ref_lock);
6156
6157 if (dlil_verbose) {
6158 printf("%s: detaching\n", if_name(ifp));
6159 }
6160
6161 /* clean up flow control entry object if there's any */
6162 if (ifp->if_eflags & IFEF_TXSTART) {
6163 ifnet_flowadv(ifp->if_flowhash);
6164 }
6165
6166 /* Reset ECN enable/disable flags */
6167 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
6168 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
6169
6170 /*
6171 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6172 * no longer be visible during lookups from this point.
6173 */
6174 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
6175 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
6176 ifp->if_link.tqe_next = NULL;
6177 ifp->if_link.tqe_prev = NULL;
6178 if (ifp->if_ordered_link.tqe_next != NULL ||
6179 ifp->if_ordered_link.tqe_prev != NULL) {
6180 ifnet_remove_from_ordered_list(ifp);
6181 }
6182 ifindex2ifnet[ifp->if_index] = NULL;
6183
6184 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6185 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
6186
6187 /* Record detach PC stacktrace */
6188 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
6189
6190 /* Clear logging parameters */
6191 bzero(&ifp->if_log, sizeof (ifp->if_log));
6192
6193 /* Clear delegated interface info (reference released below) */
6194 delegated_ifp = ifp->if_delegated.ifp;
6195 bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
6196
6197 /* Reset interface state */
6198 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
6199
6200 ifnet_lock_done(ifp);
6201 ifnet_head_done();
6202 lck_mtx_unlock(rnh_lock);
6203
6204
6205 /* Release reference held on the delegated interface */
6206 if (delegated_ifp != NULL)
6207 ifnet_release(delegated_ifp);
6208
6209 /* Reset Link Quality Metric (unless loopback [lo0]) */
6210 if (ifp != lo_ifp)
6211 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
6212
6213 /* Reset TCP local statistics */
6214 if (ifp->if_tcp_stat != NULL)
6215 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
6216
6217 /* Reset UDP local statistics */
6218 if (ifp->if_udp_stat != NULL)
6219 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
6220
6221 /* Reset ifnet IPv4 stats */
6222 if (ifp->if_ipv4_stat != NULL)
6223 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
6224
6225 /* Reset ifnet IPv6 stats */
6226 if (ifp->if_ipv6_stat != NULL)
6227 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
6228
6229 /* Release memory held for interface link status report */
6230 if (ifp->if_link_status != NULL) {
6231 FREE(ifp->if_link_status, M_TEMP);
6232 ifp->if_link_status = NULL;
6233 }
6234
6235 /* Clear agent IDs */
6236 if (ifp->if_agentids != NULL) {
6237 FREE(ifp->if_agentids, M_NETAGENT);
6238 ifp->if_agentids = NULL;
6239 }
6240 ifp->if_agentcount = 0;
6241
6242
6243 /* Let BPF know we're detaching */
6244 bpfdetach(ifp);
6245
6246 /* Mark the interface as DOWN */
6247 if_down(ifp);
6248
6249 /* Disable forwarding cached route */
6250 lck_mtx_lock(&ifp->if_cached_route_lock);
6251 ifp->if_fwd_cacheok = 0;
6252 lck_mtx_unlock(&ifp->if_cached_route_lock);
6253
6254 /* Disable data threshold and wait for any pending event posting */
6255 ifp->if_data_threshold = 0;
6256 VERIFY(ifp->if_dt_tcall != NULL);
6257 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
6258
6259 /*
6260 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6261 * references to the info structures and leave them attached to
6262 * this ifnet.
6263 */
6264 #if INET
6265 igmp_domifdetach(ifp);
6266 #endif /* INET */
6267 #if INET6
6268 mld_domifdetach(ifp);
6269 #endif /* INET6 */
6270
6271 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
6272
6273 /* Let worker thread take care of the rest, to avoid reentrancy */
6274 dlil_if_lock();
6275 ifnet_detaching_enqueue(ifp);
6276 dlil_if_unlock();
6277
6278 return (0);
6279 }
6280
6281 static void
6282 ifnet_detaching_enqueue(struct ifnet *ifp)
6283 {
6284 dlil_if_lock_assert();
6285
6286 ++ifnet_detaching_cnt;
6287 VERIFY(ifnet_detaching_cnt != 0);
6288 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
6289 wakeup((caddr_t)&ifnet_delayed_run);
6290 }
6291
6292 static struct ifnet *
6293 ifnet_detaching_dequeue(void)
6294 {
6295 struct ifnet *ifp;
6296
6297 dlil_if_lock_assert();
6298
6299 ifp = TAILQ_FIRST(&ifnet_detaching_head);
6300 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
6301 if (ifp != NULL) {
6302 VERIFY(ifnet_detaching_cnt != 0);
6303 --ifnet_detaching_cnt;
6304 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
6305 ifp->if_detaching_link.tqe_next = NULL;
6306 ifp->if_detaching_link.tqe_prev = NULL;
6307 }
6308 return (ifp);
6309 }
6310
6311 static int
6312 ifnet_detacher_thread_cont(int err)
6313 {
6314 #pragma unused(err)
6315 struct ifnet *ifp;
6316
6317 for (;;) {
6318 dlil_if_lock_assert();
6319 while (ifnet_detaching_cnt == 0) {
6320 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6321 (PZERO - 1), "ifnet_detacher_cont", 0,
6322 ifnet_detacher_thread_cont);
6323 /* NOTREACHED */
6324 }
6325
6326 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
6327
6328 /* Take care of detaching ifnet */
6329 ifp = ifnet_detaching_dequeue();
6330 if (ifp != NULL) {
6331 dlil_if_unlock();
6332 ifnet_detach_final(ifp);
6333 dlil_if_lock();
6334 }
6335 }
6336 }
6337
6338 static void
6339 ifnet_detacher_thread_func(void *v, wait_result_t w)
6340 {
6341 #pragma unused(v, w)
6342 dlil_if_lock();
6343 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6344 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
6345 /*
6346 * msleep0() shouldn't have returned as PCATCH was not set;
6347 * therefore assert in this case.
6348 */
6349 dlil_if_unlock();
6350 VERIFY(0);
6351 }
6352
6353 static void
6354 ifnet_detach_final(struct ifnet *ifp)
6355 {
6356 struct ifnet_filter *filter, *filter_next;
6357 struct ifnet_filter_head fhead;
6358 struct dlil_threading_info *inp;
6359 struct ifaddr *ifa;
6360 ifnet_detached_func if_free;
6361 int i;
6362
6363 lck_mtx_lock(&ifp->if_ref_lock);
6364 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6365 panic("%s: flags mismatch (detaching not set) ifp=%p",
6366 __func__, ifp);
6367 /* NOTREACHED */
6368 }
6369
6370 /*
6371 * Wait until the existing IO references get released
6372 * before we proceed with ifnet_detach. This is not a
6373 * common case, so block without using a continuation.
6374 */
6375 while (ifp->if_refio > 0) {
6376 printf("%s: Waiting for IO references on %s interface "
6377 "to be released\n", __func__, if_name(ifp));
6378 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
6379 (PZERO - 1), "ifnet_ioref_wait", NULL);
6380 }
6381 lck_mtx_unlock(&ifp->if_ref_lock);
6382
6383 /* Drain and destroy send queue */
6384 ifclassq_teardown(ifp);
6385
6386 /* Detach interface filters */
6387 lck_mtx_lock(&ifp->if_flt_lock);
6388 if_flt_monitor_enter(ifp);
6389
6390 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6391 fhead = ifp->if_flt_head;
6392 TAILQ_INIT(&ifp->if_flt_head);
6393
6394 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
6395 filter_next = TAILQ_NEXT(filter, filt_next);
6396 lck_mtx_unlock(&ifp->if_flt_lock);
6397
6398 dlil_detach_filter_internal(filter, 1);
6399 lck_mtx_lock(&ifp->if_flt_lock);
6400 }
6401 if_flt_monitor_leave(ifp);
6402 lck_mtx_unlock(&ifp->if_flt_lock);
6403
6404 /* Tell upper layers to drop their network addresses */
6405 if_purgeaddrs(ifp);
6406
6407 ifnet_lock_exclusive(ifp);
6408
6409 /* Uplumb all protocols */
6410 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6411 struct if_proto *proto;
6412
6413 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6414 while (proto != NULL) {
6415 protocol_family_t family = proto->protocol_family;
6416 ifnet_lock_done(ifp);
6417 proto_unplumb(family, ifp);
6418 ifnet_lock_exclusive(ifp);
6419 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6420 }
6421 /* There should not be any protocols left */
6422 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
6423 }
6424 zfree(dlif_phash_zone, ifp->if_proto_hash);
6425 ifp->if_proto_hash = NULL;
6426
6427 /* Detach (permanent) link address from if_addrhead */
6428 ifa = TAILQ_FIRST(&ifp->if_addrhead);
6429 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
6430 IFA_LOCK(ifa);
6431 if_detach_link_ifa(ifp, ifa);
6432 IFA_UNLOCK(ifa);
6433
6434 /* Remove (permanent) link address from ifnet_addrs[] */
6435 IFA_REMREF(ifa);
6436 ifnet_addrs[ifp->if_index - 1] = NULL;
6437
6438 /* This interface should not be on {ifnet_head,detaching} */
6439 VERIFY(ifp->if_link.tqe_next == NULL);
6440 VERIFY(ifp->if_link.tqe_prev == NULL);
6441 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6442 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
6443 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
6444 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
6445
6446 /* The slot should have been emptied */
6447 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6448
6449 /* There should not be any addresses left */
6450 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
6451
6452 /*
6453 * Signal the starter thread to terminate itself.
6454 */
6455 if (ifp->if_start_thread != THREAD_NULL) {
6456 lck_mtx_lock_spin(&ifp->if_start_lock);
6457 ifp->if_start_flags = 0;
6458 ifp->if_start_thread = THREAD_NULL;
6459 wakeup_one((caddr_t)&ifp->if_start_thread);
6460 lck_mtx_unlock(&ifp->if_start_lock);
6461 }
6462
6463 /*
6464 * Signal the poller thread to terminate itself.
6465 */
6466 if (ifp->if_poll_thread != THREAD_NULL) {
6467 lck_mtx_lock_spin(&ifp->if_poll_lock);
6468 ifp->if_poll_thread = THREAD_NULL;
6469 wakeup_one((caddr_t)&ifp->if_poll_thread);
6470 lck_mtx_unlock(&ifp->if_poll_lock);
6471 }
6472
6473 /*
6474 * If thread affinity was set for the workloop thread, we will need
6475 * to tear down the affinity and release the extra reference count
6476 * taken at attach time. Does not apply to lo0 or other interfaces
6477 * without dedicated input threads.
6478 */
6479 if ((inp = ifp->if_inp) != NULL) {
6480 VERIFY(inp != dlil_main_input_thread);
6481
6482 if (inp->net_affinity) {
6483 struct thread *tp, *wtp, *ptp;
6484
6485 lck_mtx_lock_spin(&inp->input_lck);
6486 wtp = inp->wloop_thr;
6487 inp->wloop_thr = THREAD_NULL;
6488 ptp = inp->poll_thr;
6489 inp->poll_thr = THREAD_NULL;
6490 tp = inp->input_thr; /* don't nullify now */
6491 inp->tag = 0;
6492 inp->net_affinity = FALSE;
6493 lck_mtx_unlock(&inp->input_lck);
6494
6495 /* Tear down poll thread affinity */
6496 if (ptp != NULL) {
6497 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
6498 (void) dlil_affinity_set(ptp,
6499 THREAD_AFFINITY_TAG_NULL);
6500 thread_deallocate(ptp);
6501 }
6502
6503 /* Tear down workloop thread affinity */
6504 if (wtp != NULL) {
6505 (void) dlil_affinity_set(wtp,
6506 THREAD_AFFINITY_TAG_NULL);
6507 thread_deallocate(wtp);
6508 }
6509
6510 /* Tear down DLIL input thread affinity */
6511 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
6512 thread_deallocate(tp);
6513 }
6514
6515 /* disassociate ifp DLIL input thread */
6516 ifp->if_inp = NULL;
6517
6518 /* tell the input thread to terminate */
6519 lck_mtx_lock_spin(&inp->input_lck);
6520 inp->input_waiting |= DLIL_INPUT_TERMINATE;
6521 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
6522 wakeup_one((caddr_t)&inp->input_waiting);
6523 }
6524 lck_mtx_unlock(&inp->input_lck);
6525 ifnet_lock_done(ifp);
6526
6527 /* wait for the input thread to terminate */
6528 lck_mtx_lock_spin(&inp->input_lck);
6529 while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE)
6530 == 0) {
6531 (void) msleep(&inp->input_waiting, &inp->input_lck,
6532 (PZERO - 1) | PSPIN, inp->input_name, NULL);
6533 }
6534 lck_mtx_unlock(&inp->input_lck);
6535 ifnet_lock_exclusive(ifp);
6536
6537 /* clean-up input thread state */
6538 dlil_clean_threading_info(inp);
6539
6540 }
6541
6542 /* The driver might unload, so point these to ourselves */
6543 if_free = ifp->if_free;
6544 ifp->if_output_dlil = ifp_if_output;
6545 ifp->if_output = ifp_if_output;
6546 ifp->if_pre_enqueue = ifp_if_output;
6547 ifp->if_start = ifp_if_start;
6548 ifp->if_output_ctl = ifp_if_ctl;
6549 ifp->if_input_dlil = ifp_if_input;
6550 ifp->if_input_poll = ifp_if_input_poll;
6551 ifp->if_input_ctl = ifp_if_ctl;
6552 ifp->if_ioctl = ifp_if_ioctl;
6553 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
6554 ifp->if_free = ifp_if_free;
6555 ifp->if_demux = ifp_if_demux;
6556 ifp->if_event = ifp_if_event;
6557 ifp->if_framer_legacy = ifp_if_framer;
6558 ifp->if_framer = ifp_if_framer_extended;
6559 ifp->if_add_proto = ifp_if_add_proto;
6560 ifp->if_del_proto = ifp_if_del_proto;
6561 ifp->if_check_multi = ifp_if_check_multi;
6562
6563 /* wipe out interface description */
6564 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
6565 ifp->if_desc.ifd_len = 0;
6566 VERIFY(ifp->if_desc.ifd_desc != NULL);
6567 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
6568
6569 /* there shouldn't be any delegation by now */
6570 VERIFY(ifp->if_delegated.ifp == NULL);
6571 VERIFY(ifp->if_delegated.type == 0);
6572 VERIFY(ifp->if_delegated.family == 0);
6573 VERIFY(ifp->if_delegated.subfamily == 0);
6574 VERIFY(ifp->if_delegated.expensive == 0);
6575
6576 /* QoS marking get cleared */
6577 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
6578 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
6579
6580
6581 ifnet_lock_done(ifp);
6582
6583 #if PF
6584 /*
6585 * Detach this interface from packet filter, if enabled.
6586 */
6587 pf_ifnet_hook(ifp, 0);
6588 #endif /* PF */
6589
6590 /* Filter list should be empty */
6591 lck_mtx_lock_spin(&ifp->if_flt_lock);
6592 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6593 VERIFY(ifp->if_flt_busy == 0);
6594 VERIFY(ifp->if_flt_waiters == 0);
6595 lck_mtx_unlock(&ifp->if_flt_lock);
6596
6597 /* Last chance to drain send queue */
6598 if_qflush(ifp, 0);
6599
6600 /* Last chance to cleanup any cached route */
6601 lck_mtx_lock(&ifp->if_cached_route_lock);
6602 VERIFY(!ifp->if_fwd_cacheok);
6603 ROUTE_RELEASE(&ifp->if_fwd_route);
6604 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
6605 ROUTE_RELEASE(&ifp->if_src_route);
6606 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
6607 ROUTE_RELEASE(&ifp->if_src_route6);
6608 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
6609 lck_mtx_unlock(&ifp->if_cached_route_lock);
6610
6611 VERIFY(ifp->if_data_threshold == 0);
6612 VERIFY(ifp->if_dt_tcall != NULL);
6613 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
6614
6615 ifnet_llreach_ifdetach(ifp);
6616
6617 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
6618
6619 /*
6620 * Finally, mark this ifnet as detached.
6621 */
6622 lck_mtx_lock_spin(&ifp->if_ref_lock);
6623 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6624 panic("%s: flags mismatch (detaching not set) ifp=%p",
6625 __func__, ifp);
6626 /* NOTREACHED */
6627 }
6628 ifp->if_refflags &= ~IFRF_DETACHING;
6629 lck_mtx_unlock(&ifp->if_ref_lock);
6630 if (if_free != NULL)
6631 if_free(ifp);
6632
6633 if (dlil_verbose)
6634 printf("%s: detached\n", if_name(ifp));
6635
6636 /* Release reference held during ifnet attach */
6637 ifnet_release(ifp);
6638 }
6639
6640 errno_t
6641 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6642 {
6643 #pragma unused(ifp)
6644 m_freem_list(m);
6645 return (0);
6646 }
6647
6648 void
6649 ifp_if_start(struct ifnet *ifp)
6650 {
6651 ifnet_purge(ifp);
6652 }
6653
6654 static errno_t
6655 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6656 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6657 boolean_t poll, struct thread *tp)
6658 {
6659 #pragma unused(ifp, m_tail, s, poll, tp)
6660 m_freem_list(m_head);
6661 return (ENXIO);
6662 }
6663
6664 static void
6665 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6666 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6667 {
6668 #pragma unused(ifp, flags, max_cnt)
6669 if (m_head != NULL)
6670 *m_head = NULL;
6671 if (m_tail != NULL)
6672 *m_tail = NULL;
6673 if (cnt != NULL)
6674 *cnt = 0;
6675 if (len != NULL)
6676 *len = 0;
6677 }
6678
6679 static errno_t
6680 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6681 {
6682 #pragma unused(ifp, cmd, arglen, arg)
6683 return (EOPNOTSUPP);
6684 }
6685
6686 static errno_t
6687 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6688 {
6689 #pragma unused(ifp, fh, pf)
6690 m_freem(m);
6691 return (EJUSTRETURN);
6692 }
6693
6694 static errno_t
6695 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6696 const struct ifnet_demux_desc *da, u_int32_t dc)
6697 {
6698 #pragma unused(ifp, pf, da, dc)
6699 return (EINVAL);
6700 }
6701
6702 static errno_t
6703 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6704 {
6705 #pragma unused(ifp, pf)
6706 return (EINVAL);
6707 }
6708
6709 static errno_t
6710 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6711 {
6712 #pragma unused(ifp, sa)
6713 return (EOPNOTSUPP);
6714 }
6715
6716 #if CONFIG_EMBEDDED
6717 static errno_t
6718 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6719 const struct sockaddr *sa, const char *ll, const char *t,
6720 u_int32_t *pre, u_int32_t *post)
6721 #else
6722 static errno_t
6723 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6724 const struct sockaddr *sa, const char *ll, const char *t)
6725 #endif /* !CONFIG_EMBEDDED */
6726 {
6727 #pragma unused(ifp, m, sa, ll, t)
6728 #if CONFIG_EMBEDDED
6729 return (ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post));
6730 #else
6731 return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
6732 #endif /* !CONFIG_EMBEDDED */
6733 }
6734
6735 static errno_t
6736 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6737 const struct sockaddr *sa, const char *ll, const char *t,
6738 u_int32_t *pre, u_int32_t *post)
6739 {
6740 #pragma unused(ifp, sa, ll, t)
6741 m_freem(*m);
6742 *m = NULL;
6743
6744 if (pre != NULL)
6745 *pre = 0;
6746 if (post != NULL)
6747 *post = 0;
6748
6749 return (EJUSTRETURN);
6750 }
6751
6752 errno_t
6753 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6754 {
6755 #pragma unused(ifp, cmd, arg)
6756 return (EOPNOTSUPP);
6757 }
6758
6759 static errno_t
6760 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6761 {
6762 #pragma unused(ifp, tm, f)
6763 /* XXX not sure what to do here */
6764 return (0);
6765 }
6766
6767 static void
6768 ifp_if_free(struct ifnet *ifp)
6769 {
6770 #pragma unused(ifp)
6771 }
6772
6773 static void
6774 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6775 {
6776 #pragma unused(ifp, e)
6777 }
6778
6779 int dlil_if_acquire(u_int32_t family, const void *uniqueid,
6780 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
6781 {
6782 struct ifnet *ifp1 = NULL;
6783 struct dlil_ifnet *dlifp1 = NULL;
6784 void *buf, *base, **pbuf;
6785 int ret = 0;
6786
6787 VERIFY(*ifp == NULL);
6788 dlil_if_lock();
6789 /*
6790 * We absolutely can't have an interface with the same name
6791 * in in-use state.
6792 * To make sure of that list has to be traversed completely
6793 */
6794 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
6795 ifp1 = (struct ifnet *)dlifp1;
6796
6797 if (ifp1->if_family != family)
6798 continue;
6799
6800 /*
6801 * If interface is in use, return EBUSY if either unique id
6802 * or interface extended names are the same
6803 */
6804 lck_mtx_lock(&dlifp1->dl_if_lock);
6805 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
6806 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6807 lck_mtx_unlock(&dlifp1->dl_if_lock);
6808 ret = EBUSY;
6809 goto end;
6810 }
6811 }
6812
6813 if (uniqueid_len) {
6814 if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
6815 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
6816 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6817 lck_mtx_unlock(&dlifp1->dl_if_lock);
6818 ret = EBUSY;
6819 goto end;
6820 } else {
6821 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
6822 /* Cache the first interface that can be recycled */
6823 if (*ifp == NULL)
6824 *ifp = ifp1;
6825 /*
6826 * XXX Do not break or jump to end as we have to traverse
6827 * the whole list to ensure there are no name collisions
6828 */
6829 }
6830 }
6831 }
6832 lck_mtx_unlock(&dlifp1->dl_if_lock);
6833 }
6834
6835 /* If there's an interface that can be recycled, use that */
6836 if (*ifp != NULL)
6837 goto end;
6838
6839 /* no interface found, allocate a new one */
6840 buf = zalloc(dlif_zone);
6841 if (buf == NULL) {
6842 ret = ENOMEM;
6843 goto end;
6844 }
6845 bzero(buf, dlif_bufsize);
6846
6847 /* Get the 64-bit aligned base address for this object */
6848 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
6849 sizeof (u_int64_t));
6850 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
6851
6852 /*
6853 * Wind back a pointer size from the aligned base and
6854 * save the original address so we can free it later.
6855 */
6856 pbuf = (void **)((intptr_t)base - sizeof (void *));
6857 *pbuf = buf;
6858 dlifp1 = base;
6859
6860 if (uniqueid_len) {
6861 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
6862 M_NKE, M_WAITOK);
6863 if (dlifp1->dl_if_uniqueid == NULL) {
6864 zfree(dlif_zone, buf);
6865 ret = ENOMEM;
6866 goto end;
6867 }
6868 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
6869 dlifp1->dl_if_uniqueid_len = uniqueid_len;
6870 }
6871
6872 ifp1 = (struct ifnet *)dlifp1;
6873 dlifp1->dl_if_flags = DLIF_INUSE;
6874 if (ifnet_debug) {
6875 dlifp1->dl_if_flags |= DLIF_DEBUG;
6876 dlifp1->dl_if_trace = dlil_if_trace;
6877 }
6878 ifp1->if_name = dlifp1->dl_if_namestorage;
6879 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
6880
6881 /* initialize interface description */
6882 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
6883 ifp1->if_desc.ifd_len = 0;
6884 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
6885
6886
6887 #if CONFIG_MACF_NET
6888 mac_ifnet_label_init(ifp1);
6889 #endif
6890
6891 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
6892 DLIL_PRINTF("%s: failed to allocate if local stats, "
6893 "error: %d\n", __func__, ret);
6894 /* This probably shouldn't be fatal */
6895 ret = 0;
6896 }
6897
6898 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
6899 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
6900 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
6901 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6902 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
6903 ifnet_lock_attr);
6904 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
6905 #if INET
6906 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
6907 ifnet_lock_attr);
6908 ifp1->if_inetdata = NULL;
6909 #endif
6910 #if INET6
6911 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
6912 ifnet_lock_attr);
6913 ifp1->if_inet6data = NULL;
6914 #endif
6915 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
6916 ifnet_lock_attr);
6917 ifp1->if_link_status = NULL;
6918
6919 /* for send data paths */
6920 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
6921 ifnet_lock_attr);
6922 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
6923 ifnet_lock_attr);
6924 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
6925 ifnet_lock_attr);
6926
6927 /* for receive data paths */
6928 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
6929 ifnet_lock_attr);
6930
6931 /* thread call allocation is done with sleeping zalloc */
6932 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
6933 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
6934 if (ifp1->if_dt_tcall == NULL) {
6935 panic_plain("%s: couldn't create if_dt_tcall", __func__);
6936 /* NOTREACHED */
6937 }
6938
6939 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
6940
6941 *ifp = ifp1;
6942
6943 end:
6944 dlil_if_unlock();
6945
6946 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
6947 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
6948
6949 return (ret);
6950 }
6951
6952 __private_extern__ void
6953 dlil_if_release(ifnet_t ifp)
6954 {
6955 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
6956
6957 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
6958 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
6959 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
6960 }
6961
6962 ifnet_lock_exclusive(ifp);
6963 lck_mtx_lock(&dlifp->dl_if_lock);
6964 dlifp->dl_if_flags &= ~DLIF_INUSE;
6965 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6966 ifp->if_name = dlifp->dl_if_namestorage;
6967 /* Reset external name (name + unit) */
6968 ifp->if_xname = dlifp->dl_if_xnamestorage;
6969 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
6970 "%s?", ifp->if_name);
6971 lck_mtx_unlock(&dlifp->dl_if_lock);
6972 #if CONFIG_MACF_NET
6973 /*
6974 * We can either recycle the MAC label here or in dlil_if_acquire().
6975 * It seems logical to do it here but this means that anything that
6976 * still has a handle on ifp will now see it as unlabeled.
6977 * Since the interface is "dead" that may be OK. Revisit later.
6978 */
6979 mac_ifnet_label_recycle(ifp);
6980 #endif
6981 ifnet_lock_done(ifp);
6982 }
6983
6984 __private_extern__ void
6985 dlil_if_lock(void)
6986 {
6987 lck_mtx_lock(&dlil_ifnet_lock);
6988 }
6989
6990 __private_extern__ void
6991 dlil_if_unlock(void)
6992 {
6993 lck_mtx_unlock(&dlil_ifnet_lock);
6994 }
6995
6996 __private_extern__ void
6997 dlil_if_lock_assert(void)
6998 {
6999 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
7000 }
7001
7002 __private_extern__ void
7003 dlil_proto_unplumb_all(struct ifnet *ifp)
7004 {
7005 /*
7006 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
7007 * each bucket contains exactly one entry; PF_VLAN does not need an
7008 * explicit unplumb.
7009 *
7010 * if_proto_hash[3] is for other protocols; we expect anything
7011 * in this bucket to respond to the DETACHING event (which would
7012 * have happened by now) and do the unplumb then.
7013 */
7014 (void) proto_unplumb(PF_INET, ifp);
7015 #if INET6
7016 (void) proto_unplumb(PF_INET6, ifp);
7017 #endif /* INET6 */
7018 }
7019
7020 static void
7021 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
7022 {
7023 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7024 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7025
7026 route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
7027
7028 lck_mtx_unlock(&ifp->if_cached_route_lock);
7029 }
7030
7031 static void
7032 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
7033 {
7034 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7035 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7036
7037 if (ifp->if_fwd_cacheok) {
7038 route_copyin(src, &ifp->if_src_route, sizeof (*src));
7039 } else {
7040 ROUTE_RELEASE(src);
7041 }
7042 lck_mtx_unlock(&ifp->if_cached_route_lock);
7043 }
7044
7045 #if INET6
7046 static void
7047 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
7048 {
7049 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7050 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7051
7052 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
7053 sizeof (*dst));
7054
7055 lck_mtx_unlock(&ifp->if_cached_route_lock);
7056 }
7057
7058 static void
7059 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
7060 {
7061 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7062 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7063
7064 if (ifp->if_fwd_cacheok) {
7065 route_copyin((struct route *)src,
7066 (struct route *)&ifp->if_src_route6, sizeof (*src));
7067 } else {
7068 ROUTE_RELEASE(src);
7069 }
7070 lck_mtx_unlock(&ifp->if_cached_route_lock);
7071 }
7072 #endif /* INET6 */
7073
7074 struct rtentry *
7075 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
7076 {
7077 struct route src_rt;
7078 struct sockaddr_in *dst;
7079
7080 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
7081
7082 ifp_src_route_copyout(ifp, &src_rt);
7083
7084 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
7085 ROUTE_RELEASE(&src_rt);
7086 if (dst->sin_family != AF_INET) {
7087 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
7088 dst->sin_len = sizeof (src_rt.ro_dst);
7089 dst->sin_family = AF_INET;
7090 }
7091 dst->sin_addr = src_ip;
7092
7093 VERIFY(src_rt.ro_rt == NULL);
7094 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
7095 0, 0, ifp->if_index);
7096
7097 if (src_rt.ro_rt != NULL) {
7098 /* retain a ref, copyin consumes one */
7099 struct rtentry *rte = src_rt.ro_rt;
7100 RT_ADDREF(rte);
7101 ifp_src_route_copyin(ifp, &src_rt);
7102 src_rt.ro_rt = rte;
7103 }
7104 }
7105
7106 return (src_rt.ro_rt);
7107 }
7108
7109 #if INET6
7110 struct rtentry *
7111 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
7112 {
7113 struct route_in6 src_rt;
7114
7115 ifp_src_route6_copyout(ifp, &src_rt);
7116
7117 if (ROUTE_UNUSABLE(&src_rt) ||
7118 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
7119 ROUTE_RELEASE(&src_rt);
7120 if (src_rt.ro_dst.sin6_family != AF_INET6) {
7121 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
7122 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
7123 src_rt.ro_dst.sin6_family = AF_INET6;
7124 }
7125 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
7126 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
7127 sizeof (src_rt.ro_dst.sin6_addr));
7128
7129 if (src_rt.ro_rt == NULL) {
7130 src_rt.ro_rt = rtalloc1_scoped(
7131 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
7132 ifp->if_index);
7133
7134 if (src_rt.ro_rt != NULL) {
7135 /* retain a ref, copyin consumes one */
7136 struct rtentry *rte = src_rt.ro_rt;
7137 RT_ADDREF(rte);
7138 ifp_src_route6_copyin(ifp, &src_rt);
7139 src_rt.ro_rt = rte;
7140 }
7141 }
7142 }
7143
7144 return (src_rt.ro_rt);
7145 }
7146 #endif /* INET6 */
7147
7148 void
7149 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
7150 {
7151 struct kev_dl_link_quality_metric_data ev_lqm_data;
7152
7153 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
7154
7155 /* Normalize to edge */
7156 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
7157 lqm = IFNET_LQM_THRESH_ABORT;
7158 atomic_bitset_32(&tcbinfo.ipi_flags,
7159 INPCBINFO_HANDLE_LQM_ABORT);
7160 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
7161 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
7162 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
7163 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
7164 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
7165 lqm <= IFNET_LQM_THRESH_POOR) {
7166 lqm = IFNET_LQM_THRESH_POOR;
7167 } else if (lqm > IFNET_LQM_THRESH_POOR &&
7168 lqm <= IFNET_LQM_THRESH_GOOD) {
7169 lqm = IFNET_LQM_THRESH_GOOD;
7170 }
7171
7172 /*
7173 * Take the lock if needed
7174 */
7175 if (!locked)
7176 ifnet_lock_exclusive(ifp);
7177
7178 if (lqm == ifp->if_interface_state.lqm_state &&
7179 (ifp->if_interface_state.valid_bitmask &
7180 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
7181 /*
7182 * Release the lock if was not held by the caller
7183 */
7184 if (!locked)
7185 ifnet_lock_done(ifp);
7186 return; /* nothing to update */
7187 }
7188 ifp->if_interface_state.valid_bitmask |=
7189 IF_INTERFACE_STATE_LQM_STATE_VALID;
7190 ifp->if_interface_state.lqm_state = lqm;
7191
7192 /*
7193 * Don't want to hold the lock when issuing kernel events
7194 */
7195 ifnet_lock_done(ifp);
7196
7197 bzero(&ev_lqm_data, sizeof (ev_lqm_data));
7198 ev_lqm_data.link_quality_metric = lqm;
7199
7200 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
7201 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
7202
7203 /*
7204 * Reacquire the lock for the caller
7205 */
7206 if (locked)
7207 ifnet_lock_exclusive(ifp);
7208 }
7209
7210 static void
7211 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
7212 {
7213 struct kev_dl_rrc_state kev;
7214
7215 if (rrc_state == ifp->if_interface_state.rrc_state &&
7216 (ifp->if_interface_state.valid_bitmask &
7217 IF_INTERFACE_STATE_RRC_STATE_VALID))
7218 return;
7219
7220 ifp->if_interface_state.valid_bitmask |=
7221 IF_INTERFACE_STATE_RRC_STATE_VALID;
7222
7223 ifp->if_interface_state.rrc_state = rrc_state;
7224
7225 /*
7226 * Don't want to hold the lock when issuing kernel events
7227 */
7228 ifnet_lock_done(ifp);
7229
7230 bzero(&kev, sizeof(struct kev_dl_rrc_state));
7231 kev.rrc_state = rrc_state;
7232
7233 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
7234 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
7235
7236 ifnet_lock_exclusive(ifp);
7237 }
7238
7239 errno_t
7240 if_state_update(struct ifnet *ifp,
7241 struct if_interface_state *if_interface_state)
7242 {
7243 u_short if_index_available = 0;
7244
7245 ifnet_lock_exclusive(ifp);
7246
7247 if ((ifp->if_type != IFT_CELLULAR) &&
7248 (if_interface_state->valid_bitmask &
7249 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
7250 ifnet_lock_done(ifp);
7251 return (ENOTSUP);
7252 }
7253 if ((if_interface_state->valid_bitmask &
7254 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
7255 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
7256 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
7257 ifnet_lock_done(ifp);
7258 return (EINVAL);
7259 }
7260 if ((if_interface_state->valid_bitmask &
7261 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
7262 if_interface_state->rrc_state !=
7263 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
7264 if_interface_state->rrc_state !=
7265 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
7266 ifnet_lock_done(ifp);
7267 return (EINVAL);
7268 }
7269
7270 if (if_interface_state->valid_bitmask &
7271 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7272 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
7273 }
7274 if (if_interface_state->valid_bitmask &
7275 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7276 if_rrc_state_update(ifp, if_interface_state->rrc_state);
7277 }
7278 if (if_interface_state->valid_bitmask &
7279 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7280 ifp->if_interface_state.valid_bitmask |=
7281 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7282 ifp->if_interface_state.interface_availability =
7283 if_interface_state->interface_availability;
7284
7285 if (ifp->if_interface_state.interface_availability ==
7286 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
7287 if_index_available = ifp->if_index;
7288 }
7289 }
7290 ifnet_lock_done(ifp);
7291
7292 /*
7293 * Check if the TCP connections going on this interface should be
7294 * forced to send probe packets instead of waiting for TCP timers
7295 * to fire. This will be done when there is an explicit
7296 * notification that the interface became available.
7297 */
7298 if (if_index_available > 0)
7299 tcp_interface_send_probe(if_index_available);
7300
7301 return (0);
7302 }
7303
7304 void
7305 if_get_state(struct ifnet *ifp,
7306 struct if_interface_state *if_interface_state)
7307 {
7308 ifnet_lock_shared(ifp);
7309
7310 if_interface_state->valid_bitmask = 0;
7311
7312 if (ifp->if_interface_state.valid_bitmask &
7313 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7314 if_interface_state->valid_bitmask |=
7315 IF_INTERFACE_STATE_RRC_STATE_VALID;
7316 if_interface_state->rrc_state =
7317 ifp->if_interface_state.rrc_state;
7318 }
7319 if (ifp->if_interface_state.valid_bitmask &
7320 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7321 if_interface_state->valid_bitmask |=
7322 IF_INTERFACE_STATE_LQM_STATE_VALID;
7323 if_interface_state->lqm_state =
7324 ifp->if_interface_state.lqm_state;
7325 }
7326 if (ifp->if_interface_state.valid_bitmask &
7327 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7328 if_interface_state->valid_bitmask |=
7329 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7330 if_interface_state->interface_availability =
7331 ifp->if_interface_state.interface_availability;
7332 }
7333
7334 ifnet_lock_done(ifp);
7335 }
7336
7337 errno_t
7338 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
7339 {
7340 ifnet_lock_exclusive(ifp);
7341 if (conn_probe > 1) {
7342 ifnet_lock_done(ifp);
7343 return (EINVAL);
7344 }
7345 if (conn_probe == 0)
7346 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
7347 else
7348 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
7349 ifnet_lock_done(ifp);
7350
7351 #if NECP
7352 necp_update_all_clients();
7353 #endif /* NECP */
7354
7355 tcp_probe_connectivity(ifp, conn_probe);
7356 return (0);
7357 }
7358
7359 /* for uuid.c */
7360 int
7361 uuid_get_ethernet(u_int8_t *node)
7362 {
7363 struct ifnet *ifp;
7364 struct sockaddr_dl *sdl;
7365
7366 ifnet_head_lock_shared();
7367 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
7368 ifnet_lock_shared(ifp);
7369 IFA_LOCK_SPIN(ifp->if_lladdr);
7370 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
7371 if (sdl->sdl_type == IFT_ETHER) {
7372 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
7373 IFA_UNLOCK(ifp->if_lladdr);
7374 ifnet_lock_done(ifp);
7375 ifnet_head_done();
7376 return (0);
7377 }
7378 IFA_UNLOCK(ifp->if_lladdr);
7379 ifnet_lock_done(ifp);
7380 }
7381 ifnet_head_done();
7382
7383 return (-1);
7384 }
7385
7386 static int
7387 sysctl_rxpoll SYSCTL_HANDLER_ARGS
7388 {
7389 #pragma unused(arg1, arg2)
7390 uint32_t i;
7391 int err;
7392
7393 i = if_rxpoll;
7394
7395 err = sysctl_handle_int(oidp, &i, 0, req);
7396 if (err != 0 || req->newptr == USER_ADDR_NULL)
7397 return (err);
7398
7399 if (net_rxpoll == 0)
7400 return (ENXIO);
7401
7402 if_rxpoll = i;
7403 return (err);
7404 }
7405
7406 static int
7407 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7408 {
7409 #pragma unused(arg1, arg2)
7410 uint64_t q;
7411 int err;
7412
7413 q = if_rxpoll_mode_holdtime;
7414
7415 err = sysctl_handle_quad(oidp, &q, 0, req);
7416 if (err != 0 || req->newptr == USER_ADDR_NULL)
7417 return (err);
7418
7419 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN)
7420 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
7421
7422 if_rxpoll_mode_holdtime = q;
7423
7424 return (err);
7425 }
7426
7427 static int
7428 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7429 {
7430 #pragma unused(arg1, arg2)
7431 uint64_t q;
7432 int err;
7433
7434 q = if_rxpoll_sample_holdtime;
7435
7436 err = sysctl_handle_quad(oidp, &q, 0, req);
7437 if (err != 0 || req->newptr == USER_ADDR_NULL)
7438 return (err);
7439
7440 if (q < IF_RXPOLL_SAMPLETIME_MIN)
7441 q = IF_RXPOLL_SAMPLETIME_MIN;
7442
7443 if_rxpoll_sample_holdtime = q;
7444
7445 return (err);
7446 }
7447
7448 static int
7449 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7450 {
7451 #pragma unused(arg1, arg2)
7452 uint64_t q;
7453 int err;
7454
7455 q = if_rxpoll_interval_time;
7456
7457 err = sysctl_handle_quad(oidp, &q, 0, req);
7458 if (err != 0 || req->newptr == USER_ADDR_NULL)
7459 return (err);
7460
7461 if (q < IF_RXPOLL_INTERVALTIME_MIN)
7462 q = IF_RXPOLL_INTERVALTIME_MIN;
7463
7464 if_rxpoll_interval_time = q;
7465
7466 return (err);
7467 }
7468
7469 static int
7470 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7471 {
7472 #pragma unused(arg1, arg2)
7473 uint32_t i;
7474 int err;
7475
7476 i = if_rxpoll_wlowat;
7477
7478 err = sysctl_handle_int(oidp, &i, 0, req);
7479 if (err != 0 || req->newptr == USER_ADDR_NULL)
7480 return (err);
7481
7482 if (i == 0 || i >= if_rxpoll_whiwat)
7483 return (EINVAL);
7484
7485 if_rxpoll_wlowat = i;
7486 return (err);
7487 }
7488
7489 static int
7490 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7491 {
7492 #pragma unused(arg1, arg2)
7493 uint32_t i;
7494 int err;
7495
7496 i = if_rxpoll_whiwat;
7497
7498 err = sysctl_handle_int(oidp, &i, 0, req);
7499 if (err != 0 || req->newptr == USER_ADDR_NULL)
7500 return (err);
7501
7502 if (i <= if_rxpoll_wlowat)
7503 return (EINVAL);
7504
7505 if_rxpoll_whiwat = i;
7506 return (err);
7507 }
7508
7509 static int
7510 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
7511 {
7512 #pragma unused(arg1, arg2)
7513 int i, err;
7514
7515 i = if_sndq_maxlen;
7516
7517 err = sysctl_handle_int(oidp, &i, 0, req);
7518 if (err != 0 || req->newptr == USER_ADDR_NULL)
7519 return (err);
7520
7521 if (i < IF_SNDQ_MINLEN)
7522 i = IF_SNDQ_MINLEN;
7523
7524 if_sndq_maxlen = i;
7525 return (err);
7526 }
7527
7528 static int
7529 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
7530 {
7531 #pragma unused(arg1, arg2)
7532 int i, err;
7533
7534 i = if_rcvq_maxlen;
7535
7536 err = sysctl_handle_int(oidp, &i, 0, req);
7537 if (err != 0 || req->newptr == USER_ADDR_NULL)
7538 return (err);
7539
7540 if (i < IF_RCVQ_MINLEN)
7541 i = IF_RCVQ_MINLEN;
7542
7543 if_rcvq_maxlen = i;
7544 return (err);
7545 }
7546
7547 void
7548 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
7549 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
7550 {
7551 struct kev_dl_node_presence kev;
7552 struct sockaddr_dl *sdl;
7553 struct sockaddr_in6 *sin6;
7554
7555 VERIFY(ifp);
7556 VERIFY(sa);
7557 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7558
7559 bzero(&kev, sizeof (kev));
7560 sin6 = &kev.sin6_node_address;
7561 sdl = &kev.sdl_node_address;
7562 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7563 kev.rssi = rssi;
7564 kev.link_quality_metric = lqm;
7565 kev.node_proximity_metric = npm;
7566 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
7567
7568 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
7569 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
7570 &kev.link_data, sizeof (kev));
7571 }
7572
7573 void
7574 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
7575 {
7576 struct kev_dl_node_absence kev;
7577 struct sockaddr_in6 *sin6;
7578 struct sockaddr_dl *sdl;
7579
7580 VERIFY(ifp);
7581 VERIFY(sa);
7582 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7583
7584 bzero(&kev, sizeof (kev));
7585 sin6 = &kev.sin6_node_address;
7586 sdl = &kev.sdl_node_address;
7587 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7588
7589 nd6_alt_node_absent(ifp, sin6);
7590 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
7591 &kev.link_data, sizeof (kev));
7592 }
7593
7594 const void *
7595 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
7596 kauth_cred_t *credp)
7597 {
7598 const u_int8_t *bytes;
7599 size_t size;
7600
7601 bytes = CONST_LLADDR(sdl);
7602 size = sdl->sdl_alen;
7603
7604 #if CONFIG_MACF
7605 if (dlil_lladdr_ckreq) {
7606 switch (sdl->sdl_type) {
7607 case IFT_ETHER:
7608 case IFT_IEEE1394:
7609 break;
7610 default:
7611 credp = NULL;
7612 break;
7613 };
7614
7615 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
7616 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
7617 [0] = 2
7618 };
7619
7620 bytes = unspec;
7621 }
7622 }
7623 #else
7624 #pragma unused(credp)
7625 #endif
7626
7627 if (sizep != NULL) *sizep = size;
7628 return (bytes);
7629 }
7630
7631 void
7632 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
7633 u_int8_t info[DLIL_MODARGLEN])
7634 {
7635 struct kev_dl_issues kev;
7636 struct timeval tv;
7637
7638 VERIFY(ifp != NULL);
7639 VERIFY(modid != NULL);
7640 _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
7641 _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
7642
7643 bzero(&kev, sizeof (kev));
7644
7645 microtime(&tv);
7646 kev.timestamp = tv.tv_sec;
7647 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
7648 if (info != NULL)
7649 bcopy(info, &kev.info, DLIL_MODARGLEN);
7650
7651 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
7652 &kev.link_data, sizeof (kev));
7653 }
7654
7655 errno_t
7656 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7657 struct proc *p)
7658 {
7659 u_int32_t level = IFNET_THROTTLE_OFF;
7660 errno_t result = 0;
7661
7662 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
7663
7664 if (cmd == SIOCSIFOPPORTUNISTIC) {
7665 /*
7666 * XXX: Use priv_check_cred() instead of root check?
7667 */
7668 if ((result = proc_suser(p)) != 0)
7669 return (result);
7670
7671 if (ifr->ifr_opportunistic.ifo_flags ==
7672 IFRIFOF_BLOCK_OPPORTUNISTIC)
7673 level = IFNET_THROTTLE_OPPORTUNISTIC;
7674 else if (ifr->ifr_opportunistic.ifo_flags == 0)
7675 level = IFNET_THROTTLE_OFF;
7676 else
7677 result = EINVAL;
7678
7679 if (result == 0)
7680 result = ifnet_set_throttle(ifp, level);
7681 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
7682 ifr->ifr_opportunistic.ifo_flags = 0;
7683 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
7684 ifr->ifr_opportunistic.ifo_flags |=
7685 IFRIFOF_BLOCK_OPPORTUNISTIC;
7686 }
7687 }
7688
7689 /*
7690 * Return the count of current opportunistic connections
7691 * over the interface.
7692 */
7693 if (result == 0) {
7694 uint32_t flags = 0;
7695 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
7696 INPCB_OPPORTUNISTIC_SETCMD : 0;
7697 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
7698 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
7699 ifr->ifr_opportunistic.ifo_inuse =
7700 udp_count_opportunistic(ifp->if_index, flags) +
7701 tcp_count_opportunistic(ifp->if_index, flags);
7702 }
7703
7704 if (result == EALREADY)
7705 result = 0;
7706
7707 return (result);
7708 }
7709
7710 int
7711 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
7712 {
7713 struct ifclassq *ifq;
7714 int err = 0;
7715
7716 if (!(ifp->if_eflags & IFEF_TXSTART))
7717 return (ENXIO);
7718
7719 *level = IFNET_THROTTLE_OFF;
7720
7721 ifq = &ifp->if_snd;
7722 IFCQ_LOCK(ifq);
7723 /* Throttling works only for IFCQ, not ALTQ instances */
7724 if (IFCQ_IS_ENABLED(ifq))
7725 IFCQ_GET_THROTTLE(ifq, *level, err);
7726 IFCQ_UNLOCK(ifq);
7727
7728 return (err);
7729 }
7730
7731 int
7732 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
7733 {
7734 struct ifclassq *ifq;
7735 int err = 0;
7736
7737 if (!(ifp->if_eflags & IFEF_TXSTART))
7738 return (ENXIO);
7739
7740 ifq = &ifp->if_snd;
7741
7742 switch (level) {
7743 case IFNET_THROTTLE_OFF:
7744 case IFNET_THROTTLE_OPPORTUNISTIC:
7745 break;
7746 default:
7747 return (EINVAL);
7748 }
7749
7750 IFCQ_LOCK(ifq);
7751 if (IFCQ_IS_ENABLED(ifq))
7752 IFCQ_SET_THROTTLE(ifq, level, err);
7753 IFCQ_UNLOCK(ifq);
7754
7755 if (err == 0) {
7756 printf("%s: throttling level set to %d\n", if_name(ifp),
7757 level);
7758 if (level == IFNET_THROTTLE_OFF)
7759 ifnet_start(ifp);
7760 }
7761
7762 return (err);
7763 }
7764
7765 errno_t
7766 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7767 struct proc *p)
7768 {
7769 #pragma unused(p)
7770 errno_t result = 0;
7771 uint32_t flags;
7772 int level, category, subcategory;
7773
7774 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7775
7776 if (cmd == SIOCSIFLOG) {
7777 if ((result = priv_check_cred(kauth_cred_get(),
7778 PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
7779 return (result);
7780
7781 level = ifr->ifr_log.ifl_level;
7782 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX)
7783 result = EINVAL;
7784
7785 flags = ifr->ifr_log.ifl_flags;
7786 if ((flags &= IFNET_LOGF_MASK) == 0)
7787 result = EINVAL;
7788
7789 category = ifr->ifr_log.ifl_category;
7790 subcategory = ifr->ifr_log.ifl_subcategory;
7791
7792 if (result == 0)
7793 result = ifnet_set_log(ifp, level, flags,
7794 category, subcategory);
7795 } else {
7796 result = ifnet_get_log(ifp, &level, &flags, &category,
7797 &subcategory);
7798 if (result == 0) {
7799 ifr->ifr_log.ifl_level = level;
7800 ifr->ifr_log.ifl_flags = flags;
7801 ifr->ifr_log.ifl_category = category;
7802 ifr->ifr_log.ifl_subcategory = subcategory;
7803 }
7804 }
7805
7806 return (result);
7807 }
7808
7809 int
7810 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7811 int32_t category, int32_t subcategory)
7812 {
7813 int err = 0;
7814
7815 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7816 VERIFY(flags & IFNET_LOGF_MASK);
7817
7818 /*
7819 * The logging level applies to all facilities; make sure to
7820 * update them all with the most current level.
7821 */
7822 flags |= ifp->if_log.flags;
7823
7824 if (ifp->if_output_ctl != NULL) {
7825 struct ifnet_log_params l;
7826
7827 bzero(&l, sizeof (l));
7828 l.level = level;
7829 l.flags = flags;
7830 l.flags &= ~IFNET_LOGF_DLIL;
7831 l.category = category;
7832 l.subcategory = subcategory;
7833
7834 /* Send this request to lower layers */
7835 if (l.flags != 0) {
7836 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7837 sizeof (l), &l);
7838 }
7839 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7840 /*
7841 * If targeted to the lower layers without an output
7842 * control callback registered on the interface, just
7843 * silently ignore facilities other than ours.
7844 */
7845 flags &= IFNET_LOGF_DLIL;
7846 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL)))
7847 level = 0;
7848 }
7849
7850 if (err == 0) {
7851 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT)
7852 ifp->if_log.flags = 0;
7853 else
7854 ifp->if_log.flags |= flags;
7855
7856 log(LOG_INFO, "%s: logging level set to %d flags=%b "
7857 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
7858 ifp->if_log.level, ifp->if_log.flags,
7859 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
7860 category, subcategory);
7861 }
7862
7863 return (err);
7864 }
7865
7866 int
7867 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7868 int32_t *category, int32_t *subcategory)
7869 {
7870 if (level != NULL)
7871 *level = ifp->if_log.level;
7872 if (flags != NULL)
7873 *flags = ifp->if_log.flags;
7874 if (category != NULL)
7875 *category = ifp->if_log.category;
7876 if (subcategory != NULL)
7877 *subcategory = ifp->if_log.subcategory;
7878
7879 return (0);
7880 }
7881
7882 int
7883 ifnet_notify_address(struct ifnet *ifp, int af)
7884 {
7885 struct ifnet_notify_address_params na;
7886
7887 #if PF
7888 (void) pf_ifaddr_hook(ifp);
7889 #endif /* PF */
7890
7891 if (ifp->if_output_ctl == NULL)
7892 return (EOPNOTSUPP);
7893
7894 bzero(&na, sizeof (na));
7895 na.address_family = af;
7896
7897 return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7898 sizeof (na), &na));
7899 }
7900
7901 errno_t
7902 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7903 {
7904 if (ifp == NULL || flowid == NULL) {
7905 return (EINVAL);
7906 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7907 !IF_FULLY_ATTACHED(ifp)) {
7908 return (ENXIO);
7909 }
7910
7911 *flowid = ifp->if_flowhash;
7912
7913 return (0);
7914 }
7915
7916 errno_t
7917 ifnet_disable_output(struct ifnet *ifp)
7918 {
7919 int err;
7920
7921 if (ifp == NULL) {
7922 return (EINVAL);
7923 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7924 !IF_FULLY_ATTACHED(ifp)) {
7925 return (ENXIO);
7926 }
7927
7928 if ((err = ifnet_fc_add(ifp)) == 0) {
7929 lck_mtx_lock_spin(&ifp->if_start_lock);
7930 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7931 lck_mtx_unlock(&ifp->if_start_lock);
7932 }
7933 return (err);
7934 }
7935
7936 errno_t
7937 ifnet_enable_output(struct ifnet *ifp)
7938 {
7939 if (ifp == NULL) {
7940 return (EINVAL);
7941 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7942 !IF_FULLY_ATTACHED(ifp)) {
7943 return (ENXIO);
7944 }
7945
7946 ifnet_start_common(ifp, TRUE);
7947 return (0);
7948 }
7949
7950 void
7951 ifnet_flowadv(uint32_t flowhash)
7952 {
7953 struct ifnet_fc_entry *ifce;
7954 struct ifnet *ifp;
7955
7956 ifce = ifnet_fc_get(flowhash);
7957 if (ifce == NULL)
7958 return;
7959
7960 VERIFY(ifce->ifce_ifp != NULL);
7961 ifp = ifce->ifce_ifp;
7962
7963 /* flow hash gets recalculated per attach, so check */
7964 if (ifnet_is_attached(ifp, 1)) {
7965 if (ifp->if_flowhash == flowhash)
7966 (void) ifnet_enable_output(ifp);
7967 ifnet_decr_iorefcnt(ifp);
7968 }
7969 ifnet_fc_entry_free(ifce);
7970 }
7971
7972 /*
7973 * Function to compare ifnet_fc_entries in ifnet flow control tree
7974 */
7975 static inline int
7976 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7977 {
7978 return (fc1->ifce_flowhash - fc2->ifce_flowhash);
7979 }
7980
7981 static int
7982 ifnet_fc_add(struct ifnet *ifp)
7983 {
7984 struct ifnet_fc_entry keyfc, *ifce;
7985 uint32_t flowhash;
7986
7987 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7988 VERIFY(ifp->if_flowhash != 0);
7989 flowhash = ifp->if_flowhash;
7990
7991 bzero(&keyfc, sizeof (keyfc));
7992 keyfc.ifce_flowhash = flowhash;
7993
7994 lck_mtx_lock_spin(&ifnet_fc_lock);
7995 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7996 if (ifce != NULL && ifce->ifce_ifp == ifp) {
7997 /* Entry is already in ifnet_fc_tree, return */
7998 lck_mtx_unlock(&ifnet_fc_lock);
7999 return (0);
8000 }
8001
8002 if (ifce != NULL) {
8003 /*
8004 * There is a different fc entry with the same flow hash
8005 * but different ifp pointer. There can be a collision
8006 * on flow hash but the probability is low. Let's just
8007 * avoid adding a second one when there is a collision.
8008 */
8009 lck_mtx_unlock(&ifnet_fc_lock);
8010 return (EAGAIN);
8011 }
8012
8013 /* become regular mutex */
8014 lck_mtx_convert_spin(&ifnet_fc_lock);
8015
8016 ifce = zalloc(ifnet_fc_zone);
8017 if (ifce == NULL) {
8018 /* memory allocation failed */
8019 lck_mtx_unlock(&ifnet_fc_lock);
8020 return (ENOMEM);
8021 }
8022 bzero(ifce, ifnet_fc_zone_size);
8023
8024 ifce->ifce_flowhash = flowhash;
8025 ifce->ifce_ifp = ifp;
8026
8027 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
8028 lck_mtx_unlock(&ifnet_fc_lock);
8029 return (0);
8030 }
8031
8032 static struct ifnet_fc_entry *
8033 ifnet_fc_get(uint32_t flowhash)
8034 {
8035 struct ifnet_fc_entry keyfc, *ifce;
8036 struct ifnet *ifp;
8037
8038 bzero(&keyfc, sizeof (keyfc));
8039 keyfc.ifce_flowhash = flowhash;
8040
8041 lck_mtx_lock_spin(&ifnet_fc_lock);
8042 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
8043 if (ifce == NULL) {
8044 /* Entry is not present in ifnet_fc_tree, return */
8045 lck_mtx_unlock(&ifnet_fc_lock);
8046 return (NULL);
8047 }
8048
8049 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
8050
8051 VERIFY(ifce->ifce_ifp != NULL);
8052 ifp = ifce->ifce_ifp;
8053
8054 /* become regular mutex */
8055 lck_mtx_convert_spin(&ifnet_fc_lock);
8056
8057 if (!ifnet_is_attached(ifp, 0)) {
8058 /*
8059 * This ifp is not attached or in the process of being
8060 * detached; just don't process it.
8061 */
8062 ifnet_fc_entry_free(ifce);
8063 ifce = NULL;
8064 }
8065 lck_mtx_unlock(&ifnet_fc_lock);
8066
8067 return (ifce);
8068 }
8069
8070 static void
8071 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
8072 {
8073 zfree(ifnet_fc_zone, ifce);
8074 }
8075
8076 static uint32_t
8077 ifnet_calc_flowhash(struct ifnet *ifp)
8078 {
8079 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
8080 uint32_t flowhash = 0;
8081
8082 if (ifnet_flowhash_seed == 0)
8083 ifnet_flowhash_seed = RandomULong();
8084
8085 bzero(&fh, sizeof (fh));
8086
8087 (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name);
8088 fh.ifk_unit = ifp->if_unit;
8089 fh.ifk_flags = ifp->if_flags;
8090 fh.ifk_eflags = ifp->if_eflags;
8091 fh.ifk_capabilities = ifp->if_capabilities;
8092 fh.ifk_capenable = ifp->if_capenable;
8093 fh.ifk_output_sched_model = ifp->if_output_sched_model;
8094 fh.ifk_rand1 = RandomULong();
8095 fh.ifk_rand2 = RandomULong();
8096
8097 try_again:
8098 flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed);
8099 if (flowhash == 0) {
8100 /* try to get a non-zero flowhash */
8101 ifnet_flowhash_seed = RandomULong();
8102 goto try_again;
8103 }
8104
8105 return (flowhash);
8106 }
8107
8108 int
8109 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
8110 uint16_t flags, uint8_t *data)
8111 {
8112 #pragma unused(flags)
8113 int error = 0;
8114
8115 switch (family) {
8116 case AF_INET:
8117 if_inetdata_lock_exclusive(ifp);
8118 if (IN_IFEXTRA(ifp) != NULL) {
8119 if (len == 0) {
8120 /* Allow clearing the signature */
8121 IN_IFEXTRA(ifp)->netsig_len = 0;
8122 bzero(IN_IFEXTRA(ifp)->netsig,
8123 sizeof (IN_IFEXTRA(ifp)->netsig));
8124 if_inetdata_lock_done(ifp);
8125 break;
8126 } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) {
8127 error = EINVAL;
8128 if_inetdata_lock_done(ifp);
8129 break;
8130 }
8131 IN_IFEXTRA(ifp)->netsig_len = len;
8132 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
8133 } else {
8134 error = ENOMEM;
8135 }
8136 if_inetdata_lock_done(ifp);
8137 break;
8138
8139 case AF_INET6:
8140 if_inet6data_lock_exclusive(ifp);
8141 if (IN6_IFEXTRA(ifp) != NULL) {
8142 if (len == 0) {
8143 /* Allow clearing the signature */
8144 IN6_IFEXTRA(ifp)->netsig_len = 0;
8145 bzero(IN6_IFEXTRA(ifp)->netsig,
8146 sizeof (IN6_IFEXTRA(ifp)->netsig));
8147 if_inet6data_lock_done(ifp);
8148 break;
8149 } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) {
8150 error = EINVAL;
8151 if_inet6data_lock_done(ifp);
8152 break;
8153 }
8154 IN6_IFEXTRA(ifp)->netsig_len = len;
8155 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
8156 } else {
8157 error = ENOMEM;
8158 }
8159 if_inet6data_lock_done(ifp);
8160 break;
8161
8162 default:
8163 error = EINVAL;
8164 break;
8165 }
8166
8167 return (error);
8168 }
8169
8170 int
8171 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
8172 uint16_t *flags, uint8_t *data)
8173 {
8174 int error = 0;
8175
8176 if (ifp == NULL || len == NULL || data == NULL)
8177 return (EINVAL);
8178
8179 switch (family) {
8180 case AF_INET:
8181 if_inetdata_lock_shared(ifp);
8182 if (IN_IFEXTRA(ifp) != NULL) {
8183 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
8184 error = EINVAL;
8185 if_inetdata_lock_done(ifp);
8186 break;
8187 }
8188 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0)
8189 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
8190 else
8191 error = ENOENT;
8192 } else {
8193 error = ENOMEM;
8194 }
8195 if_inetdata_lock_done(ifp);
8196 break;
8197
8198 case AF_INET6:
8199 if_inet6data_lock_shared(ifp);
8200 if (IN6_IFEXTRA(ifp) != NULL) {
8201 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
8202 error = EINVAL;
8203 if_inet6data_lock_done(ifp);
8204 break;
8205 }
8206 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0)
8207 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
8208 else
8209 error = ENOENT;
8210 } else {
8211 error = ENOMEM;
8212 }
8213 if_inet6data_lock_done(ifp);
8214 break;
8215
8216 default:
8217 error = EINVAL;
8218 break;
8219 }
8220
8221 if (error == 0 && flags != NULL)
8222 *flags = 0;
8223
8224 return (error);
8225 }
8226
8227 #if INET6
8228 int
8229 ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8230 {
8231 int i, error = 0, one_set = 0;
8232
8233 if_inet6data_lock_exclusive(ifp);
8234
8235 if (IN6_IFEXTRA(ifp) == NULL) {
8236 error = ENOMEM;
8237 goto out;
8238 }
8239
8240 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8241 uint32_t prefix_len =
8242 prefixes[i].prefix_len;
8243 struct in6_addr *prefix =
8244 &prefixes[i].ipv6_prefix;
8245
8246 if (prefix_len == 0) {
8247 /* Allow clearing the signature */
8248 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
8249 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8250 sizeof(struct in6_addr));
8251
8252 continue;
8253 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
8254 prefix_len != NAT64_PREFIX_LEN_40 &&
8255 prefix_len != NAT64_PREFIX_LEN_48 &&
8256 prefix_len != NAT64_PREFIX_LEN_56 &&
8257 prefix_len != NAT64_PREFIX_LEN_64 &&
8258 prefix_len != NAT64_PREFIX_LEN_96) {
8259 error = EINVAL;
8260 goto out;
8261 }
8262
8263 if (IN6_IS_SCOPE_EMBED(prefix)) {
8264 error = EINVAL;
8265 goto out;
8266 }
8267
8268 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
8269 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8270 sizeof(struct in6_addr));
8271 one_set = 1;
8272 }
8273
8274 out:
8275 if_inet6data_lock_done(ifp);
8276
8277 if (error == 0 && one_set != 0)
8278 necp_update_all_clients();
8279
8280 return (error);
8281 }
8282
8283 int
8284 ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8285 {
8286 int i, found_one = 0, error = 0;
8287
8288 if (ifp == NULL)
8289 return (EINVAL);
8290
8291 if_inet6data_lock_shared(ifp);
8292
8293 if (IN6_IFEXTRA(ifp) == NULL) {
8294 error = ENOMEM;
8295 goto out;
8296 }
8297
8298 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8299 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0)
8300 found_one = 1;
8301 }
8302
8303 if (found_one == 0) {
8304 error = ENOENT;
8305 goto out;
8306 }
8307
8308 if (prefixes)
8309 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
8310 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
8311
8312 out:
8313 if_inet6data_lock_done(ifp);
8314
8315 return (error);
8316 }
8317 #endif
8318
8319 static void
8320 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
8321 protocol_family_t pf)
8322 {
8323 #pragma unused(ifp)
8324 uint32_t did_sw;
8325
8326 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
8327 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6)))
8328 return;
8329
8330 switch (pf) {
8331 case PF_INET:
8332 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
8333 if (did_sw & CSUM_DELAY_IP)
8334 hwcksum_dbg_finalized_hdr++;
8335 if (did_sw & CSUM_DELAY_DATA)
8336 hwcksum_dbg_finalized_data++;
8337 break;
8338 #if INET6
8339 case PF_INET6:
8340 /*
8341 * Checksum offload should not have been enabled when
8342 * extension headers exist; that also means that we
8343 * cannot force-finalize packets with extension headers.
8344 * Indicate to the callee should it skip such case by
8345 * setting optlen to -1.
8346 */
8347 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
8348 m->m_pkthdr.csum_flags);
8349 if (did_sw & CSUM_DELAY_IPV6_DATA)
8350 hwcksum_dbg_finalized_data++;
8351 break;
8352 #endif /* INET6 */
8353 default:
8354 return;
8355 }
8356 }
8357
8358 static void
8359 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
8360 protocol_family_t pf)
8361 {
8362 uint16_t sum = 0;
8363 uint32_t hlen;
8364
8365 if (frame_header == NULL ||
8366 frame_header < (char *)mbuf_datastart(m) ||
8367 frame_header > (char *)m->m_data) {
8368 printf("%s: frame header pointer 0x%llx out of range "
8369 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
8370 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
8371 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
8372 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
8373 (uint64_t)VM_KERNEL_ADDRPERM(m));
8374 return;
8375 }
8376 hlen = (m->m_data - frame_header);
8377
8378 switch (pf) {
8379 case PF_INET:
8380 #if INET6
8381 case PF_INET6:
8382 #endif /* INET6 */
8383 break;
8384 default:
8385 return;
8386 }
8387
8388 /*
8389 * Force partial checksum offload; useful to simulate cases
8390 * where the hardware does not support partial checksum offload,
8391 * in order to validate correctness throughout the layers above.
8392 */
8393 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
8394 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
8395
8396 if (foff > (uint32_t)m->m_pkthdr.len)
8397 return;
8398
8399 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
8400
8401 /* Compute 16-bit 1's complement sum from forced offset */
8402 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
8403
8404 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
8405 m->m_pkthdr.csum_rx_val = sum;
8406 m->m_pkthdr.csum_rx_start = (foff + hlen);
8407
8408 hwcksum_dbg_partial_forced++;
8409 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
8410 }
8411
8412 /*
8413 * Partial checksum offload verification (and adjustment);
8414 * useful to validate and test cases where the hardware
8415 * supports partial checksum offload.
8416 */
8417 if ((m->m_pkthdr.csum_flags &
8418 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
8419 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
8420 uint32_t rxoff;
8421
8422 /* Start offset must begin after frame header */
8423 rxoff = m->m_pkthdr.csum_rx_start;
8424 if (hlen > rxoff) {
8425 hwcksum_dbg_bad_rxoff++;
8426 if (dlil_verbose) {
8427 printf("%s: partial cksum start offset %d "
8428 "is less than frame header length %d for "
8429 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
8430 (uint64_t)VM_KERNEL_ADDRPERM(m));
8431 }
8432 return;
8433 }
8434 rxoff -= hlen;
8435
8436 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
8437 /*
8438 * Compute the expected 16-bit 1's complement sum;
8439 * skip this if we've already computed it above
8440 * when partial checksum offload is forced.
8441 */
8442 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
8443
8444 /* Hardware or driver is buggy */
8445 if (sum != m->m_pkthdr.csum_rx_val) {
8446 hwcksum_dbg_bad_cksum++;
8447 if (dlil_verbose) {
8448 printf("%s: bad partial cksum value "
8449 "0x%x (expected 0x%x) for mbuf "
8450 "0x%llx [rx_start %d]\n",
8451 if_name(ifp),
8452 m->m_pkthdr.csum_rx_val, sum,
8453 (uint64_t)VM_KERNEL_ADDRPERM(m),
8454 m->m_pkthdr.csum_rx_start);
8455 }
8456 return;
8457 }
8458 }
8459 hwcksum_dbg_verified++;
8460
8461 /*
8462 * This code allows us to emulate various hardwares that
8463 * perform 16-bit 1's complement sum beginning at various
8464 * start offset values.
8465 */
8466 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
8467 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
8468
8469 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len)
8470 return;
8471
8472 sum = m_adj_sum16(m, rxoff, aoff,
8473 m_pktlen(m) - aoff, sum);
8474
8475 m->m_pkthdr.csum_rx_val = sum;
8476 m->m_pkthdr.csum_rx_start = (aoff + hlen);
8477
8478 hwcksum_dbg_adjusted++;
8479 }
8480 }
8481 }
8482
8483 static int
8484 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8485 {
8486 #pragma unused(arg1, arg2)
8487 u_int32_t i;
8488 int err;
8489
8490 i = hwcksum_dbg_mode;
8491
8492 err = sysctl_handle_int(oidp, &i, 0, req);
8493 if (err != 0 || req->newptr == USER_ADDR_NULL)
8494 return (err);
8495
8496 if (hwcksum_dbg == 0)
8497 return (ENODEV);
8498
8499 if ((i & ~HWCKSUM_DBG_MASK) != 0)
8500 return (EINVAL);
8501
8502 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
8503
8504 return (err);
8505 }
8506
8507 static int
8508 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8509 {
8510 #pragma unused(arg1, arg2)
8511 u_int32_t i;
8512 int err;
8513
8514 i = hwcksum_dbg_partial_rxoff_forced;
8515
8516 err = sysctl_handle_int(oidp, &i, 0, req);
8517 if (err != 0 || req->newptr == USER_ADDR_NULL)
8518 return (err);
8519
8520 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED))
8521 return (ENODEV);
8522
8523 hwcksum_dbg_partial_rxoff_forced = i;
8524
8525 return (err);
8526 }
8527
8528 static int
8529 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8530 {
8531 #pragma unused(arg1, arg2)
8532 u_int32_t i;
8533 int err;
8534
8535 i = hwcksum_dbg_partial_rxoff_adj;
8536
8537 err = sysctl_handle_int(oidp, &i, 0, req);
8538 if (err != 0 || req->newptr == USER_ADDR_NULL)
8539 return (err);
8540
8541 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ))
8542 return (ENODEV);
8543
8544 hwcksum_dbg_partial_rxoff_adj = i;
8545
8546 return (err);
8547 }
8548
8549 static int
8550 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8551 {
8552 #pragma unused(oidp, arg1, arg2)
8553 int err;
8554
8555 if (req->oldptr == USER_ADDR_NULL) {
8556
8557 }
8558 if (req->newptr != USER_ADDR_NULL) {
8559 return (EPERM);
8560 }
8561 err = SYSCTL_OUT(req, &tx_chain_len_stats,
8562 sizeof(struct chain_len_stats));
8563
8564 return (err);
8565 }
8566
8567
8568 #if DEBUG || DEVELOPMENT
8569 /* Blob for sum16 verification */
8570 static uint8_t sumdata[] = {
8571 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8572 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8573 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8574 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8575 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8576 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8577 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8578 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8579 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8580 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8581 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8582 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8583 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8584 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8585 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8586 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8587 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8588 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8589 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8590 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8591 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8592 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8593 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8594 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8595 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8596 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8597 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8598 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8599 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8600 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8601 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8602 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8603 0xc8, 0x28, 0x02, 0x00, 0x00
8604 };
8605
8606 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
8607 static struct {
8608 boolean_t init;
8609 uint16_t len;
8610 uint16_t sumr; /* reference */
8611 uint16_t sumrp; /* reference, precomputed */
8612 } sumtbl[] = {
8613 { FALSE, 0, 0, 0x0000 },
8614 { FALSE, 1, 0, 0x001f },
8615 { FALSE, 2, 0, 0x8b1f },
8616 { FALSE, 3, 0, 0x8b27 },
8617 { FALSE, 7, 0, 0x790e },
8618 { FALSE, 11, 0, 0xcb6d },
8619 { FALSE, 20, 0, 0x20dd },
8620 { FALSE, 27, 0, 0xbabd },
8621 { FALSE, 32, 0, 0xf3e8 },
8622 { FALSE, 37, 0, 0x197d },
8623 { FALSE, 43, 0, 0x9eae },
8624 { FALSE, 64, 0, 0x4678 },
8625 { FALSE, 127, 0, 0x9399 },
8626 { FALSE, 256, 0, 0xd147 },
8627 { FALSE, 325, 0, 0x0358 },
8628 };
8629 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8630
8631 static void
8632 dlil_verify_sum16(void)
8633 {
8634 struct mbuf *m;
8635 uint8_t *buf;
8636 int n;
8637
8638 /* Make sure test data plus extra room for alignment fits in cluster */
8639 _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES);
8640
8641 kprintf("DLIL: running SUM16 self-tests ... ");
8642
8643 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
8644 MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */
8645 buf = mtod(m, uint8_t *); /* base address */
8646
8647 for (n = 0; n < SUMTBL_MAX; n++) {
8648 uint16_t len = sumtbl[n].len;
8649 int i;
8650
8651 /* Verify for all possible alignments */
8652 for (i = 0; i < (int)sizeof (uint64_t); i++) {
8653 uint16_t sum, sumr;
8654 uint8_t *c;
8655
8656 /* Copy over test data to mbuf */
8657 VERIFY(len <= sizeof (sumdata));
8658 c = buf + i;
8659 bcopy(sumdata, c, len);
8660
8661 /* Zero-offset test (align by data pointer) */
8662 m->m_data = (caddr_t)c;
8663 m->m_len = len;
8664 sum = m_sum16(m, 0, len);
8665
8666 if (!sumtbl[n].init) {
8667 sumr = in_cksum_mbuf_ref(m, len, 0, 0);
8668 sumtbl[n].sumr = sumr;
8669 sumtbl[n].init = TRUE;
8670 } else {
8671 sumr = sumtbl[n].sumr;
8672 }
8673
8674 /* Something is horribly broken; stop now */
8675 if (sumr != sumtbl[n].sumrp) {
8676 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
8677 "for len=%d align=%d sum=0x%04x "
8678 "[expected=0x%04x]\n", __func__,
8679 len, i, sum, sumr);
8680 /* NOTREACHED */
8681 } else if (sum != sumr) {
8682 panic_plain("\n%s: broken m_sum16() for len=%d "
8683 "align=%d sum=0x%04x [expected=0x%04x]\n",
8684 __func__, len, i, sum, sumr);
8685 /* NOTREACHED */
8686 }
8687
8688 /* Alignment test by offset (fixed data pointer) */
8689 m->m_data = (caddr_t)buf;
8690 m->m_len = i + len;
8691 sum = m_sum16(m, i, len);
8692
8693 /* Something is horribly broken; stop now */
8694 if (sum != sumr) {
8695 panic_plain("\n%s: broken m_sum16() for len=%d "
8696 "offset=%d sum=0x%04x [expected=0x%04x]\n",
8697 __func__, len, i, sum, sumr);
8698 /* NOTREACHED */
8699 }
8700 #if INET
8701 /* Simple sum16 contiguous buffer test by aligment */
8702 sum = b_sum16(c, len);
8703
8704 /* Something is horribly broken; stop now */
8705 if (sum != sumr) {
8706 panic_plain("\n%s: broken b_sum16() for len=%d "
8707 "align=%d sum=0x%04x [expected=0x%04x]\n",
8708 __func__, len, i, sum, sumr);
8709 /* NOTREACHED */
8710 }
8711 #endif /* INET */
8712 }
8713 }
8714 m_freem(m);
8715
8716 kprintf("PASSED\n");
8717 }
8718 #endif /* DEBUG || DEVELOPMENT */
8719
8720 #define CASE_STRINGIFY(x) case x: return #x
8721
8722 __private_extern__ const char *
8723 dlil_kev_dl_code_str(u_int32_t event_code)
8724 {
8725 switch (event_code) {
8726 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
8727 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
8728 CASE_STRINGIFY(KEV_DL_SIFMTU);
8729 CASE_STRINGIFY(KEV_DL_SIFPHYS);
8730 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
8731 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
8732 CASE_STRINGIFY(KEV_DL_ADDMULTI);
8733 CASE_STRINGIFY(KEV_DL_DELMULTI);
8734 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
8735 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
8736 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
8737 CASE_STRINGIFY(KEV_DL_LINK_OFF);
8738 CASE_STRINGIFY(KEV_DL_LINK_ON);
8739 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
8740 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
8741 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
8742 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
8743 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
8744 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
8745 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
8746 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
8747 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
8748 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
8749 CASE_STRINGIFY(KEV_DL_ISSUES);
8750 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
8751 default:
8752 break;
8753 }
8754 return ("");
8755 }
8756
8757 static void
8758 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
8759 {
8760 #pragma unused(arg1)
8761 struct ifnet *ifp = arg0;
8762
8763 if (ifnet_is_attached(ifp, 1)) {
8764 nstat_ifnet_threshold_reached(ifp->if_index);
8765 ifnet_decr_iorefcnt(ifp);
8766 }
8767 }
8768
8769 void
8770 ifnet_notify_data_threshold(struct ifnet *ifp)
8771 {
8772 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
8773 uint64_t oldbytes = ifp->if_dt_bytes;
8774
8775 ASSERT(ifp->if_dt_tcall != NULL);
8776
8777 /*
8778 * If we went over the threshold, notify NetworkStatistics.
8779 * We rate-limit it based on the threshold interval value.
8780 */
8781 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
8782 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
8783 !thread_call_isactive(ifp->if_dt_tcall)) {
8784 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
8785 uint64_t now = mach_absolute_time(), deadline = now;
8786 uint64_t ival;
8787
8788 if (tival != 0) {
8789 nanoseconds_to_absolutetime(tival, &ival);
8790 clock_deadline_for_periodic_event(ival, now, &deadline);
8791 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
8792 deadline);
8793 } else {
8794 (void) thread_call_enter(ifp->if_dt_tcall);
8795 }
8796 }
8797 }
8798
8799 #if (DEVELOPMENT || DEBUG)
8800 /*
8801 * The sysctl variable name contains the input parameters of
8802 * ifnet_get_keepalive_offload_frames()
8803 * ifp (interface index): name[0]
8804 * frames_array_count: name[1]
8805 * frame_data_offset: name[2]
8806 * The return length gives used_frames_count
8807 */
8808 static int
8809 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8810 {
8811 #pragma unused(oidp)
8812 int *name = (int *)arg1;
8813 u_int namelen = arg2;
8814 int idx;
8815 ifnet_t ifp = NULL;
8816 u_int32_t frames_array_count;
8817 size_t frame_data_offset;
8818 u_int32_t used_frames_count;
8819 struct ifnet_keepalive_offload_frame *frames_array = NULL;
8820 int error = 0;
8821 u_int32_t i;
8822
8823 /*
8824 * Only root can get look at other people TCP frames
8825 */
8826 error = proc_suser(current_proc());
8827 if (error != 0)
8828 goto done;
8829 /*
8830 * Validate the input parameters
8831 */
8832 if (req->newptr != USER_ADDR_NULL) {
8833 error = EPERM;
8834 goto done;
8835 }
8836 if (namelen != 3) {
8837 error = EINVAL;
8838 goto done;
8839 }
8840 if (req->oldptr == USER_ADDR_NULL) {
8841 error = EINVAL;
8842 goto done;
8843 }
8844 if (req->oldlen == 0) {
8845 error = EINVAL;
8846 goto done;
8847 }
8848 idx = name[0];
8849 frames_array_count = name[1];
8850 frame_data_offset = name[2];
8851
8852 /* Make sure the passed buffer is large enough */
8853 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
8854 req->oldlen) {
8855 error = ENOMEM;
8856 goto done;
8857 }
8858
8859 ifnet_head_lock_shared();
8860 if (!IF_INDEX_IN_RANGE(idx)) {
8861 ifnet_head_done();
8862 error = ENOENT;
8863 goto done;
8864 }
8865 ifp = ifindex2ifnet[idx];
8866 ifnet_head_done();
8867
8868 frames_array = _MALLOC(frames_array_count *
8869 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
8870 if (frames_array == NULL) {
8871 error = ENOMEM;
8872 goto done;
8873 }
8874
8875 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
8876 frames_array_count, frame_data_offset, &used_frames_count);
8877 if (error != 0) {
8878 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8879 __func__, error);
8880 goto done;
8881 }
8882
8883 for (i = 0; i < used_frames_count; i++) {
8884 error = SYSCTL_OUT(req, frames_array + i,
8885 sizeof(struct ifnet_keepalive_offload_frame));
8886 if (error != 0) {
8887 goto done;
8888 }
8889 }
8890 done:
8891 if (frames_array != NULL)
8892 _FREE(frames_array, M_TEMP);
8893 return (error);
8894 }
8895 #endif /* DEVELOPMENT || DEBUG */
8896
8897 void
8898 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
8899 struct ifnet *ifp)
8900 {
8901 tcp_update_stats_per_flow(ifs, ifp);
8902 }
8903
8904 static void
8905 dlil_mit_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
8906 {
8907 #pragma unused(arg1)
8908 struct ifnet *ifp = (struct ifnet *)arg0;
8909 struct dlil_threading_info *inp = ifp->if_inp;
8910
8911 ifnet_lock_shared(ifp);
8912 if (!IF_FULLY_ATTACHED(ifp) || inp == NULL) {
8913 ifnet_lock_done(ifp);
8914 return;
8915 }
8916
8917 lck_mtx_lock_spin(&inp->input_lck);
8918 inp->input_waiting |= DLIL_INPUT_WAITING;
8919 if (!(inp->input_waiting & DLIL_INPUT_RUNNING) ||
8920 !qempty(&inp->rcvq_pkts)) {
8921 inp->wtot++;
8922 wakeup_one((caddr_t)&inp->input_waiting);
8923 }
8924 lck_mtx_unlock(&inp->input_lck);
8925 ifnet_lock_done(ifp);
8926 }