2 * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Data Link Inteface Layer
33 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
34 * support for mandatory and extensible security protections. This notice
35 * is included in support of clause 2.2 (b) of the Apple Public License,
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
44 #include <sys/socket.h>
45 #include <sys/domain.h>
47 #include <sys/random.h>
48 #include <net/if_dl.h>
50 #include <net/route.h>
51 #include <net/if_var.h>
53 #include <net/if_arp.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
57 #include <kern/assert.h>
58 #include <kern/task.h>
59 #include <kern/thread.h>
60 #include <kern/sched_prim.h>
61 #include <kern/locks.h>
62 #include <net/kpi_protocol.h>
64 #include <net/if_types.h>
65 #include <net/kpi_interfacefilter.h>
67 #include <libkern/OSAtomic.h>
69 #include <machine/machine_routines.h>
71 #include <mach/thread_act.h>
74 #include <security/mac_framework.h>
78 #include <net/pfvar.h>
81 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
82 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
83 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
84 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
85 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
88 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
89 #define MAX_LINKADDR 4 /* LONGWORDS */
90 #define M_NKE M_IFADDR
93 #define DLIL_PRINTF printf
95 #define DLIL_PRINTF kprintf
98 #define atomic_add_32(a, n) \
99 ((void) OSAddAtomic(n, (volatile SInt32 *)a))
102 #define _CASSERT(x) \
103 switch (0) { case 0: case (x): ; }
105 #define IF_DATA_REQUIRE_ALIGNED_32(f) \
106 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int32_t)))
108 #define IFNET_IF_DATA_REQUIRE_ALIGNED_32(f) \
109 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int32_t)))
110 #endif /* PKT_PRIORITY */
118 SLIST_ENTRY(if_proto
) next_hash
;
122 struct domain
*dl_domain
;
123 protocol_family_t protocol_family
;
127 proto_media_input input
;
128 proto_media_preout pre_output
;
129 proto_media_event event
;
130 proto_media_ioctl ioctl
;
131 proto_media_detached detached
;
132 proto_media_resolve_multi resolve_multi
;
133 proto_media_send_arp send_arp
;
136 proto_media_input_v2 input
;
137 proto_media_preout pre_output
;
138 proto_media_event event
;
139 proto_media_ioctl ioctl
;
140 proto_media_detached detached
;
141 proto_media_resolve_multi resolve_multi
;
142 proto_media_send_arp send_arp
;
147 SLIST_HEAD(proto_hash_entry
, if_proto
);
151 /* ifnet and drvr_ext are used by the stack and drivers
152 drvr_ext extends the public ifnet and must follow dl_if */
153 struct ifnet dl_if
; /* public ifnet */
155 /* dlil private fields */
156 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet are link together */
157 /* it is not the ifnet list */
158 void *if_uniqueid
; /* unique id identifying the interface */
159 size_t if_uniqueid_len
;/* length of the unique id */
160 char if_namestorage
[IFNAMSIZ
]; /* interface name storage */
163 struct ifnet_filter
{
164 TAILQ_ENTRY(ifnet_filter
) filt_next
;
168 const char *filt_name
;
170 protocol_family_t filt_protocol
;
171 iff_input_func filt_input
;
172 iff_output_func filt_output
;
173 iff_event_func filt_event
;
174 iff_ioctl_func filt_ioctl
;
175 iff_detached_func filt_detached
;
178 struct proto_input_entry
;
180 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
181 static lck_grp_t
*dlil_lock_group
;
182 static lck_grp_t
*ifnet_lock_group
;
183 static lck_grp_t
*ifnet_head_lock_group
;
184 static lck_attr_t
*ifnet_lock_attr
;
185 static lck_rw_t
*ifnet_head_mutex
;
186 static lck_mtx_t
*dlil_ifnet_mutex
;
187 static lck_mtx_t
*dlil_mutex
;
188 static u_int32_t dlil_read_count
= 0;
189 static u_int32_t dlil_detach_waiting
= 0;
190 u_int32_t dlil_filter_count
= 0;
191 extern u_int32_t ipv4_ll_arp_aware
;
193 #if IFNET_ROUTE_REFCNT
195 * Updating this variable should be done by first acquiring the global
196 * radix node head (rnh_lock), in tandem with settting/clearing the
197 * PR_AGGDRAIN for routedomain.
199 u_int32_t ifnet_aggressive_drainers
;
200 static u_int32_t net_rtref
;
201 #endif /* IFNET_ROUTE_REFCNT */
203 static struct dlil_threading_info dlil_lo_thread
;
204 __private_extern__
struct dlil_threading_info
*dlil_lo_thread_ptr
= &dlil_lo_thread
;
206 static struct mbuf
*dlil_lo_input_mbuf_head
= NULL
;
207 static struct mbuf
*dlil_lo_input_mbuf_tail
= NULL
;
209 #if IFNET_INPUT_SANITY_CHK
210 static int dlil_lo_input_mbuf_count
= 0;
211 int dlil_input_sanity_check
= 0; /* sanity checking of input packet lists received */
213 int dlil_multithreaded_input
= 1;
214 static int cur_dlil_input_threads
= 0;
216 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
);
217 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
218 static void dlil_call_delayed_detach_thread(void);
220 static void dlil_read_begin(void);
221 static __inline__
void dlil_read_end(void);
222 static int dlil_write_begin(void);
223 static void dlil_write_end(void);
226 __private_extern__
int dlil_verbose
= 1;
228 __private_extern__
int dlil_verbose
= 0;
231 unsigned int net_affinity
= 1;
232 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
234 extern void bpfdetach(struct ifnet
*);
235 extern void proto_input_run(void); // new run_netisr
237 void dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
);
238 static void dlil_input_thread_func(struct dlil_threading_info
*inpthread
);
239 __private_extern__
int dlil_create_input_thread(
240 ifnet_t
, struct dlil_threading_info
*);
241 __private_extern__
void dlil_terminate_input_thread(
242 struct dlil_threading_info
*);
244 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
248 extern u_int32_t inject_buckets
;
250 static const u_int32_t dlil_writer_waiting
= 0x80000000;
251 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
252 static lck_attr_t
*dlil_lck_attributes
= NULL
;
253 static lck_grp_t
*dlil_input_lock_grp
= NULL
;
256 _cast_non_const(const void * ptr
) {
266 /* Should these be inline? */
268 dlil_read_begin(void)
272 struct uthread
*uth
= get_bsdthread_info(current_thread());
274 if (uth
->dlil_incremented_read
== dlil_writer_waiting
)
275 panic("dlil_read_begin - thread is already a writer");
279 old_value
= dlil_read_count
;
281 if ((old_value
& dlil_writer_waiting
) != 0 && uth
->dlil_incremented_read
== 0)
283 tsleep(&dlil_read_count
, PRIBIO
, "dlil_read_count", 1);
287 new_value
= old_value
+ 1;
288 } while (!OSCompareAndSwap((UInt32
)old_value
, (UInt32
)new_value
, (UInt32
*)&dlil_read_count
));
290 uth
->dlil_incremented_read
++;
296 struct uthread
*uth
= get_bsdthread_info(current_thread());
298 OSDecrementAtomic(&dlil_read_count
);
299 uth
->dlil_incremented_read
--;
300 if (dlil_read_count
== dlil_writer_waiting
)
301 wakeup(_cast_non_const(&dlil_writer_waiting
));
305 dlil_write_begin(void)
307 struct uthread
*uth
= get_bsdthread_info(current_thread());
309 if (uth
->dlil_incremented_read
!= 0) {
312 lck_mtx_lock(dlil_mutex
);
313 OSBitOrAtomic((UInt32
)dlil_writer_waiting
, &dlil_read_count
);
315 if (dlil_read_count
== dlil_writer_waiting
) {
316 uth
->dlil_incremented_read
= dlil_writer_waiting
;
320 tsleep(_cast_non_const(&dlil_writer_waiting
), PRIBIO
, "dlil_writer_waiting", 1);
328 struct uthread
*uth
= get_bsdthread_info(current_thread());
330 if (uth
->dlil_incremented_read
!= dlil_writer_waiting
)
331 panic("dlil_write_end - thread is not a writer");
332 OSBitAndAtomic((UInt32
)~dlil_writer_waiting
, &dlil_read_count
);
333 lck_mtx_unlock(dlil_mutex
);
334 uth
->dlil_incremented_read
= 0;
335 wakeup(&dlil_read_count
);
338 #define PROTO_HASH_SLOTS 0x5
341 * Internal functions.
345 proto_hash_value(u_int32_t protocol_family
)
348 * dlil_proto_unplumb_all() depends on the mapping between
349 * the hash bucket index and the protocol family defined
350 * here; future changes must be applied there as well.
352 switch(protocol_family
) {
366 static struct if_proto
*
367 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
369 struct if_proto
*proto
= NULL
;
370 u_int32_t i
= proto_hash_value(protocol_family
);
371 if (ifp
->if_proto_hash
) {
372 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
375 while(proto
&& proto
->protocol_family
!= protocol_family
) {
376 proto
= SLIST_NEXT(proto
, next_hash
);
383 if_proto_ref(struct if_proto
*proto
)
385 OSAddAtomic(1, &proto
->refcount
);
389 if_proto_free(struct if_proto
*proto
)
391 int oldval
= OSAddAtomic(-1, &proto
->refcount
);
393 if (oldval
== 1) { /* This was the last reference */
394 FREE(proto
, M_IFADDR
);
398 __private_extern__
void
400 __unused
struct ifnet
*ifp
,
405 * Not implemented for rw locks.
407 * Function exists so when/if we use mutex we can
411 lck_mtx_assert(ifp
->if_lock
, what
);
415 __private_extern__
void
420 lck_rw_lock_shared(ifp
->if_lock
);
422 lck_mtx_assert(ifp
->if_lock
, LCK_MTX_ASSERT_NOTOWNED
);
423 lck_mtx_lock(ifp
->if_lock
);
427 __private_extern__
void
428 ifnet_lock_exclusive(
432 lck_rw_lock_exclusive(ifp
->if_lock
);
434 lck_mtx_assert(ifp
->if_lock
, LCK_MTX_ASSERT_NOTOWNED
);
435 lck_mtx_lock(ifp
->if_lock
);
439 __private_extern__
void
444 lck_rw_done(ifp
->if_lock
);
446 lck_mtx_assert(ifp
->if_lock
, LCK_MTX_ASSERT_OWNED
);
447 lck_mtx_unlock(ifp
->if_lock
);
451 __private_extern__
void
452 ifnet_head_lock_shared(void)
454 lck_rw_lock_shared(ifnet_head_mutex
);
457 __private_extern__
void
458 ifnet_head_lock_exclusive(void)
460 lck_rw_lock_exclusive(ifnet_head_mutex
);
463 __private_extern__
void
464 ifnet_head_done(void)
466 lck_rw_done(ifnet_head_mutex
);
469 static int dlil_ifp_proto_count(struct ifnet
* ifp
)
474 if (ifp
->if_proto_hash
!= NULL
) {
475 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
476 struct if_proto
*proto
;
477 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
486 __private_extern__
void
487 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
, u_int32_t event_code
,
488 struct net_event_data
*event_data
, u_int32_t event_data_len
)
490 struct net_event_data ev_data
;
491 struct kev_msg ev_msg
;
494 * a net event always starts with a net_event_data structure
495 * but the caller can generate a simple net event or
496 * provide a longer event structure to post
499 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
500 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
501 ev_msg
.kev_subclass
= event_subclass
;
502 ev_msg
.event_code
= event_code
;
504 if (event_data
== 0) {
505 event_data
= &ev_data
;
506 event_data_len
= sizeof(struct net_event_data
);
509 strncpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
510 event_data
->if_family
= ifp
->if_family
;
511 event_data
->if_unit
= (u_int32_t
) ifp
->if_unit
;
513 ev_msg
.dv
[0].data_length
= event_data_len
;
514 ev_msg
.dv
[0].data_ptr
= event_data
;
515 ev_msg
.dv
[1].data_length
= 0;
517 dlil_event_internal(ifp
, &ev_msg
);
520 __private_extern__
int
521 dlil_create_input_thread(
522 ifnet_t ifp
, struct dlil_threading_info
*inputthread
)
526 bzero(inputthread
, sizeof(*inputthread
));
527 // loopback ifp may not be configured at dlil_init time.
529 strlcat(inputthread
->input_name
, "dlil_input_main_thread_mtx", 32);
531 snprintf(inputthread
->input_name
, 32, "dlil_input_%s%d_mtx", ifp
->if_name
, ifp
->if_unit
);
533 inputthread
->lck_grp
= lck_grp_alloc_init(inputthread
->input_name
, dlil_grp_attributes
);
534 inputthread
->input_lck
= lck_mtx_alloc_init(inputthread
->lck_grp
, dlil_lck_attributes
);
536 error
= kernel_thread_start((thread_continue_t
)dlil_input_thread_func
, inputthread
, &inputthread
->input_thread
);
538 ml_thread_policy(inputthread
->input_thread
, MACHINE_GROUP
,
539 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_NETISR
));
541 * Except for the loopback dlil input thread, we create
542 * an affinity set so that the matching workloop thread
543 * can be scheduled on the same processor set.
545 if (net_affinity
&& inputthread
!= dlil_lo_thread_ptr
) {
546 struct thread
*tp
= inputthread
->input_thread
;
549 * Randomize to reduce the probability
550 * of affinity tag namespace collision.
552 read_random(&tag
, sizeof (tag
));
553 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
554 thread_reference(tp
);
555 inputthread
->tag
= tag
;
556 inputthread
->net_affinity
= TRUE
;
560 panic("dlil_create_input_thread: couldn't create thread\n");
562 OSAddAtomic(1, &cur_dlil_input_threads
);
564 printf("dlil_create_input_thread: threadinfo: %p input_thread=%p threads: cur=%d max=%d\n",
565 inputthread
, inputthread
->input_thread
, dlil_multithreaded_input
, cur_dlil_input_threads
);
569 __private_extern__
void
570 dlil_terminate_input_thread(
571 struct dlil_threading_info
*inputthread
)
573 OSAddAtomic(-1, &cur_dlil_input_threads
);
575 lck_mtx_unlock(inputthread
->input_lck
);
576 lck_mtx_free(inputthread
->input_lck
, inputthread
->lck_grp
);
577 lck_grp_free(inputthread
->lck_grp
);
579 FREE(inputthread
, M_NKE
);
581 /* For the extra reference count from kernel_thread_start() */
582 thread_deallocate(current_thread());
584 thread_terminate(current_thread());
588 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
590 thread_affinity_policy_data_t policy
;
592 bzero(&policy
, sizeof (policy
));
593 policy
.affinity_tag
= tag
;
594 return (thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
595 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
));
601 thread_t thread
= THREAD_NULL
;
603 PE_parse_boot_argn("net_affinity", &net_affinity
, sizeof (net_affinity
));
604 #if IFNET_ROUTE_REFCNT
605 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof (net_rtref
));
606 #endif /* IFNET_ROUTE_REFCNT */
608 TAILQ_INIT(&dlil_ifnet_head
);
609 TAILQ_INIT(&ifnet_head
);
611 /* Setup the lock groups we will use */
612 dlil_grp_attributes
= lck_grp_attr_alloc_init();
614 dlil_lock_group
= lck_grp_alloc_init("dlil internal locks", dlil_grp_attributes
);
615 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks", dlil_grp_attributes
);
616 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock", dlil_grp_attributes
);
617 dlil_input_lock_grp
= lck_grp_alloc_init("dlil input lock", dlil_grp_attributes
);
619 /* Setup the lock attributes we will use */
620 dlil_lck_attributes
= lck_attr_alloc_init();
622 ifnet_lock_attr
= lck_attr_alloc_init();
625 ifnet_head_mutex
= lck_rw_alloc_init(ifnet_head_lock_group
, dlil_lck_attributes
);
626 dlil_ifnet_mutex
= lck_mtx_alloc_init(dlil_lock_group
, dlil_lck_attributes
);
627 dlil_mutex
= lck_mtx_alloc_init(dlil_lock_group
, dlil_lck_attributes
);
629 lck_attr_free(dlil_lck_attributes
);
630 dlil_lck_attributes
= NULL
;
633 * Create and start up the first dlil input thread once everything is initialized
635 dlil_create_input_thread(0, dlil_lo_thread_ptr
);
637 (void) kernel_thread_start((thread_continue_t
)dlil_call_delayed_detach_thread
, NULL
, &thread
);
638 thread_deallocate(thread
);
640 /* Initialize the packet filter */
645 __private_extern__
int
648 const struct iff_filter
*if_filter
,
649 interface_filter_t
*filter_ref
)
652 struct ifnet_filter
*filter
;
654 MALLOC(filter
, struct ifnet_filter
*, sizeof(*filter
), M_NKE
, M_WAITOK
);
657 bzero(filter
, sizeof(*filter
));
660 filter
->filt_ifp
= ifp
;
661 filter
->filt_cookie
= if_filter
->iff_cookie
;
662 filter
->filt_name
= if_filter
->iff_name
;
663 filter
->filt_protocol
= if_filter
->iff_protocol
;
664 filter
->filt_input
= if_filter
->iff_input
;
665 filter
->filt_output
= if_filter
->iff_output
;
666 filter
->filt_event
= if_filter
->iff_event
;
667 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
668 filter
->filt_detached
= if_filter
->iff_detached
;
670 if ((retval
= dlil_write_begin()) != 0) {
671 /* Failed to acquire the write lock */
675 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
677 *filter_ref
= filter
;
680 * Bump filter count and route_generation ID to let TCP
681 * know it shouldn't do TSO on this connection
683 OSAddAtomic(1, &dlil_filter_count
);
691 dlil_detach_filter_internal(
692 interface_filter_t filter
,
699 interface_filter_t entry
= NULL
;
701 /* Take the write lock */
702 retval
= dlil_write_begin();
703 if (retval
!= 0 && retval
!= EDEADLK
)
707 * At this point either we have the write lock (retval == 0)
708 * or we couldn't get it (retval == EDEADLK) because someone
709 * else up the stack is holding the read lock. It is safe to
710 * read, either the read or write is held. Verify the filter
711 * parameter before proceeding.
713 ifnet_head_lock_shared();
714 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
715 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
724 if (entry
!= filter
) {
725 /* filter parameter is not a valid filter ref */
732 if (retval
== EDEADLK
) {
733 /* Perform a delayed detach */
734 filter
->filt_detaching
= 1;
735 dlil_detach_waiting
= 1;
736 wakeup(&dlil_detach_waiting
);
740 /* Remove the filter from the list */
741 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
, filt_next
);
745 /* Call the detached funciton if there is one */
746 if (filter
->filt_detached
)
747 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
749 /* Free the filter */
753 * Decrease filter count and route_generation ID to let TCP
754 * know it should reevalute doing TSO or not
756 OSAddAtomic(-1, &dlil_filter_count
);
763 __private_extern__
void
764 dlil_detach_filter(interface_filter_t filter
)
768 dlil_detach_filter_internal(filter
, 0);
772 dlil_input_thread_func(
773 struct dlil_threading_info
*inputthread
)
776 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
777 #if IFNET_INPUT_SANITY_CHK
778 int loop_cnt
= 0, mbuf_cnt
;
781 #endif /* IFNET_INPUT_SANITY_CHK */
783 lck_mtx_lock(inputthread
->input_lck
);
785 /* Wait until there is work to be done */
786 while ((inputthread
->input_waiting
& ~DLIL_INPUT_RUNNING
) == 0) {
787 inputthread
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
788 msleep(&inputthread
->input_waiting
, inputthread
->input_lck
, 0, inputthread
->input_name
, 0);
792 lck_mtx_assert(inputthread
->input_lck
, LCK_MTX_ASSERT_OWNED
);
794 m
= inputthread
->mbuf_head
;
795 inputthread
->mbuf_head
= NULL
;
796 inputthread
->mbuf_tail
= NULL
;
798 if (inputthread
->input_waiting
& DLIL_INPUT_TERMINATE
) {
801 /* this is the end */
802 dlil_terminate_input_thread(inputthread
);
806 inputthread
->input_waiting
|= DLIL_INPUT_RUNNING
;
807 inputthread
->input_waiting
&= ~DLIL_INPUT_WAITING
;
809 if (inputthread
== dlil_lo_thread_ptr
) {
810 m_loop
= dlil_lo_input_mbuf_head
;
811 dlil_lo_input_mbuf_head
= NULL
;
812 dlil_lo_input_mbuf_tail
= NULL
;
815 #if IFNET_INPUT_SANITY_CHK
816 if (dlil_input_sanity_check
!= 0) {
817 mbuf_cnt
= inputthread
->mbuf_count
;
818 inputthread
->mbuf_count
= 0;
819 if (inputthread
== dlil_lo_thread_ptr
) {
820 loop_cnt
= dlil_lo_input_mbuf_count
;
821 dlil_lo_input_mbuf_count
= 0;
824 lck_mtx_unlock(inputthread
->input_lck
);
826 for (m1
= m
, count
= 0; m1
; m1
= mbuf_nextpkt(m1
)) {
829 if (count
!= mbuf_cnt
) {
830 panic("dlil_input_func - thread=%p reg. loop queue has %d packets, should have %d\n",
831 inputthread
, count
, mbuf_cnt
);
834 if (inputthread
== dlil_lo_thread_ptr
) {
835 for (m1
= m_loop
, count
= 0; m1
; m1
= mbuf_nextpkt(m1
)) {
838 if (count
!= loop_cnt
) {
839 panic("dlil_input_func - thread=%p loop queue has %d packets, should have %d\n",
840 inputthread
, count
, loop_cnt
);
844 #endif /* IFNET_INPUT_SANITY_CHK */
846 lck_mtx_unlock(inputthread
->input_lck
);
851 * NOTE warning %%% attention !!!!
852 * We should think about putting some thread starvation safeguards if
853 * we deal with long chains of packets.
856 if (inputthread
== dlil_lo_thread_ptr
)
857 dlil_input_packet_list(lo_ifp
, m_loop
);
858 #if IFNET_INPUT_SANITY_CHK
860 panic("dlil_input_func - thread=%p loop queue has %d packets, should have none!\n",
861 inputthread
, loop_cnt
);
862 #endif /* IFNET_INPUT_SANITY_CHK */
867 dlil_input_packet_list(0, m
);
870 lck_mtx_lock(inputthread
->input_lck
);
872 if ((inputthread
->input_waiting
& (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
)) != 0) {
873 lck_mtx_unlock(inputthread
->input_lck
);
877 lck_mtx_unlock(inputthread
->input_lck
);
885 const struct ifnet_stat_increment_param
*stats
)
887 struct thread
*tp
= current_thread();
889 struct dlil_threading_info
*inp
;
890 #if IFNET_INPUT_SANITY_CHK
891 u_int32_t pkt_count
= 0;
892 #endif /* IFNET_INPUT_SANITY_CHK */
894 if (ifp
== NULL
|| m_head
== NULL
) {
896 mbuf_freem_list(m_head
);
902 #if IFNET_INPUT_SANITY_CHK
903 if (dlil_input_sanity_check
!= 0) {
906 rcvif
= mbuf_pkthdr_rcvif(m_tail
);
910 (ifp
->if_type
!= IFT_LOOP
&& rcvif
!= ifp
) ||
911 (mbuf_flags(m_head
) & MBUF_PKTHDR
) == 0) {
912 panic("ifnet_input - invalid mbuf %p\n", m_tail
);
915 #endif /* IFNET_INPUT_SANITY_CHK */
916 if (mbuf_nextpkt(m_tail
) == NULL
)
918 m_tail
= mbuf_nextpkt(m_tail
);
921 inp
= ifp
->if_input_thread
;
923 if (dlil_multithreaded_input
== 0 || inp
== NULL
)
924 inp
= dlil_lo_thread_ptr
;
927 * If there is a matching dlil input thread associated with an
928 * affinity set, associate this workloop thread with the same set.
929 * We will only do this once.
931 lck_mtx_lock(inp
->input_lck
);
932 if (inp
->net_affinity
&& inp
->workloop_thread
== NULL
) {
933 u_int32_t tag
= inp
->tag
;
934 inp
->workloop_thread
= tp
;
935 lck_mtx_unlock(inp
->input_lck
);
937 /* Associated the current thread with the new affinity tag */
938 (void) dlil_affinity_set(tp
, tag
);
941 * Take a reference on the workloop (current) thread; during
942 * detach, we will need to refer to it in order ot tear down
945 thread_reference(tp
);
946 lck_mtx_lock(inp
->input_lck
);
950 * Because of loopbacked multicast we cannot stuff the ifp in
951 * the rcvif of the packet header: loopback has its own dlil
955 if (inp
== dlil_lo_thread_ptr
&& ifp
->if_type
== IFT_LOOP
) {
956 if (dlil_lo_input_mbuf_head
== NULL
)
957 dlil_lo_input_mbuf_head
= m_head
;
958 else if (dlil_lo_input_mbuf_tail
!= NULL
)
959 dlil_lo_input_mbuf_tail
->m_nextpkt
= m_head
;
960 dlil_lo_input_mbuf_tail
= m_tail
;
961 #if IFNET_INPUT_SANITY_CHK
962 if (dlil_input_sanity_check
!= 0) {
963 dlil_lo_input_mbuf_count
+= pkt_count
;
964 inp
->input_mbuf_cnt
+= pkt_count
;
965 inp
->input_wake_cnt
++;
967 lck_mtx_assert(inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
972 if (inp
->mbuf_head
== NULL
)
973 inp
->mbuf_head
= m_head
;
974 else if (inp
->mbuf_tail
!= NULL
)
975 inp
->mbuf_tail
->m_nextpkt
= m_head
;
976 inp
->mbuf_tail
= m_tail
;
977 #if IFNET_INPUT_SANITY_CHK
978 if (dlil_input_sanity_check
!= 0) {
979 inp
->mbuf_count
+= pkt_count
;
980 inp
->input_mbuf_cnt
+= pkt_count
;
981 inp
->input_wake_cnt
++;
983 lck_mtx_assert(inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
989 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
990 if ((inp
->input_waiting
& DLIL_INPUT_RUNNING
) == 0) {
991 wakeup((caddr_t
)&inp
->input_waiting
);
994 ifp
->if_data
.ifi_ipackets
+= stats
->packets_in
;
995 ifp
->if_data
.ifi_ibytes
+= stats
->bytes_in
;
996 ifp
->if_data
.ifi_ierrors
+= stats
->errors_in
;
998 ifp
->if_data
.ifi_opackets
+= stats
->packets_out
;
999 ifp
->if_data
.ifi_obytes
+= stats
->bytes_out
;
1000 ifp
->if_data
.ifi_oerrors
+= stats
->errors_out
;
1002 ifp
->if_data
.ifi_collisions
+= stats
->collisions
;
1003 ifp
->if_data
.ifi_iqdrops
+= stats
->dropped
;
1006 lck_mtx_unlock(inp
->input_lck
);
1012 dlil_interface_filters_input(struct ifnet
* ifp
, struct mbuf
* * m_p
,
1013 char * * frame_header_p
,
1014 protocol_family_t protocol_family
)
1016 struct ifnet_filter
* filter
;
1018 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
1021 if (filter
->filt_input
1022 && (filter
->filt_protocol
== 0
1023 || filter
->filt_protocol
== protocol_family
)) {
1024 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
1025 ifp
, protocol_family
,
1026 m_p
, frame_header_p
);
1034 * Strip away M_PROTO1 bit prior to sending packet up the stack as
1035 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
1038 (*m_p
)->m_flags
&= ~M_PROTO1
;
1044 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
1048 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
1049 /* Version 1 protocols get one packet at a time */
1051 char * frame_header
;
1054 next_packet
= m
->m_nextpkt
;
1055 m
->m_nextpkt
= NULL
;
1056 frame_header
= m
->m_pkthdr
.header
;
1057 m
->m_pkthdr
.header
= NULL
;
1058 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
1059 ifproto
->protocol_family
,
1061 if (error
!= 0 && error
!= EJUSTRETURN
)
1066 else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
1067 /* Version 2 protocols support packet lists */
1068 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
1069 ifproto
->protocol_family
,
1071 if (error
!= 0 && error
!= EJUSTRETURN
)
1077 __private_extern__
void
1078 dlil_input_packet_list(struct ifnet
* ifp_param
, struct mbuf
*m
)
1082 protocol_family_t protocol_family
;
1084 ifnet_t ifp
= ifp_param
;
1085 char * frame_header
;
1086 struct if_proto
* last_ifproto
= NULL
;
1087 mbuf_t pkt_first
= NULL
;
1088 mbuf_t
* pkt_next
= NULL
;
1090 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
,0,0,0,0,0);
1093 struct if_proto
* ifproto
= NULL
;
1095 next_packet
= m
->m_nextpkt
;
1096 m
->m_nextpkt
= NULL
;
1097 if (ifp_param
== NULL
)
1098 ifp
= m
->m_pkthdr
.rcvif
;
1099 frame_header
= m
->m_pkthdr
.header
;
1100 m
->m_pkthdr
.header
= NULL
;
1103 /* dlil lock protects the demux and interface filters */
1109 switch (m
->m_pkthdr
.prio
) {
1111 ifp
->if_tc
.ifi_ibkpackets
++;
1112 ifp
->if_tc
.ifi_ibkbytes
+= m
->m_pkthdr
.len
;
1115 ifp
->if_tc
.ifi_ivipackets
++;
1116 ifp
->if_tc
.ifi_ivibytes
+= m
->m_pkthdr
.len
;
1119 ifp
->if_tc
.ifi_ivopackets
++;
1120 ifp
->if_tc
.ifi_ivobytes
+= m
->m_pkthdr
.len
;
1127 /* find which protocol family this packet is for */
1128 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
1131 if (error
== EJUSTRETURN
) {
1134 protocol_family
= 0;
1138 if (m
->m_flags
& (M_BCAST
|M_MCAST
))
1141 /* run interface filters, exclude VLAN packets PR-3586856 */
1142 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
1145 filter_result
= dlil_interface_filters_input(ifp
, &m
,
1148 if (filter_result
!= 0) {
1149 if (filter_result
!= EJUSTRETURN
) {
1155 if (error
!= 0 || ((m
->m_flags
& M_PROMISC
) != 0) ) {
1160 /* Lookup the protocol attachment to this interface */
1161 if (protocol_family
== 0) {
1164 else if (last_ifproto
!= NULL
1165 && last_ifproto
->ifp
== ifp
1166 && (last_ifproto
->protocol_family
1167 == protocol_family
)) {
1168 ifproto
= last_ifproto
;
1171 ifproto
= find_attached_proto(ifp
, protocol_family
);
1173 if (ifproto
== NULL
) {
1174 /* no protocol for this packet, discard */
1178 if (ifproto
!= last_ifproto
) {
1179 /* make sure ifproto can't go away during input */
1180 if_proto_ref(ifproto
);
1181 if (last_ifproto
!= NULL
) {
1182 /* pass up the list for the previous protocol */
1185 dlil_ifproto_input(last_ifproto
, pkt_first
);
1187 if_proto_free(last_ifproto
);
1190 last_ifproto
= ifproto
;
1192 /* extend the list */
1193 m
->m_pkthdr
.header
= frame_header
;
1194 if (pkt_first
== NULL
) {
1199 pkt_next
= &m
->m_nextpkt
;
1202 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
1203 /* pass up the last list of packets */
1206 dlil_ifproto_input(last_ifproto
, pkt_first
);
1207 if_proto_free(last_ifproto
);
1216 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
,0,0,0,0,0);
1221 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
)
1223 struct ifnet_filter
*filter
;
1225 if (ifp_use(ifp
, kIfNetUseCount_MustNotBeZero
) == 0) {
1228 /* Pass the event to the interface filters */
1229 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
1230 if (filter
->filt_event
)
1231 filter
->filt_event(filter
->filt_cookie
, ifp
, filter
->filt_protocol
, event
);
1234 if (ifp
->if_proto_hash
) {
1237 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1238 struct if_proto
*proto
;
1240 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1241 proto_media_event eventp
= proto
->proto_kpi
== kProtoKPI_v1
1242 ? proto
->kpi
.v1
.event
: proto
->kpi
.v2
.event
;
1245 eventp(ifp
, proto
->protocol_family
, event
);
1252 /* Pass the event to the interface */
1254 ifp
->if_event(ifp
, event
);
1257 ifp_use_reached_zero(ifp
);
1260 return kev_post_msg(event
);
1266 struct kern_event_msg
*event
)
1268 struct kev_msg kev_msg
;
1271 if (ifp
== NULL
|| event
== NULL
) return EINVAL
;
1273 kev_msg
.vendor_code
= event
->vendor_code
;
1274 kev_msg
.kev_class
= event
->kev_class
;
1275 kev_msg
.kev_subclass
= event
->kev_subclass
;
1276 kev_msg
.event_code
= event
->event_code
;
1277 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
1278 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
1279 kev_msg
.dv
[1].data_length
= 0;
1281 result
= dlil_event_internal(ifp
, &kev_msg
);
1287 #include <netinet/ip6.h>
1288 #include <netinet/ip.h>
1289 static int dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
1293 struct ip6_hdr
*ip6
;
1294 int type
= SOCK_RAW
;
1299 m
= m_pullup(*mp
, sizeof(struct ip
));
1303 ip
= mtod(m
, struct ip
*);
1304 if (ip
->ip_p
== IPPROTO_TCP
)
1306 else if (ip
->ip_p
== IPPROTO_UDP
)
1310 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
1314 ip6
= mtod(m
, struct ip6_hdr
*);
1315 if (ip6
->ip6_nxt
== IPPROTO_TCP
)
1317 else if (ip6
->ip6_nxt
== IPPROTO_UDP
)
1328 if_inc_traffic_class_out(ifnet_t ifp
, mbuf_t m
)
1335 if (!(m
->m_flags
& M_PKTHDR
))
1338 switch (m
->m_pkthdr
.prio
) {
1340 ifp
->if_tc
.ifi_obkpackets
++;
1341 ifp
->if_tc
.ifi_obkbytes
+= m
->m_pkthdr
.len
;
1344 ifp
->if_tc
.ifi_ovipackets
++;
1345 ifp
->if_tc
.ifi_ovibytes
+= m
->m_pkthdr
.len
;
1348 ifp
->if_tc
.ifi_ovopackets
++;
1349 ifp
->if_tc
.ifi_ovobytes
+= m
->m_pkthdr
.len
;
1361 u_long proto_family
,
1362 struct mbuf
*packetlist
,
1364 const struct sockaddr
*dest
,
1367 char *frame_type
= NULL
;
1368 char *dst_linkaddr
= NULL
;
1370 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
1371 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
1372 struct ifnet_filter
*filter
;
1373 struct if_proto
*proto
= 0;
1375 mbuf_t send_head
= NULL
;
1376 mbuf_t
*send_tail
= &send_head
;
1378 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
,0,0,0,0,0);
1382 frame_type
= frame_type_buffer
;
1383 dst_linkaddr
= dst_linkaddr_buffer
;
1386 proto
= find_attached_proto(ifp
, proto_family
);
1387 if (proto
== NULL
) {
1394 if (packetlist
== NULL
)
1397 packetlist
= packetlist
->m_nextpkt
;
1398 m
->m_nextpkt
= NULL
;
1401 proto_media_preout preoutp
= proto
->proto_kpi
== kProtoKPI_v1
1402 ? proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
;
1405 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
, frame_type
, dst_linkaddr
);
1408 if (retval
== EJUSTRETURN
) {
1419 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
1420 dlil_get_socket_type(&m
, proto_family
, raw
));
1427 if (raw
== 0 && ifp
->if_framer
) {
1428 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
, frame_type
);
1430 if (retval
!= EJUSTRETURN
) {
1438 * Let interface filters (if any) do their thing ...
1440 /* Do not pass VLAN tagged packets to filters PR-3586856 */
1441 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
1442 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
1443 if ((filter
->filt_protocol
== 0 || (filter
->filt_protocol
== proto_family
)) &&
1444 filter
->filt_output
) {
1445 retval
= filter
->filt_output(filter
->filt_cookie
, ifp
, proto_family
, &m
);
1447 if (retval
!= EJUSTRETURN
)
1455 * Strip away M_PROTO1 bit prior to sending packet to the driver
1456 * as this field may be used by the driver
1458 m
->m_flags
&= ~M_PROTO1
;
1461 * Finally, call the driver.
1464 if ((ifp
->if_eflags
& IFEF_SENDLIST
) != 0) {
1466 send_tail
= &m
->m_nextpkt
;
1469 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
, 0,0,0,0,0);
1470 retval
= ifp
->if_output(ifp
, m
);
1471 if (retval
&& dlil_verbose
) {
1472 printf("dlil_output: output error on %s%d retval = %d\n",
1473 ifp
->if_name
, ifp
->if_unit
, retval
);
1475 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0,0,0,0,0);
1477 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0,0,0,0,0);
1482 packetlist
= packetlist
->m_nextpkt
;
1483 m
->m_nextpkt
= NULL
;
1488 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
, 0,0,0,0,0);
1489 retval
= ifp
->if_output(ifp
, send_head
);
1490 if (retval
&& dlil_verbose
) {
1491 printf("dlil_output: output error on %s%d retval = %d\n",
1492 ifp
->if_name
, ifp
->if_unit
, retval
);
1494 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0,0,0,0,0);
1497 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
,0,0,0,0,0);
1501 if (packetlist
) /* if any packet left, clean up */
1502 mbuf_freem_list(packetlist
);
1503 if (retval
== EJUSTRETURN
)
1512 * Caller should have a lock on the protocol domain if the protocol
1513 * doesn't support finer grained locking. In most cases, the lock
1514 * will be held from the socket layer and won't be released until
1515 * we return back to the socket layer.
1517 * This does mean that we must take a protocol lock before we take
1518 * an interface lock if we're going to take both. This makes sense
1519 * because a protocol is likely to interact with an ifp while it
1520 * is under the protocol lock.
1522 __private_extern__ errno_t
1525 protocol_family_t proto_family
,
1528 const struct sockaddr
*dest
,
1531 char *frame_type
= NULL
;
1532 char *dst_linkaddr
= NULL
;
1534 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
1535 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
1536 struct ifnet_filter
*filter
;
1537 struct if_proto
*proto
= 0;
1539 mbuf_t send_head
= NULL
;
1540 mbuf_t
*send_tail
= &send_head
;
1542 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
,0,0,0,0,0);
1546 frame_type
= frame_type_buffer
;
1547 dst_linkaddr
= dst_linkaddr_buffer
;
1550 proto
= find_attached_proto(ifp
, proto_family
);
1551 if (proto
== NULL
) {
1558 if (packetlist
== NULL
)
1561 packetlist
= packetlist
->m_nextpkt
;
1562 m
->m_nextpkt
= NULL
;
1565 proto_media_preout preoutp
= proto
->proto_kpi
== kProtoKPI_v1
1566 ? proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
;
1569 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
, frame_type
, dst_linkaddr
);
1572 if (retval
== EJUSTRETURN
) {
1582 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
1583 dlil_get_socket_type(&m
, proto_family
, raw
));
1591 if (raw
== 0 && ifp
->if_framer
) {
1595 * If this is a broadcast packet that needs to be
1596 * looped back into the system, set the inbound ifp
1597 * to that of the outbound ifp. This will allow
1598 * us to determine that it is a legitimate packet
1599 * for the system. Only set the ifp if it's not
1600 * already set, just to be safe.
1602 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
1603 m
->m_pkthdr
.rcvif
== NULL
) {
1604 m
->m_pkthdr
.rcvif
= ifp
;
1608 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
, frame_type
);
1610 if (retval
!= EJUSTRETURN
) {
1617 * Clear the ifp if it was set above, and to be
1618 * safe, only if it is still the same as the
1619 * outbound ifp we have in context. If it was
1620 * looped back, then a copy of it was sent to the
1621 * loopback interface with the rcvif set, and we
1622 * are clearing the one that will go down to the
1625 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
)
1626 m
->m_pkthdr
.rcvif
= NULL
;
1630 * Let interface filters (if any) do their thing ...
1632 /* Do not pass VLAN tagged packets to filters PR-3586856 */
1633 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
1634 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
1635 if ((filter
->filt_protocol
== 0 || (filter
->filt_protocol
== proto_family
)) &&
1636 filter
->filt_output
) {
1637 retval
= filter
->filt_output(filter
->filt_cookie
, ifp
, proto_family
, &m
);
1639 if (retval
!= EJUSTRETURN
)
1648 * Strip away M_PROTO1 bit prior to sending packet to the driver
1649 * as this field may be used by the driver
1651 m
->m_flags
&= ~M_PROTO1
;
1654 * If the underlying interface is not capable of handling a
1655 * packet whose data portion spans across physically disjoint
1656 * pages, we need to "normalize" the packet so that we pass
1657 * down a chain of mbufs where each mbuf points to a span that
1658 * resides in the system page boundary. If the packet does
1659 * not cross page(s), the following is a no-op.
1661 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
1662 if ((m
= m_normalize(m
)) == NULL
)
1667 * If this is a TSO packet, make sure the interface still advertise TSO capability
1670 if ((m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
) && !(ifp
->if_hwassist
& IFNET_TSO_IPV4
)) {
1676 if ((m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV6
) && !(ifp
->if_hwassist
& IFNET_TSO_IPV6
)) {
1682 * Finally, call the driver.
1685 if ((ifp
->if_eflags
& IFEF_SENDLIST
) != 0) {
1687 send_tail
= &m
->m_nextpkt
;
1690 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
, 0,0,0,0,0);
1692 if_inc_traffic_class_out(ifp
, m
);
1694 retval
= ifp
->if_output(ifp
, m
);
1695 if (retval
&& dlil_verbose
) {
1696 printf("dlil_output: output error on %s%d retval = %d\n",
1697 ifp
->if_name
, ifp
->if_unit
, retval
);
1699 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0,0,0,0,0);
1701 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0,0,0,0,0);
1706 packetlist
= packetlist
->m_nextpkt
;
1707 m
->m_nextpkt
= NULL
;
1712 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
, 0,0,0,0,0);
1714 if_inc_traffic_class_out(ifp
, send_head
);
1716 retval
= ifp
->if_output(ifp
, send_head
);
1717 if (retval
&& dlil_verbose
) {
1718 printf("dlil_output: output error on %s%d retval = %d\n",
1719 ifp
->if_name
, ifp
->if_unit
, retval
);
1721 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0,0,0,0,0);
1724 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
,0,0,0,0,0);
1728 if (packetlist
) /* if any packet left, clean up */
1729 mbuf_freem_list(packetlist
);
1730 if (retval
== EJUSTRETURN
)
1738 protocol_family_t proto_fam
,
1742 struct ifnet_filter
*filter
;
1743 int retval
= EOPNOTSUPP
;
1745 int holding_read
= 0;
1747 if (ifp
== NULL
|| ioctl_code
== 0)
1750 /* Attempt to increment the use count. If it's zero, bail out, the ifp is invalid */
1751 result
= ifp_use(ifp
, kIfNetUseCount_MustNotBeZero
);
1758 /* Run the interface filters first.
1759 * We want to run all filters before calling the protocol,
1760 * interface family, or interface.
1762 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
1763 if ((filter
->filt_protocol
== 0 || (filter
->filt_protocol
== proto_fam
)) &&
1764 filter
->filt_ioctl
!= NULL
) {
1765 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
, proto_fam
, ioctl_code
, ioctl_arg
);
1766 /* Only update retval if no one has handled the ioctl */
1767 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
1768 if (result
== ENOTSUP
)
1769 result
= EOPNOTSUPP
;
1771 if (retval
&& retval
!= EOPNOTSUPP
) {
1778 /* Allow the protocol to handle the ioctl */
1780 struct if_proto
*proto
= find_attached_proto(ifp
, proto_fam
);
1783 proto_media_ioctl ioctlp
= proto
->proto_kpi
== kProtoKPI_v1
1784 ? proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
;
1785 result
= EOPNOTSUPP
;
1787 result
= ioctlp(ifp
, proto_fam
, ioctl_code
, ioctl_arg
);
1789 /* Only update retval if no one has handled the ioctl */
1790 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
1791 if (result
== ENOTSUP
)
1792 result
= EOPNOTSUPP
;
1794 if (retval
&& retval
!= EOPNOTSUPP
) {
1802 * Since we have incremented the use count on the ifp, we are guaranteed
1803 * that the ifp will not go away (the function pointers may not be changed).
1804 * We release the dlil read lock so the interface ioctl may trigger a
1805 * protocol attach. This happens with vlan and may occur with other virtual
1811 /* retval is either 0 or EOPNOTSUPP */
1814 * Let the interface handle this ioctl.
1815 * If it returns EOPNOTSUPP, ignore that, we may have
1816 * already handled this in the protocol or family.
1819 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
1821 /* Only update retval if no one has handled the ioctl */
1822 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
1823 if (result
== ENOTSUP
)
1824 result
= EOPNOTSUPP
;
1826 if (retval
&& retval
!= EOPNOTSUPP
) {
1835 ifp_use_reached_zero(ifp
);
1837 if (retval
== EJUSTRETURN
)
1842 __private_extern__ errno_t
1846 bpf_packet_func callback
)
1851 if (ifp
->if_set_bpf_tap
)
1852 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
1861 const struct sockaddr
*proto_addr
,
1862 struct sockaddr
*ll_addr
,
1865 errno_t result
= EOPNOTSUPP
;
1866 struct if_proto
*proto
;
1867 const struct sockaddr
*verify
;
1868 proto_media_resolve_multi resolvep
;
1872 bzero(ll_addr
, ll_len
);
1874 /* Call the protocol first */
1875 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
1876 if (proto
!= NULL
) {
1877 resolvep
= proto
->proto_kpi
== kProtoKPI_v1
1878 ? proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
;
1879 if (resolvep
!= NULL
)
1880 result
= resolvep(ifp
, proto_addr
,(struct sockaddr_dl
*)ll_addr
,
1884 /* Let the interface verify the multicast address */
1885 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
1889 verify
= proto_addr
;
1890 result
= ifp
->if_check_multi(ifp
, verify
);
1898 __private_extern__ errno_t
1899 dlil_send_arp_internal(
1902 const struct sockaddr_dl
* sender_hw
,
1903 const struct sockaddr
* sender_proto
,
1904 const struct sockaddr_dl
* target_hw
,
1905 const struct sockaddr
* target_proto
)
1907 struct if_proto
*proto
;
1912 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
1913 if (proto
== NULL
) {
1917 proto_media_send_arp arpp
;
1918 arpp
= proto
->proto_kpi
== kProtoKPI_v1
1919 ? proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
;
1923 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
, target_hw
,
1932 static __inline__
int
1933 _is_announcement(const struct sockaddr_in
* sender_sin
,
1934 const struct sockaddr_in
* target_sin
)
1936 if (sender_sin
== NULL
) {
1939 return (sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
);
1942 __private_extern__ errno_t
1946 const struct sockaddr_dl
* sender_hw
,
1947 const struct sockaddr
* sender_proto
,
1948 const struct sockaddr_dl
* target_hw
,
1949 const struct sockaddr
* target_proto
)
1952 const struct sockaddr_in
* sender_sin
;
1953 const struct sockaddr_in
* target_sin
;
1955 if (target_proto
== NULL
|| (sender_proto
&&
1956 sender_proto
->sa_family
!= target_proto
->sa_family
))
1960 * If this is an ARP request and the target IP is IPv4LL,
1961 * send the request on all interfaces. The exception is
1962 * an announcement, which must only appear on the specific
1965 sender_sin
= (const struct sockaddr_in
*)sender_proto
;
1966 target_sin
= (const struct sockaddr_in
*)target_proto
;
1967 if (target_proto
->sa_family
== AF_INET
1968 && IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
))
1969 && ipv4_ll_arp_aware
!= 0
1970 && arpop
== ARPOP_REQUEST
1971 && !_is_announcement(target_sin
, sender_sin
)) {
1978 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
1979 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
1981 ifaddr_t source_hw
= NULL
;
1982 ifaddr_t source_ip
= NULL
;
1983 struct sockaddr_in source_ip_copy
;
1986 * Only arp on interfaces marked for IPv4LL ARPing. This may
1987 * mean that we don't ARP on the interface the subnet route
1990 if ((ifp_list
[ifp_on
]->if_eflags
& IFEF_ARPLL
) == 0) {
1994 /* Find the source IP address */
1995 ifnet_lock_shared(ifp_list
[ifp_on
]);
1996 source_hw
= TAILQ_FIRST(&ifp_list
[ifp_on
]->if_addrhead
);
1997 TAILQ_FOREACH(source_ip
, &ifp_list
[ifp_on
]->if_addrhead
,
1999 if (source_ip
->ifa_addr
&&
2000 source_ip
->ifa_addr
->sa_family
== AF_INET
) {
2005 /* No IP Source, don't arp */
2006 if (source_ip
== NULL
) {
2007 ifnet_lock_done(ifp_list
[ifp_on
]);
2011 /* Copy the source IP address */
2012 source_ip_copy
= *(struct sockaddr_in
*)source_ip
->ifa_addr
;
2014 ifnet_lock_done(ifp_list
[ifp_on
]);
2017 new_result
= dlil_send_arp_internal(ifp_list
[ifp_on
], arpop
,
2018 (struct sockaddr_dl
*)source_hw
->ifa_addr
,
2019 (struct sockaddr
*)&source_ip_copy
, NULL
,
2023 if (result
== ENOTSUP
) {
2024 result
= new_result
;
2029 ifnet_list_free(ifp_list
);
2032 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
, sender_proto
,
2033 target_hw
, target_proto
);
2039 __private_extern__
int
2048 old_value
= ifp
->if_usecnt
;
2049 if (old_value
== 0 && handle_zero
== kIfNetUseCount_MustNotBeZero
) {
2050 retval
= ENXIO
; // ifp is invalid
2053 } while (!OSCompareAndSwap((UInt32
)old_value
, (UInt32
)old_value
+ 1, (UInt32
*)&ifp
->if_usecnt
));
2058 /* ifp_unuse is broken into two pieces.
2060 * ifp_use and ifp_unuse must be called between when the caller calls
2061 * dlil_write_begin and dlil_write_end. ifp_unuse needs to perform some
2062 * operations after dlil_write_end has been called. For this reason,
2063 * anyone calling ifp_unuse must call ifp_use_reached_zero if ifp_unuse
2064 * returns a non-zero value. The caller must call ifp_use_reached_zero
2065 * after the caller has called dlil_write_end.
2067 __private_extern__
void
2068 ifp_use_reached_zero(
2071 ifnet_detached_func free_func
;
2075 if (ifp
->if_usecnt
!= 0)
2076 panic("ifp_use_reached_zero: ifp->if_usecnt != 0");
2078 ifnet_head_lock_exclusive();
2079 ifnet_lock_exclusive(ifp
);
2081 /* Remove ourselves from the list */
2082 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
2083 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
2085 /* ifp should be removed from the interface list */
2086 while (ifp
->if_multiaddrs
.lh_first
) {
2087 struct ifmultiaddr
*ifma
= ifp
->if_multiaddrs
.lh_first
;
2090 * When the interface is gone, we will no longer
2091 * be listening on these multicasts. Various bits
2092 * of the stack may be referencing these multicasts,
2093 * release only our reference.
2095 LIST_REMOVE(ifma
, ifma_link
);
2096 ifma
->ifma_ifp
= NULL
;
2100 ifp
->if_eflags
&= ~IFEF_DETACHING
; // clear the detaching flag
2101 ifnet_lock_done(ifp
);
2104 free_func
= ifp
->if_free
;
2106 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
2112 __private_extern__
int
2117 oldval
= OSDecrementAtomic(&ifp
->if_usecnt
);
2119 panic("ifp_unuse: ifp(%s%d)->if_usecnt was zero\n", ifp
->if_name
, ifp
->if_unit
);
2124 if ((ifp
->if_eflags
& IFEF_DETACHING
) == 0)
2125 panic("ifp_unuse: use count reached zero but detching flag is not set!");
2127 return 1; /* caller must call ifp_use_reached_zero */
2130 extern lck_mtx_t
*domain_proto_mtx
;
2133 dlil_attach_protocol_internal(
2134 struct if_proto
*proto
,
2135 const struct ifnet_demux_desc
*demux_list
,
2136 u_int32_t demux_count
)
2138 struct kev_dl_proto_data ev_pr_data
;
2139 struct ifnet
*ifp
= proto
->ifp
;
2141 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
2143 /* setup some of the common values */
2146 lck_mtx_lock(domain_proto_mtx
);
2148 while (dp
&& (protocol_family_t
)dp
->dom_family
!= proto
->protocol_family
)
2150 proto
->dl_domain
= dp
;
2151 lck_mtx_unlock(domain_proto_mtx
);
2155 * Take the write lock to protect readers and exclude other writers.
2157 if ((retval
= dlil_write_begin()) != 0) {
2158 printf("dlil_attach_protocol_internal - dlil_write_begin returned %d\n", retval
);
2162 /* Check that the interface isn't currently detaching */
2163 ifnet_lock_shared(ifp
);
2164 if ((ifp
->if_eflags
& IFEF_DETACHING
) != 0) {
2165 ifnet_lock_done(ifp
);
2169 ifnet_lock_done(ifp
);
2171 if (find_attached_proto(ifp
, proto
->protocol_family
) != NULL
) {
2177 * Call family module add_proto routine so it can refine the
2178 * demux descriptors as it wishes.
2180 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
, demux_count
);
2187 * We can't fail from this point on.
2188 * Increment the number of uses (protocol attachments + interface attached).
2190 ifp_use(ifp
, kIfNetUseCount_MustNotBeZero
);
2193 * Insert the protocol in the hash
2196 struct if_proto
* prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
2197 while (prev_proto
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
)
2198 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
2200 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
2202 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
], proto
, next_hash
);
2206 * Add to if_proto list for this interface
2208 if_proto_ref(proto
);
2211 /* the reserved field carries the number of protocol still attached (subject to change) */
2212 ev_pr_data
.proto_family
= proto
->protocol_family
;
2213 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
2214 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
2215 (struct net_event_data
*)&ev_pr_data
,
2216 sizeof(struct kev_dl_proto_data
));
2218 DLIL_PRINTF("dlil. Attached protocol %d to %s%d - %d\n", proto
->protocol_family
,
2219 ifp
->if_name
, ifp
->if_unit
, retval
);
2225 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
2226 const struct ifnet_attach_proto_param
*proto_details
)
2229 struct if_proto
*ifproto
= NULL
;
2231 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
)
2234 ifproto
= _MALLOC(sizeof(struct if_proto
), M_IFADDR
, M_WAITOK
);
2236 DLIL_PRINTF("ERROR - dlil failed if_proto allocation\n");
2240 bzero(ifproto
, sizeof(*ifproto
));
2243 ifproto
->protocol_family
= protocol
;
2244 ifproto
->proto_kpi
= kProtoKPI_v1
;
2245 ifproto
->kpi
.v1
.input
= proto_details
->input
;
2246 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
2247 ifproto
->kpi
.v1
.event
= proto_details
->event
;
2248 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
2249 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
2250 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
2251 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
2253 retval
= dlil_attach_protocol_internal(ifproto
,
2254 proto_details
->demux_list
, proto_details
->demux_count
);
2257 if (retval
&& ifproto
)
2258 FREE(ifproto
, M_IFADDR
);
2263 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
2264 const struct ifnet_attach_proto_param_v2
*proto_details
)
2267 struct if_proto
*ifproto
= NULL
;
2269 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
)
2272 ifproto
= _MALLOC(sizeof(struct if_proto
), M_IFADDR
, M_WAITOK
);
2274 DLIL_PRINTF("ERROR - dlil failed if_proto allocation\n");
2278 bzero(ifproto
, sizeof(*ifproto
));
2281 ifproto
->protocol_family
= protocol
;
2282 ifproto
->proto_kpi
= kProtoKPI_v2
;
2283 ifproto
->kpi
.v2
.input
= proto_details
->input
;
2284 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
2285 ifproto
->kpi
.v2
.event
= proto_details
->event
;
2286 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
2287 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
2288 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
2289 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
2291 retval
= dlil_attach_protocol_internal(ifproto
,
2292 proto_details
->demux_list
, proto_details
->demux_count
);
2295 if (retval
&& ifproto
)
2296 FREE(ifproto
, M_IFADDR
);
2300 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
2303 dlil_detach_protocol_internal(
2304 struct if_proto
*proto
)
2306 struct ifnet
*ifp
= proto
->ifp
;
2307 u_int32_t proto_family
= proto
->protocol_family
;
2308 struct kev_dl_proto_data ev_pr_data
;
2310 if (proto
->proto_kpi
== kProtoKPI_v1
) {
2311 if (proto
->kpi
.v1
.detached
)
2312 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
2314 if (proto
->proto_kpi
== kProtoKPI_v2
) {
2315 if (proto
->kpi
.v2
.detached
)
2316 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
2318 if_proto_free(proto
);
2321 * Cleanup routes that may still be in the routing table for that interface/protocol pair.
2324 if_rtproto_del(ifp
, proto_family
);
2326 /* the reserved field carries the number of protocol still attached (subject to change) */
2327 ev_pr_data
.proto_family
= proto_family
;
2328 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
2329 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
2330 (struct net_event_data
*)&ev_pr_data
,
2331 sizeof(struct kev_dl_proto_data
));
2336 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
2338 struct if_proto
*proto
= NULL
;
2340 int use_reached_zero
= 0;
2342 if (ifp
== NULL
|| proto_family
== 0) return EINVAL
;
2344 if ((retval
= dlil_write_begin()) != 0) {
2345 if (retval
== EDEADLK
) {
2348 proto
= find_attached_proto(ifp
, proto_family
);
2353 proto
->detaching
= 1;
2354 dlil_detach_waiting
= 1;
2355 wakeup(&dlil_detach_waiting
);
2362 proto
= find_attached_proto(ifp
, proto_family
);
2364 if (proto
== NULL
) {
2371 * Call family module del_proto
2374 if (ifp
->if_del_proto
)
2375 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
2377 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)], proto
, if_proto
, next_hash
);
2380 * We can do the rest of the work outside of the write lock.
2382 use_reached_zero
= ifp_unuse(ifp
);
2385 dlil_detach_protocol_internal(proto
);
2388 * Only handle the case where the interface will go away after
2389 * we've sent the message. This way post message can send the
2390 * message to the interface safely.
2393 if (use_reached_zero
)
2394 ifp_use_reached_zero(ifp
);
2401 * dlil_delayed_detach_thread is responsible for detaching
2402 * protocols, protocol filters, and interface filters after
2403 * an attempt was made to detach one of those items while
2404 * it was not safe to do so (i.e. called dlil_read_begin).
2406 * This function will take the dlil write lock and walk
2407 * through each of the interfaces looking for items with
2408 * the detaching flag set. When an item is found, it is
2409 * detached from the interface and placed on a local list.
2410 * After all of the items have been collected, we drop the
2411 * write lock and performed the post detach. This is done
2412 * so we only have to take the write lock once.
2414 * When detaching a protocol filter, if we find that we
2415 * have detached the very last protocol and we need to call
2416 * ifp_use_reached_zero, we have to break out of our work
2417 * to drop the write lock so we can call ifp_use_reached_zero.
2421 dlil_delayed_detach_thread(__unused
void* foo
, __unused wait_result_t wait
)
2423 thread_t self
= current_thread();
2426 ml_thread_policy(self
, MACHINE_GROUP
,
2427 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_NETISR
));
2431 if (dlil_detach_waiting
!= 0 && dlil_write_begin() == 0) {
2433 struct proto_hash_entry detached_protos
;
2434 struct ifnet_filter_head detached_filters
;
2435 struct if_proto
*proto
;
2436 struct if_proto
*next_proto
;
2437 struct ifnet_filter
*filt
;
2438 struct ifnet_filter
*next_filt
;
2443 /* Clear the detach waiting flag */
2444 dlil_detach_waiting
= 0;
2445 TAILQ_INIT(&detached_filters
);
2446 SLIST_INIT(&detached_protos
);
2448 ifnet_head_lock_shared();
2449 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
2452 // Look for protocols and protocol filters
2453 for (i
= 0; i
< PROTO_HASH_SLOTS
&& !reached_zero
; i
++) {
2454 struct if_proto
**prev_nextptr
= &SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
2455 for (proto
= *prev_nextptr
; proto
; proto
= *prev_nextptr
) {
2457 // Detach this protocol
2458 if (proto
->detaching
) {
2459 if (ifp
->if_del_proto
)
2460 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
2461 *prev_nextptr
= SLIST_NEXT(proto
, next_hash
);
2462 SLIST_INSERT_HEAD(&detached_protos
, proto
, next_hash
);
2463 reached_zero
= ifp_unuse(ifp
);
2469 // Update prev_nextptr to point to our next ptr
2470 prev_nextptr
= &SLIST_NEXT(proto
, next_hash
);
2475 // look for interface filters that need to be detached
2476 for (filt
= TAILQ_FIRST(&ifp
->if_flt_head
); filt
; filt
= next_filt
) {
2477 next_filt
= TAILQ_NEXT(filt
, filt_next
);
2478 if (filt
->filt_detaching
!= 0) {
2479 // take this interface filter off the interface filter list
2480 TAILQ_REMOVE(&ifp
->if_flt_head
, filt
, filt_next
);
2482 // put this interface filter on the detached filters list
2483 TAILQ_INSERT_TAIL(&detached_filters
, filt
, filt_next
);
2487 if (ifp
->if_delayed_detach
) {
2488 ifp
->if_delayed_detach
= 0;
2489 reached_zero
= ifp_unuse(ifp
);
2498 for (filt
= TAILQ_FIRST(&detached_filters
); filt
; filt
= next_filt
) {
2499 next_filt
= TAILQ_NEXT(filt
, filt_next
);
2501 * dlil_detach_filter_internal won't remove an item from
2502 * the list if it is already detached (second parameter).
2503 * The item will be freed though.
2505 dlil_detach_filter_internal(filt
, 1);
2508 for (proto
= SLIST_FIRST(&detached_protos
); proto
; proto
= next_proto
) {
2509 next_proto
= SLIST_NEXT(proto
, next_hash
);
2510 dlil_detach_protocol_internal(proto
);
2514 ifp_use_reached_zero(ifp
);
2515 dlil_detach_waiting
= 1; // we may have missed something
2519 if (!asserted
&& dlil_detach_waiting
== 0) {
2521 assert_wait(&dlil_detach_waiting
, THREAD_UNINT
);
2524 if (dlil_detach_waiting
== 0) {
2526 thread_block(dlil_delayed_detach_thread
);
2532 dlil_call_delayed_detach_thread(void) {
2533 dlil_delayed_detach_thread(NULL
, THREAD_RESTART
);
2536 extern int if_next_index(void);
2541 const struct sockaddr_dl
*ll_addr
)
2543 u_int32_t interface_family
;
2544 struct ifnet
*tmp_if
;
2545 struct proto_hash_entry
*new_proto_list
= NULL
;
2548 if (ifp
== NULL
) return EINVAL
;
2549 if (ll_addr
&& ifp
->if_addrlen
== 0) {
2550 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
2552 else if (ll_addr
&& ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
2556 interface_family
= ifp
->if_family
;
2558 ifnet_head_lock_shared();
2560 /* Verify we aren't already on the list */
2561 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
2562 if (tmp_if
== ifp
) {
2570 if ((ifp
->if_eflags
& IFEF_REUSE
) == 0 || ifp
->if_lock
== 0)
2572 ifp
->if_lock
= lck_rw_alloc_init(ifnet_lock_group
, ifnet_lock_attr
);
2574 ifp
->if_lock
= lck_mtx_alloc_init(ifnet_lock_group
, ifnet_lock_attr
);
2577 if (ifp
->if_lock
== 0) {
2581 if (!(ifp
->if_eflags
& IFEF_REUSE
) || ifp
->if_fwd_route_lock
== NULL
) {
2582 if (ifp
->if_fwd_route_lock
== NULL
)
2583 ifp
->if_fwd_route_lock
= lck_mtx_alloc_init(
2584 ifnet_lock_group
, ifnet_lock_attr
);
2586 if (ifp
->if_fwd_route_lock
== NULL
) {
2588 lck_rw_free(ifp
->if_lock
, ifnet_lock_group
);
2590 lck_mtx_free(ifp
->if_lock
, ifnet_lock_group
);
2592 ifp
->if_lock
= NULL
;
2598 * Allow interfaces without protocol families to attach
2599 * only if they have the necessary fields filled out.
2602 if (ifp
->if_add_proto
== 0 || ifp
->if_del_proto
== 0) {
2603 DLIL_PRINTF("dlil Attempt to attach interface without family module - %d\n",
2608 if ((ifp
->if_eflags
& IFEF_REUSE
) == 0 || ifp
->if_proto_hash
== NULL
) {
2609 MALLOC(new_proto_list
, struct proto_hash_entry
*, sizeof(struct proto_hash_entry
) * PROTO_HASH_SLOTS
,
2612 if (new_proto_list
== 0) {
2620 TAILQ_INIT(&ifp
->if_flt_head
);
2623 if (new_proto_list
) {
2624 bzero(new_proto_list
, (PROTO_HASH_SLOTS
* sizeof(struct proto_hash_entry
)));
2625 ifp
->if_proto_hash
= new_proto_list
;
2626 new_proto_list
= NULL
;
2632 int namelen
, masklen
, socksize
, ifasize
;
2633 struct ifaddr
*ifa
= NULL
;
2635 if (ifp
->if_snd
.ifq_maxlen
== 0)
2636 ifp
->if_snd
.ifq_maxlen
= ifqmaxlen
;
2637 TAILQ_INIT(&ifp
->if_prefixhead
);
2638 LIST_INIT(&ifp
->if_multiaddrs
);
2639 ifnet_touch_lastchange(ifp
);
2641 /* usecount to track attachment to the ifnet list */
2642 ifp_use(ifp
, kIfNetUseCount_MayBeZero
);
2644 /* Lock the list of interfaces */
2645 ifnet_head_lock_exclusive();
2646 ifnet_lock_exclusive(ifp
);
2648 if ((ifp
->if_eflags
& IFEF_REUSE
) == 0 || ifp
->if_index
== 0) {
2649 int idx
= if_next_index();
2652 ifnet_lock_done(ifp
);
2659 ifp
->if_index
= idx
;
2661 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
2663 namelen
= snprintf(workbuf
, sizeof(workbuf
), "%s%d", ifp
->if_name
, ifp
->if_unit
);
2664 #define _offsetof(t, m) ((uintptr_t)((caddr_t)&((t *)0)->m))
2665 masklen
= _offsetof(struct sockaddr_dl
, sdl_data
[0]) + namelen
;
2666 socksize
= masklen
+ ifp
->if_addrlen
;
2667 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(u_int32_t) - 1)))
2668 if ((u_int32_t
)socksize
< sizeof(struct sockaddr_dl
))
2669 socksize
= sizeof(struct sockaddr_dl
);
2670 socksize
= ROUNDUP(socksize
);
2671 ifasize
= sizeof(struct ifaddr
) + 2 * socksize
;
2674 * Allocate a new ifa if we don't have one
2675 * or the old one is too small.
2677 if (ifa
== NULL
|| socksize
> ifa
->ifa_addr
->sa_len
) {
2679 if_detach_ifa(ifp
, ifa
);
2680 ifa
= (struct ifaddr
*)_MALLOC(ifasize
, M_IFADDR
, M_WAITOK
);
2684 struct sockaddr_dl
*sdl
= (struct sockaddr_dl
*)(ifa
+ 1);
2685 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
2686 bzero(ifa
, ifasize
);
2687 ifa
->ifa_debug
|= IFD_ALLOC
;
2688 sdl
->sdl_len
= socksize
;
2689 sdl
->sdl_family
= AF_LINK
;
2690 bcopy(workbuf
, sdl
->sdl_data
, namelen
);
2691 sdl
->sdl_nlen
= namelen
;
2692 sdl
->sdl_index
= ifp
->if_index
;
2693 sdl
->sdl_type
= ifp
->if_type
;
2695 sdl
->sdl_alen
= ll_addr
->sdl_alen
;
2696 if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
)
2697 panic("ifnet_attach - ll_addr->sdl_alen != ifp->if_addrlen");
2698 bcopy(CONST_LLADDR(ll_addr
), LLADDR(sdl
), sdl
->sdl_alen
);
2701 ifa
->ifa_rtrequest
= link_rtrequest
;
2702 ifa
->ifa_addr
= (struct sockaddr
*)sdl
;
2703 sdl
= (struct sockaddr_dl
*)(socksize
+ (caddr_t
)sdl
);
2704 ifa
->ifa_netmask
= (struct sockaddr
*)sdl
;
2705 sdl
->sdl_len
= masklen
;
2706 while (namelen
!= 0)
2707 sdl
->sdl_data
[--namelen
] = 0xff;
2710 TAILQ_INIT(&ifp
->if_addrhead
);
2711 ifa
= ifnet_addrs
[ifp
->if_index
- 1];
2715 * We don't use if_attach_ifa because we want
2716 * this address to be first on the list.
2719 ifa
->ifa_debug
|= IFD_ATTACHED
;
2720 TAILQ_INSERT_HEAD(&ifp
->if_addrhead
, ifa
, ifa_link
);
2723 mac_ifnet_label_associate(ifp
);
2726 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
2727 ifindex2ifnet
[ifp
->if_index
] = ifp
;
2731 * A specific dlil input thread is created per Ethernet/PDP interface.
2732 * pseudo interfaces or other types of interfaces use the main ("loopback") thread.
2733 * If the sysctl "net.link.generic.system.multi_threaded_input" is set to zero, all packets will
2734 * be handled by the main loopback thread, reverting to 10.4.x behaviour.
2738 if (ifp
->if_type
== IFT_ETHER
|| ifp
->if_type
== IFT_PDP
) {
2741 if (dlil_multithreaded_input
> 0) {
2742 ifp
->if_input_thread
= _MALLOC(sizeof(struct dlil_threading_info
), M_NKE
, M_WAITOK
);
2743 if (ifp
->if_input_thread
== NULL
)
2744 panic("ifnet_attach ifp=%p couldn't alloc threading\n", ifp
);
2745 if ((err
= dlil_create_input_thread(ifp
, ifp
->if_input_thread
)) != 0)
2746 panic("ifnet_attach ifp=%p couldn't get a thread. err=%d\n", ifp
, err
);
2748 printf("ifnet_attach: dlil thread for ifp=%p if_index=%d\n", ifp
, ifp
->if_index
);
2752 ifnet_lock_done(ifp
);
2756 * Attach packet filter to this interface, if enaled.
2758 pf_ifnet_hook(ifp
, 1);
2762 #if IFNET_ROUTE_REFCNT
2764 (void) ifnet_set_idle_flags(ifp
, IFRF_IDLE_NOTIFY
,
2767 #endif /* IFNET_ROUTE_REFCNT */
2769 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
2778 struct ifnet_filter
*filter
;
2779 struct ifnet_filter
*filter_next
;
2782 struct ifnet_filter_head fhead
;
2783 struct dlil_threading_info
*inputthread
;
2785 if (ifp
== NULL
) return EINVAL
;
2787 ifnet_lock_exclusive(ifp
);
2789 if ((ifp
->if_eflags
& IFEF_DETACHING
) != 0) {
2790 /* Interface has already been detached */
2791 ifnet_lock_done(ifp
);
2796 * Indicate this interface is being detached.
2798 * This should prevent protocols from attaching
2799 * from this point on. Interface will remain on
2800 * the list until all of the protocols are detached.
2802 ifp
->if_eflags
|= IFEF_DETACHING
;
2803 ifnet_lock_done(ifp
);
2805 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
2807 /* Let BPF know we're detaching */
2810 #if IFNET_ROUTE_REFCNT
2812 * Check to see if this interface has previously triggered
2813 * aggressive protocol draining; if so, decrement the global
2814 * refcnt and clear PR_AGGDRAIN on the route domain if
2815 * there are no more of such an interface around.
2817 if (ifp
->if_want_aggressive_drain
!= 0)
2818 (void) ifnet_set_idle_flags(ifp
, 0, ~0);
2819 #endif /* IFNET_ROUTE_REFCNT */
2821 if ((retval
= dlil_write_begin()) != 0) {
2822 if (retval
== EDEADLK
) {
2825 /* We need to perform a delayed detach */
2826 ifp
->if_delayed_detach
= 1;
2827 dlil_detach_waiting
= 1;
2828 wakeup(&dlil_detach_waiting
);
2835 * Detach this interface from packet filter, if enabled.
2837 pf_ifnet_hook(ifp
, 0);
2840 /* Steal the list of interface filters */
2841 fhead
= ifp
->if_flt_head
;
2842 TAILQ_INIT(&ifp
->if_flt_head
);
2844 /* unuse the interface */
2845 zeroed
= ifp_unuse(ifp
);
2848 * If thread affinity was set for the workloop thread, we will need
2849 * to tear down the affinity and release the extra reference count
2850 * taken at attach time;
2852 if ((inputthread
= ifp
->if_input_thread
) != NULL
) {
2853 if (inputthread
->net_affinity
) {
2856 if (inputthread
== dlil_lo_thread_ptr
)
2857 panic("Thread affinity should not be enabled "
2858 "on the loopback dlil input thread\n");
2860 lck_mtx_lock(inputthread
->input_lck
);
2861 tp
= inputthread
->workloop_thread
;
2862 inputthread
->workloop_thread
= NULL
;
2863 inputthread
->tag
= 0;
2864 inputthread
->net_affinity
= FALSE
;
2865 lck_mtx_unlock(inputthread
->input_lck
);
2867 /* Tear down workloop thread affinity */
2869 (void) dlil_affinity_set(tp
,
2870 THREAD_AFFINITY_TAG_NULL
);
2871 thread_deallocate(tp
);
2874 /* Tear down dlil input thread affinity */
2875 tp
= inputthread
->input_thread
;
2876 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
2877 thread_deallocate(tp
);
2880 /* cleanup ifp dlil input thread, if any */
2881 ifp
->if_input_thread
= NULL
;
2883 if (inputthread
!= dlil_lo_thread_ptr
) {
2885 printf("ifnet_detach: wakeup thread threadinfo: %p "
2886 "input_thread=%p threads: cur=%d max=%d\n",
2887 inputthread
, inputthread
->input_thread
,
2888 dlil_multithreaded_input
, cur_dlil_input_threads
);
2890 lck_mtx_lock(inputthread
->input_lck
);
2892 inputthread
->input_waiting
|= DLIL_INPUT_TERMINATE
;
2893 if ((inputthread
->input_waiting
& DLIL_INPUT_RUNNING
) == 0) {
2894 wakeup((caddr_t
)&inputthread
->input_waiting
);
2896 lck_mtx_unlock(inputthread
->input_lck
);
2899 /* last chance to clean up IPv4 forwarding cached route */
2900 lck_mtx_lock(ifp
->if_fwd_route_lock
);
2901 if (ifp
->if_fwd_route
.ro_rt
!= NULL
) {
2902 rtfree(ifp
->if_fwd_route
.ro_rt
);
2903 ifp
->if_fwd_route
.ro_rt
= NULL
;
2905 lck_mtx_unlock(ifp
->if_fwd_route_lock
);
2908 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
2909 filter_next
= TAILQ_NEXT(filter
, filt_next
);
2910 dlil_detach_filter_internal(filter
, 1);
2914 ifp_use_reached_zero(ifp
);
2922 __unused ifnet_t ifnet_ptr
,
2923 __unused u_long ioctl_code
,
2924 __unused
void *ioctl_arg
)
2930 dlil_recycle_output(
2931 __unused
struct ifnet
*ifnet_ptr
,
2940 __unused ifnet_t ifnet_ptr
)
2945 dlil_recycle_set_bpf_tap(
2946 __unused ifnet_t ifp
,
2947 __unused bpf_tap_mode mode
,
2948 __unused bpf_packet_func callback
)
2950 /* XXX not sure what to do here */
2955 int dlil_if_acquire(
2957 const void *uniqueid
,
2958 size_t uniqueid_len
,
2961 struct ifnet
*ifp1
= NULL
;
2962 struct dlil_ifnet
*dlifp1
= NULL
;
2965 lck_mtx_lock(dlil_ifnet_mutex
);
2966 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
2968 ifp1
= (struct ifnet
*)dlifp1
;
2970 if (ifp1
->if_family
== family
) {
2972 /* same uniqueid and same len or no unique id specified */
2973 if ((uniqueid_len
== dlifp1
->if_uniqueid_len
)
2974 && !bcmp(uniqueid
, dlifp1
->if_uniqueid
, uniqueid_len
)) {
2976 /* check for matching interface in use */
2977 if (ifp1
->if_eflags
& IFEF_INUSE
) {
2985 panic("ifp's lock is gone\n");
2986 ifnet_lock_exclusive(ifp1
);
2987 ifp1
->if_eflags
|= (IFEF_INUSE
| IFEF_REUSE
);
2988 ifnet_lock_done(ifp1
);
2996 /* no interface found, allocate a new one */
2997 MALLOC(dlifp1
, struct dlil_ifnet
*, sizeof(*dlifp1
), M_NKE
, M_WAITOK
);
3003 bzero(dlifp1
, sizeof(*dlifp1
));
3006 MALLOC(dlifp1
->if_uniqueid
, void *, uniqueid_len
, M_NKE
, M_WAITOK
);
3007 if (dlifp1
->if_uniqueid
== 0) {
3008 FREE(dlifp1
, M_NKE
);
3012 bcopy(uniqueid
, dlifp1
->if_uniqueid
, uniqueid_len
);
3013 dlifp1
->if_uniqueid_len
= uniqueid_len
;
3016 ifp1
= (struct ifnet
*)dlifp1
;
3017 ifp1
->if_eflags
|= IFEF_INUSE
;
3018 ifp1
->if_name
= dlifp1
->if_namestorage
;
3020 mac_ifnet_label_init(ifp1
);
3023 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
3028 lck_mtx_unlock(dlil_ifnet_mutex
);
3033 __private_extern__
void
3037 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
3039 /* Interface does not have a lock until it is attached - radar 3713951 */
3041 ifnet_lock_exclusive(ifp
);
3042 ifp
->if_eflags
&= ~IFEF_INUSE
;
3043 ifp
->if_ioctl
= dlil_recycle_ioctl
;
3044 ifp
->if_output
= dlil_recycle_output
;
3045 ifp
->if_free
= dlil_recycle_free
;
3046 ifp
->if_set_bpf_tap
= dlil_recycle_set_bpf_tap
;
3048 strncpy(dlifp
->if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
3049 ifp
->if_name
= dlifp
->if_namestorage
;
3052 * We can either recycle the MAC label here or in dlil_if_acquire().
3053 * It seems logical to do it here but this means that anything that
3054 * still has a handle on ifp will now see it as unlabeled.
3055 * Since the interface is "dead" that may be OK. Revisit later.
3057 mac_ifnet_label_recycle(ifp
);
3060 ifnet_lock_done(ifp
);
3064 __private_extern__
void
3065 dlil_proto_unplumb_all(struct ifnet
*ifp
)
3068 * if_proto_hash[0-3] are for PF_INET, PF_INET6, PF_APPLETALK
3069 * and PF_VLAN, where each bucket contains exactly one entry;
3070 * PF_VLAN does not need an explicit unplumb.
3072 * if_proto_hash[4] is for other protocols; we expect anything
3073 * in this bucket to respond to the DETACHING event (which would
3074 * have happened by now) and do the unplumb then.
3076 (void) proto_unplumb(PF_INET
, ifp
);
3078 (void) proto_unplumb(PF_INET6
, ifp
);
3081 (void) proto_unplumb(PF_APPLETALK
, ifp
);