2 * Copyright (c) 1999-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 #include <sys/kernel_types.h>
33 #include <net/kpi_interface.h>
43 * DLIL_DESC_ETYPE2 - native_type must point to 2 byte ethernet raw protocol,
44 * variants.native_type_length must be set to 2
45 * DLIL_DESC_SAP - native_type must point to 3 byte SAP protocol
46 * variants.native_type_length must be set to 3
47 * DLIL_DESC_SNAP - native_type must point to 5 byte SNAP protocol
48 * variants.native_type_length must be set to 5
50 * All protocols must be in Network byte order.
52 * Future interface families may define more protocol types they know about.
53 * The type implies the offset and context of the protocol data at native_type.
54 * The length of the protocol data specified at native_type must be set in
55 * variants.native_type_length.
57 /* Ethernet specific types */
58 #define DLIL_DESC_ETYPE2 4
59 #define DLIL_DESC_SAP 5
60 #define DLIL_DESC_SNAP 6
64 #include <net/if_var.h>
65 #include <net/classq/classq.h>
66 #include <net/flowadv.h>
67 #include <sys/kern_event.h>
68 #include <kern/thread.h>
69 #include <kern/locks.h>
71 #ifdef BSD_KERNEL_PRIVATE
72 /* Operations on timespecs. */
73 #define net_timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_nsec = 0
75 #define net_timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_nsec)
77 #define net_timercmp(tvp, uvp, cmp) \
78 (((tvp)->tv_sec == (uvp)->tv_sec) ? \
79 ((tvp)->tv_nsec cmp (uvp)->tv_nsec) : \
80 ((tvp)->tv_sec cmp (uvp)->tv_sec))
82 #define net_timeradd(tvp, uvp, vvp) do { \
83 (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec; \
84 (vvp)->tv_nsec = (tvp)->tv_nsec + (uvp)->tv_nsec; \
85 if ((vvp)->tv_nsec >= (long)NSEC_PER_SEC) { \
87 (vvp)->tv_nsec -= NSEC_PER_SEC; \
91 #define net_timersub(tvp, uvp, vvp) do { \
92 (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \
93 (vvp)->tv_nsec = (tvp)->tv_nsec - (uvp)->tv_nsec; \
94 if ((vvp)->tv_nsec < 0) { \
96 (vvp)->tv_nsec += NSEC_PER_SEC; \
100 #define net_timernsec(tvp, nsp) do { \
101 *(nsp) = (tvp)->tv_nsec; \
102 if ((tvp)->tv_sec > 0) \
103 *(nsp) += ((tvp)->tv_sec * NSEC_PER_SEC); \
106 #if defined(__x86_64__) || defined(__arm64__)
107 #define net_nsectimer(nsp, tvp) do { \
108 u_int64_t __nsp = *(nsp); \
109 net_timerclear(tvp); \
110 uint64_t __sec = __nsp / NSEC_PER_SEC; \
111 (tvp)->tv_sec = (__darwin_time_t)__sec; \
112 (tvp)->tv_nsec = (long)(__nsp - __sec * NSEC_PER_SEC); \
116 * NSEC needs to be < 2^31*10^9 to be representable in a struct timespec
117 * because __darwin_time_t is 32 bit on 32-bit platforms. This bound
118 * is < 2^61. We get a first approximation to convert into seconds using
119 * the following values.
120 * a = floor(NSEC / 2^29)
121 * inv = floor(2^61 / 10^9)
123 * The approximation of seconds is correct or too low by 1 unit.
124 * So we fix it by computing the remainder.
126 #define net_nsectimer(nsp, tvp) do { \
127 u_int64_t __nsp = *(nsp); \
128 net_timerclear(tvp); \
129 uint32_t __a = (uint32_t)(__nsp >> 29); \
130 const uint32_t __inv = 0x89705F41; \
131 uint32_t __sec = (uint32_t)(((uint64_t)__a * __inv) >> 32); \
132 uint32_t __rem = (uint32_t)(__nsp - __sec * NSEC_PER_SEC); \
133 __sec += ((__rem >= NSEC_PER_SEC) ? 1 : 0); \
134 (tvp)->tv_sec = (__darwin_time_t)__sec; \
136 (long)((__rem >= NSEC_PER_SEC) ? (__rem - NSEC_PER_SEC) : __rem); \
146 #define DLIL_THREADNAME_LEN 32
149 * DLIL input thread info
151 struct dlil_threading_info
{
152 decl_lck_mtx_data(, input_lck
);
153 lck_grp_t
*lck_grp
; /* lock group (for lock stats) */
154 u_int32_t input_waiting
; /* DLIL condition of thread */
155 u_int32_t wtot
; /* # of wakeup requests */
156 char input_name
[DLIL_THREADNAME_LEN
]; /* name storage */
157 struct ifnet
*ifp
; /* pointer to interface */
158 class_queue_t rcvq_pkts
; /* queue of pkts */
159 struct ifnet_stat_increment_param stats
; /* incremental statistics */
161 * Thread affinity (workloop and DLIL threads).
163 boolean_t net_affinity
; /* affinity set is available */
164 struct thread
*input_thr
; /* input thread */
165 struct thread
*wloop_thr
; /* workloop thread */
166 struct thread
*poll_thr
; /* poll thread */
167 u_int32_t tag
; /* affinity tag */
169 * Opportunistic polling.
171 ifnet_model_t mode
; /* current mode */
172 struct pktcntr tstats
; /* incremental polling statistics */
173 struct if_rxpoll_stats pstats
; /* polling statistics */
174 #define rxpoll_offreq pstats.ifi_poll_off_req
175 #define rxpoll_offerr pstats.ifi_poll_off_err
176 #define rxpoll_onreq pstats.ifi_poll_on_req
177 #define rxpoll_onerr pstats.ifi_poll_on_err
178 #define rxpoll_wavg pstats.ifi_poll_wakeups_avg
179 #define rxpoll_wlowat pstats.ifi_poll_wakeups_lowat
180 #define rxpoll_whiwat pstats.ifi_poll_wakeups_hiwat
181 #define rxpoll_pavg pstats.ifi_poll_packets_avg
182 #define rxpoll_pmin pstats.ifi_poll_packets_min
183 #define rxpoll_pmax pstats.ifi_poll_packets_max
184 #define rxpoll_plowat pstats.ifi_poll_packets_lowat
185 #define rxpoll_phiwat pstats.ifi_poll_packets_hiwat
186 #define rxpoll_bavg pstats.ifi_poll_bytes_avg
187 #define rxpoll_bmin pstats.ifi_poll_bytes_min
188 #define rxpoll_bmax pstats.ifi_poll_bytes_max
189 #define rxpoll_blowat pstats.ifi_poll_bytes_lowat
190 #define rxpoll_bhiwat pstats.ifi_poll_bytes_hiwat
191 #define rxpoll_plim pstats.ifi_poll_packets_limit
192 #define rxpoll_ival pstats.ifi_poll_interval_time
193 struct pktcntr sstats
; /* packets and bytes per sampling */
194 struct timespec mode_holdtime
; /* mode holdtime in nsec */
195 struct timespec mode_lasttime
; /* last mode change time in nsec */
196 struct timespec sample_holdtime
; /* sampling holdtime in nsec */
197 struct timespec sample_lasttime
; /* last sampling time in nsec */
198 struct timespec dbg_lasttime
; /* last debug message time in nsec */
199 #if IFNET_INPUT_SANITY_CHK
203 u_int64_t input_mbuf_cnt
; /* total # of packets processed */
205 thread_call_t input_mit_tcall
; /* coalescing input processing */
209 * DLIL input thread info (for main/loopback input thread)
211 struct dlil_main_threading_info
{
212 struct dlil_threading_info inp
;
213 class_queue_t lo_rcvq_pkts
; /* queue of lo0 pkts */
217 * The following are shared with kpi_protocol.c so that it may wakeup
218 * the input thread to run through packets queued for protocol input.
220 #define DLIL_INPUT_RUNNING 0x80000000
221 #define DLIL_INPUT_WAITING 0x40000000
222 #define DLIL_PROTO_REGISTER 0x20000000
223 #define DLIL_PROTO_WAITING 0x10000000
224 #define DLIL_INPUT_TERMINATE 0x08000000
225 #define DLIL_INPUT_TERMINATE_COMPLETE 0x04000000
228 * Flags for dlil_attach_filter()
230 #define DLIL_IFF_TSO 0x01 /* Interface filter supports TSO */
231 #define DLIL_IFF_INTERNAL 0x02 /* Apple internal -- do not count towards stats */
233 extern int dlil_verbose
;
234 extern uint32_t hwcksum_dbg
;
235 extern uint32_t hwcksum_tx
;
236 extern uint32_t hwcksum_rx
;
237 extern struct dlil_threading_info
*dlil_main_input_thread
;
239 extern void dlil_init(void);
241 extern errno_t
ifp_if_ioctl(struct ifnet
*, unsigned long, void *);
242 extern errno_t
ifp_if_output(struct ifnet
*, struct mbuf
*);
243 extern void ifp_if_start(struct ifnet
*);
245 extern errno_t
dlil_set_bpf_tap(ifnet_t
, bpf_tap_mode
, bpf_packet_func
);
248 * Send arp internal bypasses the check for IPv4LL.
250 extern errno_t
dlil_send_arp_internal(ifnet_t
, u_int16_t
,
251 const struct sockaddr_dl
*, const struct sockaddr
*,
252 const struct sockaddr_dl
*, const struct sockaddr
*);
255 * The following constants are used with the net_thread_mark_apply and
256 * net_thread_is_unmarked functions to control the bits in the uu_network_marks
257 * field of the uthread structure.
259 #define NET_THREAD_HELD_PF 0x1 /* thread is holding PF lock */
260 #define NET_THREAD_HELD_DOMAIN 0x2 /* thread is holding domain_proto_mtx */
261 #define NET_THREAD_CKREQ_LLADDR 0x4 /* thread reqs MACF check for LLADDR */
264 * net_thread_marks_t is a pointer to a phantom structure type used for
265 * manipulating the uthread:uu_network_marks field. As an example...
267 * static const u_int32_t bits = NET_THREAD_CKREQ_LLADDR;
268 * struct uthread *uth = get_bsdthread_info(current_thread());
270 * net_thread_marks_t marks = net_thread_marks_push(bits);
271 * VERIFY((uth->uu_network_marks & NET_THREAD_CKREQ_LLADDR) != 0);
272 * net_thread_marks_pop(marks);
274 * The net_thread_marks_push() function returns an encoding of the bits
275 * that were changed from zero to one in the uu_network_marks field. When
276 * the net_thread_marks_pop() function later processes that value, it
277 * resets the bits to their previous value.
279 * The net_thread_unmarks_push() and net_thread_unmarks_pop() functions
280 * are similar to net_thread_marks_push() and net_thread_marks_pop() except
281 * they clear the marks bits in the guarded section rather than set them.
283 * The net_thread_is_marked() and net_thread_is_unmarked() functions return
284 * the subset of the bits that are currently set or cleared (respectively)
285 * in the uthread:uu_network_marks field.
287 * Finally, the value of the net_thread_marks_none constant is provided for
288 * comparing for equality with the value returned when no bits in the marks
289 * field are changed by the push.
291 * It is not significant that a value of type net_thread_marks_t may
292 * compare as equal to the NULL pointer.
294 struct net_thread_marks
;
295 typedef const struct net_thread_marks
*net_thread_marks_t
;
297 extern const net_thread_marks_t net_thread_marks_none
;
299 extern net_thread_marks_t
net_thread_marks_push(u_int32_t
);
300 extern net_thread_marks_t
net_thread_unmarks_push(u_int32_t
);
301 extern void net_thread_marks_pop(net_thread_marks_t
);
302 extern void net_thread_unmarks_pop(net_thread_marks_t
);
303 extern u_int32_t
net_thread_is_marked(u_int32_t
);
304 extern u_int32_t
net_thread_is_unmarked(u_int32_t
);
306 extern int dlil_output(ifnet_t
, protocol_family_t
, mbuf_t
, void *,
307 const struct sockaddr
*, int, struct flowadv
*);
309 extern void dlil_input_packet_list(struct ifnet
*, struct mbuf
*);
310 extern void dlil_input_packet_list_extended(struct ifnet
*, struct mbuf
*,
311 u_int32_t
, ifnet_model_t
);
313 extern errno_t
dlil_resolve_multi(struct ifnet
*,
314 const struct sockaddr
*, struct sockaddr
*, size_t);
316 extern errno_t
dlil_send_arp(ifnet_t
, u_int16_t
, const struct sockaddr_dl
*,
317 const struct sockaddr
*, const struct sockaddr_dl
*,
318 const struct sockaddr
*, u_int32_t
);
320 extern int dlil_attach_filter(ifnet_t
, const struct iff_filter
*,
321 interface_filter_t
*, u_int32_t
);
322 extern void dlil_detach_filter(interface_filter_t
);
324 extern void dlil_proto_unplumb_all(ifnet_t
);
326 extern void dlil_post_msg(struct ifnet
*, u_int32_t
, u_int32_t
,
327 struct net_event_data
*, u_int32_t
);
329 extern void dlil_post_sifflags_msg(struct ifnet
*);
331 extern int dlil_post_complete_msg(struct ifnet
*, struct kev_msg
*);
333 extern int dlil_alloc_local_stats(struct ifnet
*);
337 * dlil_if_acquire is obsolete. Use ifnet_allocate.
339 extern int dlil_if_acquire(u_int32_t
, const void *, size_t, const char *, struct ifnet
**);
341 * dlil_if_release is obsolete. The equivalent is called automatically when
342 * an interface is detached.
344 extern void dlil_if_release(struct ifnet
*ifp
);
346 extern errno_t
dlil_if_ref(struct ifnet
*);
347 extern errno_t
dlil_if_free(struct ifnet
*);
349 extern void dlil_node_present(struct ifnet
*, struct sockaddr
*, int32_t, int,
351 extern void dlil_node_absent(struct ifnet
*, struct sockaddr
*);
353 extern const void *dlil_ifaddr_bytes(const struct sockaddr_dl
*, size_t *,
356 extern void dlil_report_issues(struct ifnet
*, u_int8_t
[DLIL_MODIDLEN
],
357 u_int8_t
[DLIL_MODARGLEN
]);
359 #define PROTO_HASH_SLOTS 4
361 extern int proto_hash_value(u_int32_t
);
363 extern const char *dlil_kev_dl_code_str(u_int32_t
);
365 extern errno_t
dlil_rxpoll_set_params(struct ifnet
*,
366 struct ifnet_poll_params
*, boolean_t
);
367 extern errno_t
dlil_rxpoll_get_params(struct ifnet
*,
368 struct ifnet_poll_params
*);
370 extern errno_t
dlil_output_handler(struct ifnet
*, struct mbuf
*);
371 extern errno_t
dlil_input_handler(struct ifnet
*, struct mbuf
*,
372 struct mbuf
*, const struct ifnet_stat_increment_param
*,
373 boolean_t
, struct thread
*);
377 * This is mostly called from the context of the DLIL input thread;
378 * because of that there is no need for atomic operations.
380 __attribute__((always_inline
))
382 ifp_inc_traffic_class_in(struct ifnet
*ifp
, struct mbuf
*m
)
384 if (!(m
->m_flags
& M_PKTHDR
))
387 switch (m_get_traffic_class(m
)) {
389 ifp
->if_tc
.ifi_ibepackets
++;
390 ifp
->if_tc
.ifi_ibebytes
+= m
->m_pkthdr
.len
;
393 ifp
->if_tc
.ifi_ibkpackets
++;
394 ifp
->if_tc
.ifi_ibkbytes
+= m
->m_pkthdr
.len
;
397 ifp
->if_tc
.ifi_ivipackets
++;
398 ifp
->if_tc
.ifi_ivibytes
+= m
->m_pkthdr
.len
;
401 ifp
->if_tc
.ifi_ivopackets
++;
402 ifp
->if_tc
.ifi_ivobytes
+= m
->m_pkthdr
.len
;
408 if (mbuf_is_traffic_class_privileged(m
)) {
409 ifp
->if_tc
.ifi_ipvpackets
++;
410 ifp
->if_tc
.ifi_ipvbytes
+= m
->m_pkthdr
.len
;
415 * This is called from DLIL output, hence multiple threads could end
416 * up modifying the statistics. We trade off acccuracy for performance
417 * by not using atomic operations here.
419 __attribute__((always_inline
))
421 ifp_inc_traffic_class_out(struct ifnet
*ifp
, struct mbuf
*m
)
423 if (!(m
->m_flags
& M_PKTHDR
))
426 switch (m_get_traffic_class(m
)) {
428 ifp
->if_tc
.ifi_obepackets
++;
429 ifp
->if_tc
.ifi_obebytes
+= m
->m_pkthdr
.len
;
432 ifp
->if_tc
.ifi_obkpackets
++;
433 ifp
->if_tc
.ifi_obkbytes
+= m
->m_pkthdr
.len
;
436 ifp
->if_tc
.ifi_ovipackets
++;
437 ifp
->if_tc
.ifi_ovibytes
+= m
->m_pkthdr
.len
;
440 ifp
->if_tc
.ifi_ovopackets
++;
441 ifp
->if_tc
.ifi_ovobytes
+= m
->m_pkthdr
.len
;
447 if (mbuf_is_traffic_class_privileged(m
)) {
448 ifp
->if_tc
.ifi_opvpackets
++;
449 ifp
->if_tc
.ifi_opvbytes
+= m
->m_pkthdr
.len
;
452 #endif /* BSD_KERNEL_PRIVATE */
453 #endif /* KERNEL_PRIVATE */