2 * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by INET/INET6 sockets.
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
45 * It should be noted that messages about many INET/INET6 sockets can be multiplexed
46 * over a single kernel control socket.
49 * - The current implementation supports all INET/INET6 sockets (i.e. TCP,
51 * - The current implementation supports up to two simultaneous content filters
52 * for iOS devices and eight simultaneous content filters for OSX.
55 * NECP FILTER CONTROL UNIT
57 * A user space filter agent uses the Network Extension Control Policy (NECP)
58 * database to specify which INET/INET6 sockets need to be filtered. The NECP
59 * criteria may be based on a variety of properties like user ID or proc UUID.
61 * The NECP "filter control unit" is used by the socket content filter subsystem
62 * to deliver the relevant INET/INET6 content information to the appropriate
63 * user space filter agent via its kernel control socket instance.
64 * This works as follows:
66 * 1) The user space filter agent specifies an NECP filter control unit when
67 * in adds its filtering rules to the NECP database.
69 * 2) The user space filter agent also sets its NECP filter control unit on the
70 * content filter kernel control socket via the socket option
71 * CFIL_OPT_NECP_CONTROL_UNIT.
73 * 3) The NECP database is consulted to find out if a given INET/INET6 socket
74 * needs to be subjected to content filtering and returns the corresponding
75 * NECP filter control unit -- the NECP filter control unit is actually
76 * stored in the INET/INET6 socket structure so the NECP lookup is really simple.
78 * 4) The NECP filter control unit is then used to find the corresponding
79 * kernel control socket instance.
81 * Note: NECP currently supports a single filter control unit per INET/INET6 socket
82 * but this restriction may be soon lifted.
85 * THE MESSAGING PROTOCOL
87 * The socket content filter subsystem and a user space filter agent
88 * communicate over the kernel control socket via an asynchronous
89 * messaging protocol (this is not a request-response protocol).
90 * The socket content filter subsystem sends event messages to the user
91 * space filter agent about the INET/INET6 sockets it is interested to filter.
92 * The user space filter agent sends action messages to either allow
93 * data to pass or to disallow the data flow (and drop the connection).
95 * All messages over a content filter kernel control socket share the same
96 * common header of type "struct cfil_msg_hdr". The message type tells if
97 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
98 * The message header field "cfm_sock_id" identifies a given INET/INET6 flow.
99 * For TCP, flows are per-socket. For UDP and other datagrame protocols, there
100 * could be multiple flows per socket.
102 * Note the message header length field may be padded for alignment and can
103 * be larger than the actual content of the message.
104 * The field "cfm_op" describe the kind of event or action.
106 * Here are the kinds of content filter events:
107 * - CFM_OP_SOCKET_ATTACHED: a new INET/INET6 socket is being filtered
108 * - CFM_OP_SOCKET_CLOSED: A INET/INET6 socket is closed
109 * - CFM_OP_DATA_OUT: A span of data is being sent on a INET/INET6 socket
110 * - CFM_OP_DATA_IN: A span of data is being or received on a INET/INET6 socket
115 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
116 * data that is being sent or received. The position of this span of data
117 * in the data flow is described by a set of start and end offsets. These
118 * are absolute 64 bits offsets. The first byte sent (or received) starts
119 * at offset 0 and ends at offset 1. The length of the content data
120 * is given by the difference between the end offset and the start offset.
122 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
123 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
124 * action message is sent by the user space filter agent.
126 * Note: absolute 64 bits offsets should be large enough for the foreseeable
127 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
128 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
130 * They are two kinds of primary content filter actions:
131 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
132 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
134 * There is also an action to mark a given client flow as already filtered
135 * at a higher level, CFM_OP_BLESS_CLIENT.
140 * The CFM_OP_DATA_UPDATE action messages let the user space filter
141 * agent allow data to flow up to the specified pass offset -- there
142 * is a pass offset for outgoing data and a pass offset for incoming data.
143 * When a new INET/INET6 socket is attached to the content filter and a flow is
144 * created, each pass offset is initially set to 0 so no data is allowed to pass by
145 * default. When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
146 * then the data flow becomes unrestricted.
148 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
149 * with a pass offset smaller than the pass offset of a previous
150 * CFM_OP_DATA_UPDATE message is silently ignored.
152 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
153 * to tell the kernel how much data it wants to see by using the peek offsets.
154 * Just like pass offsets, there is a peek offset for each direction.
155 * When a new INET/INET6 flow is created, each peek offset is initially set to 0
156 * so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages are dispatched by default
157 * until a CFM_OP_DATA_UPDATE action message with a greater than 0 peek offset is sent
158 * by the user space filter agent. When the peek offset is set to CFM_MAX_OFFSET via
159 * a CFM_OP_DATA_UPDATE then the flow of update data events becomes unrestricted.
161 * Note that peek offsets cannot be smaller than the corresponding pass offset.
162 * Also a peek offsets cannot be smaller than the corresponding end offset
163 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
164 * to set a too small peek value is silently ignored.
167 * PER FLOW "struct cfil_info"
169 * As soon as a INET/INET6 socket gets attached to a content filter, a
170 * "struct cfil_info" is created to hold the content filtering state for this
171 * socket. For UDP and other datagram protocols, as soon as traffic is seen for
172 * each new flow identified by its 4-tuple of source address/port and destination
173 * address/port, a "struct cfil_info" is created. Each datagram socket may
174 * have multiple flows maintained in a hash table of "struct cfil_info" entries.
176 * The content filtering state is made of the following information
177 * for each direction:
178 * - The current pass offset;
179 * - The first and last offsets of the data pending, waiting for a filtering
181 * - The inject queue for data that passed the filters and that needs
183 * - A content filter specific state in a set of "struct cfil_entry"
186 * CONTENT FILTER STATE "struct cfil_entry"
188 * The "struct cfil_entry" maintains the information most relevant to the
189 * message handling over a kernel control socket with a user space filter agent.
191 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
192 * to the kernel control socket unit it corresponds to and also has a pointer
193 * to the corresponding "struct content_filter".
195 * For each direction, "struct cfil_entry" maintains the following information:
198 * - The offset of the last data peeked at by the filter
199 * - A queue of data that's waiting to be delivered to the user space filter
200 * agent on the kernel control socket
201 * - A queue of data for which event messages have been sent on the kernel
202 * control socket and are pending for a filtering decision.
205 * CONTENT FILTER QUEUES
207 * Data that is being filtered is steered away from the INET/INET6 socket buffer
208 * and instead will sit in one of three content filter queues until the data
209 * can be re-injected into the INET/INET6 socket buffer.
211 * A content filter queue is represented by "struct cfil_queue" that contains
212 * a list of mbufs and the start and end offset of the data span of
215 * The data moves into the three content filter queues according to this
217 * a) The "cfe_ctl_q" of "struct cfil_entry"
218 * b) The "cfe_pending_q" of "struct cfil_entry"
219 * c) The "cfi_inject_q" of "struct cfil_info"
221 * Note: The sequence (a),(b) may be repeated several times if there is more
222 * than one content filter attached to the INET/INET6 socket.
224 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
225 * kernel conntrol socket for two reasons:
226 * - The peek offset is less that the end offset of the mbuf data
227 * - The kernel control socket is flow controlled
229 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
230 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
231 * socket and are waiting for a pass action message fromn the user space
232 * filter agent. An mbuf length must be fully allowed to pass to be removed
233 * from the cfe_pending_q.
235 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
236 * by the user space filter agent and that needs to be re-injected into the
240 * IMPACT ON FLOW CONTROL
242 * An essential aspect of the content filer subsystem is to minimize the
243 * impact on flow control of the INET/INET6 sockets being filtered.
245 * The processing overhead of the content filtering may have an effect on
246 * flow control by adding noticeable delays and cannot be eliminated --
247 * care must be taken by the user space filter agent to minimize the
250 * The amount of data being filtered is kept in buffers while waiting for
251 * a decision by the user space filter agent. This amount of data pending
252 * needs to be subtracted from the amount of data available in the
253 * corresponding INET/INET6 socket buffer. This is done by modifying
254 * sbspace() and tcp_sbspace() to account for amount of data pending
255 * in the content filter.
260 * The global state of content filter subsystem is protected by a single
261 * read-write lock "cfil_lck_rw". The data flow can be done with the
262 * cfil read-write lock held as shared so it can be re-entered from multiple
265 * The per INET/INET6 socket content filterstate -- "struct cfil_info" -- is
266 * protected by the socket lock.
268 * A INET/INET6 socket lock cannot be taken while the cfil read-write lock
269 * is held. That's why we have some sequences where we drop the cfil read-write
270 * lock before taking the INET/INET6 lock.
272 * It is also important to lock the INET/INET6 socket buffer while the content
273 * filter is modifying the amount of pending data. Otherwise the calculations
274 * in sbspace() and tcp_sbspace() could be wrong.
276 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
277 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
279 * Actually "cfe_link" and "cfe_filter" are protected by both by
280 * "cfil_lck_rw" and the socket lock: they may be modified only when
281 * "cfil_lck_rw" is exclusive and the socket is locked.
283 * To read the other fields of "struct content_filter" we have to take
284 * "cfil_lck_rw" in shared mode.
286 * DATAGRAM SPECIFICS:
288 * The socket content filter supports all INET/INET6 protocols. However
289 * the treatments for TCP sockets and for datagram (UDP, ICMP, etc) sockets
290 * are slightly different.
292 * Each datagram socket may have multiple flows. Each flow is identified
293 * by the flow's source address/port and destination address/port tuple
294 * and is represented as a "struct cfil_info" entry. For each socket,
295 * a hash table is used to maintain the collection of flows under that socket.
297 * Each datagram flow is uniquely identified by it's "struct cfil_info" cfi_sock_id.
298 * The highest 32-bits of the cfi_sock_id contains the socket's so_gencnt. This portion
299 * of the cfi_sock_id is used locate the socket during socket lookup. The lowest 32-bits
300 * of the cfi_sock_id contains a hash of the flow's 4-tuple. This portion of the cfi_sock_id
301 * is used as the hash value for the flow hash table lookup within the parent socket.
303 * Since datagram sockets may not be connected, flow states may not be maintained in the
304 * socket structures and thus have to be saved for each packet. These saved states will be
305 * used for both outgoing and incoming reinjections. For outgoing packets, destination
306 * address/port as well as the current socket states will be saved. During reinjection,
307 * these saved states will be used instead. For incoming packets, control and address
308 * mbufs will be chained to the data. During reinjection, the whole chain will be queued
309 * onto the incoming socket buffer.
313 * - Support all INET/INET6 sockets, such as TCP, UDP, ICMP, etc
315 * - Does not support TCP unordered messages
325 #include <sys/types.h>
326 #include <sys/kern_control.h>
327 #include <sys/queue.h>
328 #include <sys/domain.h>
329 #include <sys/protosw.h>
330 #include <sys/syslog.h>
331 #include <sys/systm.h>
332 #include <sys/param.h>
333 #include <sys/mbuf.h>
335 #include <kern/locks.h>
336 #include <kern/zalloc.h>
337 #include <kern/debug.h>
339 #include <net/content_filter.h>
340 #include <net/content_filter_crypto.h>
343 #include <netinet/ip.h>
344 #include <netinet/in_pcb.h>
345 #include <netinet/tcp.h>
346 #include <netinet/tcp_var.h>
347 #include <netinet/udp.h>
348 #include <netinet/udp_var.h>
351 #include <libkern/libkern.h>
352 #include <kern/sched_prim.h>
353 #include <kern/task.h>
354 #include <mach/task_info.h>
356 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
357 #define MAX_CONTENT_FILTER 2
359 #define MAX_CONTENT_FILTER 8
362 extern struct inpcbinfo ripcbinfo
;
366 * The structure content_filter represents a user space content filter
367 * It's created and associated with a kernel control socket instance
369 struct content_filter
{
370 kern_ctl_ref cf_kcref
;
374 uint32_t cf_necp_control_unit
;
376 uint32_t cf_sock_count
;
377 TAILQ_HEAD(, cfil_entry
) cf_sock_entries
;
379 cfil_crypto_state_t cf_crypto_state
;
382 #define CFF_ACTIVE 0x01
383 #define CFF_DETACHING 0x02
384 #define CFF_FLOW_CONTROLLED 0x04
386 struct content_filter
**content_filters
= NULL
;
387 uint32_t cfil_active_count
= 0; /* Number of active content filters */
388 uint32_t cfil_sock_attached_count
= 0; /* Number of sockets attachements */
389 uint32_t cfil_sock_udp_attached_count
= 0; /* Number of UDP sockets attachements */
390 uint32_t cfil_sock_attached_stats_count
= 0; /* Number of sockets requested periodic stats report */
391 uint32_t cfil_close_wait_timeout
= 1000; /* in milliseconds */
393 static kern_ctl_ref cfil_kctlref
= NULL
;
395 static lck_grp_attr_t
*cfil_lck_grp_attr
= NULL
;
396 static lck_attr_t
*cfil_lck_attr
= NULL
;
397 static lck_grp_t
*cfil_lck_grp
= NULL
;
398 decl_lck_rw_data(static, cfil_lck_rw
);
400 #define CFIL_RW_LCK_MAX 8
402 int cfil_rw_nxt_lck
= 0;
403 void* cfil_rw_lock_history
[CFIL_RW_LCK_MAX
];
405 int cfil_rw_nxt_unlck
= 0;
406 void* cfil_rw_unlock_history
[CFIL_RW_LCK_MAX
];
408 #define CONTENT_FILTER_ZONE_NAME "content_filter"
409 #define CONTENT_FILTER_ZONE_MAX 10
410 static struct zone
*content_filter_zone
= NULL
; /* zone for content_filter */
413 #define CFIL_INFO_ZONE_NAME "cfil_info"
414 #define CFIL_INFO_ZONE_MAX 1024
415 static struct zone
*cfil_info_zone
= NULL
; /* zone for cfil_info */
417 MBUFQ_HEAD(cfil_mqhead
);
420 uint64_t q_start
; /* offset of first byte in queue */
421 uint64_t q_end
; /* offset of last byte in queue */
422 struct cfil_mqhead q_mq
;
428 * The is one entry per content filter
431 TAILQ_ENTRY(cfil_entry
) cfe_link
;
432 SLIST_ENTRY(cfil_entry
) cfe_order_link
;
433 struct content_filter
*cfe_filter
;
435 struct cfil_info
*cfe_cfil_info
;
437 uint32_t cfe_necp_control_unit
;
438 struct timeval cfe_last_event
; /* To user space */
439 struct timeval cfe_last_action
; /* From user space */
440 uint64_t cfe_byte_inbound_count_reported
; /* stats already been reported */
441 uint64_t cfe_byte_outbound_count_reported
; /* stats already been reported */
442 struct timeval cfe_stats_report_ts
; /* Timestamp for last stats report */
443 uint32_t cfe_stats_report_frequency
; /* Interval for stats report in msecs */
444 boolean_t cfe_laddr_sent
;
448 * cfe_pending_q holds data that has been delivered to
449 * the filter and for which we are waiting for an action
451 struct cfil_queue cfe_pending_q
;
453 * This queue is for data that has not be delivered to
454 * the content filter (new data, pass peek or flow control)
456 struct cfil_queue cfe_ctl_q
;
458 uint64_t cfe_pass_offset
;
459 uint64_t cfe_peek_offset
;
464 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
465 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
466 #define CFEF_DATA_START 0x0004 /* can send data event */
467 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
468 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
469 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
470 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
471 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
474 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
475 struct timeval _tdiff; \
476 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
477 timersub(t1, t0, &_tdiff); \
478 (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
479 (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
480 (cfil)->cfi_op_list_ctr ++; \
483 struct cfil_hash_entry
;
488 * There is a struct cfil_info per socket
491 TAILQ_ENTRY(cfil_info
) cfi_link
;
492 TAILQ_ENTRY(cfil_info
) cfi_link_stats
;
493 struct socket
*cfi_so
;
495 uint64_t cfi_sock_id
;
496 struct timeval64 cfi_first_event
;
497 uint32_t cfi_op_list_ctr
;
498 uint32_t cfi_op_time
[CFI_MAX_TIME_LOG_ENTRY
]; /* time interval in microseconds since first event */
499 unsigned char cfi_op_list
[CFI_MAX_TIME_LOG_ENTRY
];
500 union sockaddr_in_4_6 cfi_so_attach_faddr
; /* faddr at the time of attach */
501 union sockaddr_in_4_6 cfi_so_attach_laddr
; /* laddr at the time of attach */
504 uint64_t cfi_byte_inbound_count
;
505 uint64_t cfi_byte_outbound_count
;
507 boolean_t cfi_isSignatureLatest
; /* Indicates if signature covers latest flow attributes */
511 * cfi_pending_first and cfi_pending_last describe the total
512 * amount of data outstanding for all the filters on
513 * this socket and data in the flow queue
514 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
516 uint64_t cfi_pending_first
;
517 uint64_t cfi_pending_last
;
518 uint32_t cfi_pending_mbcnt
;
519 uint32_t cfi_pending_mbnum
;
520 uint32_t cfi_tail_drop_cnt
;
522 * cfi_pass_offset is the minimum of all the filters
524 uint64_t cfi_pass_offset
;
526 * cfi_inject_q holds data that needs to be re-injected
527 * into the socket after filtering and that can
528 * be queued because of flow control
530 struct cfil_queue cfi_inject_q
;
533 struct cfil_entry cfi_entries
[MAX_CONTENT_FILTER
];
534 struct cfil_hash_entry
*cfi_hash_entry
;
535 SLIST_HEAD(, cfil_entry
) cfi_ordered_entries
;
536 } __attribute__((aligned(8)));
538 #define CFIF_DROP 0x0001 /* drop action applied */
539 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
540 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
541 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
542 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
543 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
544 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
545 #define CFIF_SOCKET_CONNECTED 0x0100 /* socket is connected */
546 #define CFIF_INITIAL_VERDICT 0x0200 /* received initial verdict */
548 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
549 #define CFI_SHIFT_GENCNT 32
550 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
551 #define CFI_SHIFT_FLOWHASH 0
553 #define CFI_ENTRY_KCUNIT(i, e) (((e) - &((i)->cfi_entries[0])) + 1)
555 TAILQ_HEAD(cfil_sock_head
, cfil_info
) cfil_sock_head
;
556 TAILQ_HEAD(cfil_sock_head_stats
, cfil_info
) cfil_sock_head_stats
;
558 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
559 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
564 LIST_HEAD(cfilhashhead
, cfil_hash_entry
);
565 #define CFILHASHSIZE 16
566 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
568 #define IS_INET(so) (so && so->so_proto && so->so_proto->pr_domain && (so->so_proto->pr_domain->dom_family == AF_INET || so->so_proto->pr_domain->dom_family == AF_INET6))
569 #define IS_TCP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_STREAM && so->so_proto->pr_protocol == IPPROTO_TCP)
570 #define IS_UDP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
571 #define IS_ICMP(so) (so && so->so_proto && (so->so_proto->pr_type == SOCK_RAW || so->so_proto->pr_type == SOCK_DGRAM) && \
572 (so->so_proto->pr_protocol == IPPROTO_ICMP || so->so_proto->pr_protocol == IPPROTO_ICMPV6))
573 #define IS_RAW(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_RAW && so->so_proto->pr_protocol == IPPROTO_RAW)
575 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
576 #define IS_IP_DGRAM(so) (IS_INET(so) && IS_UDP(so))
578 #define IS_IP_DGRAM(so) (IS_INET(so) && !IS_TCP(so))
581 #define OPTIONAL_IP_HEADER(so) (!IS_TCP(so) && !IS_UDP(so))
582 #define GET_SO_PROTO(so) ((so && so->so_proto) ? so->so_proto->pr_protocol : IPPROTO_MAX)
583 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
585 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
586 ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
587 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
588 cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
589 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
590 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
591 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
592 (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
593 (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
596 * Periodic Statistics Report:
598 static struct thread
*cfil_stats_report_thread
;
599 #define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC 500 // Highest report frequency
600 #define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
601 #define CFIL_STATS_REPORT_MAX_COUNT 50 // Max stats to be reported per run
603 /* This buffer must have same layout as struct cfil_msg_stats_report */
604 struct cfil_stats_report_buffer
{
605 struct cfil_msg_hdr msghdr
;
607 struct cfil_msg_sock_stats stats
[CFIL_STATS_REPORT_MAX_COUNT
];
609 static struct cfil_stats_report_buffer
*global_cfil_stats_report_buffers
[MAX_CONTENT_FILTER
];
610 static uint32_t global_cfil_stats_counts
[MAX_CONTENT_FILTER
];
613 * UDP Garbage Collection:
615 static struct thread
*cfil_udp_gc_thread
;
616 #define UDP_FLOW_GC_IDLE_TO 30 // Flow Idle Timeout in seconds
617 #define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
618 #define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
619 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC (10 * NSEC_PER_SEC) // GC wakes up every 10 seconds
622 * UDP flow queue thresholds
624 #define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
625 #define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
626 #define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
628 * UDP flow queue threshold globals:
630 static unsigned int cfil_udp_gc_mbuf_num_max
= UDP_FLOW_GC_MBUF_NUM_MAX
;
631 static unsigned int cfil_udp_gc_mbuf_cnt_max
= UDP_FLOW_GC_MBUF_CNT_MAX
;
634 * struct cfil_hash_entry
636 * Hash entry for cfil_info
638 struct cfil_hash_entry
{
639 LIST_ENTRY(cfil_hash_entry
) cfentry_link
;
640 struct cfil_info
*cfentry_cfil
;
641 u_short cfentry_fport
;
642 u_short cfentry_lport
;
643 sa_family_t cfentry_family
;
644 u_int32_t cfentry_flowhash
;
645 u_int64_t cfentry_lastused
;
647 /* foreign host table entry */
648 struct in_addr_4in6 addr46
;
649 struct in6_addr addr6
;
652 /* local host table entry */
653 struct in_addr_4in6 addr46
;
654 struct in6_addr addr6
;
661 * For each UDP socket, this is a hash table maintaining all cfil_info structs
662 * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
665 struct socket
*cfdb_so
;
666 uint32_t cfdb_count
; /* Number of total content filters */
667 struct cfilhashhead
*cfdb_hashbase
;
668 u_long cfdb_hashmask
;
669 struct cfil_hash_entry
*cfdb_only_entry
; /* Optimization for connected UDP */
673 * CFIL specific mbuf tag:
674 * Save state of socket at the point of data entry into cfil.
675 * Use saved state for reinjection at protocol layer.
678 union sockaddr_in_4_6 cfil_faddr
;
679 uint32_t cfil_so_state_change_cnt
;
680 short cfil_so_options
;
684 #define CFIL_HASH_ENTRY_ZONE_NAME "cfil_entry_hash"
685 #define CFIL_HASH_ENTRY_ZONE_MAX 1024
686 static struct zone
*cfil_hash_entry_zone
= NULL
;
688 #define CFIL_DB_ZONE_NAME "cfil_db"
689 #define CFIL_DB_ZONE_MAX 1024
690 static struct zone
*cfil_db_zone
= NULL
;
696 struct cfil_stats cfil_stats
;
699 * For troubleshooting
701 int cfil_log_level
= LOG_ERR
;
704 // Debug controls added for selective debugging.
705 // Disabled for production. If enabled,
706 // these will have performance impact
707 #define LIFECYCLE_DEBUG 0
708 #define VERDICT_DEBUG 0
712 #define STATS_DEBUG 0
715 * Sysctls for logs and statistics
717 static int sysctl_cfil_filter_list(struct sysctl_oid
*, void *, int,
718 struct sysctl_req
*);
719 static int sysctl_cfil_sock_list(struct sysctl_oid
*, void *, int,
720 struct sysctl_req
*);
722 SYSCTL_NODE(_net
, OID_AUTO
, cfil
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "cfil");
724 SYSCTL_INT(_net_cfil
, OID_AUTO
, log
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
725 &cfil_log_level
, 0, "");
727 SYSCTL_INT(_net_cfil
, OID_AUTO
, debug
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
730 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sock_attached_count
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
731 &cfil_sock_attached_count
, 0, "");
733 SYSCTL_UINT(_net_cfil
, OID_AUTO
, active_count
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
734 &cfil_active_count
, 0, "");
736 SYSCTL_UINT(_net_cfil
, OID_AUTO
, close_wait_timeout
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
737 &cfil_close_wait_timeout
, 0, "");
739 static int cfil_sbtrim
= 1;
740 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sbtrim
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
741 &cfil_sbtrim
, 0, "");
743 SYSCTL_PROC(_net_cfil
, OID_AUTO
, filter_list
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
744 0, 0, sysctl_cfil_filter_list
, "S,cfil_filter_stat", "");
746 SYSCTL_PROC(_net_cfil
, OID_AUTO
, sock_list
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
747 0, 0, sysctl_cfil_sock_list
, "S,cfil_sock_stat", "");
749 SYSCTL_STRUCT(_net_cfil
, OID_AUTO
, stats
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
750 &cfil_stats
, cfil_stats
, "");
753 * Forward declaration to appease the compiler
755 static int cfil_action_data_pass(struct socket
*, struct cfil_info
*, uint32_t, int,
757 static int cfil_action_drop(struct socket
*, struct cfil_info
*, uint32_t);
758 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr
*);
759 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr
*);
760 static int cfil_dispatch_closed_event(struct socket
*, struct cfil_info
*, int);
761 static int cfil_data_common(struct socket
*, struct cfil_info
*, int, struct sockaddr
*,
762 struct mbuf
*, struct mbuf
*, uint32_t);
763 static int cfil_data_filter(struct socket
*, struct cfil_info
*, uint32_t, int,
764 struct mbuf
*, uint64_t);
765 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*,
766 struct in_addr
, u_int16_t
);
767 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*,
768 struct in6_addr
*, u_int16_t
);
770 static int cfil_dispatch_attach_event(struct socket
*, struct cfil_info
*, uint32_t, int);
771 static void cfil_info_free(struct cfil_info
*);
772 static struct cfil_info
* cfil_info_alloc(struct socket
*, struct cfil_hash_entry
*);
773 static int cfil_info_attach_unit(struct socket
*, uint32_t, struct cfil_info
*);
774 static struct socket
* cfil_socket_from_sock_id(cfil_sock_id_t
, bool);
775 static struct socket
* cfil_socket_from_client_uuid(uuid_t
, bool *);
776 static int cfil_service_pending_queue(struct socket
*, struct cfil_info
*, uint32_t, int);
777 static int cfil_data_service_ctl_q(struct socket
*, struct cfil_info
*, uint32_t, int);
778 static void cfil_info_verify(struct cfil_info
*);
779 static int cfil_update_data_offsets(struct socket
*, struct cfil_info
*, uint32_t, int,
781 static int cfil_acquire_sockbuf(struct socket
*, struct cfil_info
*, int);
782 static void cfil_release_sockbuf(struct socket
*, int);
783 static int cfil_filters_attached(struct socket
*);
785 static void cfil_rw_lock_exclusive(lck_rw_t
*);
786 static void cfil_rw_unlock_exclusive(lck_rw_t
*);
787 static void cfil_rw_lock_shared(lck_rw_t
*);
788 static void cfil_rw_unlock_shared(lck_rw_t
*);
789 static boolean_t
cfil_rw_lock_shared_to_exclusive(lck_rw_t
*);
790 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t
*);
792 static unsigned int cfil_data_length(struct mbuf
*, int *, int *);
793 static errno_t
cfil_db_init(struct socket
*);
794 static void cfil_db_free(struct socket
*so
);
795 struct cfil_hash_entry
*cfil_db_lookup_entry(struct cfil_db
*, struct sockaddr
*, struct sockaddr
*, boolean_t
);
796 struct cfil_hash_entry
*cfil_db_lookup_entry_with_sockid(struct cfil_db
*, u_int64_t
);
797 struct cfil_hash_entry
*cfil_db_add_entry(struct cfil_db
*, struct sockaddr
*, struct sockaddr
*);
798 void cfil_db_update_entry_local(struct cfil_db
*, struct cfil_hash_entry
*, struct sockaddr
*);
799 void cfil_db_delete_entry(struct cfil_db
*, struct cfil_hash_entry
*);
800 struct cfil_hash_entry
*cfil_sock_udp_get_flow(struct socket
*, uint32_t, bool, struct sockaddr
*, struct sockaddr
*, int);
801 struct cfil_info
*cfil_db_get_cfil_info(struct cfil_db
*, cfil_sock_id_t
);
802 static errno_t
cfil_sock_udp_handle_data(bool, struct socket
*, struct sockaddr
*, struct sockaddr
*,
803 struct mbuf
*, struct mbuf
*, uint32_t);
804 static int32_t cfil_sock_udp_data_pending(struct sockbuf
*, bool);
805 static void cfil_sock_udp_is_closed(struct socket
*);
806 static int cfil_sock_udp_notify_shutdown(struct socket
*, int, int, int);
807 static int cfil_sock_udp_shutdown(struct socket
*, int *);
808 static void cfil_sock_udp_close_wait(struct socket
*);
809 static void cfil_sock_udp_buf_update(struct sockbuf
*);
810 static int cfil_filters_udp_attached(struct socket
*, bool);
811 static void cfil_get_flow_address_v6(struct cfil_hash_entry
*, struct inpcb
*,
812 struct in6_addr
**, struct in6_addr
**,
813 u_int16_t
*, u_int16_t
*);
814 static void cfil_get_flow_address(struct cfil_hash_entry
*, struct inpcb
*,
815 struct in_addr
*, struct in_addr
*,
816 u_int16_t
*, u_int16_t
*);
817 static void cfil_info_log(int, struct cfil_info
*, const char *);
818 void cfil_filter_show(u_int32_t
);
819 void cfil_info_show(void);
820 bool cfil_info_idle_timed_out(struct cfil_info
*, int, u_int32_t
);
821 bool cfil_info_action_timed_out(struct cfil_info
*, int);
822 bool cfil_info_buffer_threshold_exceeded(struct cfil_info
*);
823 struct m_tag
*cfil_dgram_save_socket_state(struct cfil_info
*, struct mbuf
*);
824 boolean_t
cfil_dgram_peek_socket_state(struct mbuf
*m
, int *inp_flags
);
825 static void cfil_udp_gc_thread_func(void *, wait_result_t
);
826 static void cfil_info_udp_expire(void *, wait_result_t
);
827 static bool fill_cfil_hash_entry_from_address(struct cfil_hash_entry
*, bool, struct sockaddr
*);
828 static void cfil_sock_received_verdict(struct socket
*so
);
829 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry
*, struct inpcb
*,
830 union sockaddr_in_4_6
*, union sockaddr_in_4_6
*,
831 boolean_t
, boolean_t
);
832 static void cfil_stats_report_thread_func(void *, wait_result_t
);
833 static void cfil_stats_report(void *v
, wait_result_t w
);
835 bool check_port(struct sockaddr
*, u_short
);
838 * Content filter global read write lock
842 cfil_rw_lock_exclusive(lck_rw_t
*lck
)
846 lr_saved
= __builtin_return_address(0);
848 lck_rw_lock_exclusive(lck
);
850 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
851 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
855 cfil_rw_unlock_exclusive(lck_rw_t
*lck
)
859 lr_saved
= __builtin_return_address(0);
861 lck_rw_unlock_exclusive(lck
);
863 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
864 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
868 cfil_rw_lock_shared(lck_rw_t
*lck
)
872 lr_saved
= __builtin_return_address(0);
874 lck_rw_lock_shared(lck
);
876 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
877 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
881 cfil_rw_unlock_shared(lck_rw_t
*lck
)
885 lr_saved
= __builtin_return_address(0);
887 lck_rw_unlock_shared(lck
);
889 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
890 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
894 cfil_rw_lock_shared_to_exclusive(lck_rw_t
*lck
)
899 lr_saved
= __builtin_return_address(0);
901 upgraded
= lck_rw_lock_shared_to_exclusive(lck
);
903 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
904 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
910 cfil_rw_lock_exclusive_to_shared(lck_rw_t
*lck
)
914 lr_saved
= __builtin_return_address(0);
916 lck_rw_lock_exclusive_to_shared(lck
);
918 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
919 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
923 cfil_rw_lock_assert_held(lck_rw_t
*lck
, int exclusive
)
926 #pragma unused(lck, exclusive)
929 exclusive
? LCK_RW_ASSERT_EXCLUSIVE
: LCK_RW_ASSERT_HELD
);
933 * Return the number of bytes in the mbuf chain using the same
934 * method as m_length() or sballoc()
936 * Returns data len - starting from PKT start
937 * - retmbcnt - optional param to get total mbuf bytes in chain
938 * - retmbnum - optional param to get number of mbufs in chain
941 cfil_data_length(struct mbuf
*m
, int *retmbcnt
, int *retmbnum
)
944 unsigned int pktlen
= 0;
948 // Locate the start of data
949 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
950 if (m0
->m_flags
& M_PKTHDR
) {
955 CFIL_LOG(LOG_ERR
, "cfil_data_length: no M_PKTHDR");
960 if (retmbcnt
== NULL
&& retmbnum
== NULL
) {
967 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
971 if (m0
->m_flags
& M_EXT
) {
972 mbcnt
+= m0
->m_ext
.ext_size
;
985 cfil_data_start(struct mbuf
*m
)
989 // Locate the start of data
990 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
991 if (m0
->m_flags
& M_PKTHDR
) {
999 * Common mbuf queue utilities
1003 cfil_queue_init(struct cfil_queue
*cfq
)
1007 MBUFQ_INIT(&cfq
->q_mq
);
1010 static inline uint64_t
1011 cfil_queue_drain(struct cfil_queue
*cfq
)
1013 uint64_t drained
= cfq
->q_start
- cfq
->q_end
;
1016 MBUFQ_DRAIN(&cfq
->q_mq
);
1021 /* Return 1 when empty, 0 otherwise */
1023 cfil_queue_empty(struct cfil_queue
*cfq
)
1025 return MBUFQ_EMPTY(&cfq
->q_mq
);
1028 static inline uint64_t
1029 cfil_queue_offset_first(struct cfil_queue
*cfq
)
1031 return cfq
->q_start
;
1034 static inline uint64_t
1035 cfil_queue_offset_last(struct cfil_queue
*cfq
)
1040 static inline uint64_t
1041 cfil_queue_len(struct cfil_queue
*cfq
)
1043 return cfq
->q_end
- cfq
->q_start
;
1047 * Routines to verify some fundamental assumptions
1051 cfil_queue_verify(struct cfil_queue
*cfq
)
1056 uint64_t queuesize
= 0;
1058 /* Verify offset are ordered */
1059 VERIFY(cfq
->q_start
<= cfq
->q_end
);
1062 * When queue is empty, the offsets are equal otherwise the offsets
1065 VERIFY((MBUFQ_EMPTY(&cfq
->q_mq
) && cfq
->q_start
== cfq
->q_end
) ||
1066 (!MBUFQ_EMPTY(&cfq
->q_mq
) &&
1067 cfq
->q_start
!= cfq
->q_end
));
1069 MBUFQ_FOREACH(chain
, &cfq
->q_mq
) {
1070 size_t chainsize
= 0;
1072 unsigned int mlen
= cfil_data_length(m
, NULL
, NULL
);
1073 // skip the addr and control stuff if present
1074 m
= cfil_data_start(m
);
1077 m
== (void *)M_TAG_FREE_PATTERN
||
1078 m
->m_next
== (void *)M_TAG_FREE_PATTERN
||
1079 m
->m_nextpkt
== (void *)M_TAG_FREE_PATTERN
) {
1080 panic("%s - mq %p is free at %p", __func__
,
1083 for (n
= m
; n
!= NULL
; n
= n
->m_next
) {
1084 if (n
->m_type
!= MT_DATA
&&
1085 n
->m_type
!= MT_HEADER
&&
1086 n
->m_type
!= MT_OOBDATA
) {
1087 panic("%s - %p unsupported type %u", __func__
,
1090 chainsize
+= n
->m_len
;
1092 if (mlen
!= chainsize
) {
1093 panic("%s - %p m_length() %u != chainsize %lu",
1094 __func__
, m
, mlen
, chainsize
);
1096 queuesize
+= chainsize
;
1098 if (queuesize
!= cfq
->q_end
- cfq
->q_start
) {
1099 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__
,
1100 m
, queuesize
, cfq
->q_end
- cfq
->q_start
);
1105 cfil_queue_enqueue(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
1107 CFIL_QUEUE_VERIFY(cfq
);
1109 MBUFQ_ENQUEUE(&cfq
->q_mq
, m
);
1112 CFIL_QUEUE_VERIFY(cfq
);
1116 cfil_queue_remove(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
1118 CFIL_QUEUE_VERIFY(cfq
);
1120 VERIFY(cfil_data_length(m
, NULL
, NULL
) == len
);
1122 MBUFQ_REMOVE(&cfq
->q_mq
, m
);
1123 MBUFQ_NEXT(m
) = NULL
;
1124 cfq
->q_start
+= len
;
1126 CFIL_QUEUE_VERIFY(cfq
);
1130 cfil_queue_first(struct cfil_queue
*cfq
)
1132 return MBUFQ_FIRST(&cfq
->q_mq
);
1136 cfil_queue_next(struct cfil_queue
*cfq
, mbuf_t m
)
1139 return MBUFQ_NEXT(m
);
1143 cfil_entry_buf_verify(struct cfe_buf
*cfe_buf
)
1145 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_ctl_q
);
1146 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_pending_q
);
1148 /* Verify the queues are ordered so that pending is before ctl */
1149 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
>= cfe_buf
->cfe_pending_q
.q_end
);
1151 /* The peek offset cannot be less than the pass offset */
1152 VERIFY(cfe_buf
->cfe_peek_offset
>= cfe_buf
->cfe_pass_offset
);
1154 /* Make sure we've updated the offset we peeked at */
1155 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
<= cfe_buf
->cfe_peeked
);
1159 cfil_entry_verify(struct cfil_entry
*entry
)
1161 cfil_entry_buf_verify(&entry
->cfe_snd
);
1162 cfil_entry_buf_verify(&entry
->cfe_rcv
);
1166 cfil_info_buf_verify(struct cfi_buf
*cfi_buf
)
1168 CFIL_QUEUE_VERIFY(&cfi_buf
->cfi_inject_q
);
1170 VERIFY(cfi_buf
->cfi_pending_first
<= cfi_buf
->cfi_pending_last
);
1174 cfil_info_verify(struct cfil_info
*cfil_info
)
1178 if (cfil_info
== NULL
) {
1182 cfil_info_buf_verify(&cfil_info
->cfi_snd
);
1183 cfil_info_buf_verify(&cfil_info
->cfi_rcv
);
1185 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++) {
1186 cfil_entry_verify(&cfil_info
->cfi_entries
[i
]);
1191 verify_content_filter(struct content_filter
*cfc
)
1193 struct cfil_entry
*entry
;
1196 VERIFY(cfc
->cf_sock_count
>= 0);
1198 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
1200 VERIFY(cfc
== entry
->cfe_filter
);
1202 VERIFY(count
== cfc
->cf_sock_count
);
1206 * Kernel control socket callbacks
1209 cfil_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
1213 struct content_filter
*cfc
= NULL
;
1215 CFIL_LOG(LOG_NOTICE
, "");
1217 cfc
= zalloc(content_filter_zone
);
1219 CFIL_LOG(LOG_ERR
, "zalloc failed");
1223 bzero(cfc
, sizeof(struct content_filter
));
1225 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1226 if (content_filters
== NULL
) {
1227 struct content_filter
**tmp
;
1229 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1232 struct content_filter
**,
1233 MAX_CONTENT_FILTER
* sizeof(struct content_filter
*),
1237 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1239 if (tmp
== NULL
&& content_filters
== NULL
) {
1241 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1244 /* Another thread may have won the race */
1245 if (content_filters
!= NULL
) {
1248 content_filters
= tmp
;
1252 if (sac
->sc_unit
== 0 || sac
->sc_unit
> MAX_CONTENT_FILTER
) {
1253 CFIL_LOG(LOG_ERR
, "bad sc_unit %u", sac
->sc_unit
);
1255 } else if (content_filters
[sac
->sc_unit
- 1] != NULL
) {
1256 CFIL_LOG(LOG_ERR
, "sc_unit %u in use", sac
->sc_unit
);
1260 * kernel control socket kcunit numbers start at 1
1262 content_filters
[sac
->sc_unit
- 1] = cfc
;
1264 cfc
->cf_kcref
= kctlref
;
1265 cfc
->cf_kcunit
= sac
->sc_unit
;
1266 TAILQ_INIT(&cfc
->cf_sock_entries
);
1269 cfil_active_count
++;
1271 // Allocate periodic stats buffer for this filter
1272 if (global_cfil_stats_report_buffers
[cfc
->cf_kcunit
- 1] == NULL
) {
1273 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1275 struct cfil_stats_report_buffer
*buf
;
1278 struct cfil_stats_report_buffer
*,
1279 sizeof(struct cfil_stats_report_buffer
),
1283 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1287 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1291 /* Another thread may have won the race */
1292 if (global_cfil_stats_report_buffers
[cfc
->cf_kcunit
- 1] != NULL
) {
1295 global_cfil_stats_report_buffers
[cfc
->cf_kcunit
- 1] = buf
;
1299 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1301 if (error
!= 0 && cfc
!= NULL
) {
1302 zfree(content_filter_zone
, cfc
);
1306 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_ok
);
1308 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_fail
);
1311 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
1312 error
, cfil_active_count
, sac
->sc_unit
);
1318 cfil_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
)
1320 #pragma unused(kctlref)
1322 struct content_filter
*cfc
;
1323 struct cfil_entry
*entry
;
1324 uint64_t sock_flow_id
= 0;
1326 CFIL_LOG(LOG_NOTICE
, "");
1328 if (content_filters
== NULL
) {
1329 CFIL_LOG(LOG_ERR
, "no content filter");
1333 if (kcunit
> MAX_CONTENT_FILTER
) {
1334 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1335 kcunit
, MAX_CONTENT_FILTER
);
1340 cfc
= (struct content_filter
*)unitinfo
;
1345 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1346 if (content_filters
[kcunit
- 1] != cfc
|| cfc
->cf_kcunit
!= kcunit
) {
1347 CFIL_LOG(LOG_ERR
, "bad unit info %u)",
1349 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1352 cfc
->cf_flags
|= CFF_DETACHING
;
1354 * Remove all sockets from the filter
1356 while ((entry
= TAILQ_FIRST(&cfc
->cf_sock_entries
)) != NULL
) {
1357 cfil_rw_lock_assert_held(&cfil_lck_rw
, 1);
1359 verify_content_filter(cfc
);
1361 * Accept all outstanding data by pushing to next filter
1364 * TBD: Actually we should make sure all data has been pushed
1367 if (entry
->cfe_cfil_info
&& entry
->cfe_cfil_info
->cfi_so
) {
1368 struct cfil_info
*cfil_info
= entry
->cfe_cfil_info
;
1369 struct socket
*so
= cfil_info
->cfi_so
;
1370 sock_flow_id
= cfil_info
->cfi_sock_id
;
1372 /* Need to let data flow immediately */
1373 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
|
1377 * Respect locking hierarchy
1379 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1384 * When cfe_filter is NULL the filter is detached
1385 * and the entry has been removed from cf_sock_entries
1387 if ((so
->so_cfil
== NULL
&& so
->so_cfil_db
== NULL
) || entry
->cfe_filter
== NULL
) {
1388 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1392 (void) cfil_action_data_pass(so
, cfil_info
, kcunit
, 1,
1396 (void) cfil_action_data_pass(so
, cfil_info
, kcunit
, 0,
1400 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1403 * Check again to make sure if the cfil_info is still valid
1404 * as the socket may have been unlocked when when calling
1405 * cfil_acquire_sockbuf()
1407 if (entry
->cfe_filter
== NULL
||
1408 (so
->so_cfil
== NULL
&& cfil_db_get_cfil_info(so
->so_cfil_db
, sock_flow_id
) == NULL
)) {
1412 /* The filter is now detached */
1413 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
1415 cfil_info_log(LOG_DEBUG
, cfil_info
, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1417 CFIL_LOG(LOG_NOTICE
, "so %llx detached %u",
1418 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1419 if ((cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
1420 cfil_filters_attached(so
) == 0) {
1421 CFIL_LOG(LOG_NOTICE
, "so %llx waking",
1422 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1423 wakeup((caddr_t
)cfil_info
);
1427 * Remove the filter entry from the content filter
1428 * but leave the rest of the state intact as the queues
1429 * may not be empty yet
1431 entry
->cfe_filter
= NULL
;
1432 entry
->cfe_necp_control_unit
= 0;
1434 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1435 cfc
->cf_sock_count
--;
1437 socket_unlock(so
, 1);
1440 verify_content_filter(cfc
);
1442 /* Free the stats buffer for this filter */
1443 if (global_cfil_stats_report_buffers
[cfc
->cf_kcunit
- 1] != NULL
) {
1444 FREE(global_cfil_stats_report_buffers
[cfc
->cf_kcunit
- 1], M_TEMP
);
1445 global_cfil_stats_report_buffers
[cfc
->cf_kcunit
- 1] = NULL
;
1447 VERIFY(cfc
->cf_sock_count
== 0);
1450 * Make filter inactive
1452 content_filters
[kcunit
- 1] = NULL
;
1453 cfil_active_count
--;
1454 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1456 if (cfc
->cf_crypto_state
!= NULL
) {
1457 cfil_crypto_cleanup_state(cfc
->cf_crypto_state
);
1458 cfc
->cf_crypto_state
= NULL
;
1461 zfree(content_filter_zone
, cfc
);
1464 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_ok
);
1466 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_fail
);
1469 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
1470 error
, cfil_active_count
, kcunit
);
1476 * cfil_acquire_sockbuf()
1478 * Prevent any other thread from acquiring the sockbuf
1479 * We use sb_cfil_thread as a semaphore to prevent other threads from
1480 * messing with the sockbuf -- see sblock()
1481 * Note: We do not set SB_LOCK here because the thread may check or modify
1482 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1483 * sblock(), sbunlock() or sodefunct()
1486 cfil_acquire_sockbuf(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
)
1488 thread_t tp
= current_thread();
1489 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1490 lck_mtx_t
*mutex_held
;
1494 * Wait until no thread is holding the sockbuf and other content
1495 * filter threads have released the sockbuf
1497 while ((sb
->sb_flags
& SB_LOCK
) ||
1498 (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
)) {
1499 if (so
->so_proto
->pr_getlock
!= NULL
) {
1500 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
1502 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1505 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1508 VERIFY(sb
->sb_wantlock
!= 0);
1510 msleep(&sb
->sb_flags
, mutex_held
, PSOCK
, "cfil_acquire_sockbuf",
1513 VERIFY(sb
->sb_wantlock
!= 0);
1517 * Use reference count for repetitive calls on same thread
1519 if (sb
->sb_cfil_refs
== 0) {
1520 VERIFY(sb
->sb_cfil_thread
== NULL
);
1521 VERIFY((sb
->sb_flags
& SB_LOCK
) == 0);
1523 sb
->sb_cfil_thread
= tp
;
1524 sb
->sb_flags
|= SB_LOCK
;
1528 /* We acquire the socket buffer when we need to cleanup */
1529 if (cfil_info
== NULL
) {
1530 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
1531 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1533 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
1534 CFIL_LOG(LOG_ERR
, "so %llx drop set",
1535 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1543 cfil_release_sockbuf(struct socket
*so
, int outgoing
)
1545 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1546 thread_t tp
= current_thread();
1548 socket_lock_assert_owned(so
);
1550 if (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
) {
1551 panic("%s sb_cfil_thread %p not current %p", __func__
,
1552 sb
->sb_cfil_thread
, tp
);
1555 * Don't panic if we are defunct because SB_LOCK has
1556 * been cleared by sodefunct()
1558 if (!(so
->so_flags
& SOF_DEFUNCT
) && !(sb
->sb_flags
& SB_LOCK
)) {
1559 panic("%s SB_LOCK not set on %p", __func__
,
1563 * We can unlock when the thread unwinds to the last reference
1566 if (sb
->sb_cfil_refs
== 0) {
1567 sb
->sb_cfil_thread
= NULL
;
1568 sb
->sb_flags
&= ~SB_LOCK
;
1570 if (sb
->sb_wantlock
> 0) {
1571 wakeup(&sb
->sb_flags
);
1577 cfil_sock_id_from_socket(struct socket
*so
)
1579 if ((so
->so_flags
& SOF_CONTENT_FILTER
) && so
->so_cfil
) {
1580 return so
->so_cfil
->cfi_sock_id
;
1582 return CFIL_SOCK_ID_NONE
;
1587 cfil_socket_safe_lock(struct inpcb
*inp
)
1589 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) != WNT_STOPUSING
) {
1590 socket_lock(inp
->inp_socket
, 1);
1591 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) != WNT_STOPUSING
) {
1594 socket_unlock(inp
->inp_socket
, 1);
1599 static struct socket
*
1600 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id
, bool udp_only
)
1602 struct socket
*so
= NULL
;
1603 u_int64_t gencnt
= cfil_sock_id
>> 32;
1604 u_int32_t flowhash
= (u_int32_t
)(cfil_sock_id
& 0x0ffffffff);
1605 struct inpcb
*inp
= NULL
;
1606 struct inpcbinfo
*pcbinfo
= NULL
;
1609 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id
, gencnt
, flowhash
);
1617 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1618 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1619 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1620 inp
->inp_socket
!= NULL
&&
1621 inp
->inp_flowhash
== flowhash
&&
1622 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
&&
1623 inp
->inp_socket
->so_cfil
!= NULL
) {
1624 if (cfil_socket_safe_lock(inp
)) {
1625 so
= inp
->inp_socket
;
1630 lck_rw_done(pcbinfo
->ipi_lock
);
1638 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1639 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1640 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1641 inp
->inp_socket
!= NULL
&&
1642 inp
->inp_socket
->so_cfil_db
!= NULL
&&
1643 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
) {
1644 if (cfil_socket_safe_lock(inp
)) {
1645 so
= inp
->inp_socket
;
1650 lck_rw_done(pcbinfo
->ipi_lock
);
1652 pcbinfo
= &ripcbinfo
;
1653 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1654 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1655 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1656 inp
->inp_socket
!= NULL
&&
1657 inp
->inp_socket
->so_cfil_db
!= NULL
&&
1658 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
) {
1659 if (cfil_socket_safe_lock(inp
)) {
1660 so
= inp
->inp_socket
;
1665 lck_rw_done(pcbinfo
->ipi_lock
);
1669 OSIncrementAtomic(&cfil_stats
.cfs_sock_id_not_found
);
1671 "no socket for sock_id %llx gencnt %llx flowhash %x",
1672 cfil_sock_id
, gencnt
, flowhash
);
1678 static struct socket
*
1679 cfil_socket_from_client_uuid(uuid_t necp_client_uuid
, bool *cfil_attached
)
1681 struct socket
*so
= NULL
;
1682 struct inpcb
*inp
= NULL
;
1683 struct inpcbinfo
*pcbinfo
= &tcbinfo
;
1685 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1686 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1687 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1688 inp
->inp_socket
!= NULL
&&
1689 uuid_compare(inp
->necp_client_uuid
, necp_client_uuid
) == 0) {
1690 *cfil_attached
= (inp
->inp_socket
->so_cfil
!= NULL
);
1691 if (cfil_socket_safe_lock(inp
)) {
1692 so
= inp
->inp_socket
;
1697 lck_rw_done(pcbinfo
->ipi_lock
);
1703 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1704 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1705 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1706 inp
->inp_socket
!= NULL
&&
1707 uuid_compare(inp
->necp_client_uuid
, necp_client_uuid
) == 0) {
1708 *cfil_attached
= (inp
->inp_socket
->so_cfil_db
!= NULL
);
1709 if (cfil_socket_safe_lock(inp
)) {
1710 so
= inp
->inp_socket
;
1715 lck_rw_done(pcbinfo
->ipi_lock
);
1722 cfil_info_stats_toggle(struct cfil_info
*cfil_info
, struct cfil_entry
*entry
, uint32_t report_frequency
)
1724 struct cfil_info
*cfil
= NULL
;
1725 Boolean found
= FALSE
;
1728 if (cfil_info
== NULL
) {
1732 if (report_frequency
) {
1733 if (entry
== NULL
) {
1737 // Update stats reporting frequency.
1738 if (entry
->cfe_stats_report_frequency
!= report_frequency
) {
1739 entry
->cfe_stats_report_frequency
= report_frequency
;
1740 if (entry
->cfe_stats_report_frequency
< CFIL_STATS_REPORT_INTERVAL_MIN_MSEC
) {
1741 entry
->cfe_stats_report_frequency
= CFIL_STATS_REPORT_INTERVAL_MIN_MSEC
;
1743 microuptime(&entry
->cfe_stats_report_ts
);
1745 // Insert cfil_info into list only if it is not in yet.
1746 TAILQ_FOREACH(cfil
, &cfil_sock_head_stats
, cfi_link_stats
) {
1747 if (cfil
== cfil_info
) {
1752 TAILQ_INSERT_TAIL(&cfil_sock_head_stats
, cfil_info
, cfi_link_stats
);
1754 // Wake up stats thread if this is first flow added
1755 if (cfil_sock_attached_stats_count
== 0) {
1756 thread_wakeup((caddr_t
)&cfil_sock_attached_stats_count
);
1758 cfil_sock_attached_stats_count
++;
1760 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu> stats frequency %d msecs",
1761 cfil_info
->cfi_so
? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info
->cfi_so
) : 0,
1762 cfil_info
->cfi_sock_id
,
1763 entry
->cfe_stats_report_frequency
);
1767 // Turn off stats reporting for this filter.
1768 if (entry
!= NULL
) {
1769 // Already off, no change.
1770 if (entry
->cfe_stats_report_frequency
== 0) {
1774 entry
->cfe_stats_report_frequency
= 0;
1775 // If cfil_info still has filter(s) asking for stats, no need to remove from list.
1776 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
1777 if (cfil_info
->cfi_entries
[kcunit
- 1].cfe_stats_report_frequency
> 0) {
1783 // No more filter asking for stats for this cfil_info, remove from list.
1784 if (!TAILQ_EMPTY(&cfil_sock_head_stats
)) {
1786 TAILQ_FOREACH(cfil
, &cfil_sock_head_stats
, cfi_link_stats
) {
1787 if (cfil
== cfil_info
) {
1793 cfil_sock_attached_stats_count
--;
1794 TAILQ_REMOVE(&cfil_sock_head_stats
, cfil_info
, cfi_link_stats
);
1796 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu> stats frequency reset",
1797 cfil_info
->cfi_so
? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info
->cfi_so
) : 0,
1798 cfil_info
->cfi_sock_id
);
1806 cfil_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, mbuf_t m
,
1809 #pragma unused(kctlref, flags)
1811 struct cfil_msg_hdr
*msghdr
;
1812 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1814 struct cfil_msg_action
*action_msg
;
1815 struct cfil_entry
*entry
;
1816 struct cfil_info
*cfil_info
= NULL
;
1817 unsigned int data_len
= 0;
1819 CFIL_LOG(LOG_INFO
, "");
1821 if (content_filters
== NULL
) {
1822 CFIL_LOG(LOG_ERR
, "no content filter");
1826 if (kcunit
> MAX_CONTENT_FILTER
) {
1827 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1828 kcunit
, MAX_CONTENT_FILTER
);
1833 CFIL_LOG(LOG_ERR
, "null mbuf");
1837 data_len
= m_length(m
);
1839 if (data_len
< sizeof(struct cfil_msg_hdr
)) {
1840 CFIL_LOG(LOG_ERR
, "too short %u", data_len
);
1844 msghdr
= (struct cfil_msg_hdr
*)mbuf_data(m
);
1845 if (msghdr
->cfm_version
!= CFM_VERSION_CURRENT
) {
1846 CFIL_LOG(LOG_ERR
, "bad version %u", msghdr
->cfm_version
);
1850 if (msghdr
->cfm_type
!= CFM_TYPE_ACTION
) {
1851 CFIL_LOG(LOG_ERR
, "bad type %u", msghdr
->cfm_type
);
1855 if (msghdr
->cfm_len
> data_len
) {
1856 CFIL_LOG(LOG_ERR
, "bad length %u", msghdr
->cfm_len
);
1861 /* Validate action operation */
1862 switch (msghdr
->cfm_op
) {
1863 case CFM_OP_DATA_UPDATE
:
1865 &cfil_stats
.cfs_ctl_action_data_update
);
1868 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_drop
);
1870 case CFM_OP_BLESS_CLIENT
:
1871 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_bless_client
)) {
1872 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1874 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1879 error
= cfil_action_bless_client(kcunit
, msghdr
);
1881 case CFM_OP_SET_CRYPTO_KEY
:
1882 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_set_crypto_key
)) {
1883 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1885 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1890 error
= cfil_action_set_crypto_key(kcunit
, msghdr
);
1893 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_op
);
1894 CFIL_LOG(LOG_ERR
, "bad op %u", msghdr
->cfm_op
);
1898 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_action
)) {
1899 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1901 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1906 cfil_rw_lock_shared(&cfil_lck_rw
);
1907 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1908 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1911 cfil_rw_unlock_shared(&cfil_lck_rw
);
1914 cfil_rw_unlock_shared(&cfil_lck_rw
);
1916 // Search for socket (TCP+UDP and lock so)
1917 so
= cfil_socket_from_sock_id(msghdr
->cfm_sock_id
, false);
1919 CFIL_LOG(LOG_NOTICE
, "bad sock_id %llx",
1920 msghdr
->cfm_sock_id
);
1925 cfil_info
= so
->so_cfil_db
!= NULL
?
1926 cfil_db_get_cfil_info(so
->so_cfil_db
, msghdr
->cfm_sock_id
) : so
->so_cfil
;
1928 if (cfil_info
== NULL
) {
1929 CFIL_LOG(LOG_NOTICE
, "so %llx <id %llu> not attached",
1930 (uint64_t)VM_KERNEL_ADDRPERM(so
), msghdr
->cfm_sock_id
);
1933 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
1934 CFIL_LOG(LOG_NOTICE
, "so %llx drop set",
1935 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1940 if (cfil_info
->cfi_debug
) {
1941 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: RECEIVED MSG FROM FILTER");
1944 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1945 if (entry
->cfe_filter
== NULL
) {
1946 CFIL_LOG(LOG_NOTICE
, "so %llx no filter",
1947 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1952 if (entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) {
1953 entry
->cfe_flags
|= CFEF_DATA_START
;
1956 "so %llx attached not sent for %u",
1957 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1962 microuptime(&entry
->cfe_last_action
);
1963 CFI_ADD_TIME_LOG(cfil_info
, &entry
->cfe_last_action
, &cfil_info
->cfi_first_event
, msghdr
->cfm_op
);
1965 action_msg
= (struct cfil_msg_action
*)msghdr
;
1967 switch (msghdr
->cfm_op
) {
1968 case CFM_OP_DATA_UPDATE
:
1970 if (cfil_info
->cfi_debug
) {
1971 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: RECEIVED CFM_OP_DATA_UPDATE");
1972 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1973 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1974 cfil_info
->cfi_sock_id
,
1975 action_msg
->cfa_in_peek_offset
, action_msg
->cfa_in_pass_offset
,
1976 action_msg
->cfa_out_peek_offset
, action_msg
->cfa_out_pass_offset
);
1980 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1981 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1982 cfil_info
->cfi_sock_id
,
1983 action_msg
->cfa_in_peek_offset
, action_msg
->cfa_in_pass_offset
,
1984 action_msg
->cfa_out_peek_offset
, action_msg
->cfa_out_pass_offset
);
1987 * Received verdict, at this point we know this
1988 * socket connection is allowed. Unblock thread
1989 * immediately before proceeding to process the verdict.
1991 cfil_sock_received_verdict(so
);
1993 if (action_msg
->cfa_out_peek_offset
!= 0 ||
1994 action_msg
->cfa_out_pass_offset
!= 0) {
1995 error
= cfil_action_data_pass(so
, cfil_info
, kcunit
, 1,
1996 action_msg
->cfa_out_pass_offset
,
1997 action_msg
->cfa_out_peek_offset
);
1999 if (error
== EJUSTRETURN
) {
2005 if (action_msg
->cfa_in_peek_offset
!= 0 ||
2006 action_msg
->cfa_in_pass_offset
!= 0) {
2007 error
= cfil_action_data_pass(so
, cfil_info
, kcunit
, 0,
2008 action_msg
->cfa_in_pass_offset
,
2009 action_msg
->cfa_in_peek_offset
);
2011 if (error
== EJUSTRETURN
) {
2015 // Toggle stats reporting according to received verdict.
2016 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2017 cfil_info_stats_toggle(cfil_info
, entry
, action_msg
->cfa_stats_frequency
);
2018 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2023 if (cfil_info
->cfi_debug
) {
2024 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: RECEIVED CFM_OP_DROP");
2025 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2026 (uint64_t)VM_KERNEL_ADDRPERM(so
),
2027 cfil_info
->cfi_sock_id
,
2028 action_msg
->cfa_in_peek_offset
, action_msg
->cfa_in_pass_offset
,
2029 action_msg
->cfa_out_peek_offset
, action_msg
->cfa_out_pass_offset
);
2033 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2034 (uint64_t)VM_KERNEL_ADDRPERM(so
),
2035 cfil_info
->cfi_sock_id
,
2036 action_msg
->cfa_in_peek_offset
, action_msg
->cfa_in_pass_offset
,
2037 action_msg
->cfa_out_peek_offset
, action_msg
->cfa_out_pass_offset
);
2039 error
= cfil_action_drop(so
, cfil_info
, kcunit
);
2040 cfil_sock_received_verdict(so
);
2048 socket_unlock(so
, 1);
2053 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_ok
);
2055 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_bad
);
2062 cfil_ctl_getopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
2063 int opt
, void *data
, size_t *len
)
2065 #pragma unused(kctlref, opt)
2066 struct cfil_info
*cfil_info
= NULL
;
2068 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
2070 CFIL_LOG(LOG_NOTICE
, "");
2072 cfil_rw_lock_shared(&cfil_lck_rw
);
2074 if (content_filters
== NULL
) {
2075 CFIL_LOG(LOG_ERR
, "no content filter");
2079 if (kcunit
> MAX_CONTENT_FILTER
) {
2080 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2081 kcunit
, MAX_CONTENT_FILTER
);
2085 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
2086 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
2092 case CFIL_OPT_NECP_CONTROL_UNIT
:
2093 if (*len
< sizeof(uint32_t)) {
2094 CFIL_LOG(LOG_ERR
, "len too small %lu", *len
);
2099 *(uint32_t *)data
= cfc
->cf_necp_control_unit
;
2102 case CFIL_OPT_GET_SOCKET_INFO
:
2103 if (*len
!= sizeof(struct cfil_opt_sock_info
)) {
2104 CFIL_LOG(LOG_ERR
, "len does not match %lu", *len
);
2109 CFIL_LOG(LOG_ERR
, "data not passed");
2114 struct cfil_opt_sock_info
*sock_info
=
2115 (struct cfil_opt_sock_info
*) data
;
2117 // Unlock here so that we never hold both cfil_lck_rw and the
2118 // socket_lock at the same time. Otherwise, this can deadlock
2119 // because soclose() takes the socket_lock and then exclusive
2120 // cfil_lck_rw and we require the opposite order.
2122 // WARNING: Be sure to never use anything protected
2123 // by cfil_lck_rw beyond this point.
2124 // WARNING: Be sure to avoid fallthrough and
2125 // goto return_already_unlocked from this branch.
2126 cfil_rw_unlock_shared(&cfil_lck_rw
);
2128 // Search (TCP+UDP) and lock socket
2129 struct socket
*sock
=
2130 cfil_socket_from_sock_id(sock_info
->cfs_sock_id
, false);
2133 CFIL_LOG(LOG_ERR
, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2134 sock_info
->cfs_sock_id
);
2137 goto return_already_unlocked
;
2140 cfil_info
= (sock
->so_cfil_db
!= NULL
) ?
2141 cfil_db_get_cfil_info(sock
->so_cfil_db
, sock_info
->cfs_sock_id
) : sock
->so_cfil
;
2143 if (cfil_info
== NULL
) {
2145 CFIL_LOG(LOG_ERR
, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2146 (uint64_t)VM_KERNEL_ADDRPERM(sock
));
2149 socket_unlock(sock
, 1);
2150 goto return_already_unlocked
;
2153 // Fill out family, type, and protocol
2154 sock_info
->cfs_sock_family
= sock
->so_proto
->pr_domain
->dom_family
;
2155 sock_info
->cfs_sock_type
= sock
->so_proto
->pr_type
;
2156 sock_info
->cfs_sock_protocol
= sock
->so_proto
->pr_protocol
;
2158 // Source and destination addresses
2159 struct inpcb
*inp
= sotoinpcb(sock
);
2160 if (inp
->inp_vflag
& INP_IPV6
) {
2161 struct in6_addr
*laddr
= NULL
, *faddr
= NULL
;
2162 u_int16_t lport
= 0, fport
= 0;
2164 cfil_get_flow_address_v6(cfil_info
->cfi_hash_entry
, inp
,
2165 &laddr
, &faddr
, &lport
, &fport
);
2166 fill_ip6_sockaddr_4_6(&sock_info
->cfs_local
, laddr
, lport
);
2167 fill_ip6_sockaddr_4_6(&sock_info
->cfs_remote
, faddr
, fport
);
2168 } else if (inp
->inp_vflag
& INP_IPV4
) {
2169 struct in_addr laddr
= {.s_addr
= 0}, faddr
= {.s_addr
= 0};
2170 u_int16_t lport
= 0, fport
= 0;
2172 cfil_get_flow_address(cfil_info
->cfi_hash_entry
, inp
,
2173 &laddr
, &faddr
, &lport
, &fport
);
2174 fill_ip_sockaddr_4_6(&sock_info
->cfs_local
, laddr
, lport
);
2175 fill_ip_sockaddr_4_6(&sock_info
->cfs_remote
, faddr
, fport
);
2179 sock_info
->cfs_pid
= sock
->last_pid
;
2180 memcpy(sock_info
->cfs_uuid
, sock
->last_uuid
, sizeof(uuid_t
));
2182 if (sock
->so_flags
& SOF_DELEGATED
) {
2183 sock_info
->cfs_e_pid
= sock
->e_pid
;
2184 memcpy(sock_info
->cfs_e_uuid
, sock
->e_uuid
, sizeof(uuid_t
));
2186 sock_info
->cfs_e_pid
= sock
->last_pid
;
2187 memcpy(sock_info
->cfs_e_uuid
, sock
->last_uuid
, sizeof(uuid_t
));
2190 socket_unlock(sock
, 1);
2192 goto return_already_unlocked
;
2194 error
= ENOPROTOOPT
;
2198 cfil_rw_unlock_shared(&cfil_lck_rw
);
2202 return_already_unlocked
:
2208 cfil_ctl_setopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
2209 int opt
, void *data
, size_t len
)
2211 #pragma unused(kctlref, opt)
2213 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
2215 CFIL_LOG(LOG_NOTICE
, "");
2217 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2219 if (content_filters
== NULL
) {
2220 CFIL_LOG(LOG_ERR
, "no content filter");
2224 if (kcunit
> MAX_CONTENT_FILTER
) {
2225 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2226 kcunit
, MAX_CONTENT_FILTER
);
2230 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
2231 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
2237 case CFIL_OPT_NECP_CONTROL_UNIT
:
2238 if (len
< sizeof(uint32_t)) {
2239 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
2240 "len too small %lu", len
);
2244 if (cfc
->cf_necp_control_unit
!= 0) {
2245 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
2247 cfc
->cf_necp_control_unit
);
2251 cfc
->cf_necp_control_unit
= *(uint32_t *)data
;
2254 error
= ENOPROTOOPT
;
2258 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2265 cfil_ctl_rcvd(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, int flags
)
2267 #pragma unused(kctlref, flags)
2268 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
2269 struct socket
*so
= NULL
;
2271 struct cfil_entry
*entry
;
2272 struct cfil_info
*cfil_info
= NULL
;
2274 CFIL_LOG(LOG_INFO
, "");
2276 if (content_filters
== NULL
) {
2277 CFIL_LOG(LOG_ERR
, "no content filter");
2278 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
2281 if (kcunit
> MAX_CONTENT_FILTER
) {
2282 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2283 kcunit
, MAX_CONTENT_FILTER
);
2284 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
2287 cfil_rw_lock_shared(&cfil_lck_rw
);
2288 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
2289 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
2291 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
2294 /* Let's assume the flow control is lifted */
2295 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2296 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
2297 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2300 cfc
->cf_flags
&= ~CFF_FLOW_CONTROLLED
;
2302 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw
);
2303 LCK_RW_ASSERT(&cfil_lck_rw
, LCK_RW_ASSERT_SHARED
);
2306 * Flow control will be raised again as soon as an entry cannot enqueue
2307 * to the kernel control socket
2309 while ((cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) == 0) {
2310 verify_content_filter(cfc
);
2312 cfil_rw_lock_assert_held(&cfil_lck_rw
, 0);
2314 /* Find an entry that is flow controlled */
2315 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
2316 if (entry
->cfe_cfil_info
== NULL
||
2317 entry
->cfe_cfil_info
->cfi_so
== NULL
) {
2320 if ((entry
->cfe_flags
& CFEF_FLOW_CONTROLLED
) == 0) {
2324 if (entry
== NULL
) {
2328 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_flow_lift
);
2330 cfil_info
= entry
->cfe_cfil_info
;
2331 so
= cfil_info
->cfi_so
;
2333 cfil_rw_unlock_shared(&cfil_lck_rw
);
2337 error
= cfil_acquire_sockbuf(so
, cfil_info
, 1);
2339 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, 1);
2341 cfil_release_sockbuf(so
, 1);
2346 error
= cfil_acquire_sockbuf(so
, cfil_info
, 0);
2348 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, 0);
2350 cfil_release_sockbuf(so
, 0);
2353 socket_lock_assert_owned(so
);
2354 socket_unlock(so
, 1);
2356 cfil_rw_lock_shared(&cfil_lck_rw
);
2359 cfil_rw_unlock_shared(&cfil_lck_rw
);
2365 struct kern_ctl_reg kern_ctl
;
2367 vm_size_t content_filter_size
= 0; /* size of content_filter */
2368 vm_size_t cfil_info_size
= 0; /* size of cfil_info */
2369 vm_size_t cfil_hash_entry_size
= 0; /* size of cfil_hash_entry */
2370 vm_size_t cfil_db_size
= 0; /* size of cfil_db */
2371 unsigned int mbuf_limit
= 0;
2373 CFIL_LOG(LOG_NOTICE
, "");
2376 * Compile time verifications
2378 _CASSERT(CFIL_MAX_FILTER_COUNT
== MAX_CONTENT_FILTER
);
2379 _CASSERT(sizeof(struct cfil_filter_stat
) % sizeof(uint32_t) == 0);
2380 _CASSERT(sizeof(struct cfil_entry_stat
) % sizeof(uint32_t) == 0);
2381 _CASSERT(sizeof(struct cfil_sock_stat
) % sizeof(uint32_t) == 0);
2384 * Runtime time verifications
2386 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_enqueued
,
2388 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_enqueued
,
2390 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_peeked
,
2392 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_peeked
,
2395 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_in_enqueued
,
2397 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_out_enqueued
,
2400 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_enqueued
,
2402 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_enqueued
,
2404 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_passed
,
2406 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_passed
,
2410 * Zone for content filters kernel control sockets
2412 content_filter_size
= sizeof(struct content_filter
);
2413 content_filter_zone
= zinit(content_filter_size
,
2414 CONTENT_FILTER_ZONE_MAX
* content_filter_size
,
2416 CONTENT_FILTER_ZONE_NAME
);
2417 if (content_filter_zone
== NULL
) {
2418 panic("%s: zinit(%s) failed", __func__
,
2419 CONTENT_FILTER_ZONE_NAME
);
2422 zone_change(content_filter_zone
, Z_CALLERACCT
, FALSE
);
2423 zone_change(content_filter_zone
, Z_EXPAND
, TRUE
);
2426 * Zone for per socket content filters
2428 cfil_info_size
= sizeof(struct cfil_info
);
2429 cfil_info_zone
= zinit(cfil_info_size
,
2430 CFIL_INFO_ZONE_MAX
* cfil_info_size
,
2432 CFIL_INFO_ZONE_NAME
);
2433 if (cfil_info_zone
== NULL
) {
2434 panic("%s: zinit(%s) failed", __func__
, CFIL_INFO_ZONE_NAME
);
2437 zone_change(cfil_info_zone
, Z_CALLERACCT
, FALSE
);
2438 zone_change(cfil_info_zone
, Z_EXPAND
, TRUE
);
2441 * Zone for content filters cfil hash entries and db
2443 cfil_hash_entry_size
= sizeof(struct cfil_hash_entry
);
2444 cfil_hash_entry_zone
= zinit(cfil_hash_entry_size
,
2445 CFIL_HASH_ENTRY_ZONE_MAX
* cfil_hash_entry_size
,
2447 CFIL_HASH_ENTRY_ZONE_NAME
);
2448 if (cfil_hash_entry_zone
== NULL
) {
2449 panic("%s: zinit(%s) failed", __func__
, CFIL_HASH_ENTRY_ZONE_NAME
);
2452 zone_change(cfil_hash_entry_zone
, Z_CALLERACCT
, FALSE
);
2453 zone_change(cfil_hash_entry_zone
, Z_EXPAND
, TRUE
);
2455 cfil_db_size
= sizeof(struct cfil_db
);
2456 cfil_db_zone
= zinit(cfil_db_size
,
2457 CFIL_DB_ZONE_MAX
* cfil_db_size
,
2460 if (cfil_db_zone
== NULL
) {
2461 panic("%s: zinit(%s) failed", __func__
, CFIL_DB_ZONE_NAME
);
2464 zone_change(cfil_db_zone
, Z_CALLERACCT
, FALSE
);
2465 zone_change(cfil_db_zone
, Z_EXPAND
, TRUE
);
2470 cfil_lck_grp_attr
= lck_grp_attr_alloc_init();
2471 if (cfil_lck_grp_attr
== NULL
) {
2472 panic("%s: lck_grp_attr_alloc_init failed", __func__
);
2475 cfil_lck_grp
= lck_grp_alloc_init("content filter",
2477 if (cfil_lck_grp
== NULL
) {
2478 panic("%s: lck_grp_alloc_init failed", __func__
);
2481 cfil_lck_attr
= lck_attr_alloc_init();
2482 if (cfil_lck_attr
== NULL
) {
2483 panic("%s: lck_attr_alloc_init failed", __func__
);
2486 lck_rw_init(&cfil_lck_rw
, cfil_lck_grp
, cfil_lck_attr
);
2488 TAILQ_INIT(&cfil_sock_head
);
2489 TAILQ_INIT(&cfil_sock_head_stats
);
2492 * Register kernel control
2494 bzero(&kern_ctl
, sizeof(kern_ctl
));
2495 strlcpy(kern_ctl
.ctl_name
, CONTENT_FILTER_CONTROL_NAME
,
2496 sizeof(kern_ctl
.ctl_name
));
2497 kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
| CTL_FLAG_REG_EXTENDED
;
2498 kern_ctl
.ctl_sendsize
= 512 * 1024; /* enough? */
2499 kern_ctl
.ctl_recvsize
= 512 * 1024; /* enough? */
2500 kern_ctl
.ctl_connect
= cfil_ctl_connect
;
2501 kern_ctl
.ctl_disconnect
= cfil_ctl_disconnect
;
2502 kern_ctl
.ctl_send
= cfil_ctl_send
;
2503 kern_ctl
.ctl_getopt
= cfil_ctl_getopt
;
2504 kern_ctl
.ctl_setopt
= cfil_ctl_setopt
;
2505 kern_ctl
.ctl_rcvd
= cfil_ctl_rcvd
;
2506 error
= ctl_register(&kern_ctl
, &cfil_kctlref
);
2508 CFIL_LOG(LOG_ERR
, "ctl_register failed: %d", error
);
2512 // Spawn thread for gargage collection
2513 if (kernel_thread_start(cfil_udp_gc_thread_func
, NULL
,
2514 &cfil_udp_gc_thread
) != KERN_SUCCESS
) {
2515 panic_plain("%s: Can't create UDP GC thread", __func__
);
2518 /* this must not fail */
2519 VERIFY(cfil_udp_gc_thread
!= NULL
);
2521 // Spawn thread for statistics reporting
2522 if (kernel_thread_start(cfil_stats_report_thread_func
, NULL
,
2523 &cfil_stats_report_thread
) != KERN_SUCCESS
) {
2524 panic_plain("%s: Can't create statistics report thread", __func__
);
2527 /* this must not fail */
2528 VERIFY(cfil_stats_report_thread
!= NULL
);
2530 // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2531 mbuf_limit
= MAX(UDP_FLOW_GC_MBUF_CNT_MAX
, (nmbclusters
<< MCLSHIFT
) >> UDP_FLOW_GC_MBUF_SHIFT
);
2532 cfil_udp_gc_mbuf_num_max
= (mbuf_limit
>> MCLSHIFT
);
2533 cfil_udp_gc_mbuf_cnt_max
= mbuf_limit
;
2535 memset(&global_cfil_stats_report_buffers
, 0, sizeof(global_cfil_stats_report_buffers
));
2539 cfil_info_alloc(struct socket
*so
, struct cfil_hash_entry
*hash_entry
)
2542 struct cfil_info
*cfil_info
= NULL
;
2543 struct inpcb
*inp
= sotoinpcb(so
);
2545 CFIL_LOG(LOG_INFO
, "");
2547 socket_lock_assert_owned(so
);
2549 cfil_info
= zalloc(cfil_info_zone
);
2550 if (cfil_info
== NULL
) {
2553 bzero(cfil_info
, sizeof(struct cfil_info
));
2555 cfil_queue_init(&cfil_info
->cfi_snd
.cfi_inject_q
);
2556 cfil_queue_init(&cfil_info
->cfi_rcv
.cfi_inject_q
);
2558 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2559 struct cfil_entry
*entry
;
2561 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2562 entry
->cfe_cfil_info
= cfil_info
;
2564 /* Initialize the filter entry */
2565 entry
->cfe_filter
= NULL
;
2566 entry
->cfe_flags
= 0;
2567 entry
->cfe_necp_control_unit
= 0;
2568 entry
->cfe_snd
.cfe_pass_offset
= 0;
2569 entry
->cfe_snd
.cfe_peek_offset
= 0;
2570 entry
->cfe_snd
.cfe_peeked
= 0;
2571 entry
->cfe_rcv
.cfe_pass_offset
= 0;
2572 entry
->cfe_rcv
.cfe_peek_offset
= 0;
2573 entry
->cfe_rcv
.cfe_peeked
= 0;
2575 * Timestamp the last action to avoid pre-maturely
2576 * triggering garbage collection
2578 microuptime(&entry
->cfe_last_action
);
2580 cfil_queue_init(&entry
->cfe_snd
.cfe_pending_q
);
2581 cfil_queue_init(&entry
->cfe_rcv
.cfe_pending_q
);
2582 cfil_queue_init(&entry
->cfe_snd
.cfe_ctl_q
);
2583 cfil_queue_init(&entry
->cfe_rcv
.cfe_ctl_q
);
2586 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2589 * Create a cfi_sock_id that's not the socket pointer!
2592 if (hash_entry
== NULL
) {
2593 // This is the TCP case, cfil_info is tracked per socket
2594 if (inp
->inp_flowhash
== 0) {
2595 inp
->inp_flowhash
= inp_calc_flowhash(inp
);
2598 so
->so_cfil
= cfil_info
;
2599 cfil_info
->cfi_so
= so
;
2600 cfil_info
->cfi_sock_id
=
2601 ((so
->so_gencnt
<< 32) | inp
->inp_flowhash
);
2603 // This is the UDP case, cfil_info is tracked in per-socket hash
2604 cfil_info
->cfi_so
= so
;
2605 hash_entry
->cfentry_cfil
= cfil_info
;
2606 cfil_info
->cfi_hash_entry
= hash_entry
;
2607 cfil_info
->cfi_sock_id
= ((so
->so_gencnt
<< 32) | (hash_entry
->cfentry_flowhash
& 0xffffffff));
2608 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2609 inp
->inp_flowhash
, so
->so_gencnt
, hash_entry
->cfentry_flowhash
, cfil_info
->cfi_sock_id
);
2611 // Wake up gc thread if this is first flow added
2612 if (cfil_sock_udp_attached_count
== 0) {
2613 thread_wakeup((caddr_t
)&cfil_sock_udp_attached_count
);
2616 cfil_sock_udp_attached_count
++;
2619 TAILQ_INSERT_TAIL(&cfil_sock_head
, cfil_info
, cfi_link
);
2620 SLIST_INIT(&cfil_info
->cfi_ordered_entries
);
2622 cfil_sock_attached_count
++;
2624 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2627 if (cfil_info
!= NULL
) {
2628 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_ok
);
2630 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_fail
);
2637 cfil_info_attach_unit(struct socket
*so
, uint32_t filter_control_unit
, struct cfil_info
*cfil_info
)
2642 CFIL_LOG(LOG_INFO
, "");
2644 socket_lock_assert_owned(so
);
2646 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2649 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
2651 struct content_filter
*cfc
= content_filters
[kcunit
- 1];
2652 struct cfil_entry
*entry
;
2653 struct cfil_entry
*iter_entry
;
2654 struct cfil_entry
*iter_prev
;
2659 if (!(cfc
->cf_necp_control_unit
& filter_control_unit
)) {
2663 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2665 entry
->cfe_filter
= cfc
;
2666 entry
->cfe_necp_control_unit
= cfc
->cf_necp_control_unit
;
2667 TAILQ_INSERT_TAIL(&cfc
->cf_sock_entries
, entry
, cfe_link
);
2668 cfc
->cf_sock_count
++;
2670 /* Insert the entry into the list ordered by control unit */
2672 SLIST_FOREACH(iter_entry
, &cfil_info
->cfi_ordered_entries
, cfe_order_link
) {
2673 if (entry
->cfe_necp_control_unit
< iter_entry
->cfe_necp_control_unit
) {
2676 iter_prev
= iter_entry
;
2679 if (iter_prev
== NULL
) {
2680 SLIST_INSERT_HEAD(&cfil_info
->cfi_ordered_entries
, entry
, cfe_order_link
);
2682 SLIST_INSERT_AFTER(iter_prev
, entry
, cfe_order_link
);
2685 verify_content_filter(cfc
);
2687 entry
->cfe_flags
|= CFEF_CFIL_ATTACHED
;
2690 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2696 cfil_info_free(struct cfil_info
*cfil_info
)
2699 uint64_t in_drain
= 0;
2700 uint64_t out_drained
= 0;
2702 if (cfil_info
== NULL
) {
2706 CFIL_LOG(LOG_INFO
, "");
2708 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2711 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
2713 struct cfil_entry
*entry
;
2714 struct content_filter
*cfc
;
2716 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2718 /* Don't be silly and try to detach twice */
2719 if (entry
->cfe_filter
== NULL
) {
2723 cfc
= content_filters
[kcunit
- 1];
2725 VERIFY(cfc
== entry
->cfe_filter
);
2727 entry
->cfe_filter
= NULL
;
2728 entry
->cfe_necp_control_unit
= 0;
2729 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
2730 cfc
->cf_sock_count
--;
2732 verify_content_filter(cfc
);
2734 if (cfil_info
->cfi_hash_entry
!= NULL
) {
2735 cfil_sock_udp_attached_count
--;
2737 cfil_sock_attached_count
--;
2738 TAILQ_REMOVE(&cfil_sock_head
, cfil_info
, cfi_link
);
2740 // Turn off stats reporting for cfil_info.
2741 cfil_info_stats_toggle(cfil_info
, NULL
, 0);
2743 out_drained
+= cfil_queue_drain(&cfil_info
->cfi_snd
.cfi_inject_q
);
2744 in_drain
+= cfil_queue_drain(&cfil_info
->cfi_rcv
.cfi_inject_q
);
2746 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2747 struct cfil_entry
*entry
;
2749 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2750 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_pending_q
);
2751 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_pending_q
);
2752 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
2753 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_ctl_q
);
2755 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2758 OSIncrementAtomic(&cfil_stats
.cfs_flush_out_free
);
2761 OSIncrementAtomic(&cfil_stats
.cfs_flush_in_free
);
2764 zfree(cfil_info_zone
, cfil_info
);
2768 * Received a verdict from userspace for a socket.
2769 * Perform any delayed operation if needed.
2772 cfil_sock_received_verdict(struct socket
*so
)
2774 if (so
== NULL
|| so
->so_cfil
== NULL
) {
2778 so
->so_cfil
->cfi_flags
|= CFIF_INITIAL_VERDICT
;
2781 * If socket has already been connected, trigger
2782 * soisconnected now.
2784 if (so
->so_cfil
->cfi_flags
& CFIF_SOCKET_CONNECTED
) {
2785 so
->so_cfil
->cfi_flags
&= ~CFIF_SOCKET_CONNECTED
;
2792 * Entry point from Sockets layer
2793 * The socket is locked.
2795 * Checks if a connected socket is subject to filter and
2796 * pending the initial verdict.
2799 cfil_sock_connected_pending_verdict(struct socket
*so
)
2801 if (so
== NULL
|| so
->so_cfil
== NULL
) {
2805 if (so
->so_cfil
->cfi_flags
& CFIF_INITIAL_VERDICT
) {
2809 * Remember that this protocol is already connected, so
2810 * we will trigger soisconnected() upon receipt of
2811 * initial verdict later.
2813 so
->so_cfil
->cfi_flags
|= CFIF_SOCKET_CONNECTED
;
2819 cfil_filter_present(void)
2821 return cfil_active_count
> 0;
2825 * Entry point from Sockets layer
2826 * The socket is locked.
2829 cfil_sock_attach(struct socket
*so
, struct sockaddr
*local
, struct sockaddr
*remote
, int dir
)
2832 uint32_t filter_control_unit
;
2834 socket_lock_assert_owned(so
);
2836 /* Limit ourselves to TCP that are not MPTCP subflows */
2837 if ((so
->so_proto
->pr_domain
->dom_family
!= PF_INET
&&
2838 so
->so_proto
->pr_domain
->dom_family
!= PF_INET6
) ||
2839 so
->so_proto
->pr_type
!= SOCK_STREAM
||
2840 so
->so_proto
->pr_protocol
!= IPPROTO_TCP
||
2841 (so
->so_flags
& SOF_MP_SUBFLOW
) != 0 ||
2842 (so
->so_flags1
& SOF1_CONTENT_FILTER_SKIP
) != 0) {
2846 filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
2847 if (filter_control_unit
== 0) {
2851 if (filter_control_unit
== NECP_FILTER_UNIT_NO_FILTER
) {
2854 if ((filter_control_unit
& NECP_MASK_USERSPACE_ONLY
) != 0) {
2855 OSIncrementAtomic(&cfil_stats
.cfs_sock_userspace_only
);
2858 if (cfil_active_count
== 0) {
2859 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_in_vain
);
2862 if (so
->so_cfil
!= NULL
) {
2863 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_already
);
2864 CFIL_LOG(LOG_ERR
, "already attached");
2866 cfil_info_alloc(so
, NULL
);
2867 if (so
->so_cfil
== NULL
) {
2869 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
2872 so
->so_cfil
->cfi_dir
= dir
;
2874 if (cfil_info_attach_unit(so
, filter_control_unit
, so
->so_cfil
) == 0) {
2875 CFIL_LOG(LOG_ERR
, "cfil_info_attach_unit(%u) failed",
2876 filter_control_unit
);
2877 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_failed
);
2880 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u sockID %llx",
2881 (uint64_t)VM_KERNEL_ADDRPERM(so
),
2882 filter_control_unit
, so
->so_cfil
->cfi_sock_id
);
2884 so
->so_flags
|= SOF_CONTENT_FILTER
;
2885 OSIncrementAtomic(&cfil_stats
.cfs_sock_attached
);
2887 /* Hold a reference on the socket */
2891 * Save passed addresses for attach event msg (in case resend
2894 if (remote
!= NULL
) {
2895 memcpy(&so
->so_cfil
->cfi_so_attach_faddr
, remote
, remote
->sa_len
);
2897 if (local
!= NULL
) {
2898 memcpy(&so
->so_cfil
->cfi_so_attach_laddr
, local
, local
->sa_len
);
2901 error
= cfil_dispatch_attach_event(so
, so
->so_cfil
, 0, dir
);
2902 /* We can recover from flow control or out of memory errors */
2903 if (error
== ENOBUFS
|| error
== ENOMEM
) {
2905 } else if (error
!= 0) {
2909 CFIL_INFO_VERIFY(so
->so_cfil
);
2915 * Entry point from Sockets layer
2916 * The socket is locked.
2919 cfil_sock_detach(struct socket
*so
)
2921 if (IS_IP_DGRAM(so
)) {
2927 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
2928 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
2929 VERIFY(so
->so_usecount
> 0);
2932 cfil_info_free(so
->so_cfil
);
2934 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
2940 * Fill in the address info of an event message from either
2941 * the socket or passed in address info.
2944 cfil_fill_event_msg_addresses(struct cfil_hash_entry
*entry
, struct inpcb
*inp
,
2945 union sockaddr_in_4_6
*sin_src
, union sockaddr_in_4_6
*sin_dst
,
2946 boolean_t isIPv4
, boolean_t outgoing
)
2949 struct in_addr laddr
= {0}, faddr
= {0};
2950 u_int16_t lport
= 0, fport
= 0;
2952 cfil_get_flow_address(entry
, inp
, &laddr
, &faddr
, &lport
, &fport
);
2955 fill_ip_sockaddr_4_6(sin_src
, laddr
, lport
);
2956 fill_ip_sockaddr_4_6(sin_dst
, faddr
, fport
);
2958 fill_ip_sockaddr_4_6(sin_src
, faddr
, fport
);
2959 fill_ip_sockaddr_4_6(sin_dst
, laddr
, lport
);
2962 struct in6_addr
*laddr
= NULL
, *faddr
= NULL
;
2963 u_int16_t lport
= 0, fport
= 0;
2965 cfil_get_flow_address_v6(entry
, inp
, &laddr
, &faddr
, &lport
, &fport
);
2967 fill_ip6_sockaddr_4_6(sin_src
, laddr
, lport
);
2968 fill_ip6_sockaddr_4_6(sin_dst
, faddr
, fport
);
2970 fill_ip6_sockaddr_4_6(sin_src
, faddr
, fport
);
2971 fill_ip6_sockaddr_4_6(sin_dst
, laddr
, lport
);
2977 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state
,
2978 struct cfil_info
*cfil_info
,
2979 struct cfil_msg_sock_attached
*msg
)
2981 struct cfil_crypto_data data
= {};
2983 if (crypto_state
== NULL
|| msg
== NULL
|| cfil_info
== NULL
) {
2987 data
.sock_id
= msg
->cfs_msghdr
.cfm_sock_id
;
2988 data
.direction
= msg
->cfs_conn_dir
;
2990 data
.pid
= msg
->cfs_pid
;
2991 data
.effective_pid
= msg
->cfs_e_pid
;
2992 uuid_copy(data
.uuid
, msg
->cfs_uuid
);
2993 uuid_copy(data
.effective_uuid
, msg
->cfs_e_uuid
);
2994 data
.socketProtocol
= msg
->cfs_sock_protocol
;
2995 if (data
.direction
== CFS_CONNECTION_DIR_OUT
) {
2996 data
.remote
.sin6
= msg
->cfs_dst
.sin6
;
2997 data
.local
.sin6
= msg
->cfs_src
.sin6
;
2999 data
.remote
.sin6
= msg
->cfs_src
.sin6
;
3000 data
.local
.sin6
= msg
->cfs_dst
.sin6
;
3003 // At attach, if local address is already present, no need to re-sign subsequent data messages.
3004 if (!NULLADDRESS(data
.local
)) {
3005 cfil_info
->cfi_isSignatureLatest
= true;
3008 msg
->cfs_signature_length
= sizeof(cfil_crypto_signature
);
3009 if (cfil_crypto_sign_data(crypto_state
, &data
, msg
->cfs_signature
, &msg
->cfs_signature_length
) != 0) {
3010 msg
->cfs_signature_length
= 0;
3011 CFIL_LOG(LOG_ERR
, "CFIL: Failed to sign attached msg <sockID %llu>",
3012 msg
->cfs_msghdr
.cfm_sock_id
);
3020 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state
,
3021 struct socket
*so
, struct cfil_info
*cfil_info
,
3022 struct cfil_msg_data_event
*msg
)
3024 struct cfil_crypto_data data
= {};
3026 if (crypto_state
== NULL
|| msg
== NULL
||
3027 so
== NULL
|| cfil_info
== NULL
) {
3031 data
.sock_id
= cfil_info
->cfi_sock_id
;
3032 data
.direction
= cfil_info
->cfi_dir
;
3033 data
.pid
= so
->last_pid
;
3034 memcpy(data
.uuid
, so
->last_uuid
, sizeof(uuid_t
));
3035 if (so
->so_flags
& SOF_DELEGATED
) {
3036 data
.effective_pid
= so
->e_pid
;
3037 memcpy(data
.effective_uuid
, so
->e_uuid
, sizeof(uuid_t
));
3039 data
.effective_pid
= so
->last_pid
;
3040 memcpy(data
.effective_uuid
, so
->last_uuid
, sizeof(uuid_t
));
3042 data
.socketProtocol
= so
->so_proto
->pr_protocol
;
3044 if (data
.direction
== CFS_CONNECTION_DIR_OUT
) {
3045 data
.remote
.sin6
= msg
->cfc_dst
.sin6
;
3046 data
.local
.sin6
= msg
->cfc_src
.sin6
;
3048 data
.remote
.sin6
= msg
->cfc_src
.sin6
;
3049 data
.local
.sin6
= msg
->cfc_dst
.sin6
;
3052 // At first data, local address may show up for the first time, update address cache and
3053 // no need to re-sign subsequent data messages anymore.
3054 if (!NULLADDRESS(data
.local
)) {
3055 memcpy(&cfil_info
->cfi_so_attach_laddr
, &data
.local
, data
.local
.sa
.sa_len
);
3056 cfil_info
->cfi_isSignatureLatest
= true;
3059 msg
->cfd_signature_length
= sizeof(cfil_crypto_signature
);
3060 if (cfil_crypto_sign_data(crypto_state
, &data
, msg
->cfd_signature
, &msg
->cfd_signature_length
) != 0) {
3061 msg
->cfd_signature_length
= 0;
3062 CFIL_LOG(LOG_ERR
, "CFIL: Failed to sign data msg <sockID %llu>",
3063 msg
->cfd_msghdr
.cfm_sock_id
);
3071 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state
,
3072 struct socket
*so
, struct cfil_info
*cfil_info
,
3073 struct cfil_msg_sock_closed
*msg
)
3075 struct cfil_crypto_data data
= {};
3076 struct cfil_hash_entry hash_entry
= {};
3077 struct cfil_hash_entry
*hash_entry_ptr
= NULL
;
3078 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
3080 if (crypto_state
== NULL
|| msg
== NULL
||
3081 so
== NULL
|| inp
== NULL
|| cfil_info
== NULL
) {
3085 data
.sock_id
= cfil_info
->cfi_sock_id
;
3086 data
.direction
= cfil_info
->cfi_dir
;
3088 data
.pid
= so
->last_pid
;
3089 memcpy(data
.uuid
, so
->last_uuid
, sizeof(uuid_t
));
3090 if (so
->so_flags
& SOF_DELEGATED
) {
3091 data
.effective_pid
= so
->e_pid
;
3092 memcpy(data
.effective_uuid
, so
->e_uuid
, sizeof(uuid_t
));
3094 data
.effective_pid
= so
->last_pid
;
3095 memcpy(data
.effective_uuid
, so
->last_uuid
, sizeof(uuid_t
));
3097 data
.socketProtocol
= so
->so_proto
->pr_protocol
;
3100 * Fill in address info:
3101 * For UDP, use the cfil_info hash entry directly.
3102 * For TCP, compose an hash entry with the saved addresses.
3104 if (cfil_info
->cfi_hash_entry
!= NULL
) {
3105 hash_entry_ptr
= cfil_info
->cfi_hash_entry
;
3106 } else if (cfil_info
->cfi_so_attach_faddr
.sa
.sa_len
> 0 ||
3107 cfil_info
->cfi_so_attach_laddr
.sa
.sa_len
> 0) {
3108 fill_cfil_hash_entry_from_address(&hash_entry
, TRUE
, &cfil_info
->cfi_so_attach_laddr
.sa
);
3109 fill_cfil_hash_entry_from_address(&hash_entry
, FALSE
, &cfil_info
->cfi_so_attach_faddr
.sa
);
3110 hash_entry_ptr
= &hash_entry
;
3112 if (hash_entry_ptr
!= NULL
) {
3113 boolean_t outgoing
= (cfil_info
->cfi_dir
== CFS_CONNECTION_DIR_OUT
);
3114 union sockaddr_in_4_6
*src
= outgoing
? &data
.local
: &data
.remote
;
3115 union sockaddr_in_4_6
*dst
= outgoing
? &data
.remote
: &data
.local
;
3116 cfil_fill_event_msg_addresses(hash_entry_ptr
, inp
, src
, dst
, !IS_INP_V6(inp
), outgoing
);
3119 data
.byte_count_in
= cfil_info
->cfi_byte_inbound_count
;
3120 data
.byte_count_out
= cfil_info
->cfi_byte_outbound_count
;
3122 msg
->cfc_signature_length
= sizeof(cfil_crypto_signature
);
3123 if (cfil_crypto_sign_data(crypto_state
, &data
, msg
->cfc_signature
, &msg
->cfc_signature_length
) != 0) {
3124 msg
->cfc_signature_length
= 0;
3125 CFIL_LOG(LOG_ERR
, "CFIL: Failed to sign closed msg <sockID %llu>",
3126 msg
->cfc_msghdr
.cfm_sock_id
);
3134 cfil_dispatch_attach_event(struct socket
*so
, struct cfil_info
*cfil_info
,
3135 uint32_t kcunit
, int conn_dir
)
3138 struct cfil_entry
*entry
= NULL
;
3139 struct cfil_msg_sock_attached msg_attached
;
3140 struct content_filter
*cfc
= NULL
;
3141 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
3142 struct cfil_hash_entry
*hash_entry_ptr
= NULL
;
3143 struct cfil_hash_entry hash_entry
;
3145 memset(&hash_entry
, 0, sizeof(struct cfil_hash_entry
));
3146 proc_t p
= PROC_NULL
;
3147 task_t t
= TASK_NULL
;
3149 socket_lock_assert_owned(so
);
3151 cfil_rw_lock_shared(&cfil_lck_rw
);
3153 if (so
->so_proto
== NULL
|| so
->so_proto
->pr_domain
== NULL
) {
3159 entry
= SLIST_FIRST(&cfil_info
->cfi_ordered_entries
);
3161 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3164 if (entry
== NULL
) {
3168 cfc
= entry
->cfe_filter
;
3173 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
)) {
3178 kcunit
= CFI_ENTRY_KCUNIT(cfil_info
, entry
);
3181 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u kcunit %u",
3182 (uint64_t)VM_KERNEL_ADDRPERM(so
), entry
->cfe_necp_control_unit
, kcunit
);
3184 /* Would be wasteful to try when flow controlled */
3185 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
3190 bzero(&msg_attached
, sizeof(struct cfil_msg_sock_attached
));
3191 msg_attached
.cfs_msghdr
.cfm_len
= sizeof(struct cfil_msg_sock_attached
);
3192 msg_attached
.cfs_msghdr
.cfm_version
= CFM_VERSION_CURRENT
;
3193 msg_attached
.cfs_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
3194 msg_attached
.cfs_msghdr
.cfm_op
= CFM_OP_SOCKET_ATTACHED
;
3195 msg_attached
.cfs_msghdr
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
3197 msg_attached
.cfs_sock_family
= so
->so_proto
->pr_domain
->dom_family
;
3198 msg_attached
.cfs_sock_type
= so
->so_proto
->pr_type
;
3199 msg_attached
.cfs_sock_protocol
= so
->so_proto
->pr_protocol
;
3200 msg_attached
.cfs_pid
= so
->last_pid
;
3201 memcpy(msg_attached
.cfs_uuid
, so
->last_uuid
, sizeof(uuid_t
));
3202 if (so
->so_flags
& SOF_DELEGATED
) {
3203 msg_attached
.cfs_e_pid
= so
->e_pid
;
3204 memcpy(msg_attached
.cfs_e_uuid
, so
->e_uuid
, sizeof(uuid_t
));
3206 msg_attached
.cfs_e_pid
= so
->last_pid
;
3207 memcpy(msg_attached
.cfs_e_uuid
, so
->last_uuid
, sizeof(uuid_t
));
3211 * Fill in address info:
3212 * For UDP, use the cfil_info hash entry directly.
3213 * For TCP, compose an hash entry with the saved addresses.
3215 if (cfil_info
->cfi_hash_entry
!= NULL
) {
3216 hash_entry_ptr
= cfil_info
->cfi_hash_entry
;
3217 } else if (cfil_info
->cfi_so_attach_faddr
.sa
.sa_len
> 0 ||
3218 cfil_info
->cfi_so_attach_laddr
.sa
.sa_len
> 0) {
3219 fill_cfil_hash_entry_from_address(&hash_entry
, TRUE
, &cfil_info
->cfi_so_attach_laddr
.sa
);
3220 fill_cfil_hash_entry_from_address(&hash_entry
, FALSE
, &cfil_info
->cfi_so_attach_faddr
.sa
);
3221 hash_entry_ptr
= &hash_entry
;
3223 if (hash_entry_ptr
!= NULL
) {
3224 cfil_fill_event_msg_addresses(hash_entry_ptr
, inp
,
3225 &msg_attached
.cfs_src
, &msg_attached
.cfs_dst
,
3226 !IS_INP_V6(inp
), conn_dir
== CFS_CONNECTION_DIR_OUT
);
3228 msg_attached
.cfs_conn_dir
= conn_dir
;
3230 if (msg_attached
.cfs_e_pid
!= 0) {
3231 p
= proc_find(msg_attached
.cfs_e_pid
);
3232 if (p
!= PROC_NULL
) {
3234 if (t
!= TASK_NULL
) {
3235 audit_token_t audit_token
;
3236 mach_msg_type_number_t count
= TASK_AUDIT_TOKEN_COUNT
;
3237 if (task_info(t
, TASK_AUDIT_TOKEN
, (task_info_t
)&audit_token
, &count
) == KERN_SUCCESS
) {
3238 memcpy(&msg_attached
.cfs_audit_token
, &audit_token
, sizeof(msg_attached
.cfs_audit_token
));
3240 CFIL_LOG(LOG_ERR
, "CFIL: Failed to get process audit token <sockID %llu> ",
3241 entry
->cfe_cfil_info
->cfi_sock_id
);
3248 if (cfil_info
->cfi_debug
) {
3249 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: SENDING ATTACH UP");
3252 cfil_dispatch_attach_event_sign(entry
->cfe_filter
->cf_crypto_state
, cfil_info
, &msg_attached
);
3255 CFIL_LOG(LOG_DEBUG
, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
3256 entry
->cfe_cfil_info
->cfi_sock_id
);
3259 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
3260 entry
->cfe_filter
->cf_kcunit
,
3262 sizeof(struct cfil_msg_sock_attached
),
3265 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d", error
);
3268 microuptime(&entry
->cfe_last_event
);
3269 cfil_info
->cfi_first_event
.tv_sec
= entry
->cfe_last_event
.tv_sec
;
3270 cfil_info
->cfi_first_event
.tv_usec
= entry
->cfe_last_event
.tv_usec
;
3272 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
;
3273 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_ok
);
3276 /* We can recover from flow control */
3277 if (error
== ENOBUFS
) {
3278 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
3279 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_flow_control
);
3281 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
3282 cfil_rw_lock_exclusive(&cfil_lck_rw
);
3285 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
3287 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
3290 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_fail
);
3293 cfil_rw_unlock_shared(&cfil_lck_rw
);
3299 cfil_dispatch_disconnect_event(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
)
3302 struct mbuf
*msg
= NULL
;
3303 struct cfil_entry
*entry
;
3304 struct cfe_buf
*entrybuf
;
3305 struct cfil_msg_hdr msg_disconnected
;
3306 struct content_filter
*cfc
;
3308 socket_lock_assert_owned(so
);
3310 cfil_rw_lock_shared(&cfil_lck_rw
);
3312 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3314 entrybuf
= &entry
->cfe_snd
;
3316 entrybuf
= &entry
->cfe_rcv
;
3319 cfc
= entry
->cfe_filter
;
3324 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
3325 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
3328 * Send the disconnection event once
3330 if ((outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
)) ||
3331 (!outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
))) {
3332 CFIL_LOG(LOG_INFO
, "so %llx disconnect already sent",
3333 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3338 * We're not disconnected as long as some data is waiting
3339 * to be delivered to the filter
3341 if (outgoing
&& cfil_queue_empty(&entrybuf
->cfe_ctl_q
) == 0) {
3342 CFIL_LOG(LOG_INFO
, "so %llx control queue not empty",
3343 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3347 /* Would be wasteful to try when flow controlled */
3348 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
3353 if (cfil_info
->cfi_debug
) {
3354 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: SENDING DISCONNECT UP");
3358 cfil_info_log(LOG_ERR
, cfil_info
, outgoing
?
3359 "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
3360 "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
3363 bzero(&msg_disconnected
, sizeof(struct cfil_msg_hdr
));
3364 msg_disconnected
.cfm_len
= sizeof(struct cfil_msg_hdr
);
3365 msg_disconnected
.cfm_version
= CFM_VERSION_CURRENT
;
3366 msg_disconnected
.cfm_type
= CFM_TYPE_EVENT
;
3367 msg_disconnected
.cfm_op
= outgoing
? CFM_OP_DISCONNECT_OUT
:
3368 CFM_OP_DISCONNECT_IN
;
3369 msg_disconnected
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
3370 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
3371 entry
->cfe_filter
->cf_kcunit
,
3373 sizeof(struct cfil_msg_hdr
),
3376 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
3380 microuptime(&entry
->cfe_last_event
);
3381 CFI_ADD_TIME_LOG(cfil_info
, &entry
->cfe_last_event
, &cfil_info
->cfi_first_event
, msg_disconnected
.cfm_op
);
3383 /* Remember we have sent the disconnection message */
3385 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_OUT
;
3386 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_out_event_ok
);
3388 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_IN
;
3389 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_in_event_ok
);
3392 if (error
== ENOBUFS
) {
3393 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
3395 &cfil_stats
.cfs_disconnect_event_flow_control
);
3397 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
3398 cfil_rw_lock_exclusive(&cfil_lck_rw
);
3401 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
3403 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
3407 &cfil_stats
.cfs_disconnect_event_fail
);
3410 cfil_rw_unlock_shared(&cfil_lck_rw
);
3416 cfil_dispatch_closed_event(struct socket
*so
, struct cfil_info
*cfil_info
, int kcunit
)
3418 struct cfil_entry
*entry
;
3419 struct cfil_msg_sock_closed msg_closed
;
3421 struct content_filter
*cfc
;
3423 socket_lock_assert_owned(so
);
3425 cfil_rw_lock_shared(&cfil_lck_rw
);
3427 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3428 cfc
= entry
->cfe_filter
;
3433 CFIL_LOG(LOG_INFO
, "so %llx kcunit %d",
3434 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3436 /* Would be wasteful to try when flow controlled */
3437 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
3442 * Send a single closed message per filter
3444 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_CLOSED
) != 0) {
3447 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
3451 microuptime(&entry
->cfe_last_event
);
3452 CFI_ADD_TIME_LOG(cfil_info
, &entry
->cfe_last_event
, &cfil_info
->cfi_first_event
, CFM_OP_SOCKET_CLOSED
);
3454 bzero(&msg_closed
, sizeof(struct cfil_msg_sock_closed
));
3455 msg_closed
.cfc_msghdr
.cfm_len
= sizeof(struct cfil_msg_sock_closed
);
3456 msg_closed
.cfc_msghdr
.cfm_version
= CFM_VERSION_CURRENT
;
3457 msg_closed
.cfc_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
3458 msg_closed
.cfc_msghdr
.cfm_op
= CFM_OP_SOCKET_CLOSED
;
3459 msg_closed
.cfc_msghdr
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
3460 msg_closed
.cfc_first_event
.tv_sec
= cfil_info
->cfi_first_event
.tv_sec
;
3461 msg_closed
.cfc_first_event
.tv_usec
= cfil_info
->cfi_first_event
.tv_usec
;
3462 memcpy(msg_closed
.cfc_op_time
, cfil_info
->cfi_op_time
, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY
);
3463 memcpy(msg_closed
.cfc_op_list
, cfil_info
->cfi_op_list
, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY
);
3464 msg_closed
.cfc_op_list_ctr
= cfil_info
->cfi_op_list_ctr
;
3465 msg_closed
.cfc_byte_inbound_count
= cfil_info
->cfi_byte_inbound_count
;
3466 msg_closed
.cfc_byte_outbound_count
= cfil_info
->cfi_byte_outbound_count
;
3468 cfil_dispatch_closed_event_sign(entry
->cfe_filter
->cf_crypto_state
, so
, cfil_info
, &msg_closed
);
3470 if (cfil_info
->cfi_debug
) {
3471 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: SENDING CLOSED UP");
3475 CFIL_LOG(LOG_ERR
, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed
.cfc_msghdr
.cfm_sock_id
, cfil_info
->cfi_op_list_ctr
, cfil_info
->cfi_first_event
.tv_sec
, cfil_info
->cfi_first_event
.tv_usec
);
3478 * if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3479 * msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
3481 * for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3482 * CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3486 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
3487 entry
->cfe_filter
->cf_kcunit
,
3489 sizeof(struct cfil_msg_sock_closed
),
3492 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d",
3497 entry
->cfe_flags
|= CFEF_SENT_SOCK_CLOSED
;
3498 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_ok
);
3500 /* We can recover from flow control */
3501 if (error
== ENOBUFS
) {
3502 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
3503 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_flow_control
);
3505 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
3506 cfil_rw_lock_exclusive(&cfil_lck_rw
);
3509 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
3511 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
3514 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_fail
);
3517 cfil_rw_unlock_shared(&cfil_lck_rw
);
3524 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
3525 struct in6_addr
*ip6
, u_int16_t port
)
3527 if (sin46
== NULL
) {
3531 struct sockaddr_in6
*sin6
= &sin46
->sin6
;
3533 sin6
->sin6_family
= AF_INET6
;
3534 sin6
->sin6_len
= sizeof(*sin6
);
3535 sin6
->sin6_port
= port
;
3536 sin6
->sin6_addr
= *ip6
;
3537 if (IN6_IS_SCOPE_EMBED(&sin6
->sin6_addr
)) {
3538 sin6
->sin6_scope_id
= ntohs(sin6
->sin6_addr
.s6_addr16
[1]);
3539 sin6
->sin6_addr
.s6_addr16
[1] = 0;
3544 fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
3545 struct in_addr ip
, u_int16_t port
)
3547 if (sin46
== NULL
) {
3551 struct sockaddr_in
*sin
= &sin46
->sin
;
3553 sin
->sin_family
= AF_INET
;
3554 sin
->sin_len
= sizeof(*sin
);
3555 sin
->sin_port
= port
;
3556 sin
->sin_addr
.s_addr
= ip
.s_addr
;
3560 cfil_get_flow_address_v6(struct cfil_hash_entry
*entry
, struct inpcb
*inp
,
3561 struct in6_addr
**laddr
, struct in6_addr
**faddr
,
3562 u_int16_t
*lport
, u_int16_t
*fport
)
3564 if (entry
!= NULL
) {
3565 *laddr
= &entry
->cfentry_laddr
.addr6
;
3566 *faddr
= &entry
->cfentry_faddr
.addr6
;
3567 *lport
= entry
->cfentry_lport
;
3568 *fport
= entry
->cfentry_fport
;
3570 *laddr
= &inp
->in6p_laddr
;
3571 *faddr
= &inp
->in6p_faddr
;
3572 *lport
= inp
->inp_lport
;
3573 *fport
= inp
->inp_fport
;
3578 cfil_get_flow_address(struct cfil_hash_entry
*entry
, struct inpcb
*inp
,
3579 struct in_addr
*laddr
, struct in_addr
*faddr
,
3580 u_int16_t
*lport
, u_int16_t
*fport
)
3582 if (entry
!= NULL
) {
3583 *laddr
= entry
->cfentry_laddr
.addr46
.ia46_addr4
;
3584 *faddr
= entry
->cfentry_faddr
.addr46
.ia46_addr4
;
3585 *lport
= entry
->cfentry_lport
;
3586 *fport
= entry
->cfentry_fport
;
3588 *laddr
= inp
->inp_laddr
;
3589 *faddr
= inp
->inp_faddr
;
3590 *lport
= inp
->inp_lport
;
3591 *fport
= inp
->inp_fport
;
3596 cfil_dispatch_data_event(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
3597 struct mbuf
*data
, unsigned int copyoffset
, unsigned int copylen
)
3600 struct mbuf
*copy
= NULL
;
3601 struct mbuf
*msg
= NULL
;
3602 unsigned int one
= 1;
3603 struct cfil_msg_data_event
*data_req
;
3605 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
3606 struct cfil_entry
*entry
;
3607 struct cfe_buf
*entrybuf
;
3608 struct content_filter
*cfc
;
3612 cfil_rw_lock_shared(&cfil_lck_rw
);
3614 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3616 entrybuf
= &entry
->cfe_snd
;
3618 entrybuf
= &entry
->cfe_rcv
;
3621 cfc
= entry
->cfe_filter
;
3626 data
= cfil_data_start(data
);
3627 if (data
== NULL
|| (data
->m_flags
& M_PKTHDR
) == 0) {
3628 CFIL_LOG(LOG_ERR
, "NOT PKTHDR");
3632 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
3633 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
3635 socket_lock_assert_owned(so
);
3637 /* Would be wasteful to try */
3638 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
3643 /* Make a copy of the data to pass to kernel control socket */
3644 copy
= m_copym_mode(data
, copyoffset
, copylen
, M_DONTWAIT
,
3647 CFIL_LOG(LOG_ERR
, "m_copym_mode() failed");
3652 /* We need an mbuf packet for the message header */
3653 hdrsize
= sizeof(struct cfil_msg_data_event
);
3654 error
= mbuf_allocpacket(MBUF_DONTWAIT
, hdrsize
, &one
, &msg
);
3656 CFIL_LOG(LOG_ERR
, "mbuf_allocpacket() failed");
3659 * ENOBUFS is to indicate flow control
3664 mbuf_setlen(msg
, hdrsize
);
3665 mbuf_pkthdr_setlen(msg
, hdrsize
+ copylen
);
3667 data_req
= (struct cfil_msg_data_event
*)mbuf_data(msg
);
3668 bzero(data_req
, hdrsize
);
3669 data_req
->cfd_msghdr
.cfm_len
= hdrsize
+ copylen
;
3670 data_req
->cfd_msghdr
.cfm_version
= 1;
3671 data_req
->cfd_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
3672 data_req
->cfd_msghdr
.cfm_op
=
3673 outgoing
? CFM_OP_DATA_OUT
: CFM_OP_DATA_IN
;
3674 data_req
->cfd_msghdr
.cfm_sock_id
=
3675 entry
->cfe_cfil_info
->cfi_sock_id
;
3676 data_req
->cfd_start_offset
= entrybuf
->cfe_peeked
;
3677 data_req
->cfd_end_offset
= entrybuf
->cfe_peeked
+ copylen
;
3679 data_req
->cfd_flags
= 0;
3680 if (OPTIONAL_IP_HEADER(so
)) {
3682 * For non-UDP/TCP traffic, indicate to filters if optional
3683 * IP header is present:
3684 * outgoing - indicate according to INP_HDRINCL flag
3685 * incoming - For IPv4 only, stripping of IP header is
3686 * optional. But for CFIL, we delay stripping
3687 * at rip_input. So CFIL always expects IP
3688 * frames. IP header will be stripped according
3689 * to INP_STRIPHDR flag later at reinjection.
3691 if ((!outgoing
&& !IS_INP_V6(inp
)) ||
3692 (outgoing
&& cfil_dgram_peek_socket_state(data
, &inp_flags
) && (inp_flags
& INP_HDRINCL
))) {
3693 data_req
->cfd_flags
|= CFD_DATA_FLAG_IP_HEADER
;
3698 * Copy address/port into event msg.
3699 * For non connected sockets need to copy addresses from passed
3702 cfil_fill_event_msg_addresses(cfil_info
->cfi_hash_entry
, inp
,
3703 &data_req
->cfc_src
, &data_req
->cfc_dst
,
3704 !IS_INP_V6(inp
), outgoing
);
3706 if (cfil_info
->cfi_debug
) {
3707 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: SENDING DATA UP");
3710 if (cfil_info
->cfi_isSignatureLatest
== false) {
3711 cfil_dispatch_data_event_sign(entry
->cfe_filter
->cf_crypto_state
, so
, cfil_info
, data_req
);
3715 CFI_ADD_TIME_LOG(cfil_info
, &tv
, &cfil_info
->cfi_first_event
, data_req
->cfd_msghdr
.cfm_op
);
3717 /* Pass the message to the content filter */
3718 error
= ctl_enqueuembuf(entry
->cfe_filter
->cf_kcref
,
3719 entry
->cfe_filter
->cf_kcunit
,
3722 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
3726 entry
->cfe_flags
&= ~CFEF_FLOW_CONTROLLED
;
3727 OSIncrementAtomic(&cfil_stats
.cfs_data_event_ok
);
3730 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3731 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, outgoing
, (uint64_t)VM_KERNEL_ADDRPERM(data
), copyoffset
, copylen
);
3734 if (cfil_info
->cfi_debug
) {
3735 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u (%s)",
3736 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, outgoing
, (uint64_t)VM_KERNEL_ADDRPERM(data
), copyoffset
, copylen
,
3737 data_req
->cfd_flags
& CFD_DATA_FLAG_IP_HEADER
? "IP HDR" : "NO IP HDR");
3741 if (error
== ENOBUFS
) {
3742 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
3744 &cfil_stats
.cfs_data_event_flow_control
);
3746 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
3747 cfil_rw_lock_exclusive(&cfil_lck_rw
);
3750 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
3752 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
3755 OSIncrementAtomic(&cfil_stats
.cfs_data_event_fail
);
3758 cfil_rw_unlock_shared(&cfil_lck_rw
);
3764 * Process the queue of data waiting to be delivered to content filter
3767 cfil_data_service_ctl_q(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
)
3770 struct mbuf
*data
, *tmp
= NULL
;
3771 unsigned int datalen
= 0, copylen
= 0, copyoffset
= 0;
3772 struct cfil_entry
*entry
;
3773 struct cfe_buf
*entrybuf
;
3774 uint64_t currentoffset
= 0;
3776 if (cfil_info
== NULL
) {
3780 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
3781 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
3783 socket_lock_assert_owned(so
);
3785 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3787 entrybuf
= &entry
->cfe_snd
;
3789 entrybuf
= &entry
->cfe_rcv
;
3792 /* Send attached message if not yet done */
3793 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
3794 error
= cfil_dispatch_attach_event(so
, cfil_info
, CFI_ENTRY_KCUNIT(cfil_info
, entry
),
3795 outgoing
? CFS_CONNECTION_DIR_OUT
: CFS_CONNECTION_DIR_IN
);
3797 /* We can recover from flow control */
3798 if (error
== ENOBUFS
|| error
== ENOMEM
) {
3803 } else if ((entry
->cfe_flags
& CFEF_DATA_START
) == 0) {
3804 OSIncrementAtomic(&cfil_stats
.cfs_ctl_q_not_started
);
3809 CFIL_LOG(LOG_DEBUG
, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3810 entrybuf
->cfe_pass_offset
,
3811 entrybuf
->cfe_peeked
,
3812 entrybuf
->cfe_peek_offset
);
3815 /* Move all data that can pass */
3816 while ((data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
)) != NULL
&&
3817 entrybuf
->cfe_ctl_q
.q_start
< entrybuf
->cfe_pass_offset
) {
3818 datalen
= cfil_data_length(data
, NULL
, NULL
);
3821 if (entrybuf
->cfe_ctl_q
.q_start
+ datalen
<=
3822 entrybuf
->cfe_pass_offset
) {
3824 * The first mbuf can fully pass
3829 * The first mbuf can partially pass
3831 copylen
= entrybuf
->cfe_pass_offset
-
3832 entrybuf
->cfe_ctl_q
.q_start
;
3834 VERIFY(copylen
<= datalen
);
3838 "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3839 "datalen %u copylen %u",
3840 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3841 entrybuf
->cfe_ctl_q
.q_start
,
3842 entrybuf
->cfe_peeked
,
3843 entrybuf
->cfe_pass_offset
,
3844 entrybuf
->cfe_peek_offset
,
3849 * Data that passes has been peeked at explicitly or
3852 if (entrybuf
->cfe_ctl_q
.q_start
+ copylen
>
3853 entrybuf
->cfe_peeked
) {
3854 entrybuf
->cfe_peeked
=
3855 entrybuf
->cfe_ctl_q
.q_start
+ copylen
;
3858 * Stop on partial pass
3860 if (copylen
< datalen
) {
3864 /* All good, move full data from ctl queue to pending queue */
3865 cfil_queue_remove(&entrybuf
->cfe_ctl_q
, data
, datalen
);
3867 cfil_queue_enqueue(&entrybuf
->cfe_pending_q
, data
, datalen
);
3869 OSAddAtomic64(datalen
,
3870 &cfil_stats
.cfs_pending_q_out_enqueued
);
3872 OSAddAtomic64(datalen
,
3873 &cfil_stats
.cfs_pending_q_in_enqueued
);
3876 CFIL_INFO_VERIFY(cfil_info
);
3879 "%llx first %llu peeked %llu pass %llu peek %llu"
3880 "datalen %u copylen %u",
3881 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3882 entrybuf
->cfe_ctl_q
.q_start
,
3883 entrybuf
->cfe_peeked
,
3884 entrybuf
->cfe_pass_offset
,
3885 entrybuf
->cfe_peek_offset
,
3890 /* Now deal with remaining data the filter wants to peek at */
3891 for (data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
),
3892 currentoffset
= entrybuf
->cfe_ctl_q
.q_start
;
3893 data
!= NULL
&& currentoffset
< entrybuf
->cfe_peek_offset
;
3894 data
= cfil_queue_next(&entrybuf
->cfe_ctl_q
, data
),
3895 currentoffset
+= datalen
) {
3896 datalen
= cfil_data_length(data
, NULL
, NULL
);
3899 /* We've already peeked at this mbuf */
3900 if (currentoffset
+ datalen
<= entrybuf
->cfe_peeked
) {
3904 * The data in the first mbuf may have been
3905 * partially peeked at
3907 copyoffset
= entrybuf
->cfe_peeked
- currentoffset
;
3908 VERIFY(copyoffset
< datalen
);
3909 copylen
= datalen
- copyoffset
;
3910 VERIFY(copylen
<= datalen
);
3912 * Do not copy more than needed
3914 if (currentoffset
+ copyoffset
+ copylen
>
3915 entrybuf
->cfe_peek_offset
) {
3916 copylen
= entrybuf
->cfe_peek_offset
-
3917 (currentoffset
+ copyoffset
);
3922 "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3923 "datalen %u copylen %u copyoffset %u",
3924 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3926 entrybuf
->cfe_peeked
,
3927 entrybuf
->cfe_pass_offset
,
3928 entrybuf
->cfe_peek_offset
,
3929 datalen
, copylen
, copyoffset
);
3933 * Stop if there is nothing more to peek at
3939 * Let the filter get a peek at this span of data
3941 error
= cfil_dispatch_data_event(so
, cfil_info
, kcunit
,
3942 outgoing
, data
, copyoffset
, copylen
);
3944 /* On error, leave data in ctl_q */
3947 entrybuf
->cfe_peeked
+= copylen
;
3949 OSAddAtomic64(copylen
,
3950 &cfil_stats
.cfs_ctl_q_out_peeked
);
3952 OSAddAtomic64(copylen
,
3953 &cfil_stats
.cfs_ctl_q_in_peeked
);
3956 /* Stop when data could not be fully peeked at */
3957 if (copylen
+ copyoffset
< datalen
) {
3961 CFIL_INFO_VERIFY(cfil_info
);
3964 "%llx first %llu peeked %llu pass %llu peek %llu"
3965 "datalen %u copylen %u copyoffset %u",
3966 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3968 entrybuf
->cfe_peeked
,
3969 entrybuf
->cfe_pass_offset
,
3970 entrybuf
->cfe_peek_offset
,
3971 datalen
, copylen
, copyoffset
);
3975 * Process data that has passed the filter
3977 error
= cfil_service_pending_queue(so
, cfil_info
, kcunit
, outgoing
);
3979 CFIL_LOG(LOG_ERR
, "cfil_service_pending_queue() error %d",
3985 * Dispatch disconnect events that could not be sent
3987 if (cfil_info
== NULL
) {
3989 } else if (outgoing
) {
3990 if ((cfil_info
->cfi_flags
& CFIF_SHUT_WR
) &&
3991 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
)) {
3992 cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 1);
3995 if ((cfil_info
->cfi_flags
& CFIF_SHUT_RD
) &&
3996 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
)) {
3997 cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 0);
4003 "first %llu peeked %llu pass %llu peek %llu",
4004 entrybuf
->cfe_ctl_q
.q_start
,
4005 entrybuf
->cfe_peeked
,
4006 entrybuf
->cfe_pass_offset
,
4007 entrybuf
->cfe_peek_offset
);
4009 CFIL_INFO_VERIFY(cfil_info
);
4014 * cfil_data_filter()
4016 * Process data for a content filter installed on a socket
4019 cfil_data_filter(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
4020 struct mbuf
*data
, uint64_t datalen
)
4023 struct cfil_entry
*entry
;
4024 struct cfe_buf
*entrybuf
;
4026 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
4027 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
4029 socket_lock_assert_owned(so
);
4031 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4033 entrybuf
= &entry
->cfe_snd
;
4035 entrybuf
= &entry
->cfe_rcv
;
4038 /* Are we attached to the filter? */
4039 if (entry
->cfe_filter
== NULL
) {
4044 /* Dispatch to filters */
4045 cfil_queue_enqueue(&entrybuf
->cfe_ctl_q
, data
, datalen
);
4047 OSAddAtomic64(datalen
,
4048 &cfil_stats
.cfs_ctl_q_out_enqueued
);
4050 OSAddAtomic64(datalen
,
4051 &cfil_stats
.cfs_ctl_q_in_enqueued
);
4054 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, outgoing
);
4056 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
4060 * We have to return EJUSTRETURN in all cases to avoid double free
4063 error
= EJUSTRETURN
;
4065 CFIL_INFO_VERIFY(cfil_info
);
4067 CFIL_LOG(LOG_INFO
, "return %d", error
);
4072 * cfil_service_inject_queue() re-inject data that passed the
4076 cfil_service_inject_queue(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
)
4079 unsigned int datalen
;
4083 struct cfi_buf
*cfi_buf
;
4084 struct cfil_queue
*inject_q
;
4085 int need_rwakeup
= 0;
4087 struct inpcb
*inp
= NULL
;
4088 struct ip
*ip
= NULL
;
4091 if (cfil_info
== NULL
) {
4095 socket_lock_assert_owned(so
);
4098 cfi_buf
= &cfil_info
->cfi_snd
;
4099 cfil_info
->cfi_flags
&= ~CFIF_RETRY_INJECT_OUT
;
4101 cfi_buf
= &cfil_info
->cfi_rcv
;
4102 cfil_info
->cfi_flags
&= ~CFIF_RETRY_INJECT_IN
;
4104 inject_q
= &cfi_buf
->cfi_inject_q
;
4106 if (cfil_queue_empty(inject_q
)) {
4110 #if DATA_DEBUG | VERDICT_DEBUG
4111 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
4112 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
, cfil_queue_len(inject_q
));
4115 while ((data
= cfil_queue_first(inject_q
)) != NULL
) {
4116 datalen
= cfil_data_length(data
, &mbcnt
, &mbnum
);
4119 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4120 (uint64_t)VM_KERNEL_ADDRPERM(so
), (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, mbcnt
);
4122 if (cfil_info
->cfi_debug
) {
4123 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4124 (uint64_t)VM_KERNEL_ADDRPERM(so
), (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, mbcnt
);
4127 /* Remove data from queue and adjust stats */
4128 cfil_queue_remove(inject_q
, data
, datalen
);
4129 cfi_buf
->cfi_pending_first
+= datalen
;
4130 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
4131 cfi_buf
->cfi_pending_mbnum
-= mbnum
;
4132 cfil_info_buf_verify(cfi_buf
);
4135 error
= sosend_reinject(so
, NULL
, data
, NULL
, 0);
4138 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: Error: sosend_reinject() failed");
4139 CFIL_LOG(LOG_ERR
, "### sosend() failed %d", error
);
4143 // At least one injection succeeded, need to wake up pending threads.
4146 data
->m_flags
|= M_SKIPCFIL
;
4149 * NOTE: We currently only support TCP, UDP, ICMP,
4150 * ICMPv6 and RAWIP. For MPTCP and message TCP we'll
4151 * need to call the appropriate sbappendxxx()
4152 * of fix sock_inject_data_in()
4154 if (IS_IP_DGRAM(so
)) {
4155 if (OPTIONAL_IP_HEADER(so
)) {
4156 inp
= sotoinpcb(so
);
4157 if (inp
&& (inp
->inp_flags
& INP_STRIPHDR
)) {
4158 mbuf_t data_start
= cfil_data_start(data
);
4159 if (data_start
!= NULL
&& (data_start
->m_flags
& M_PKTHDR
)) {
4160 ip
= mtod(data_start
, struct ip
*);
4161 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
4162 data_start
->m_len
-= hlen
;
4163 data_start
->m_pkthdr
.len
-= hlen
;
4164 data_start
->m_data
+= hlen
;
4169 if (sbappendchain(&so
->so_rcv
, data
, 0)) {
4173 if (sbappendstream(&so
->so_rcv
, data
)) {
4180 OSAddAtomic64(datalen
,
4181 &cfil_stats
.cfs_inject_q_out_passed
);
4183 OSAddAtomic64(datalen
,
4184 &cfil_stats
.cfs_inject_q_in_passed
);
4190 #if DATA_DEBUG | VERDICT_DEBUG
4191 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4192 (uint64_t)VM_KERNEL_ADDRPERM(so
), count
);
4194 if (cfil_info
->cfi_debug
) {
4195 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4196 (uint64_t)VM_KERNEL_ADDRPERM(so
), count
);
4199 /* A single wakeup is for several packets is more efficient */
4201 if (outgoing
== TRUE
) {
4208 if (error
!= 0 && cfil_info
) {
4209 if (error
== ENOBUFS
) {
4210 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nobufs
);
4212 if (error
== ENOMEM
) {
4213 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nomem
);
4217 cfil_info
->cfi_flags
|= CFIF_RETRY_INJECT_OUT
;
4218 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_fail
);
4220 cfil_info
->cfi_flags
|= CFIF_RETRY_INJECT_IN
;
4221 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_fail
);
4228 if (cfil_info
&& (cfil_info
->cfi_flags
& CFIF_SHUT_WR
)) {
4229 cfil_sock_notify_shutdown(so
, SHUT_WR
);
4230 if (cfil_sock_data_pending(&so
->so_snd
) == 0) {
4231 soshutdownlock_final(so
, SHUT_WR
);
4234 if (cfil_info
&& (cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
4235 if (cfil_filters_attached(so
) == 0) {
4236 CFIL_LOG(LOG_INFO
, "so %llx waking",
4237 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4238 wakeup((caddr_t
)cfil_info
);
4242 CFIL_INFO_VERIFY(cfil_info
);
4248 cfil_service_pending_queue(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
)
4250 uint64_t passlen
, curlen
;
4252 unsigned int datalen
;
4254 struct cfil_entry
*entry
;
4255 struct cfe_buf
*entrybuf
;
4256 struct cfil_queue
*pending_q
;
4258 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
4259 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
4261 socket_lock_assert_owned(so
);
4263 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4265 entrybuf
= &entry
->cfe_snd
;
4267 entrybuf
= &entry
->cfe_rcv
;
4270 pending_q
= &entrybuf
->cfe_pending_q
;
4272 passlen
= entrybuf
->cfe_pass_offset
- pending_q
->q_start
;
4275 * Locate the chunks of data that we can pass to the next filter
4276 * A data chunk must be on mbuf boundaries
4279 while ((data
= cfil_queue_first(pending_q
)) != NULL
) {
4280 struct cfil_entry
*iter_entry
;
4281 datalen
= cfil_data_length(data
, NULL
, NULL
);
4285 "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4286 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
,
4290 if (curlen
+ datalen
> passlen
) {
4294 cfil_queue_remove(pending_q
, data
, datalen
);
4298 for (iter_entry
= SLIST_NEXT(entry
, cfe_order_link
);
4300 iter_entry
= SLIST_NEXT(iter_entry
, cfe_order_link
)) {
4301 error
= cfil_data_filter(so
, cfil_info
, CFI_ENTRY_KCUNIT(cfil_info
, iter_entry
), outgoing
,
4303 /* 0 means passed so we can continue */
4308 /* When data has passed all filters, re-inject */
4312 &cfil_info
->cfi_snd
.cfi_inject_q
,
4314 OSAddAtomic64(datalen
,
4315 &cfil_stats
.cfs_inject_q_out_enqueued
);
4318 &cfil_info
->cfi_rcv
.cfi_inject_q
,
4320 OSAddAtomic64(datalen
,
4321 &cfil_stats
.cfs_inject_q_in_enqueued
);
4326 CFIL_INFO_VERIFY(cfil_info
);
4332 cfil_update_data_offsets(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
4333 uint64_t pass_offset
, uint64_t peek_offset
)
4336 struct cfil_entry
*entry
= NULL
;
4337 struct cfe_buf
*entrybuf
;
4340 CFIL_LOG(LOG_INFO
, "pass %llu peek %llu", pass_offset
, peek_offset
);
4342 socket_lock_assert_owned(so
);
4344 if (cfil_info
== NULL
) {
4345 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
4346 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4349 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
4350 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4351 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4356 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4358 entrybuf
= &entry
->cfe_snd
;
4360 entrybuf
= &entry
->cfe_rcv
;
4363 /* Record updated offsets for this content filter */
4364 if (pass_offset
> entrybuf
->cfe_pass_offset
) {
4365 entrybuf
->cfe_pass_offset
= pass_offset
;
4367 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
) {
4368 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
4372 CFIL_LOG(LOG_INFO
, "pass_offset %llu <= cfe_pass_offset %llu",
4373 pass_offset
, entrybuf
->cfe_pass_offset
);
4375 /* Filter does not want or need to see data that's allowed to pass */
4376 if (peek_offset
> entrybuf
->cfe_pass_offset
&&
4377 peek_offset
> entrybuf
->cfe_peek_offset
) {
4378 entrybuf
->cfe_peek_offset
= peek_offset
;
4386 /* Move data held in control queue to pending queue if needed */
4387 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, outgoing
);
4389 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
4393 error
= EJUSTRETURN
;
4397 * The filter is effectively detached when pass all from both sides
4398 * or when the socket is closed and no more data is waiting
4399 * to be delivered to the filter
4401 if (entry
!= NULL
&&
4402 ((entry
->cfe_snd
.cfe_pass_offset
== CFM_MAX_OFFSET
&&
4403 entry
->cfe_rcv
.cfe_pass_offset
== CFM_MAX_OFFSET
) ||
4404 ((cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
4405 cfil_queue_empty(&entry
->cfe_snd
.cfe_ctl_q
) &&
4406 cfil_queue_empty(&entry
->cfe_rcv
.cfe_ctl_q
)))) {
4407 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
4409 cfil_info_log(LOG_ERR
, cfil_info
, outgoing
?
4410 "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
4411 "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
4413 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
4414 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
4415 if ((cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
4416 cfil_filters_attached(so
) == 0) {
4418 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAKING");
4420 CFIL_LOG(LOG_INFO
, "so %llx waking",
4421 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4422 wakeup((caddr_t
)cfil_info
);
4425 CFIL_INFO_VERIFY(cfil_info
);
4426 CFIL_LOG(LOG_INFO
, "return %d", error
);
4431 * Update pass offset for socket when no data is pending
4434 cfil_set_socket_pass_offset(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
)
4436 struct cfi_buf
*cfi_buf
;
4437 struct cfil_entry
*entry
;
4438 struct cfe_buf
*entrybuf
;
4440 uint64_t pass_offset
= 0;
4442 if (cfil_info
== NULL
) {
4446 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d",
4447 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
4449 socket_lock_assert_owned(so
);
4452 cfi_buf
= &cfil_info
->cfi_snd
;
4454 cfi_buf
= &cfil_info
->cfi_rcv
;
4457 CFIL_LOG(LOG_DEBUG
, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4458 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, outgoing
,
4459 cfi_buf
->cfi_pending_first
, cfi_buf
->cfi_pending_last
);
4461 if (cfi_buf
->cfi_pending_last
- cfi_buf
->cfi_pending_first
== 0) {
4462 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4463 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4465 /* Are we attached to a filter? */
4466 if (entry
->cfe_filter
== NULL
) {
4471 entrybuf
= &entry
->cfe_snd
;
4473 entrybuf
= &entry
->cfe_rcv
;
4476 if (pass_offset
== 0 ||
4477 entrybuf
->cfe_pass_offset
< pass_offset
) {
4478 pass_offset
= entrybuf
->cfe_pass_offset
;
4481 cfi_buf
->cfi_pass_offset
= pass_offset
;
4484 CFIL_LOG(LOG_DEBUG
, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
4485 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, cfi_buf
->cfi_pass_offset
);
4491 cfil_action_data_pass(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
4492 uint64_t pass_offset
, uint64_t peek_offset
)
4496 CFIL_LOG(LOG_INFO
, "");
4498 socket_lock_assert_owned(so
);
4500 error
= cfil_acquire_sockbuf(so
, cfil_info
, outgoing
);
4502 CFIL_LOG(LOG_INFO
, "so %llx %s dropped",
4503 (uint64_t)VM_KERNEL_ADDRPERM(so
),
4504 outgoing
? "out" : "in");
4508 error
= cfil_update_data_offsets(so
, cfil_info
, kcunit
, outgoing
,
4509 pass_offset
, peek_offset
);
4511 cfil_service_inject_queue(so
, cfil_info
, outgoing
);
4513 cfil_set_socket_pass_offset(so
, cfil_info
, outgoing
);
4515 CFIL_INFO_VERIFY(cfil_info
);
4516 cfil_release_sockbuf(so
, outgoing
);
4523 cfil_flush_queues(struct socket
*so
, struct cfil_info
*cfil_info
)
4525 struct cfil_entry
*entry
;
4529 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || cfil_info
== NULL
) {
4533 socket_lock_assert_owned(so
);
4536 * Flush the output queues and ignore errors as long as
4539 (void) cfil_acquire_sockbuf(so
, cfil_info
, 1);
4540 if (cfil_info
!= NULL
) {
4542 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4543 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4545 drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
4546 drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_pending_q
);
4548 drained
+= cfil_queue_drain(&cfil_info
->cfi_snd
.cfi_inject_q
);
4551 if (cfil_info
->cfi_flags
& CFIF_DROP
) {
4553 &cfil_stats
.cfs_flush_out_drop
);
4556 &cfil_stats
.cfs_flush_out_close
);
4560 cfil_release_sockbuf(so
, 1);
4563 * Flush the input queues
4565 (void) cfil_acquire_sockbuf(so
, cfil_info
, 0);
4566 if (cfil_info
!= NULL
) {
4568 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4569 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4571 drained
+= cfil_queue_drain(
4572 &entry
->cfe_rcv
.cfe_ctl_q
);
4573 drained
+= cfil_queue_drain(
4574 &entry
->cfe_rcv
.cfe_pending_q
);
4576 drained
+= cfil_queue_drain(&cfil_info
->cfi_rcv
.cfi_inject_q
);
4579 if (cfil_info
->cfi_flags
& CFIF_DROP
) {
4581 &cfil_stats
.cfs_flush_in_drop
);
4584 &cfil_stats
.cfs_flush_in_close
);
4588 cfil_release_sockbuf(so
, 0);
4590 CFIL_INFO_VERIFY(cfil_info
);
4594 cfil_action_drop(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
)
4597 struct cfil_entry
*entry
;
4600 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || cfil_info
== NULL
) {
4604 socket_lock_assert_owned(so
);
4606 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4608 /* Are we attached to the filter? */
4609 if (entry
->cfe_filter
== NULL
) {
4613 cfil_info
->cfi_flags
|= CFIF_DROP
;
4618 * Force the socket to be marked defunct
4619 * (forcing fixed along with rdar://19391339)
4621 if (so
->so_cfil_db
== NULL
) {
4622 error
= sosetdefunct(p
, so
,
4623 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
,
4626 /* Flush the socket buffer and disconnect */
4628 error
= sodefunct(p
, so
,
4629 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
);
4633 /* The filter is done, mark as detached */
4634 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
4636 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: DROP - DETACH");
4638 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
4639 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
4641 /* Pending data needs to go */
4642 cfil_flush_queues(so
, cfil_info
);
4644 if (cfil_info
&& (cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
4645 if (cfil_filters_attached(so
) == 0) {
4646 CFIL_LOG(LOG_INFO
, "so %llx waking",
4647 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4648 wakeup((caddr_t
)cfil_info
);
4656 cfil_action_bless_client(uint32_t kcunit
, struct cfil_msg_hdr
*msghdr
)
4659 struct cfil_info
*cfil_info
= NULL
;
4661 bool cfil_attached
= false;
4662 struct cfil_msg_bless_client
*blessmsg
= (struct cfil_msg_bless_client
*)msghdr
;
4664 // Search and lock socket
4665 struct socket
*so
= cfil_socket_from_client_uuid(blessmsg
->cfb_client_uuid
, &cfil_attached
);
4669 // The client gets a pass automatically
4670 cfil_info
= (so
->so_cfil_db
!= NULL
) ?
4671 cfil_db_get_cfil_info(so
->so_cfil_db
, msghdr
->cfm_sock_id
) : so
->so_cfil
;
4673 if (cfil_attached
) {
4675 if (cfil_info
!= NULL
) {
4676 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
4677 cfil_info
->cfi_hash_entry
? "UDP" : "TCP",
4678 (uint64_t)VM_KERNEL_ADDRPERM(so
),
4679 cfil_info
->cfi_sock_id
);
4682 cfil_sock_received_verdict(so
);
4683 (void)cfil_action_data_pass(so
, cfil_info
, kcunit
, 1, CFM_MAX_OFFSET
, CFM_MAX_OFFSET
);
4684 (void)cfil_action_data_pass(so
, cfil_info
, kcunit
, 0, CFM_MAX_OFFSET
, CFM_MAX_OFFSET
);
4686 so
->so_flags1
|= SOF1_CONTENT_FILTER_SKIP
;
4688 socket_unlock(so
, 1);
4695 cfil_action_set_crypto_key(uint32_t kcunit
, struct cfil_msg_hdr
*msghdr
)
4697 struct content_filter
*cfc
= NULL
;
4698 cfil_crypto_state_t crypto_state
= NULL
;
4699 struct cfil_msg_set_crypto_key
*keymsg
= (struct cfil_msg_set_crypto_key
*)msghdr
;
4701 CFIL_LOG(LOG_NOTICE
, "");
4703 if (content_filters
== NULL
) {
4704 CFIL_LOG(LOG_ERR
, "no content filter");
4707 if (kcunit
> MAX_CONTENT_FILTER
) {
4708 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4709 kcunit
, MAX_CONTENT_FILTER
);
4712 crypto_state
= cfil_crypto_init_client((uint8_t *)keymsg
->crypto_key
);
4713 if (crypto_state
== NULL
) {
4714 CFIL_LOG(LOG_ERR
, "failed to initialize crypto state for unit %u)",
4719 cfil_rw_lock_exclusive(&cfil_lck_rw
);
4721 cfc
= content_filters
[kcunit
- 1];
4722 if (cfc
->cf_kcunit
!= kcunit
) {
4723 CFIL_LOG(LOG_ERR
, "bad unit info %u)",
4725 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
4726 cfil_crypto_cleanup_state(crypto_state
);
4729 if (cfc
->cf_crypto_state
!= NULL
) {
4730 cfil_crypto_cleanup_state(cfc
->cf_crypto_state
);
4731 cfc
->cf_crypto_state
= NULL
;
4733 cfc
->cf_crypto_state
= crypto_state
;
4735 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
4740 cfil_update_entry_offsets(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
, unsigned int datalen
)
4742 struct cfil_entry
*entry
;
4743 struct cfe_buf
*entrybuf
;
4746 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d datalen %u",
4747 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
, datalen
);
4749 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4750 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4752 /* Are we attached to the filter? */
4753 if (entry
->cfe_filter
== NULL
) {
4758 entrybuf
= &entry
->cfe_snd
;
4760 entrybuf
= &entry
->cfe_rcv
;
4763 entrybuf
->cfe_ctl_q
.q_start
+= datalen
;
4764 entrybuf
->cfe_pass_offset
= entrybuf
->cfe_ctl_q
.q_start
;
4765 entrybuf
->cfe_peeked
= entrybuf
->cfe_ctl_q
.q_start
;
4766 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
) {
4767 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
4770 entrybuf
->cfe_ctl_q
.q_end
+= datalen
;
4772 entrybuf
->cfe_pending_q
.q_start
+= datalen
;
4773 entrybuf
->cfe_pending_q
.q_end
+= datalen
;
4775 CFIL_INFO_VERIFY(cfil_info
);
4780 cfil_data_common(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
, struct sockaddr
*to
,
4781 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
4783 #pragma unused(to, control, flags)
4785 unsigned int datalen
;
4789 struct cfi_buf
*cfi_buf
;
4790 struct mbuf
*chain
= NULL
;
4792 if (cfil_info
== NULL
) {
4793 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
4794 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4797 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
4798 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4799 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4804 datalen
= cfil_data_length(data
, &mbcnt
, &mbnum
);
4807 cfi_buf
= &cfil_info
->cfi_snd
;
4808 cfil_info
->cfi_byte_outbound_count
+= datalen
;
4810 cfi_buf
= &cfil_info
->cfi_rcv
;
4811 cfil_info
->cfi_byte_inbound_count
+= datalen
;
4814 cfi_buf
->cfi_pending_last
+= datalen
;
4815 cfi_buf
->cfi_pending_mbcnt
+= mbcnt
;
4816 cfi_buf
->cfi_pending_mbnum
+= mbnum
;
4818 if (IS_IP_DGRAM(so
)) {
4819 if (cfi_buf
->cfi_pending_mbnum
> cfil_udp_gc_mbuf_num_max
||
4820 cfi_buf
->cfi_pending_mbcnt
> cfil_udp_gc_mbuf_cnt_max
) {
4821 cfi_buf
->cfi_tail_drop_cnt
++;
4822 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
4823 cfi_buf
->cfi_pending_mbnum
-= mbnum
;
4828 cfil_info_buf_verify(cfi_buf
);
4831 CFIL_LOG(LOG_DEBUG
, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
4832 (uint64_t)VM_KERNEL_ADDRPERM(so
),
4833 outgoing
? "OUT" : "IN",
4834 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, data
->m_flags
,
4835 (uint64_t)VM_KERNEL_ADDRPERM(data
->m_nextpkt
),
4836 cfi_buf
->cfi_pending_last
,
4837 cfi_buf
->cfi_pending_mbcnt
,
4838 cfi_buf
->cfi_pass_offset
);
4841 /* Fast path when below pass offset */
4842 if (cfi_buf
->cfi_pending_last
<= cfi_buf
->cfi_pass_offset
) {
4843 cfil_update_entry_offsets(so
, cfil_info
, outgoing
, datalen
);
4845 CFIL_LOG(LOG_DEBUG
, "CFIL: QUEUEING DATA: FAST PATH");
4848 struct cfil_entry
*iter_entry
;
4849 SLIST_FOREACH(iter_entry
, &cfil_info
->cfi_ordered_entries
, cfe_order_link
) {
4850 // Is cfil attached to this filter?
4851 kcunit
= CFI_ENTRY_KCUNIT(cfil_info
, iter_entry
);
4852 if (IS_ENTRY_ATTACHED(cfil_info
, kcunit
)) {
4853 if (IS_IP_DGRAM(so
) && chain
== NULL
) {
4855 * Chain addr (incoming only TDB), control (optional) and data into one chain.
4856 * This full chain will be reinjected into socket after recieving verdict.
4858 (void) cfil_dgram_save_socket_state(cfil_info
, data
);
4859 chain
= sbconcat_mbufs(NULL
, outgoing
? NULL
: to
, data
, control
);
4860 if (chain
== NULL
) {
4865 error
= cfil_data_filter(so
, cfil_info
, kcunit
, outgoing
, data
,
4868 /* 0 means passed so continue with next filter */
4875 /* Move cursor if no filter claimed the data */
4877 cfi_buf
->cfi_pending_first
+= datalen
;
4878 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
4879 cfi_buf
->cfi_pending_mbnum
-= mbnum
;
4880 cfil_info_buf_verify(cfi_buf
);
4883 CFIL_INFO_VERIFY(cfil_info
);
4889 * Callback from socket layer sosendxxx()
4892 cfil_sock_data_out(struct socket
*so
, struct sockaddr
*to
,
4893 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
4897 if (IS_IP_DGRAM(so
)) {
4898 return cfil_sock_udp_handle_data(TRUE
, so
, NULL
, to
, data
, control
, flags
);
4901 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
4906 * Pass initial data for TFO.
4908 if (IS_INITIAL_TFO_DATA(so
)) {
4912 socket_lock_assert_owned(so
);
4914 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
4915 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4916 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4919 if (control
!= NULL
) {
4920 CFIL_LOG(LOG_ERR
, "so %llx control",
4921 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4922 OSIncrementAtomic(&cfil_stats
.cfs_data_out_control
);
4924 if ((flags
& MSG_OOB
)) {
4925 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
4926 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4927 OSIncrementAtomic(&cfil_stats
.cfs_data_out_oob
);
4929 if ((so
->so_snd
.sb_flags
& SB_LOCK
) == 0) {
4930 panic("so %p SB_LOCK not set", so
);
4933 if (so
->so_snd
.sb_cfil_thread
!= NULL
) {
4934 panic("%s sb_cfil_thread %p not NULL", __func__
,
4935 so
->so_snd
.sb_cfil_thread
);
4938 error
= cfil_data_common(so
, so
->so_cfil
, 1, to
, data
, control
, flags
);
4944 * Callback from socket layer sbappendxxx()
4947 cfil_sock_data_in(struct socket
*so
, struct sockaddr
*from
,
4948 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
4952 if (IS_IP_DGRAM(so
)) {
4953 return cfil_sock_udp_handle_data(FALSE
, so
, NULL
, from
, data
, control
, flags
);
4956 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
4961 * Pass initial data for TFO.
4963 if (IS_INITIAL_TFO_DATA(so
)) {
4967 socket_lock_assert_owned(so
);
4969 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
4970 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4971 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4974 if (control
!= NULL
) {
4975 CFIL_LOG(LOG_ERR
, "so %llx control",
4976 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4977 OSIncrementAtomic(&cfil_stats
.cfs_data_in_control
);
4979 if (data
->m_type
== MT_OOBDATA
) {
4980 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
4981 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4982 OSIncrementAtomic(&cfil_stats
.cfs_data_in_oob
);
4984 error
= cfil_data_common(so
, so
->so_cfil
, 0, from
, data
, control
, flags
);
4990 * Callback from socket layer soshutdownxxx()
4992 * We may delay the shutdown write if there's outgoing data in process.
4994 * There is no point in delaying the shutdown read because the process
4995 * indicated that it does not want to read anymore data.
4998 cfil_sock_shutdown(struct socket
*so
, int *how
)
5002 if (IS_IP_DGRAM(so
)) {
5003 return cfil_sock_udp_shutdown(so
, how
);
5006 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
5010 socket_lock_assert_owned(so
);
5012 CFIL_LOG(LOG_INFO
, "so %llx how %d",
5013 (uint64_t)VM_KERNEL_ADDRPERM(so
), *how
);
5016 * Check the state of the socket before the content filter
5018 if (*how
!= SHUT_WR
&& (so
->so_state
& SS_CANTRCVMORE
) != 0) {
5019 /* read already shut down */
5023 if (*how
!= SHUT_RD
&& (so
->so_state
& SS_CANTSENDMORE
) != 0) {
5024 /* write already shut down */
5029 if ((so
->so_cfil
->cfi_flags
& CFIF_DROP
) != 0) {
5030 CFIL_LOG(LOG_ERR
, "so %llx drop set",
5031 (uint64_t)VM_KERNEL_ADDRPERM(so
));
5036 * shutdown read: SHUT_RD or SHUT_RDWR
5038 if (*how
!= SHUT_WR
) {
5039 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_RD
) {
5043 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_RD
;
5044 cfil_sock_notify_shutdown(so
, SHUT_RD
);
5047 * shutdown write: SHUT_WR or SHUT_RDWR
5049 if (*how
!= SHUT_RD
) {
5050 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
) {
5054 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_WR
;
5055 cfil_sock_notify_shutdown(so
, SHUT_WR
);
5057 * When outgoing data is pending, we delay the shutdown at the
5058 * protocol level until the content filters give the final
5059 * verdict on the pending data.
5061 if (cfil_sock_data_pending(&so
->so_snd
) != 0) {
5063 * When shutting down the read and write sides at once
5064 * we can proceed to the final shutdown of the read
5065 * side. Otherwise, we just return.
5067 if (*how
== SHUT_WR
) {
5068 error
= EJUSTRETURN
;
5069 } else if (*how
== SHUT_RDWR
) {
5079 * This is called when the socket is closed and there is no more
5080 * opportunity for filtering
5083 cfil_sock_is_closed(struct socket
*so
)
5088 if (IS_IP_DGRAM(so
)) {
5089 cfil_sock_udp_is_closed(so
);
5093 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
5097 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
5099 socket_lock_assert_owned(so
);
5101 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
5102 /* Let the filters know of the closing */
5103 error
= cfil_dispatch_closed_event(so
, so
->so_cfil
, kcunit
);
5106 /* Last chance to push passed data out */
5107 error
= cfil_acquire_sockbuf(so
, so
->so_cfil
, 1);
5109 cfil_service_inject_queue(so
, so
->so_cfil
, 1);
5111 cfil_release_sockbuf(so
, 1);
5113 so
->so_cfil
->cfi_flags
|= CFIF_SOCK_CLOSED
;
5115 /* Pending data needs to go */
5116 cfil_flush_queues(so
, so
->so_cfil
);
5118 CFIL_INFO_VERIFY(so
->so_cfil
);
5122 * This is called when the socket is disconnected so let the filters
5123 * know about the disconnection and that no more data will come
5125 * The how parameter has the same values as soshutown()
5128 cfil_sock_notify_shutdown(struct socket
*so
, int how
)
5133 if (IS_IP_DGRAM(so
)) {
5134 cfil_sock_udp_notify_shutdown(so
, how
, 0, 0);
5138 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
5142 CFIL_LOG(LOG_INFO
, "so %llx how %d",
5143 (uint64_t)VM_KERNEL_ADDRPERM(so
), how
);
5145 socket_lock_assert_owned(so
);
5147 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
5148 /* Disconnect incoming side */
5149 if (how
!= SHUT_WR
) {
5150 error
= cfil_dispatch_disconnect_event(so
, so
->so_cfil
, kcunit
, 0);
5152 /* Disconnect outgoing side */
5153 if (how
!= SHUT_RD
) {
5154 error
= cfil_dispatch_disconnect_event(so
, so
->so_cfil
, kcunit
, 1);
5160 cfil_filters_attached(struct socket
*so
)
5162 struct cfil_entry
*entry
;
5166 if (IS_IP_DGRAM(so
)) {
5167 return cfil_filters_udp_attached(so
, FALSE
);
5170 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
5174 socket_lock_assert_owned(so
);
5176 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
5177 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
5179 /* Are we attached to the filter? */
5180 if (entry
->cfe_filter
== NULL
) {
5183 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
5186 if ((entry
->cfe_flags
& CFEF_CFIL_DETACHED
) != 0) {
5197 * This is called when the socket is closed and we are waiting for
5198 * the filters to gives the final pass or drop
5201 cfil_sock_close_wait(struct socket
*so
)
5203 lck_mtx_t
*mutex_held
;
5207 if (IS_IP_DGRAM(so
)) {
5208 cfil_sock_udp_close_wait(so
);
5212 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
5216 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
5218 if (so
->so_proto
->pr_getlock
!= NULL
) {
5219 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
5221 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
5223 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
5225 while (cfil_filters_attached(so
)) {
5227 * Notify the filters we are going away so they can detach
5229 cfil_sock_notify_shutdown(so
, SHUT_RDWR
);
5232 * Make sure we need to wait after the filter are notified
5233 * of the disconnection
5235 if (cfil_filters_attached(so
) == 0) {
5239 CFIL_LOG(LOG_INFO
, "so %llx waiting",
5240 (uint64_t)VM_KERNEL_ADDRPERM(so
));
5242 ts
.tv_sec
= cfil_close_wait_timeout
/ 1000;
5243 ts
.tv_nsec
= (cfil_close_wait_timeout
% 1000) *
5244 NSEC_PER_USEC
* 1000;
5246 OSIncrementAtomic(&cfil_stats
.cfs_close_wait
);
5247 so
->so_cfil
->cfi_flags
|= CFIF_CLOSE_WAIT
;
5248 error
= msleep((caddr_t
)so
->so_cfil
, mutex_held
,
5249 PSOCK
| PCATCH
, "cfil_sock_close_wait", &ts
);
5250 so
->so_cfil
->cfi_flags
&= ~CFIF_CLOSE_WAIT
;
5252 CFIL_LOG(LOG_NOTICE
, "so %llx timed out %d",
5253 (uint64_t)VM_KERNEL_ADDRPERM(so
), (error
!= 0));
5256 * Force close in case of timeout
5259 OSIncrementAtomic(&cfil_stats
.cfs_close_wait_timeout
);
5266 * Returns the size of the data held by the content filter by using
5269 cfil_sock_data_pending(struct sockbuf
*sb
)
5271 struct socket
*so
= sb
->sb_so
;
5272 uint64_t pending
= 0;
5274 if (IS_IP_DGRAM(so
)) {
5275 return cfil_sock_udp_data_pending(sb
, FALSE
);
5278 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
) {
5279 struct cfi_buf
*cfi_buf
;
5281 socket_lock_assert_owned(so
);
5283 if ((sb
->sb_flags
& SB_RECV
) == 0) {
5284 cfi_buf
= &so
->so_cfil
->cfi_snd
;
5286 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
5289 pending
= cfi_buf
->cfi_pending_last
-
5290 cfi_buf
->cfi_pending_first
;
5293 * If we are limited by the "chars of mbufs used" roughly
5294 * adjust so we won't overcommit
5296 if (pending
> (uint64_t)cfi_buf
->cfi_pending_mbcnt
) {
5297 pending
= cfi_buf
->cfi_pending_mbcnt
;
5301 VERIFY(pending
< INT32_MAX
);
5303 return (int32_t)(pending
);
5307 * Return the socket buffer space used by data being held by content filters
5308 * so processes won't clog the socket buffer
5311 cfil_sock_data_space(struct sockbuf
*sb
)
5313 struct socket
*so
= sb
->sb_so
;
5314 uint64_t pending
= 0;
5316 if (IS_IP_DGRAM(so
)) {
5317 return cfil_sock_udp_data_pending(sb
, TRUE
);
5320 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
&&
5321 so
->so_snd
.sb_cfil_thread
!= current_thread()) {
5322 struct cfi_buf
*cfi_buf
;
5324 socket_lock_assert_owned(so
);
5326 if ((sb
->sb_flags
& SB_RECV
) == 0) {
5327 cfi_buf
= &so
->so_cfil
->cfi_snd
;
5329 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
5332 pending
= cfi_buf
->cfi_pending_last
-
5333 cfi_buf
->cfi_pending_first
;
5336 * If we are limited by the "chars of mbufs used" roughly
5337 * adjust so we won't overcommit
5339 if ((uint64_t)cfi_buf
->cfi_pending_mbcnt
> pending
) {
5340 pending
= cfi_buf
->cfi_pending_mbcnt
;
5344 VERIFY(pending
< INT32_MAX
);
5346 return (int32_t)(pending
);
5350 * A callback from the socket and protocol layer when data becomes
5351 * available in the socket buffer to give a chance for the content filter
5352 * to re-inject data that was held back
5355 cfil_sock_buf_update(struct sockbuf
*sb
)
5359 struct socket
*so
= sb
->sb_so
;
5361 if (IS_IP_DGRAM(so
)) {
5362 cfil_sock_udp_buf_update(sb
);
5366 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
5374 socket_lock_assert_owned(so
);
5376 if ((sb
->sb_flags
& SB_RECV
) == 0) {
5377 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) == 0) {
5381 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_retry
);
5383 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_IN
) == 0) {
5387 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_retry
);
5390 CFIL_LOG(LOG_NOTICE
, "so %llx outgoing %d",
5391 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
5393 error
= cfil_acquire_sockbuf(so
, so
->so_cfil
, outgoing
);
5395 cfil_service_inject_queue(so
, so
->so_cfil
, outgoing
);
5397 cfil_release_sockbuf(so
, outgoing
);
5401 sysctl_cfil_filter_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
5402 struct sysctl_req
*req
)
5404 #pragma unused(oidp, arg1, arg2)
5410 if (req
->newptr
!= USER_ADDR_NULL
) {
5414 cfil_rw_lock_shared(&cfil_lck_rw
);
5416 for (i
= 0; content_filters
!= NULL
&& i
< MAX_CONTENT_FILTER
; i
++) {
5417 struct cfil_filter_stat filter_stat
;
5418 struct content_filter
*cfc
= content_filters
[i
];
5424 /* If just asking for the size */
5425 if (req
->oldptr
== USER_ADDR_NULL
) {
5426 len
+= sizeof(struct cfil_filter_stat
);
5430 bzero(&filter_stat
, sizeof(struct cfil_filter_stat
));
5431 filter_stat
.cfs_len
= sizeof(struct cfil_filter_stat
);
5432 filter_stat
.cfs_filter_id
= cfc
->cf_kcunit
;
5433 filter_stat
.cfs_flags
= cfc
->cf_flags
;
5434 filter_stat
.cfs_sock_count
= cfc
->cf_sock_count
;
5435 filter_stat
.cfs_necp_control_unit
= cfc
->cf_necp_control_unit
;
5437 error
= SYSCTL_OUT(req
, &filter_stat
,
5438 sizeof(struct cfil_filter_stat
));
5443 /* If just asking for the size */
5444 if (req
->oldptr
== USER_ADDR_NULL
) {
5448 cfil_rw_unlock_shared(&cfil_lck_rw
);
5451 if (req
->oldptr
!= USER_ADDR_NULL
) {
5452 for (i
= 1; content_filters
!= NULL
&& i
<= MAX_CONTENT_FILTER
; i
++) {
5453 cfil_filter_show(i
);
5462 sysctl_cfil_sock_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
5463 struct sysctl_req
*req
)
5465 #pragma unused(oidp, arg1, arg2)
5468 struct cfil_info
*cfi
;
5471 if (req
->newptr
!= USER_ADDR_NULL
) {
5475 cfil_rw_lock_shared(&cfil_lck_rw
);
5478 * If just asking for the size,
5480 if (req
->oldptr
== USER_ADDR_NULL
) {
5481 req
->oldidx
= cfil_sock_attached_count
*
5482 sizeof(struct cfil_sock_stat
);
5483 /* Bump the length in case new sockets gets attached */
5484 req
->oldidx
+= req
->oldidx
>> 3;
5488 TAILQ_FOREACH(cfi
, &cfil_sock_head
, cfi_link
) {
5489 struct cfil_entry
*entry
;
5490 struct cfil_sock_stat stat
;
5491 struct socket
*so
= cfi
->cfi_so
;
5493 bzero(&stat
, sizeof(struct cfil_sock_stat
));
5494 stat
.cfs_len
= sizeof(struct cfil_sock_stat
);
5495 stat
.cfs_sock_id
= cfi
->cfi_sock_id
;
5496 stat
.cfs_flags
= cfi
->cfi_flags
;
5499 stat
.cfs_pid
= so
->last_pid
;
5500 memcpy(stat
.cfs_uuid
, so
->last_uuid
,
5502 if (so
->so_flags
& SOF_DELEGATED
) {
5503 stat
.cfs_e_pid
= so
->e_pid
;
5504 memcpy(stat
.cfs_e_uuid
, so
->e_uuid
,
5507 stat
.cfs_e_pid
= so
->last_pid
;
5508 memcpy(stat
.cfs_e_uuid
, so
->last_uuid
,
5512 stat
.cfs_sock_family
= so
->so_proto
->pr_domain
->dom_family
;
5513 stat
.cfs_sock_type
= so
->so_proto
->pr_type
;
5514 stat
.cfs_sock_protocol
= so
->so_proto
->pr_protocol
;
5517 stat
.cfs_snd
.cbs_pending_first
=
5518 cfi
->cfi_snd
.cfi_pending_first
;
5519 stat
.cfs_snd
.cbs_pending_last
=
5520 cfi
->cfi_snd
.cfi_pending_last
;
5521 stat
.cfs_snd
.cbs_inject_q_len
=
5522 cfil_queue_len(&cfi
->cfi_snd
.cfi_inject_q
);
5523 stat
.cfs_snd
.cbs_pass_offset
=
5524 cfi
->cfi_snd
.cfi_pass_offset
;
5526 stat
.cfs_rcv
.cbs_pending_first
=
5527 cfi
->cfi_rcv
.cfi_pending_first
;
5528 stat
.cfs_rcv
.cbs_pending_last
=
5529 cfi
->cfi_rcv
.cfi_pending_last
;
5530 stat
.cfs_rcv
.cbs_inject_q_len
=
5531 cfil_queue_len(&cfi
->cfi_rcv
.cfi_inject_q
);
5532 stat
.cfs_rcv
.cbs_pass_offset
=
5533 cfi
->cfi_rcv
.cfi_pass_offset
;
5535 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++) {
5536 struct cfil_entry_stat
*estat
;
5537 struct cfe_buf
*ebuf
;
5538 struct cfe_buf_stat
*sbuf
;
5540 entry
= &cfi
->cfi_entries
[i
];
5542 estat
= &stat
.ces_entries
[i
];
5544 estat
->ces_len
= sizeof(struct cfil_entry_stat
);
5545 estat
->ces_filter_id
= entry
->cfe_filter
?
5546 entry
->cfe_filter
->cf_kcunit
: 0;
5547 estat
->ces_flags
= entry
->cfe_flags
;
5548 estat
->ces_necp_control_unit
=
5549 entry
->cfe_necp_control_unit
;
5551 estat
->ces_last_event
.tv_sec
=
5552 (int64_t)entry
->cfe_last_event
.tv_sec
;
5553 estat
->ces_last_event
.tv_usec
=
5554 (int64_t)entry
->cfe_last_event
.tv_usec
;
5556 estat
->ces_last_action
.tv_sec
=
5557 (int64_t)entry
->cfe_last_action
.tv_sec
;
5558 estat
->ces_last_action
.tv_usec
=
5559 (int64_t)entry
->cfe_last_action
.tv_usec
;
5561 ebuf
= &entry
->cfe_snd
;
5562 sbuf
= &estat
->ces_snd
;
5563 sbuf
->cbs_pending_first
=
5564 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
5565 sbuf
->cbs_pending_last
=
5566 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
5567 sbuf
->cbs_ctl_first
=
5568 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
5569 sbuf
->cbs_ctl_last
=
5570 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
5571 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
5572 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
5573 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
5575 ebuf
= &entry
->cfe_rcv
;
5576 sbuf
= &estat
->ces_rcv
;
5577 sbuf
->cbs_pending_first
=
5578 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
5579 sbuf
->cbs_pending_last
=
5580 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
5581 sbuf
->cbs_ctl_first
=
5582 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
5583 sbuf
->cbs_ctl_last
=
5584 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
5585 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
5586 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
5587 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
5589 error
= SYSCTL_OUT(req
, &stat
,
5590 sizeof(struct cfil_sock_stat
));
5596 cfil_rw_unlock_shared(&cfil_lck_rw
);
5599 if (req
->oldptr
!= USER_ADDR_NULL
) {
5608 * UDP Socket Support
5611 cfil_hash_entry_log(int level
, struct socket
*so
, struct cfil_hash_entry
*entry
, uint64_t sockId
, const char* msg
)
5613 char local
[MAX_IPv6_STR_LEN
+ 6];
5614 char remote
[MAX_IPv6_STR_LEN
+ 6];
5617 // No sock or not UDP, no-op
5618 if (so
== NULL
|| entry
== NULL
) {
5622 local
[0] = remote
[0] = 0x0;
5624 switch (entry
->cfentry_family
) {
5626 addr
= &entry
->cfentry_laddr
.addr6
;
5627 inet_ntop(AF_INET6
, addr
, local
, sizeof(local
));
5628 addr
= &entry
->cfentry_faddr
.addr6
;
5629 inet_ntop(AF_INET6
, addr
, remote
, sizeof(local
));
5632 addr
= &entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
;
5633 inet_ntop(AF_INET
, addr
, local
, sizeof(local
));
5634 addr
= &entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
;
5635 inet_ntop(AF_INET
, addr
, remote
, sizeof(local
));
5641 CFIL_LOG(level
, "<%s>: <%s(%d) so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s",
5643 IS_UDP(so
) ? "UDP" : "proto", GET_SO_PROTO(so
),
5644 (uint64_t)VM_KERNEL_ADDRPERM(so
), entry
, sockId
,
5645 ntohs(entry
->cfentry_lport
), ntohs(entry
->cfentry_fport
), local
, remote
);
5649 cfil_inp_log(int level
, struct socket
*so
, const char* msg
)
5651 struct inpcb
*inp
= NULL
;
5652 char local
[MAX_IPv6_STR_LEN
+ 6];
5653 char remote
[MAX_IPv6_STR_LEN
+ 6];
5660 inp
= sotoinpcb(so
);
5665 local
[0] = remote
[0] = 0x0;
5668 if (inp
->inp_vflag
& INP_IPV6
) {
5669 addr
= &inp
->in6p_laddr
.s6_addr32
;
5670 inet_ntop(AF_INET6
, addr
, local
, sizeof(local
));
5671 addr
= &inp
->in6p_faddr
.s6_addr32
;
5672 inet_ntop(AF_INET6
, addr
, remote
, sizeof(local
));
5676 addr
= &inp
->inp_laddr
.s_addr
;
5677 inet_ntop(AF_INET
, addr
, local
, sizeof(local
));
5678 addr
= &inp
->inp_faddr
.s_addr
;
5679 inet_ntop(AF_INET
, addr
, remote
, sizeof(local
));
5682 if (so
->so_cfil
!= NULL
) {
5683 CFIL_LOG(level
, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
5684 msg
, IS_UDP(so
) ? "UDP" : "TCP",
5685 (uint64_t)VM_KERNEL_ADDRPERM(so
), inp
->inp_flags
, inp
->inp_socket
->so_flags
, so
->so_cfil
->cfi_sock_id
,
5686 ntohs(inp
->inp_lport
), ntohs(inp
->inp_fport
), local
, remote
);
5688 CFIL_LOG(level
, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
5689 msg
, IS_UDP(so
) ? "UDP" : "TCP",
5690 (uint64_t)VM_KERNEL_ADDRPERM(so
), inp
->inp_flags
, inp
->inp_socket
->so_flags
,
5691 ntohs(inp
->inp_lport
), ntohs(inp
->inp_fport
), local
, remote
);
5696 cfil_info_log(int level
, struct cfil_info
*cfil_info
, const char* msg
)
5698 if (cfil_info
== NULL
) {
5702 if (cfil_info
->cfi_hash_entry
!= NULL
) {
5703 cfil_hash_entry_log(level
, cfil_info
->cfi_so
, cfil_info
->cfi_hash_entry
, cfil_info
->cfi_sock_id
, msg
);
5705 cfil_inp_log(level
, cfil_info
->cfi_so
, msg
);
5710 cfil_db_init(struct socket
*so
)
5713 struct cfil_db
*db
= NULL
;
5715 CFIL_LOG(LOG_INFO
, "");
5717 db
= zalloc(cfil_db_zone
);
5722 bzero(db
, sizeof(struct cfil_db
));
5724 db
->cfdb_hashbase
= hashinit(CFILHASHSIZE
, M_CFIL
, &db
->cfdb_hashmask
);
5725 if (db
->cfdb_hashbase
== NULL
) {
5726 zfree(cfil_db_zone
, db
);
5732 so
->so_cfil_db
= db
;
5739 cfil_db_free(struct socket
*so
)
5741 struct cfil_hash_entry
*entry
= NULL
;
5742 struct cfil_hash_entry
*temp_entry
= NULL
;
5743 struct cfilhashhead
*cfilhash
= NULL
;
5744 struct cfil_db
*db
= NULL
;
5746 CFIL_LOG(LOG_INFO
, "");
5748 if (so
== NULL
|| so
->so_cfil_db
== NULL
) {
5751 db
= so
->so_cfil_db
;
5754 CFIL_LOG(LOG_ERR
, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
5755 (uint64_t)VM_KERNEL_ADDRPERM(so
), db
, db
->cfdb_count
);
5758 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
5759 cfilhash
= &db
->cfdb_hashbase
[i
];
5760 LIST_FOREACH_SAFE(entry
, cfilhash
, cfentry_link
, temp_entry
) {
5761 if (entry
->cfentry_cfil
!= NULL
) {
5763 cfil_info_log(LOG_ERR
, entry
->cfentry_cfil
, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
5765 cfil_info_free(entry
->cfentry_cfil
);
5766 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
5767 entry
->cfentry_cfil
= NULL
;
5770 cfil_db_delete_entry(db
, entry
);
5771 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
5772 if (db
->cfdb_count
== 0) {
5773 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
5775 VERIFY(so
->so_usecount
> 0);
5781 // Make sure all entries are cleaned up!
5782 VERIFY(db
->cfdb_count
== 0);
5784 CFIL_LOG(LOG_ERR
, "CFIL: LIFECYCLE: so usecount %d", so
->so_usecount
);
5787 FREE(db
->cfdb_hashbase
, M_CFIL
);
5788 zfree(cfil_db_zone
, db
);
5789 so
->so_cfil_db
= NULL
;
5793 fill_cfil_hash_entry_from_address(struct cfil_hash_entry
*entry
, bool isLocal
, struct sockaddr
*addr
)
5795 struct sockaddr_in
*sin
= NULL
;
5796 struct sockaddr_in6
*sin6
= NULL
;
5798 if (entry
== NULL
|| addr
== NULL
) {
5802 switch (addr
->sa_family
) {
5804 sin
= satosin(addr
);
5805 if (sin
->sin_len
!= sizeof(*sin
)) {
5808 if (isLocal
== TRUE
) {
5809 entry
->cfentry_lport
= sin
->sin_port
;
5810 entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
= sin
->sin_addr
.s_addr
;
5812 entry
->cfentry_fport
= sin
->sin_port
;
5813 entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
= sin
->sin_addr
.s_addr
;
5815 entry
->cfentry_family
= AF_INET
;
5818 sin6
= satosin6(addr
);
5819 if (sin6
->sin6_len
!= sizeof(*sin6
)) {
5822 if (isLocal
== TRUE
) {
5823 entry
->cfentry_lport
= sin6
->sin6_port
;
5824 entry
->cfentry_laddr
.addr6
= sin6
->sin6_addr
;
5826 entry
->cfentry_fport
= sin6
->sin6_port
;
5827 entry
->cfentry_faddr
.addr6
= sin6
->sin6_addr
;
5829 entry
->cfentry_family
= AF_INET6
;
5837 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry
*entry
, bool isLocal
, struct inpcb
*inp
)
5839 if (entry
== NULL
|| inp
== NULL
) {
5843 if (inp
->inp_vflag
& INP_IPV6
) {
5844 if (isLocal
== TRUE
) {
5845 entry
->cfentry_lport
= inp
->inp_lport
;
5846 entry
->cfentry_laddr
.addr6
= inp
->in6p_laddr
;
5848 entry
->cfentry_fport
= inp
->inp_fport
;
5849 entry
->cfentry_faddr
.addr6
= inp
->in6p_faddr
;
5851 entry
->cfentry_family
= AF_INET6
;
5853 } else if (inp
->inp_vflag
& INP_IPV4
) {
5854 if (isLocal
== TRUE
) {
5855 entry
->cfentry_lport
= inp
->inp_lport
;
5856 entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
= inp
->inp_laddr
.s_addr
;
5858 entry
->cfentry_fport
= inp
->inp_fport
;
5859 entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
= inp
->inp_faddr
.s_addr
;
5861 entry
->cfentry_family
= AF_INET
;
5868 check_port(struct sockaddr
*addr
, u_short port
)
5870 struct sockaddr_in
*sin
= NULL
;
5871 struct sockaddr_in6
*sin6
= NULL
;
5873 if (addr
== NULL
|| port
== 0) {
5877 switch (addr
->sa_family
) {
5879 sin
= satosin(addr
);
5880 if (sin
->sin_len
!= sizeof(*sin
)) {
5883 if (port
== ntohs(sin
->sin_port
)) {
5888 sin6
= satosin6(addr
);
5889 if (sin6
->sin6_len
!= sizeof(*sin6
)) {
5892 if (port
== ntohs(sin6
->sin6_port
)) {
5902 struct cfil_hash_entry
*
5903 cfil_db_lookup_entry_with_sockid(struct cfil_db
*db
, u_int64_t sock_id
)
5905 struct cfilhashhead
*cfilhash
= NULL
;
5906 u_int32_t flowhash
= (u_int32_t
)(sock_id
& 0x0ffffffff);
5907 struct cfil_hash_entry
*nextentry
;
5909 if (db
== NULL
|| db
->cfdb_hashbase
== NULL
|| sock_id
== 0) {
5913 flowhash
&= db
->cfdb_hashmask
;
5914 cfilhash
= &db
->cfdb_hashbase
[flowhash
];
5916 LIST_FOREACH(nextentry
, cfilhash
, cfentry_link
) {
5917 if (nextentry
->cfentry_cfil
!= NULL
&&
5918 nextentry
->cfentry_cfil
->cfi_sock_id
== sock_id
) {
5919 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
5920 (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), nextentry
->cfentry_cfil
->cfi_sock_id
, flowhash
);
5921 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, nextentry
, 0, "CFIL: UDP found entry");
5926 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
5927 (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), sock_id
, flowhash
);
5931 struct cfil_hash_entry
*
5932 cfil_db_lookup_entry(struct cfil_db
*db
, struct sockaddr
*local
, struct sockaddr
*remote
, boolean_t remoteOnly
)
5934 struct cfil_hash_entry matchentry
= { };
5935 struct cfil_hash_entry
*nextentry
= NULL
;
5936 struct inpcb
*inp
= sotoinpcb(db
->cfdb_so
);
5937 u_int32_t hashkey_faddr
= 0, hashkey_laddr
= 0;
5938 u_int16_t hashkey_fport
= 0, hashkey_lport
= 0;
5939 int inp_hash_element
= 0;
5940 struct cfilhashhead
*cfilhash
= NULL
;
5942 CFIL_LOG(LOG_INFO
, "");
5948 if (remoteOnly
== false) {
5949 if (local
!= NULL
) {
5950 fill_cfil_hash_entry_from_address(&matchentry
, TRUE
, local
);
5952 fill_cfil_hash_entry_from_inp(&matchentry
, TRUE
, inp
);
5955 if (remote
!= NULL
) {
5956 fill_cfil_hash_entry_from_address(&matchentry
, FALSE
, remote
);
5958 fill_cfil_hash_entry_from_inp(&matchentry
, FALSE
, inp
);
5962 if (inp
->inp_vflag
& INP_IPV6
) {
5963 hashkey_faddr
= matchentry
.cfentry_faddr
.addr6
.s6_addr32
[3];
5964 hashkey_laddr
= (remoteOnly
== false) ? matchentry
.cfentry_laddr
.addr6
.s6_addr32
[3] : 0;
5968 hashkey_faddr
= matchentry
.cfentry_faddr
.addr46
.ia46_addr4
.s_addr
;
5969 hashkey_laddr
= (remoteOnly
== false) ? matchentry
.cfentry_laddr
.addr46
.ia46_addr4
.s_addr
: 0;
5972 hashkey_fport
= matchentry
.cfentry_fport
;
5973 hashkey_lport
= (remoteOnly
== false) ? matchentry
.cfentry_lport
: 0;
5975 inp_hash_element
= CFIL_HASH(hashkey_laddr
, hashkey_faddr
, hashkey_lport
, hashkey_fport
);
5976 inp_hash_element
&= db
->cfdb_hashmask
;
5978 cfilhash
= &db
->cfdb_hashbase
[inp_hash_element
];
5980 LIST_FOREACH(nextentry
, cfilhash
, cfentry_link
) {
5982 if ((inp
->inp_vflag
& INP_IPV6
) &&
5983 (remoteOnly
|| nextentry
->cfentry_lport
== matchentry
.cfentry_lport
) &&
5984 nextentry
->cfentry_fport
== matchentry
.cfentry_fport
&&
5985 (remoteOnly
|| IN6_ARE_ADDR_EQUAL(&nextentry
->cfentry_laddr
.addr6
, &matchentry
.cfentry_laddr
.addr6
)) &&
5986 IN6_ARE_ADDR_EQUAL(&nextentry
->cfentry_faddr
.addr6
, &matchentry
.cfentry_faddr
.addr6
)) {
5988 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, &matchentry
, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
5993 if ((remoteOnly
|| nextentry
->cfentry_lport
== matchentry
.cfentry_lport
) &&
5994 nextentry
->cfentry_fport
== matchentry
.cfentry_fport
&&
5995 (remoteOnly
|| nextentry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
== matchentry
.cfentry_laddr
.addr46
.ia46_addr4
.s_addr
) &&
5996 nextentry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
== matchentry
.cfentry_faddr
.addr46
.ia46_addr4
.s_addr
) {
5998 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, &matchentry
, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
6006 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, &matchentry
, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
6012 cfil_db_delete_entry(struct cfil_db
*db
, struct cfil_hash_entry
*hash_entry
)
6014 if (hash_entry
== NULL
) {
6017 if (db
== NULL
|| db
->cfdb_count
== 0) {
6021 if (db
->cfdb_only_entry
== hash_entry
) {
6022 db
->cfdb_only_entry
= NULL
;
6024 LIST_REMOVE(hash_entry
, cfentry_link
);
6025 zfree(cfil_hash_entry_zone
, hash_entry
);
6028 struct cfil_hash_entry
*
6029 cfil_db_add_entry(struct cfil_db
*db
, struct sockaddr
*local
, struct sockaddr
*remote
)
6031 struct cfil_hash_entry
*entry
= NULL
;
6032 struct inpcb
*inp
= sotoinpcb(db
->cfdb_so
);
6033 u_int32_t hashkey_faddr
= 0, hashkey_laddr
= 0;
6034 int inp_hash_element
= 0;
6035 struct cfilhashhead
*cfilhash
= NULL
;
6037 CFIL_LOG(LOG_INFO
, "");
6043 entry
= zalloc(cfil_hash_entry_zone
);
6044 if (entry
== NULL
) {
6047 bzero(entry
, sizeof(struct cfil_hash_entry
));
6049 if (local
!= NULL
) {
6050 fill_cfil_hash_entry_from_address(entry
, TRUE
, local
);
6052 fill_cfil_hash_entry_from_inp(entry
, TRUE
, inp
);
6054 if (remote
!= NULL
) {
6055 fill_cfil_hash_entry_from_address(entry
, FALSE
, remote
);
6057 fill_cfil_hash_entry_from_inp(entry
, FALSE
, inp
);
6059 entry
->cfentry_lastused
= net_uptime();
6062 if (inp
->inp_vflag
& INP_IPV6
) {
6063 hashkey_faddr
= entry
->cfentry_faddr
.addr6
.s6_addr32
[3];
6064 hashkey_laddr
= entry
->cfentry_laddr
.addr6
.s6_addr32
[3];
6068 hashkey_faddr
= entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
;
6069 hashkey_laddr
= entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
;
6071 entry
->cfentry_flowhash
= CFIL_HASH(hashkey_laddr
, hashkey_faddr
,
6072 entry
->cfentry_lport
, entry
->cfentry_fport
);
6073 inp_hash_element
= entry
->cfentry_flowhash
& db
->cfdb_hashmask
;
6075 cfilhash
= &db
->cfdb_hashbase
[inp_hash_element
];
6077 LIST_INSERT_HEAD(cfilhash
, entry
, cfentry_link
);
6079 db
->cfdb_only_entry
= entry
;
6080 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, entry
, 0, "CFIL: cfil_db_add_entry: ADDED");
6083 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), db
->cfdb_count
);
6088 cfil_db_update_entry_local(struct cfil_db
*db
, struct cfil_hash_entry
*entry
, struct sockaddr
*local
)
6090 struct inpcb
*inp
= sotoinpcb(db
->cfdb_so
);
6092 CFIL_LOG(LOG_INFO
, "");
6094 if (inp
== NULL
|| entry
== NULL
) {
6098 if (local
!= NULL
) {
6099 fill_cfil_hash_entry_from_address(entry
, TRUE
, local
);
6101 fill_cfil_hash_entry_from_inp(entry
, TRUE
, inp
);
6103 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, entry
, 0, "CFIL: cfil_db_add_entry: local updated");
6109 cfil_db_get_cfil_info(struct cfil_db
*db
, cfil_sock_id_t id
)
6111 struct cfil_hash_entry
*hash_entry
= NULL
;
6113 CFIL_LOG(LOG_INFO
, "");
6115 if (db
== NULL
|| id
== 0) {
6116 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> NULL DB <id %llu>",
6117 db
? (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
) : 0, id
);
6121 // This is an optimization for connected UDP socket which only has one flow.
6122 // No need to do the hash lookup.
6123 if (db
->cfdb_count
== 1) {
6124 if (db
->cfdb_only_entry
&& db
->cfdb_only_entry
->cfentry_cfil
&&
6125 db
->cfdb_only_entry
->cfentry_cfil
->cfi_sock_id
== id
) {
6126 return db
->cfdb_only_entry
->cfentry_cfil
;
6130 hash_entry
= cfil_db_lookup_entry_with_sockid(db
, id
);
6131 return hash_entry
!= NULL
? hash_entry
->cfentry_cfil
: NULL
;
6134 struct cfil_hash_entry
*
6135 cfil_sock_udp_get_flow(struct socket
*so
, uint32_t filter_control_unit
, bool outgoing
, struct sockaddr
*local
, struct sockaddr
*remote
, int debug
)
6137 struct cfil_hash_entry
*hash_entry
= NULL
;
6140 socket_lock_assert_owned(so
);
6142 // If new socket, allocate cfil db
6143 if (so
->so_cfil_db
== NULL
) {
6144 if (cfil_db_init(so
) != 0) {
6149 // See if flow already exists.
6150 hash_entry
= cfil_db_lookup_entry(so
->so_cfil_db
, local
, remote
, false);
6151 if (hash_entry
== NULL
) {
6152 // No match with both local and remote, try match with remote only
6153 hash_entry
= cfil_db_lookup_entry(so
->so_cfil_db
, local
, remote
, true);
6154 if (hash_entry
!= NULL
) {
6155 // Simply update the local address into the original flow, keeping
6156 // its sockId and flow_hash unchanged.
6157 cfil_db_update_entry_local(so
->so_cfil_db
, hash_entry
, local
);
6160 if (hash_entry
!= NULL
) {
6164 hash_entry
= cfil_db_add_entry(so
->so_cfil_db
, local
, remote
);
6165 if (hash_entry
== NULL
) {
6166 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
6167 CFIL_LOG(LOG_ERR
, "CFIL: UDP failed to add entry");
6171 if (cfil_info_alloc(so
, hash_entry
) == NULL
||
6172 hash_entry
->cfentry_cfil
== NULL
) {
6173 cfil_db_delete_entry(so
->so_cfil_db
, hash_entry
);
6174 CFIL_LOG(LOG_ERR
, "CFIL: UDP failed to alloc cfil_info");
6175 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
6178 hash_entry
->cfentry_cfil
->cfi_dir
= outgoing
? CFS_CONNECTION_DIR_OUT
: CFS_CONNECTION_DIR_IN
;
6179 hash_entry
->cfentry_cfil
->cfi_debug
= debug
;
6182 cfil_info_log(LOG_ERR
, hash_entry
->cfentry_cfil
, "CFIL: LIFECYCLE: ADDED");
6185 if (cfil_info_attach_unit(so
, filter_control_unit
, hash_entry
->cfentry_cfil
) == 0) {
6186 cfil_info_free(hash_entry
->cfentry_cfil
);
6187 cfil_db_delete_entry(so
->so_cfil_db
, hash_entry
);
6188 CFIL_LOG(LOG_ERR
, "CFIL: UDP cfil_info_attach_unit(%u) failed",
6189 filter_control_unit
);
6190 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_failed
);
6193 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
6194 (uint64_t)VM_KERNEL_ADDRPERM(so
),
6195 filter_control_unit
, hash_entry
->cfentry_cfil
->cfi_sock_id
);
6197 so
->so_flags
|= SOF_CONTENT_FILTER
;
6198 OSIncrementAtomic(&cfil_stats
.cfs_sock_attached
);
6200 /* Hold a reference on the socket for each flow */
6204 cfil_info_log(LOG_ERR
, hash_entry
->cfentry_cfil
, "CFIL: LIFECYCLE: ADDED");
6207 error
= cfil_dispatch_attach_event(so
, hash_entry
->cfentry_cfil
, 0,
6208 outgoing
? CFS_CONNECTION_DIR_OUT
: CFS_CONNECTION_DIR_IN
);
6209 /* We can recover from flow control or out of memory errors */
6210 if (error
!= 0 && error
!= ENOBUFS
&& error
!= ENOMEM
) {
6214 CFIL_INFO_VERIFY(hash_entry
->cfentry_cfil
);
6219 cfil_sock_udp_handle_data(bool outgoing
, struct socket
*so
,
6220 struct sockaddr
*local
, struct sockaddr
*remote
,
6221 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
6223 #pragma unused(outgoing, so, local, remote, data, control, flags)
6225 uint32_t filter_control_unit
;
6226 struct cfil_hash_entry
*hash_entry
= NULL
;
6227 struct cfil_info
*cfil_info
= NULL
;
6230 socket_lock_assert_owned(so
);
6232 if (cfil_active_count
== 0) {
6233 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP no active filter");
6234 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_in_vain
);
6238 // Socket has been blessed
6239 if ((so
->so_flags1
& SOF1_CONTENT_FILTER_SKIP
) != 0) {
6243 filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
6244 if (filter_control_unit
== 0) {
6245 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP failed to get control unit");
6249 if (filter_control_unit
== NECP_FILTER_UNIT_NO_FILTER
) {
6253 if ((filter_control_unit
& NECP_MASK_USERSPACE_ONLY
) != 0) {
6254 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP user space only");
6255 OSIncrementAtomic(&cfil_stats
.cfs_sock_userspace_only
);
6259 hash_entry
= cfil_sock_udp_get_flow(so
, filter_control_unit
, outgoing
, local
, remote
, debug
);
6260 if (hash_entry
== NULL
|| hash_entry
->cfentry_cfil
== NULL
) {
6261 CFIL_LOG(LOG_ERR
, "CFIL: Falied to create UDP flow");
6264 // Update last used timestamp, this is for flow Idle TO
6265 hash_entry
->cfentry_lastused
= net_uptime();
6266 cfil_info
= hash_entry
->cfentry_cfil
;
6268 if (cfil_info
->cfi_flags
& CFIF_DROP
) {
6270 cfil_hash_entry_log(LOG_DEBUG
, so
, hash_entry
, 0, "CFIL: UDP DROP");
6274 if (control
!= NULL
) {
6275 OSIncrementAtomic(&cfil_stats
.cfs_data_in_control
);
6277 if (data
->m_type
== MT_OOBDATA
) {
6278 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
6279 (uint64_t)VM_KERNEL_ADDRPERM(so
));
6280 OSIncrementAtomic(&cfil_stats
.cfs_data_in_oob
);
6283 error
= cfil_data_common(so
, cfil_info
, outgoing
, remote
, data
, control
, flags
);
6289 * Go through all UDP flows for specified socket and returns TRUE if
6290 * any flow is still attached. If need_wait is TRUE, wait on first
6294 cfil_filters_udp_attached(struct socket
*so
, bool need_wait
)
6297 lck_mtx_t
*mutex_held
;
6298 struct cfilhashhead
*cfilhash
= NULL
;
6299 struct cfil_db
*db
= NULL
;
6300 struct cfil_hash_entry
*hash_entry
= NULL
;
6301 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
6302 struct cfil_info
*cfil_info
= NULL
;
6303 struct cfil_entry
*entry
= NULL
;
6307 uint64_t sock_flow_id
= 0;
6309 socket_lock_assert_owned(so
);
6311 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
6312 if (so
->so_proto
->pr_getlock
!= NULL
) {
6313 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
6315 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
6317 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
6319 db
= so
->so_cfil_db
;
6321 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
6322 cfilhash
= &db
->cfdb_hashbase
[i
];
6324 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
6325 if (hash_entry
->cfentry_cfil
!= NULL
) {
6326 cfil_info
= hash_entry
->cfentry_cfil
;
6327 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
6328 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
6330 /* Are we attached to the filter? */
6331 if (entry
->cfe_filter
== NULL
) {
6335 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
6338 if ((entry
->cfe_flags
& CFEF_CFIL_DETACHED
) != 0) {
6344 if (need_wait
== TRUE
) {
6346 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
6349 ts
.tv_sec
= cfil_close_wait_timeout
/ 1000;
6350 ts
.tv_nsec
= (cfil_close_wait_timeout
% 1000) *
6351 NSEC_PER_USEC
* 1000;
6353 OSIncrementAtomic(&cfil_stats
.cfs_close_wait
);
6354 cfil_info
->cfi_flags
|= CFIF_CLOSE_WAIT
;
6355 sock_flow_id
= cfil_info
->cfi_sock_id
;
6357 error
= msleep((caddr_t
)cfil_info
, mutex_held
,
6358 PSOCK
| PCATCH
, "cfil_filters_udp_attached", &ts
);
6360 // Woke up from sleep, validate if cfil_info is still valid
6361 if (so
->so_cfil_db
== NULL
||
6362 (cfil_info
!= cfil_db_get_cfil_info(so
->so_cfil_db
, sock_flow_id
))) {
6363 // cfil_info is not valid, do not continue
6367 cfil_info
->cfi_flags
&= ~CFIF_CLOSE_WAIT
;
6370 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
6374 * Force close in case of timeout
6377 OSIncrementAtomic(&cfil_stats
.cfs_close_wait_timeout
);
6379 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6381 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
6396 cfil_sock_udp_data_pending(struct sockbuf
*sb
, bool check_thread
)
6398 struct socket
*so
= sb
->sb_so
;
6399 struct cfi_buf
*cfi_buf
;
6400 uint64_t pending
= 0;
6401 uint64_t total_pending
= 0;
6402 struct cfilhashhead
*cfilhash
= NULL
;
6403 struct cfil_db
*db
= NULL
;
6404 struct cfil_hash_entry
*hash_entry
= NULL
;
6405 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
6407 socket_lock_assert_owned(so
);
6409 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
&&
6410 (check_thread
== FALSE
|| so
->so_snd
.sb_cfil_thread
!= current_thread())) {
6411 db
= so
->so_cfil_db
;
6413 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
6414 cfilhash
= &db
->cfdb_hashbase
[i
];
6416 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
6417 if (hash_entry
->cfentry_cfil
!= NULL
) {
6418 if ((sb
->sb_flags
& SB_RECV
) == 0) {
6419 cfi_buf
= &hash_entry
->cfentry_cfil
->cfi_snd
;
6421 cfi_buf
= &hash_entry
->cfentry_cfil
->cfi_rcv
;
6424 pending
= cfi_buf
->cfi_pending_last
- cfi_buf
->cfi_pending_first
;
6426 * If we are limited by the "chars of mbufs used" roughly
6427 * adjust so we won't overcommit
6429 if ((uint64_t)cfi_buf
->cfi_pending_mbcnt
> pending
) {
6430 pending
= cfi_buf
->cfi_pending_mbcnt
;
6433 total_pending
+= pending
;
6438 VERIFY(total_pending
< INT32_MAX
);
6440 CFIL_LOG(LOG_DEBUG
, "CFIL: <so %llx> total pending %llu <check_thread %d>",
6441 (uint64_t)VM_KERNEL_ADDRPERM(so
),
6442 total_pending
, check_thread
);
6446 return (int32_t)(total_pending
);
6450 cfil_sock_udp_notify_shutdown(struct socket
*so
, int how
, int drop_flag
, int shut_flag
)
6452 struct cfil_info
*cfil_info
= NULL
;
6453 struct cfilhashhead
*cfilhash
= NULL
;
6454 struct cfil_db
*db
= NULL
;
6455 struct cfil_hash_entry
*hash_entry
= NULL
;
6456 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
6461 socket_lock_assert_owned(so
);
6463 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
6464 db
= so
->so_cfil_db
;
6466 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
6467 cfilhash
= &db
->cfdb_hashbase
[i
];
6469 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
6470 if (hash_entry
->cfentry_cfil
!= NULL
) {
6471 cfil_info
= hash_entry
->cfentry_cfil
;
6473 // This flow is marked as DROP
6474 if (cfil_info
->cfi_flags
& drop_flag
) {
6479 // This flow has been shut already, skip
6480 if (cfil_info
->cfi_flags
& shut_flag
) {
6483 // Mark flow as shut
6484 cfil_info
->cfi_flags
|= shut_flag
;
6487 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
6488 /* Disconnect incoming side */
6489 if (how
!= SHUT_WR
) {
6490 error
= cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 0);
6492 /* Disconnect outgoing side */
6493 if (how
!= SHUT_RD
) {
6494 error
= cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 1);
6502 if (done_count
== 0) {
6509 cfil_sock_udp_shutdown(struct socket
*so
, int *how
)
6513 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || (so
->so_cfil_db
== NULL
)) {
6517 socket_lock_assert_owned(so
);
6519 CFIL_LOG(LOG_INFO
, "so %llx how %d",
6520 (uint64_t)VM_KERNEL_ADDRPERM(so
), *how
);
6523 * Check the state of the socket before the content filter
6525 if (*how
!= SHUT_WR
&& (so
->so_state
& SS_CANTRCVMORE
) != 0) {
6526 /* read already shut down */
6530 if (*how
!= SHUT_RD
&& (so
->so_state
& SS_CANTSENDMORE
) != 0) {
6531 /* write already shut down */
6537 * shutdown read: SHUT_RD or SHUT_RDWR
6539 if (*how
!= SHUT_WR
) {
6540 error
= cfil_sock_udp_notify_shutdown(so
, SHUT_RD
, CFIF_DROP
, CFIF_SHUT_RD
);
6546 * shutdown write: SHUT_WR or SHUT_RDWR
6548 if (*how
!= SHUT_RD
) {
6549 error
= cfil_sock_udp_notify_shutdown(so
, SHUT_WR
, CFIF_DROP
, CFIF_SHUT_WR
);
6555 * When outgoing data is pending, we delay the shutdown at the
6556 * protocol level until the content filters give the final
6557 * verdict on the pending data.
6559 if (cfil_sock_data_pending(&so
->so_snd
) != 0) {
6561 * When shutting down the read and write sides at once
6562 * we can proceed to the final shutdown of the read
6563 * side. Otherwise, we just return.
6565 if (*how
== SHUT_WR
) {
6566 error
= EJUSTRETURN
;
6567 } else if (*how
== SHUT_RDWR
) {
6577 cfil_sock_udp_close_wait(struct socket
*so
)
6579 socket_lock_assert_owned(so
);
6581 while (cfil_filters_udp_attached(so
, FALSE
)) {
6583 * Notify the filters we are going away so they can detach
6585 cfil_sock_udp_notify_shutdown(so
, SHUT_RDWR
, 0, 0);
6588 * Make sure we need to wait after the filter are notified
6589 * of the disconnection
6591 if (cfil_filters_udp_attached(so
, TRUE
) == 0) {
6598 cfil_sock_udp_is_closed(struct socket
*so
)
6600 struct cfil_info
*cfil_info
= NULL
;
6601 struct cfilhashhead
*cfilhash
= NULL
;
6602 struct cfil_db
*db
= NULL
;
6603 struct cfil_hash_entry
*hash_entry
= NULL
;
6604 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
6608 socket_lock_assert_owned(so
);
6610 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
6611 db
= so
->so_cfil_db
;
6613 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
6614 cfilhash
= &db
->cfdb_hashbase
[i
];
6616 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
6617 if (hash_entry
->cfentry_cfil
!= NULL
) {
6618 cfil_info
= hash_entry
->cfentry_cfil
;
6620 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
6621 /* Let the filters know of the closing */
6622 error
= cfil_dispatch_closed_event(so
, cfil_info
, kcunit
);
6625 /* Last chance to push passed data out */
6626 error
= cfil_acquire_sockbuf(so
, cfil_info
, 1);
6628 cfil_service_inject_queue(so
, cfil_info
, 1);
6630 cfil_release_sockbuf(so
, 1);
6632 cfil_info
->cfi_flags
|= CFIF_SOCK_CLOSED
;
6634 /* Pending data needs to go */
6635 cfil_flush_queues(so
, cfil_info
);
6637 CFIL_INFO_VERIFY(cfil_info
);
6645 cfil_sock_udp_buf_update(struct sockbuf
*sb
)
6647 struct cfil_info
*cfil_info
= NULL
;
6648 struct cfilhashhead
*cfilhash
= NULL
;
6649 struct cfil_db
*db
= NULL
;
6650 struct cfil_hash_entry
*hash_entry
= NULL
;
6651 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
6654 struct socket
*so
= sb
->sb_so
;
6656 socket_lock_assert_owned(so
);
6658 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
6663 db
= so
->so_cfil_db
;
6665 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
6666 cfilhash
= &db
->cfdb_hashbase
[i
];
6668 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
6669 if (hash_entry
->cfentry_cfil
!= NULL
) {
6670 cfil_info
= hash_entry
->cfentry_cfil
;
6672 if ((sb
->sb_flags
& SB_RECV
) == 0) {
6673 if ((cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) == 0) {
6677 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_retry
);
6679 if ((cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_IN
) == 0) {
6683 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_retry
);
6686 CFIL_LOG(LOG_NOTICE
, "so %llx outgoing %d",
6687 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
6689 error
= cfil_acquire_sockbuf(so
, cfil_info
, outgoing
);
6691 cfil_service_inject_queue(so
, cfil_info
, outgoing
);
6693 cfil_release_sockbuf(so
, outgoing
);
6701 cfil_filter_show(u_int32_t kcunit
)
6703 struct content_filter
*cfc
= NULL
;
6704 struct cfil_entry
*entry
;
6707 if (content_filters
== NULL
) {
6710 if (kcunit
> MAX_CONTENT_FILTER
) {
6714 cfil_rw_lock_shared(&cfil_lck_rw
);
6716 if (content_filters
[kcunit
- 1] == NULL
) {
6717 cfil_rw_unlock_shared(&cfil_lck_rw
);
6720 cfc
= content_filters
[kcunit
- 1];
6722 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6723 kcunit
, cfc
->cf_sock_count
, (unsigned long)cfc
->cf_flags
);
6724 if (cfc
->cf_flags
& CFF_DETACHING
) {
6725 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: - DETACHING");
6727 if (cfc
->cf_flags
& CFF_ACTIVE
) {
6728 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: - ACTIVE");
6730 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
6731 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
6734 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
6735 if (entry
->cfe_cfil_info
&& entry
->cfe_cfil_info
->cfi_so
) {
6736 struct cfil_info
*cfil_info
= entry
->cfe_cfil_info
;
6740 if (entry
->cfe_flags
& CFEF_CFIL_DETACHED
) {
6741 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: FILTER SHOW: - DETACHED");
6743 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: FILTER SHOW: - ATTACHED");
6748 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count
);
6750 cfil_rw_unlock_shared(&cfil_lck_rw
);
6754 cfil_info_show(void)
6756 struct cfil_info
*cfil_info
;
6759 cfil_rw_lock_shared(&cfil_lck_rw
);
6761 CFIL_LOG(LOG_ERR
, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count
);
6763 TAILQ_FOREACH(cfil_info
, &cfil_sock_head
, cfi_link
) {
6766 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: INFO SHOW");
6768 if (cfil_info
->cfi_flags
& CFIF_DROP
) {
6769 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - DROP");
6771 if (cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) {
6772 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - CLOSE_WAIT");
6774 if (cfil_info
->cfi_flags
& CFIF_SOCK_CLOSED
) {
6775 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - SOCK_CLOSED");
6777 if (cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_IN
) {
6778 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - RETRY_INJECT_IN");
6780 if (cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) {
6781 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
6783 if (cfil_info
->cfi_flags
& CFIF_SHUT_WR
) {
6784 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - SHUT_WR");
6786 if (cfil_info
->cfi_flags
& CFIF_SHUT_RD
) {
6787 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - SHUT_RD");
6791 CFIL_LOG(LOG_ERR
, "CFIL: INFO SHOW: total cfil_info shown: %d", count
);
6793 cfil_rw_unlock_shared(&cfil_lck_rw
);
6797 cfil_info_idle_timed_out(struct cfil_info
*cfil_info
, int timeout
, u_int32_t current_time
)
6799 if (cfil_info
&& cfil_info
->cfi_hash_entry
&&
6800 (current_time
- cfil_info
->cfi_hash_entry
->cfentry_lastused
>= (u_int32_t
)timeout
)) {
6802 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: flow IDLE timeout expired");
6810 cfil_info_action_timed_out(struct cfil_info
*cfil_info
, int timeout
)
6812 struct cfil_entry
*entry
;
6813 struct timeval current_tv
;
6814 struct timeval diff_time
;
6816 if (cfil_info
== NULL
) {
6821 * If we have queued up more data than passed offset and we haven't received
6822 * an action from user space for a while (the user space filter might have crashed),
6823 * return action timed out.
6825 if (cfil_info
->cfi_snd
.cfi_pending_last
> cfil_info
->cfi_snd
.cfi_pass_offset
||
6826 cfil_info
->cfi_rcv
.cfi_pending_last
> cfil_info
->cfi_rcv
.cfi_pass_offset
) {
6827 microuptime(¤t_tv
);
6829 for (int kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
6830 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
6832 if (entry
->cfe_filter
== NULL
) {
6836 if (cfil_info
->cfi_snd
.cfi_pending_last
> entry
->cfe_snd
.cfe_pass_offset
||
6837 cfil_info
->cfi_rcv
.cfi_pending_last
> entry
->cfe_rcv
.cfe_pass_offset
) {
6838 // haven't gotten an action from this filter, check timeout
6839 timersub(¤t_tv
, &entry
->cfe_last_action
, &diff_time
);
6840 if (diff_time
.tv_sec
>= timeout
) {
6842 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: flow ACTION timeout expired");
6853 cfil_info_buffer_threshold_exceeded(struct cfil_info
*cfil_info
)
6855 if (cfil_info
== NULL
) {
6860 * Clean up flow if it exceeded queue thresholds
6862 if (cfil_info
->cfi_snd
.cfi_tail_drop_cnt
||
6863 cfil_info
->cfi_rcv
.cfi_tail_drop_cnt
) {
6865 CFIL_LOG(LOG_ERR
, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
6866 cfil_udp_gc_mbuf_num_max
,
6867 cfil_udp_gc_mbuf_cnt_max
,
6868 cfil_info
->cfi_snd
.cfi_tail_drop_cnt
,
6869 cfil_info
->cfi_rcv
.cfi_tail_drop_cnt
);
6870 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: queue threshold exceeded");
6879 cfil_udp_gc_thread_sleep(bool forever
)
6882 (void) assert_wait((event_t
) &cfil_sock_udp_attached_count
,
6883 THREAD_INTERRUPTIBLE
);
6885 uint64_t deadline
= 0;
6886 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC
, &deadline
);
6887 clock_absolutetime_interval_to_deadline(deadline
, &deadline
);
6889 (void) assert_wait_deadline(&cfil_sock_udp_attached_count
,
6890 THREAD_INTERRUPTIBLE
, deadline
);
6895 cfil_udp_gc_thread_func(void *v
, wait_result_t w
)
6897 #pragma unused(v, w)
6899 ASSERT(cfil_udp_gc_thread
== current_thread());
6900 thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
6902 // Kick off gc shortly
6903 cfil_udp_gc_thread_sleep(false);
6904 thread_block_parameter((thread_continue_t
) cfil_info_udp_expire
, NULL
);
6909 cfil_info_udp_expire(void *v
, wait_result_t w
)
6911 #pragma unused(v, w)
6913 static uint64_t expired_array
[UDP_FLOW_GC_MAX_COUNT
];
6914 static uint32_t expired_count
= 0;
6916 struct cfil_info
*cfil_info
;
6917 struct cfil_hash_entry
*hash_entry
;
6920 u_int64_t current_time
= 0;
6922 current_time
= net_uptime();
6924 // Get all expired UDP flow ids
6925 cfil_rw_lock_shared(&cfil_lck_rw
);
6927 if (cfil_sock_udp_attached_count
== 0) {
6928 cfil_rw_unlock_shared(&cfil_lck_rw
);
6932 TAILQ_FOREACH(cfil_info
, &cfil_sock_head
, cfi_link
) {
6933 if (expired_count
>= UDP_FLOW_GC_MAX_COUNT
) {
6937 if (IS_IP_DGRAM(cfil_info
->cfi_so
)) {
6938 if (cfil_info_idle_timed_out(cfil_info
, UDP_FLOW_GC_IDLE_TO
, current_time
) ||
6939 cfil_info_action_timed_out(cfil_info
, UDP_FLOW_GC_ACTION_TO
) ||
6940 cfil_info_buffer_threshold_exceeded(cfil_info
)) {
6941 expired_array
[expired_count
] = cfil_info
->cfi_sock_id
;
6946 cfil_rw_unlock_shared(&cfil_lck_rw
);
6948 if (expired_count
== 0) {
6952 for (uint32_t i
= 0; i
< expired_count
; i
++) {
6953 // Search for socket (UDP only and lock so)
6954 so
= cfil_socket_from_sock_id(expired_array
[i
], true);
6959 cfil_info
= cfil_db_get_cfil_info(so
->so_cfil_db
, expired_array
[i
]);
6960 if (cfil_info
== NULL
) {
6964 db
= so
->so_cfil_db
;
6965 hash_entry
= cfil_info
->cfi_hash_entry
;
6967 if (db
== NULL
|| hash_entry
== NULL
) {
6971 #if GC_DEBUG || LIFECYCLE_DEBUG
6972 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: GC CLEAN UP");
6975 cfil_db_delete_entry(db
, hash_entry
);
6976 cfil_info_free(cfil_info
);
6977 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
6979 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
6980 if (db
->cfdb_count
== 0) {
6981 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
6983 VERIFY(so
->so_usecount
> 0);
6987 socket_unlock(so
, 1);
6991 CFIL_LOG(LOG_ERR
, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count
);
6997 // Sleep forever (until waken up) if no more UDP flow to clean
6998 cfil_rw_lock_shared(&cfil_lck_rw
);
6999 cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count
== 0 ? true : false);
7000 cfil_rw_unlock_shared(&cfil_lck_rw
);
7001 thread_block_parameter((thread_continue_t
)cfil_info_udp_expire
, NULL
);
7006 cfil_dgram_save_socket_state(struct cfil_info
*cfil_info
, struct mbuf
*m
)
7008 struct m_tag
*tag
= NULL
;
7009 struct cfil_tag
*ctag
= NULL
;
7010 struct cfil_hash_entry
*hash_entry
= NULL
;
7011 struct inpcb
*inp
= NULL
;
7013 if (cfil_info
== NULL
|| cfil_info
->cfi_so
== NULL
||
7014 cfil_info
->cfi_hash_entry
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
)) {
7018 inp
= sotoinpcb(cfil_info
->cfi_so
);
7020 /* Allocate a tag */
7021 tag
= m_tag_create(KERNEL_MODULE_TAG_ID
, KERNEL_TAG_TYPE_CFIL_UDP
,
7022 sizeof(struct cfil_tag
), M_DONTWAIT
, m
);
7025 ctag
= (struct cfil_tag
*)(tag
+ 1);
7026 ctag
->cfil_so_state_change_cnt
= cfil_info
->cfi_so
->so_state_change_cnt
;
7027 ctag
->cfil_so_options
= cfil_info
->cfi_so
->so_options
;
7028 ctag
->cfil_inp_flags
= inp
? inp
->inp_flags
: 0;
7030 hash_entry
= cfil_info
->cfi_hash_entry
;
7031 if (hash_entry
->cfentry_family
== AF_INET6
) {
7032 fill_ip6_sockaddr_4_6(&ctag
->cfil_faddr
,
7033 &hash_entry
->cfentry_faddr
.addr6
,
7034 hash_entry
->cfentry_fport
);
7035 } else if (hash_entry
->cfentry_family
== AF_INET
) {
7036 fill_ip_sockaddr_4_6(&ctag
->cfil_faddr
,
7037 hash_entry
->cfentry_faddr
.addr46
.ia46_addr4
,
7038 hash_entry
->cfentry_fport
);
7040 m_tag_prepend(m
, tag
);
7047 cfil_dgram_get_socket_state(struct mbuf
*m
, uint32_t *state_change_cnt
, short *options
,
7048 struct sockaddr
**faddr
, int *inp_flags
)
7050 struct m_tag
*tag
= NULL
;
7051 struct cfil_tag
*ctag
= NULL
;
7053 tag
= m_tag_locate(m
, KERNEL_MODULE_TAG_ID
, KERNEL_TAG_TYPE_CFIL_UDP
, NULL
);
7055 ctag
= (struct cfil_tag
*)(tag
+ 1);
7056 if (state_change_cnt
) {
7057 *state_change_cnt
= ctag
->cfil_so_state_change_cnt
;
7060 *options
= ctag
->cfil_so_options
;
7063 *faddr
= (struct sockaddr
*) &ctag
->cfil_faddr
;
7066 *inp_flags
= ctag
->cfil_inp_flags
;
7070 * Unlink tag and hand it over to caller.
7071 * Note that caller will be responsible to free it.
7073 m_tag_unlink(m
, tag
);
7080 cfil_dgram_peek_socket_state(struct mbuf
*m
, int *inp_flags
)
7082 struct m_tag
*tag
= NULL
;
7083 struct cfil_tag
*ctag
= NULL
;
7085 tag
= m_tag_locate(m
, KERNEL_MODULE_TAG_ID
, KERNEL_TAG_TYPE_CFIL_UDP
, NULL
);
7087 ctag
= (struct cfil_tag
*)(tag
+ 1);
7089 *inp_flags
= ctag
->cfil_inp_flags
;
7097 cfil_dispatch_stats_event_locked(int kcunit
, struct cfil_stats_report_buffer
*buffer
, uint32_t stats_count
)
7099 struct content_filter
*cfc
= NULL
;
7103 if (buffer
== NULL
|| stats_count
== 0) {
7107 if (content_filters
== NULL
|| kcunit
> MAX_CONTENT_FILTER
) {
7111 cfc
= content_filters
[kcunit
- 1];
7116 /* Would be wasteful to try */
7117 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
7122 msgsize
= sizeof(struct cfil_msg_stats_report
) + (sizeof(struct cfil_msg_sock_stats
) * stats_count
);
7123 buffer
->msghdr
.cfm_len
= msgsize
;
7124 buffer
->msghdr
.cfm_version
= 1;
7125 buffer
->msghdr
.cfm_type
= CFM_TYPE_EVENT
;
7126 buffer
->msghdr
.cfm_op
= CFM_OP_STATS
;
7127 buffer
->msghdr
.cfm_sock_id
= 0;
7128 buffer
->count
= stats_count
;
7131 CFIL_LOG(LOG_ERR
, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
7133 (unsigned long)msgsize
,
7134 (unsigned long)sizeof(struct cfil_msg_stats_report
),
7135 (unsigned long)sizeof(struct cfil_msg_sock_stats
),
7136 (unsigned long)stats_count
);
7139 error
= ctl_enqueuedata(cfc
->cf_kcref
, cfc
->cf_kcunit
,
7144 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d", error
);
7147 OSIncrementAtomic(&cfil_stats
.cfs_stats_event_ok
);
7150 CFIL_LOG(LOG_ERR
, "CFIL: STATS REPORT: send msg to %d", kcunit
);
7155 if (error
== ENOBUFS
) {
7157 &cfil_stats
.cfs_stats_event_flow_control
);
7159 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
7160 cfil_rw_lock_exclusive(&cfil_lck_rw
);
7163 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
7165 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
7166 } else if (error
!= 0) {
7167 OSIncrementAtomic(&cfil_stats
.cfs_stats_event_fail
);
7174 cfil_stats_report_thread_sleep(bool forever
)
7177 CFIL_LOG(LOG_ERR
, "CFIL: STATS COLLECTION SLEEP");
7181 (void) assert_wait((event_t
) &cfil_sock_attached_stats_count
,
7182 THREAD_INTERRUPTIBLE
);
7184 uint64_t deadline
= 0;
7185 nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC
, &deadline
);
7186 clock_absolutetime_interval_to_deadline(deadline
, &deadline
);
7188 (void) assert_wait_deadline(&cfil_sock_attached_stats_count
,
7189 THREAD_INTERRUPTIBLE
, deadline
);
7194 cfil_stats_report_thread_func(void *v
, wait_result_t w
)
7196 #pragma unused(v, w)
7198 ASSERT(cfil_stats_report_thread
== current_thread());
7199 thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
7201 // Kick off gc shortly
7202 cfil_stats_report_thread_sleep(false);
7203 thread_block_parameter((thread_continue_t
) cfil_stats_report
, NULL
);
7208 cfil_stats_collect_flow_stats_for_filter(int kcunit
,
7209 struct cfil_info
*cfil_info
,
7210 struct cfil_entry
*entry
,
7211 struct timeval current_tv
)
7213 struct cfil_stats_report_buffer
*buffer
= NULL
;
7214 struct cfil_msg_sock_stats
*flow_array
= NULL
;
7215 struct cfil_msg_sock_stats
*stats
= NULL
;
7216 struct inpcb
*inp
= NULL
;
7217 struct timeval diff_time
;
7218 uint64_t diff_time_usecs
;
7221 if (entry
->cfe_stats_report_frequency
== 0) {
7225 buffer
= global_cfil_stats_report_buffers
[kcunit
- 1];
7226 if (buffer
== NULL
) {
7228 CFIL_LOG(LOG_ERR
, "CFIL: STATS: no buffer");
7233 timersub(¤t_tv
, &entry
->cfe_stats_report_ts
, &diff_time
);
7234 diff_time_usecs
= (diff_time
.tv_sec
* USEC_PER_SEC
) + diff_time
.tv_usec
;
7237 CFIL_LOG(LOG_ERR
, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu (usecs %llu) @freq %llu usecs sockID %llu",
7238 (unsigned long long)entry
->cfe_stats_report_ts
.tv_sec
,
7239 (unsigned long long)entry
->cfe_stats_report_ts
.tv_usec
,
7240 (unsigned long long)current_tv
.tv_sec
,
7241 (unsigned long long)current_tv
.tv_usec
,
7242 (unsigned long long)diff_time
.tv_sec
,
7243 (unsigned long long)diff_time
.tv_usec
,
7244 (unsigned long long)diff_time_usecs
,
7245 (unsigned long long)((entry
->cfe_stats_report_frequency
* NSEC_PER_MSEC
) / NSEC_PER_USEC
),
7246 cfil_info
->cfi_sock_id
);
7249 // Compare elapsed time in usecs
7250 if (diff_time_usecs
>= (entry
->cfe_stats_report_frequency
* NSEC_PER_MSEC
) / NSEC_PER_USEC
) {
7252 CFIL_LOG(LOG_ERR
, "CFIL: STATS REPORT - in %llu reported %llu",
7253 cfil_info
->cfi_byte_inbound_count
,
7254 entry
->cfe_byte_inbound_count_reported
);
7255 CFIL_LOG(LOG_ERR
, "CFIL: STATS REPORT - out %llu reported %llu",
7256 cfil_info
->cfi_byte_outbound_count
,
7257 entry
->cfe_byte_outbound_count_reported
);
7259 // Check if flow has new bytes that have not been reported
7260 if (entry
->cfe_byte_inbound_count_reported
< cfil_info
->cfi_byte_inbound_count
||
7261 entry
->cfe_byte_outbound_count_reported
< cfil_info
->cfi_byte_outbound_count
) {
7262 flow_array
= (struct cfil_msg_sock_stats
*)&buffer
->stats
;
7263 index
= global_cfil_stats_counts
[kcunit
- 1];
7265 stats
= &flow_array
[index
];
7266 stats
->cfs_sock_id
= cfil_info
->cfi_sock_id
;
7267 stats
->cfs_byte_inbound_count
= cfil_info
->cfi_byte_inbound_count
;
7268 stats
->cfs_byte_outbound_count
= cfil_info
->cfi_byte_outbound_count
;
7270 if (entry
->cfe_laddr_sent
== false) {
7271 /* cache it if necessary */
7272 if (cfil_info
->cfi_so_attach_laddr
.sa
.sa_len
== 0) {
7273 inp
= cfil_info
->cfi_so
? sotoinpcb(cfil_info
->cfi_so
) : NULL
;
7275 boolean_t outgoing
= (cfil_info
->cfi_dir
== CFS_CONNECTION_DIR_OUT
);
7276 union sockaddr_in_4_6
*src
= outgoing
? &cfil_info
->cfi_so_attach_laddr
: NULL
;
7277 union sockaddr_in_4_6
*dst
= outgoing
? NULL
: &cfil_info
->cfi_so_attach_laddr
;
7278 cfil_fill_event_msg_addresses(cfil_info
->cfi_hash_entry
, inp
,
7279 src
, dst
, !IS_INP_V6(inp
), outgoing
);
7283 if (cfil_info
->cfi_so_attach_laddr
.sa
.sa_len
!= 0) {
7284 stats
->cfs_laddr
.sin6
= cfil_info
->cfi_so_attach_laddr
.sin6
;
7285 entry
->cfe_laddr_sent
= true;
7289 global_cfil_stats_counts
[kcunit
- 1]++;
7291 entry
->cfe_stats_report_ts
= current_tv
;
7292 entry
->cfe_byte_inbound_count_reported
= cfil_info
->cfi_byte_inbound_count
;
7293 entry
->cfe_byte_outbound_count_reported
= cfil_info
->cfi_byte_outbound_count
;
7295 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: STATS COLLECTED");
7297 CFI_ADD_TIME_LOG(cfil_info
, ¤t_tv
, &cfil_info
->cfi_first_event
, CFM_OP_STATS
);
7305 cfil_stats_report(void *v
, wait_result_t w
)
7307 #pragma unused(v, w)
7309 struct cfil_info
*cfil_info
= NULL
;
7310 struct cfil_entry
*entry
= NULL
;
7311 struct timeval current_tv
;
7312 uint32_t flow_count
= 0;
7313 uint64_t saved_next_sock_id
= 0; // Next sock id to be reported for next loop
7314 bool flow_reported
= false;
7317 CFIL_LOG(LOG_ERR
, "CFIL: STATS COLLECTION RUNNING");
7321 // Collect all sock ids of flows that has new stats
7322 cfil_rw_lock_shared(&cfil_lck_rw
);
7324 if (cfil_sock_attached_stats_count
== 0) {
7326 CFIL_LOG(LOG_ERR
, "CFIL: STATS: no flow");
7328 cfil_rw_unlock_shared(&cfil_lck_rw
);
7332 for (int kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
7333 if (global_cfil_stats_report_buffers
[kcunit
- 1] != NULL
) {
7334 memset(global_cfil_stats_report_buffers
[kcunit
- 1], 0, sizeof(struct cfil_stats_report_buffer
));
7336 global_cfil_stats_counts
[kcunit
- 1] = 0;
7339 microuptime(¤t_tv
);
7342 TAILQ_FOREACH(cfil_info
, &cfil_sock_head_stats
, cfi_link_stats
) {
7343 if (saved_next_sock_id
!= 0 &&
7344 saved_next_sock_id
== cfil_info
->cfi_sock_id
) {
7345 // Here is where we left off previously, start accumulating
7346 saved_next_sock_id
= 0;
7349 if (saved_next_sock_id
== 0) {
7350 if (flow_count
>= CFIL_STATS_REPORT_MAX_COUNT
) {
7351 // Examine a fixed number of flows each round. Remember the current flow
7352 // so we can start from here for next loop
7353 saved_next_sock_id
= cfil_info
->cfi_sock_id
;
7357 flow_reported
= false;
7358 for (int kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
7359 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
7360 if (entry
->cfe_filter
== NULL
) {
7362 CFIL_LOG(LOG_NOTICE
, "CFIL: STATS REPORT - so %llx no filter",
7363 cfil_info
->cfi_so
? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info
->cfi_so
) : 0);
7368 if ((entry
->cfe_stats_report_frequency
> 0) &&
7369 cfil_stats_collect_flow_stats_for_filter(kcunit
, cfil_info
, entry
, current_tv
) == true) {
7370 flow_reported
= true;
7373 if (flow_reported
== true) {
7379 if (flow_count
> 0) {
7381 CFIL_LOG(LOG_ERR
, "CFIL: STATS reporting for %d flows", flow_count
);
7383 for (int kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
7384 if (global_cfil_stats_report_buffers
[kcunit
- 1] != NULL
&&
7385 global_cfil_stats_counts
[kcunit
- 1] > 0) {
7386 cfil_dispatch_stats_event_locked(kcunit
,
7387 global_cfil_stats_report_buffers
[kcunit
- 1],
7388 global_cfil_stats_counts
[kcunit
- 1]);
7392 cfil_rw_unlock_shared(&cfil_lck_rw
);
7396 cfil_rw_unlock_shared(&cfil_lck_rw
);
7398 // Loop again if we haven't finished the whole cfil_info list
7399 } while (saved_next_sock_id
!= 0);
7403 // Sleep forever (until waken up) if no more flow to report
7404 cfil_rw_lock_shared(&cfil_lck_rw
);
7405 cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count
== 0 ? true : false);
7406 cfil_rw_unlock_shared(&cfil_lck_rw
);
7407 thread_block_parameter((thread_continue_t
) cfil_stats_report
, NULL
);