2 * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by INET/INET6 sockets.
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
45 * It should be noted that messages about many INET/INET6 sockets can be multiplexed
46 * over a single kernel control socket.
49 * - The current implementation supports all INET/INET6 sockets (i.e. TCP,
51 * - The current implementation supports up to two simultaneous content filters
52 * for iOS devices and eight simultaneous content filters for OSX.
55 * NECP FILTER CONTROL UNIT
57 * A user space filter agent uses the Network Extension Control Policy (NECP)
58 * database to specify which INET/INET6 sockets need to be filtered. The NECP
59 * criteria may be based on a variety of properties like user ID or proc UUID.
61 * The NECP "filter control unit" is used by the socket content filter subsystem
62 * to deliver the relevant INET/INET6 content information to the appropriate
63 * user space filter agent via its kernel control socket instance.
64 * This works as follows:
66 * 1) The user space filter agent specifies an NECP filter control unit when
67 * in adds its filtering rules to the NECP database.
69 * 2) The user space filter agent also sets its NECP filter control unit on the
70 * content filter kernel control socket via the socket option
71 * CFIL_OPT_NECP_CONTROL_UNIT.
73 * 3) The NECP database is consulted to find out if a given INET/INET6 socket
74 * needs to be subjected to content filtering and returns the corresponding
75 * NECP filter control unit -- the NECP filter control unit is actually
76 * stored in the INET/INET6 socket structure so the NECP lookup is really simple.
78 * 4) The NECP filter control unit is then used to find the corresponding
79 * kernel control socket instance.
81 * Note: NECP currently supports a single filter control unit per INET/INET6 socket
82 * but this restriction may be soon lifted.
85 * THE MESSAGING PROTOCOL
87 * The socket content filter subsystem and a user space filter agent
88 * communicate over the kernel control socket via an asynchronous
89 * messaging protocol (this is not a request-response protocol).
90 * The socket content filter subsystem sends event messages to the user
91 * space filter agent about the INET/INET6 sockets it is interested to filter.
92 * The user space filter agent sends action messages to either allow
93 * data to pass or to disallow the data flow (and drop the connection).
95 * All messages over a content filter kernel control socket share the same
96 * common header of type "struct cfil_msg_hdr". The message type tells if
97 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
98 * The message header field "cfm_sock_id" identifies a given INET/INET6 flow.
99 * For TCP, flows are per-socket. For UDP and other datagrame protocols, there
100 * could be multiple flows per socket.
102 * Note the message header length field may be padded for alignment and can
103 * be larger than the actual content of the message.
104 * The field "cfm_op" describe the kind of event or action.
106 * Here are the kinds of content filter events:
107 * - CFM_OP_SOCKET_ATTACHED: a new INET/INET6 socket is being filtered
108 * - CFM_OP_SOCKET_CLOSED: A INET/INET6 socket is closed
109 * - CFM_OP_DATA_OUT: A span of data is being sent on a INET/INET6 socket
110 * - CFM_OP_DATA_IN: A span of data is being or received on a INET/INET6 socket
115 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
116 * data that is being sent or received. The position of this span of data
117 * in the data flow is described by a set of start and end offsets. These
118 * are absolute 64 bits offsets. The first byte sent (or received) starts
119 * at offset 0 and ends at offset 1. The length of the content data
120 * is given by the difference between the end offset and the start offset.
122 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
123 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
124 * action message is sent by the user space filter agent.
126 * Note: absolute 64 bits offsets should be large enough for the foreseeable
127 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
128 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
130 * They are two kinds of primary content filter actions:
131 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
132 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
134 * There is also an action to mark a given client flow as already filtered
135 * at a higher level, CFM_OP_BLESS_CLIENT.
140 * The CFM_OP_DATA_UPDATE action messages let the user space filter
141 * agent allow data to flow up to the specified pass offset -- there
142 * is a pass offset for outgoing data and a pass offset for incoming data.
143 * When a new INET/INET6 socket is attached to the content filter and a flow is
144 * created, each pass offset is initially set to 0 so no data is allowed to pass by
145 * default. When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
146 * then the data flow becomes unrestricted.
148 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
149 * with a pass offset smaller than the pass offset of a previous
150 * CFM_OP_DATA_UPDATE message is silently ignored.
152 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
153 * to tell the kernel how much data it wants to see by using the peek offsets.
154 * Just like pass offsets, there is a peek offset for each direction.
155 * When a new INET/INET6 flow is created, each peek offset is initially set to 0
156 * so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages are dispatched by default
157 * until a CFM_OP_DATA_UPDATE action message with a greater than 0 peek offset is sent
158 * by the user space filter agent. When the peek offset is set to CFM_MAX_OFFSET via
159 * a CFM_OP_DATA_UPDATE then the flow of update data events becomes unrestricted.
161 * Note that peek offsets cannot be smaller than the corresponding pass offset.
162 * Also a peek offsets cannot be smaller than the corresponding end offset
163 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
164 * to set a too small peek value is silently ignored.
167 * PER FLOW "struct cfil_info"
169 * As soon as a INET/INET6 socket gets attached to a content filter, a
170 * "struct cfil_info" is created to hold the content filtering state for this
171 * socket. For UDP and other datagram protocols, as soon as traffic is seen for
172 * each new flow identified by its 4-tuple of source address/port and destination
173 * address/port, a "struct cfil_info" is created. Each datagram socket may
174 * have multiple flows maintained in a hash table of "struct cfil_info" entries.
176 * The content filtering state is made of the following information
177 * for each direction:
178 * - The current pass offset;
179 * - The first and last offsets of the data pending, waiting for a filtering
181 * - The inject queue for data that passed the filters and that needs
183 * - A content filter specific state in a set of "struct cfil_entry"
186 * CONTENT FILTER STATE "struct cfil_entry"
188 * The "struct cfil_entry" maintains the information most relevant to the
189 * message handling over a kernel control socket with a user space filter agent.
191 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
192 * to the kernel control socket unit it corresponds to and also has a pointer
193 * to the corresponding "struct content_filter".
195 * For each direction, "struct cfil_entry" maintains the following information:
198 * - The offset of the last data peeked at by the filter
199 * - A queue of data that's waiting to be delivered to the user space filter
200 * agent on the kernel control socket
201 * - A queue of data for which event messages have been sent on the kernel
202 * control socket and are pending for a filtering decision.
205 * CONTENT FILTER QUEUES
207 * Data that is being filtered is steered away from the INET/INET6 socket buffer
208 * and instead will sit in one of three content filter queues until the data
209 * can be re-injected into the INET/INET6 socket buffer.
211 * A content filter queue is represented by "struct cfil_queue" that contains
212 * a list of mbufs and the start and end offset of the data span of
215 * The data moves into the three content filter queues according to this
217 * a) The "cfe_ctl_q" of "struct cfil_entry"
218 * b) The "cfe_pending_q" of "struct cfil_entry"
219 * c) The "cfi_inject_q" of "struct cfil_info"
221 * Note: The sequence (a),(b) may be repeated several times if there is more
222 * than one content filter attached to the INET/INET6 socket.
224 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
225 * kernel conntrol socket for two reasons:
226 * - The peek offset is less that the end offset of the mbuf data
227 * - The kernel control socket is flow controlled
229 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
230 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
231 * socket and are waiting for a pass action message fromn the user space
232 * filter agent. An mbuf length must be fully allowed to pass to be removed
233 * from the cfe_pending_q.
235 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
236 * by the user space filter agent and that needs to be re-injected into the
240 * IMPACT ON FLOW CONTROL
242 * An essential aspect of the content filer subsystem is to minimize the
243 * impact on flow control of the INET/INET6 sockets being filtered.
245 * The processing overhead of the content filtering may have an effect on
246 * flow control by adding noticeable delays and cannot be eliminated --
247 * care must be taken by the user space filter agent to minimize the
250 * The amount of data being filtered is kept in buffers while waiting for
251 * a decision by the user space filter agent. This amount of data pending
252 * needs to be subtracted from the amount of data available in the
253 * corresponding INET/INET6 socket buffer. This is done by modifying
254 * sbspace() and tcp_sbspace() to account for amount of data pending
255 * in the content filter.
260 * The global state of content filter subsystem is protected by a single
261 * read-write lock "cfil_lck_rw". The data flow can be done with the
262 * cfil read-write lock held as shared so it can be re-entered from multiple
265 * The per INET/INET6 socket content filterstate -- "struct cfil_info" -- is
266 * protected by the socket lock.
268 * A INET/INET6 socket lock cannot be taken while the cfil read-write lock
269 * is held. That's why we have some sequences where we drop the cfil read-write
270 * lock before taking the INET/INET6 lock.
272 * It is also important to lock the INET/INET6 socket buffer while the content
273 * filter is modifying the amount of pending data. Otherwise the calculations
274 * in sbspace() and tcp_sbspace() could be wrong.
276 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
277 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
279 * Actually "cfe_link" and "cfe_filter" are protected by both by
280 * "cfil_lck_rw" and the socket lock: they may be modified only when
281 * "cfil_lck_rw" is exclusive and the socket is locked.
283 * To read the other fields of "struct content_filter" we have to take
284 * "cfil_lck_rw" in shared mode.
286 * DATAGRAM SPECIFICS:
288 * The socket content filter supports all INET/INET6 protocols. However
289 * the treatments for TCP sockets and for datagram (UDP, ICMP, etc) sockets
290 * are slightly different.
292 * Each datagram socket may have multiple flows. Each flow is identified
293 * by the flow's source address/port and destination address/port tuple
294 * and is represented as a "struct cfil_info" entry. For each socket,
295 * a hash table is used to maintain the collection of flows under that socket.
297 * Each datagram flow is uniquely identified by it's "struct cfil_info" cfi_sock_id.
298 * The highest 32-bits of the cfi_sock_id contains the socket's so_gencnt. This portion
299 * of the cfi_sock_id is used locate the socket during socket lookup. The lowest 32-bits
300 * of the cfi_sock_id contains a hash of the flow's 4-tuple. This portion of the cfi_sock_id
301 * is used as the hash value for the flow hash table lookup within the parent socket.
303 * Since datagram sockets may not be connected, flow states may not be maintained in the
304 * socket structures and thus have to be saved for each packet. These saved states will be
305 * used for both outgoing and incoming reinjections. For outgoing packets, destination
306 * address/port as well as the current socket states will be saved. During reinjection,
307 * these saved states will be used instead. For incoming packets, control and address
308 * mbufs will be chained to the data. During reinjection, the whole chain will be queued
309 * onto the incoming socket buffer.
313 * - Support all INET/INET6 sockets, such as TCP, UDP, ICMP, etc
315 * - Does not support TCP unordered messages
325 #include <sys/types.h>
326 #include <sys/kern_control.h>
327 #include <sys/queue.h>
328 #include <sys/domain.h>
329 #include <sys/protosw.h>
330 #include <sys/syslog.h>
331 #include <sys/systm.h>
332 #include <sys/param.h>
333 #include <sys/mbuf.h>
335 #include <kern/locks.h>
336 #include <kern/zalloc.h>
337 #include <kern/debug.h>
339 #include <net/content_filter.h>
340 #include <net/content_filter_crypto.h>
343 #include <netinet/ip.h>
344 #include <netinet/in_pcb.h>
345 #include <netinet/tcp.h>
346 #include <netinet/tcp_var.h>
347 #include <netinet/udp.h>
348 #include <netinet/udp_var.h>
351 #include <libkern/libkern.h>
352 #include <kern/sched_prim.h>
353 #include <kern/task.h>
354 #include <mach/task_info.h>
356 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
357 #define MAX_CONTENT_FILTER 2
359 #define MAX_CONTENT_FILTER 8
362 extern struct inpcbinfo ripcbinfo
;
366 * The structure content_filter represents a user space content filter
367 * It's created and associated with a kernel control socket instance
369 struct content_filter
{
370 kern_ctl_ref cf_kcref
;
374 uint32_t cf_necp_control_unit
;
376 uint32_t cf_sock_count
;
377 TAILQ_HEAD(, cfil_entry
) cf_sock_entries
;
379 cfil_crypto_state_t cf_crypto_state
;
382 #define CFF_ACTIVE 0x01
383 #define CFF_DETACHING 0x02
384 #define CFF_FLOW_CONTROLLED 0x04
386 struct content_filter
**content_filters
= NULL
;
387 uint32_t cfil_active_count
= 0; /* Number of active content filters */
388 uint32_t cfil_sock_attached_count
= 0; /* Number of sockets attachements */
389 uint32_t cfil_sock_udp_attached_count
= 0; /* Number of UDP sockets attachements */
390 uint32_t cfil_sock_attached_stats_count
= 0; /* Number of sockets requested periodic stats report */
391 uint32_t cfil_close_wait_timeout
= 1000; /* in milliseconds */
393 static kern_ctl_ref cfil_kctlref
= NULL
;
395 static lck_grp_attr_t
*cfil_lck_grp_attr
= NULL
;
396 static lck_attr_t
*cfil_lck_attr
= NULL
;
397 static lck_grp_t
*cfil_lck_grp
= NULL
;
398 decl_lck_rw_data(static, cfil_lck_rw
);
400 #define CFIL_RW_LCK_MAX 8
402 int cfil_rw_nxt_lck
= 0;
403 void* cfil_rw_lock_history
[CFIL_RW_LCK_MAX
];
405 int cfil_rw_nxt_unlck
= 0;
406 void* cfil_rw_unlock_history
[CFIL_RW_LCK_MAX
];
408 static ZONE_DECLARE(content_filter_zone
, "content_filter",
409 sizeof(struct content_filter
), ZC_NONE
);
411 MBUFQ_HEAD(cfil_mqhead
);
414 uint64_t q_start
; /* offset of first byte in queue */
415 uint64_t q_end
; /* offset of last byte in queue */
416 struct cfil_mqhead q_mq
;
422 * The is one entry per content filter
425 TAILQ_ENTRY(cfil_entry
) cfe_link
;
426 SLIST_ENTRY(cfil_entry
) cfe_order_link
;
427 struct content_filter
*cfe_filter
;
429 struct cfil_info
*cfe_cfil_info
;
431 uint32_t cfe_necp_control_unit
;
432 struct timeval cfe_last_event
; /* To user space */
433 struct timeval cfe_last_action
; /* From user space */
434 uint64_t cfe_byte_inbound_count_reported
; /* stats already been reported */
435 uint64_t cfe_byte_outbound_count_reported
; /* stats already been reported */
436 struct timeval cfe_stats_report_ts
; /* Timestamp for last stats report */
437 uint32_t cfe_stats_report_frequency
; /* Interval for stats report in msecs */
438 boolean_t cfe_laddr_sent
;
442 * cfe_pending_q holds data that has been delivered to
443 * the filter and for which we are waiting for an action
445 struct cfil_queue cfe_pending_q
;
447 * This queue is for data that has not be delivered to
448 * the content filter (new data, pass peek or flow control)
450 struct cfil_queue cfe_ctl_q
;
452 uint64_t cfe_pass_offset
;
453 uint64_t cfe_peek_offset
;
458 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
459 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
460 #define CFEF_DATA_START 0x0004 /* can send data event */
461 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
462 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
463 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
464 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
465 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
468 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
469 struct timeval64 _tdiff; \
470 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
471 timersub(t1, t0, &_tdiff); \
472 (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
473 (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
474 (cfil)->cfi_op_list_ctr ++; \
477 struct cfil_hash_entry
;
482 * There is a struct cfil_info per socket
485 TAILQ_ENTRY(cfil_info
) cfi_link
;
486 TAILQ_ENTRY(cfil_info
) cfi_link_stats
;
487 struct socket
*cfi_so
;
489 uint64_t cfi_sock_id
;
490 struct timeval64 cfi_first_event
;
491 uint32_t cfi_op_list_ctr
;
492 uint32_t cfi_op_time
[CFI_MAX_TIME_LOG_ENTRY
]; /* time interval in microseconds since first event */
493 unsigned char cfi_op_list
[CFI_MAX_TIME_LOG_ENTRY
];
494 union sockaddr_in_4_6 cfi_so_attach_faddr
; /* faddr at the time of attach */
495 union sockaddr_in_4_6 cfi_so_attach_laddr
; /* laddr at the time of attach */
498 uint64_t cfi_byte_inbound_count
;
499 uint64_t cfi_byte_outbound_count
;
501 boolean_t cfi_isSignatureLatest
; /* Indicates if signature covers latest flow attributes */
502 u_int32_t cfi_filter_control_unit
;
506 * cfi_pending_first and cfi_pending_last describe the total
507 * amount of data outstanding for all the filters on
508 * this socket and data in the flow queue
509 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
511 uint64_t cfi_pending_first
;
512 uint64_t cfi_pending_last
;
513 uint32_t cfi_pending_mbcnt
;
514 uint32_t cfi_pending_mbnum
;
515 uint32_t cfi_tail_drop_cnt
;
517 * cfi_pass_offset is the minimum of all the filters
519 uint64_t cfi_pass_offset
;
521 * cfi_inject_q holds data that needs to be re-injected
522 * into the socket after filtering and that can
523 * be queued because of flow control
525 struct cfil_queue cfi_inject_q
;
528 struct cfil_entry cfi_entries
[MAX_CONTENT_FILTER
];
529 struct cfil_hash_entry
*cfi_hash_entry
;
530 SLIST_HEAD(, cfil_entry
) cfi_ordered_entries
;
531 os_refcnt_t cfi_ref_count
;
532 } __attribute__((aligned(8)));
534 #define CFIF_DROP 0x0001 /* drop action applied */
535 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
536 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
537 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
538 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
539 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
540 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
541 #define CFIF_SOCKET_CONNECTED 0x0100 /* socket is connected */
542 #define CFIF_INITIAL_VERDICT 0x0200 /* received initial verdict */
544 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
545 #define CFI_SHIFT_GENCNT 32
546 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
547 #define CFI_SHIFT_FLOWHASH 0
549 #define CFI_ENTRY_KCUNIT(i, e) ((uint32_t)(((e) - &((i)->cfi_entries[0])) + 1))
551 static ZONE_DECLARE(cfil_info_zone
, "cfil_info",
552 sizeof(struct cfil_info
), ZC_NONE
);
554 TAILQ_HEAD(cfil_sock_head
, cfil_info
) cfil_sock_head
;
555 TAILQ_HEAD(cfil_sock_head_stats
, cfil_info
) cfil_sock_head_stats
;
557 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
558 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
563 LIST_HEAD(cfilhashhead
, cfil_hash_entry
);
564 #define CFILHASHSIZE 16
565 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
567 #define IS_INET(so) (so && so->so_proto && so->so_proto->pr_domain && (so->so_proto->pr_domain->dom_family == AF_INET || so->so_proto->pr_domain->dom_family == AF_INET6))
568 #define IS_TCP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_STREAM && so->so_proto->pr_protocol == IPPROTO_TCP)
569 #define IS_UDP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
570 #define IS_ICMP(so) (so && so->so_proto && (so->so_proto->pr_type == SOCK_RAW || so->so_proto->pr_type == SOCK_DGRAM) && \
571 (so->so_proto->pr_protocol == IPPROTO_ICMP || so->so_proto->pr_protocol == IPPROTO_ICMPV6))
572 #define IS_RAW(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_RAW && so->so_proto->pr_protocol == IPPROTO_RAW)
574 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
575 #define IS_IP_DGRAM(so) (IS_INET(so) && IS_UDP(so))
577 #define IS_IP_DGRAM(so) (IS_INET(so) && !IS_TCP(so))
580 #define OPTIONAL_IP_HEADER(so) (!IS_TCP(so) && !IS_UDP(so))
581 #define GET_SO_PROTO(so) ((so && so->so_proto) ? so->so_proto->pr_protocol : IPPROTO_MAX)
582 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
584 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
585 ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
586 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
587 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
588 cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
589 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
590 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
591 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
592 (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
593 (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
594 #define LOCAL_ADDRESS_NEEDS_UPDATE(entry) \
595 ((entry->cfentry_family == AF_INET && entry->cfentry_laddr.addr46.ia46_addr4.s_addr == 0) || \
596 entry->cfentry_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&entry->cfentry_laddr.addr6))
597 #define LOCAL_PORT_NEEDS_UPDATE(entry, so) (entry->cfentry_lport == 0 && IS_UDP(so))
599 #define SKIP_FILTER_FOR_TCP_SOCKET(so) \
600 (so == NULL || so->so_proto == NULL || so->so_proto->pr_domain == NULL || \
601 (so->so_proto->pr_domain->dom_family != PF_INET && so->so_proto->pr_domain->dom_family != PF_INET6) || \
602 so->so_proto->pr_type != SOCK_STREAM || \
603 so->so_proto->pr_protocol != IPPROTO_TCP || \
604 (so->so_flags & SOF_MP_SUBFLOW) != 0 || \
605 (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0)
607 os_refgrp_decl(static, cfil_refgrp
, "CFILRefGroup", NULL
);
609 #define CFIL_INFO_FREE(cfil_info) \
610 if (cfil_info && (os_ref_release(&cfil_info->cfi_ref_count) == 0)) { \
611 cfil_info_free(cfil_info); \
615 * Periodic Statistics Report:
617 static struct thread
*cfil_stats_report_thread
;
618 #define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC 500 // Highest report frequency
619 #define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
620 #define CFIL_STATS_REPORT_MAX_COUNT 50 // Max stats to be reported per run
622 /* This buffer must have same layout as struct cfil_msg_stats_report */
623 struct cfil_stats_report_buffer
{
624 struct cfil_msg_hdr msghdr
;
626 struct cfil_msg_sock_stats stats
[CFIL_STATS_REPORT_MAX_COUNT
];
628 static struct cfil_stats_report_buffer
*global_cfil_stats_report_buffers
[MAX_CONTENT_FILTER
];
629 static uint32_t global_cfil_stats_counts
[MAX_CONTENT_FILTER
];
632 * UDP Garbage Collection:
634 static struct thread
*cfil_udp_gc_thread
;
635 #define UDP_FLOW_GC_IDLE_TO 30 // Flow Idle Timeout in seconds
636 #define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
637 #define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
638 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC (10 * NSEC_PER_SEC) // GC wakes up every 10 seconds
641 * UDP flow queue thresholds
643 #define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
644 #define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
645 #define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
647 * UDP flow queue threshold globals:
649 static unsigned int cfil_udp_gc_mbuf_num_max
= UDP_FLOW_GC_MBUF_NUM_MAX
;
650 static unsigned int cfil_udp_gc_mbuf_cnt_max
= UDP_FLOW_GC_MBUF_CNT_MAX
;
653 * struct cfil_hash_entry
655 * Hash entry for cfil_info
657 struct cfil_hash_entry
{
658 LIST_ENTRY(cfil_hash_entry
) cfentry_link
;
659 struct cfil_info
*cfentry_cfil
;
660 u_short cfentry_fport
;
661 u_short cfentry_lport
;
662 sa_family_t cfentry_family
;
663 u_int32_t cfentry_flowhash
;
664 u_int64_t cfentry_lastused
;
666 /* foreign host table entry */
667 struct in_addr_4in6 addr46
;
668 struct in6_addr addr6
;
671 /* local host table entry */
672 struct in_addr_4in6 addr46
;
673 struct in6_addr addr6
;
675 uint8_t cfentry_laddr_updated
: 1;
676 uint8_t cfentry_lport_updated
: 1;
677 uint8_t cfentry_reserved
: 6;
683 * For each UDP socket, this is a hash table maintaining all cfil_info structs
684 * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
687 struct socket
*cfdb_so
;
688 uint32_t cfdb_count
; /* Number of total content filters */
689 struct cfilhashhead
*cfdb_hashbase
;
690 u_long cfdb_hashmask
;
691 struct cfil_hash_entry
*cfdb_only_entry
; /* Optimization for connected UDP */
695 * CFIL specific mbuf tag:
696 * Save state of socket at the point of data entry into cfil.
697 * Use saved state for reinjection at protocol layer.
700 union sockaddr_in_4_6 cfil_faddr
;
701 uint32_t cfil_so_state_change_cnt
;
702 uint32_t cfil_so_options
;
706 static ZONE_DECLARE(cfil_hash_entry_zone
, "cfil_entry_hash",
707 sizeof(struct cfil_hash_entry
), ZC_NONE
);
709 static ZONE_DECLARE(cfil_db_zone
, "cfil_db",
710 sizeof(struct cfil_db
), ZC_NONE
);
716 struct cfil_stats cfil_stats
;
719 * For troubleshooting
721 int cfil_log_level
= LOG_ERR
;
724 // Debug controls added for selective debugging.
725 // Disabled for production. If enabled,
726 // these will have performance impact
727 #define LIFECYCLE_DEBUG 0
728 #define VERDICT_DEBUG 0
732 #define STATS_DEBUG 0
735 * Sysctls for logs and statistics
737 static int sysctl_cfil_filter_list(struct sysctl_oid
*, void *, int,
738 struct sysctl_req
*);
739 static int sysctl_cfil_sock_list(struct sysctl_oid
*, void *, int,
740 struct sysctl_req
*);
742 SYSCTL_NODE(_net
, OID_AUTO
, cfil
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "cfil");
744 SYSCTL_INT(_net_cfil
, OID_AUTO
, log
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
745 &cfil_log_level
, 0, "");
747 SYSCTL_INT(_net_cfil
, OID_AUTO
, debug
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
750 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sock_attached_count
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
751 &cfil_sock_attached_count
, 0, "");
753 SYSCTL_UINT(_net_cfil
, OID_AUTO
, active_count
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
754 &cfil_active_count
, 0, "");
756 SYSCTL_UINT(_net_cfil
, OID_AUTO
, close_wait_timeout
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
757 &cfil_close_wait_timeout
, 0, "");
759 static int cfil_sbtrim
= 1;
760 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sbtrim
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
761 &cfil_sbtrim
, 0, "");
763 SYSCTL_PROC(_net_cfil
, OID_AUTO
, filter_list
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
764 0, 0, sysctl_cfil_filter_list
, "S,cfil_filter_stat", "");
766 SYSCTL_PROC(_net_cfil
, OID_AUTO
, sock_list
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
767 0, 0, sysctl_cfil_sock_list
, "S,cfil_sock_stat", "");
769 SYSCTL_STRUCT(_net_cfil
, OID_AUTO
, stats
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
770 &cfil_stats
, cfil_stats
, "");
773 * Forward declaration to appease the compiler
775 static int cfil_action_data_pass(struct socket
*, struct cfil_info
*, uint32_t, int,
777 static int cfil_action_drop(struct socket
*, struct cfil_info
*, uint32_t);
778 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr
*);
779 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr
*);
780 static int cfil_dispatch_closed_event(struct socket
*, struct cfil_info
*, int);
781 static int cfil_data_common(struct socket
*, struct cfil_info
*, int, struct sockaddr
*,
782 struct mbuf
*, struct mbuf
*, uint32_t);
783 static int cfil_data_filter(struct socket
*, struct cfil_info
*, uint32_t, int,
784 struct mbuf
*, uint32_t);
785 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*,
786 struct in_addr
, u_int16_t
);
787 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*,
788 struct in6_addr
*, u_int16_t
);
790 static int cfil_dispatch_attach_event(struct socket
*, struct cfil_info
*, uint32_t, int);
791 static void cfil_info_free(struct cfil_info
*);
792 static struct cfil_info
* cfil_info_alloc(struct socket
*, struct cfil_hash_entry
*);
793 static int cfil_info_attach_unit(struct socket
*, uint32_t, struct cfil_info
*);
794 static struct socket
* cfil_socket_from_sock_id(cfil_sock_id_t
, bool);
795 static struct socket
* cfil_socket_from_client_uuid(uuid_t
, bool *);
796 static int cfil_service_pending_queue(struct socket
*, struct cfil_info
*, uint32_t, int);
797 static int cfil_data_service_ctl_q(struct socket
*, struct cfil_info
*, uint32_t, int);
798 static void cfil_info_verify(struct cfil_info
*);
799 static int cfil_update_data_offsets(struct socket
*, struct cfil_info
*, uint32_t, int,
801 static int cfil_acquire_sockbuf(struct socket
*, struct cfil_info
*, int);
802 static void cfil_release_sockbuf(struct socket
*, int);
803 static int cfil_filters_attached(struct socket
*);
805 static void cfil_rw_lock_exclusive(lck_rw_t
*);
806 static void cfil_rw_unlock_exclusive(lck_rw_t
*);
807 static void cfil_rw_lock_shared(lck_rw_t
*);
808 static void cfil_rw_unlock_shared(lck_rw_t
*);
809 static boolean_t
cfil_rw_lock_shared_to_exclusive(lck_rw_t
*);
810 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t
*);
812 static unsigned int cfil_data_length(struct mbuf
*, int *, int *);
813 static errno_t
cfil_db_init(struct socket
*);
814 static void cfil_db_free(struct socket
*so
);
815 struct cfil_hash_entry
*cfil_db_lookup_entry(struct cfil_db
*, struct sockaddr
*, struct sockaddr
*, boolean_t
);
816 struct cfil_hash_entry
*cfil_db_lookup_entry_internal(struct cfil_db
*, struct sockaddr
*, struct sockaddr
*, boolean_t
, boolean_t
);
817 struct cfil_hash_entry
*cfil_db_lookup_entry_with_sockid(struct cfil_db
*, u_int64_t
);
818 struct cfil_hash_entry
*cfil_db_add_entry(struct cfil_db
*, struct sockaddr
*, struct sockaddr
*);
819 void cfil_db_update_entry_local(struct cfil_db
*, struct cfil_hash_entry
*, struct sockaddr
*, struct mbuf
*);
820 void cfil_db_delete_entry(struct cfil_db
*, struct cfil_hash_entry
*);
821 struct cfil_hash_entry
*cfil_sock_udp_get_flow(struct socket
*, uint32_t, bool, struct sockaddr
*, struct sockaddr
*, struct mbuf
*, int);
822 struct cfil_info
*cfil_db_get_cfil_info(struct cfil_db
*, cfil_sock_id_t
);
823 static errno_t
cfil_sock_udp_handle_data(bool, struct socket
*, struct sockaddr
*, struct sockaddr
*,
824 struct mbuf
*, struct mbuf
*, uint32_t);
825 static int cfil_sock_udp_get_address_from_control(sa_family_t
, struct mbuf
*, uint8_t **);
826 static int32_t cfil_sock_udp_data_pending(struct sockbuf
*, bool);
827 static void cfil_sock_udp_is_closed(struct socket
*);
828 static int cfil_sock_udp_notify_shutdown(struct socket
*, int, int, int);
829 static int cfil_sock_udp_shutdown(struct socket
*, int *);
830 static void cfil_sock_udp_close_wait(struct socket
*);
831 static void cfil_sock_udp_buf_update(struct sockbuf
*);
832 static int cfil_filters_udp_attached(struct socket
*, bool);
833 static void cfil_get_flow_address_v6(struct cfil_hash_entry
*, struct inpcb
*,
834 struct in6_addr
**, struct in6_addr
**,
835 u_int16_t
*, u_int16_t
*);
836 static void cfil_get_flow_address(struct cfil_hash_entry
*, struct inpcb
*,
837 struct in_addr
*, struct in_addr
*,
838 u_int16_t
*, u_int16_t
*);
839 static void cfil_info_log(int, struct cfil_info
*, const char *);
840 void cfil_filter_show(u_int32_t
);
841 void cfil_info_show(void);
842 bool cfil_info_idle_timed_out(struct cfil_info
*, int, u_int64_t
);
843 bool cfil_info_action_timed_out(struct cfil_info
*, int);
844 bool cfil_info_buffer_threshold_exceeded(struct cfil_info
*);
845 struct m_tag
*cfil_dgram_save_socket_state(struct cfil_info
*, struct mbuf
*);
846 boolean_t
cfil_dgram_peek_socket_state(struct mbuf
*m
, int *inp_flags
);
847 static void cfil_udp_gc_thread_func(void *, wait_result_t
);
848 static void cfil_info_udp_expire(void *, wait_result_t
);
849 static bool fill_cfil_hash_entry_from_address(struct cfil_hash_entry
*, bool, struct sockaddr
*, bool);
850 static void cfil_sock_received_verdict(struct socket
*so
);
851 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry
*, struct inpcb
*,
852 union sockaddr_in_4_6
*, union sockaddr_in_4_6
*,
853 boolean_t
, boolean_t
);
854 static void cfil_stats_report_thread_func(void *, wait_result_t
);
855 static void cfil_stats_report(void *v
, wait_result_t w
);
857 bool check_port(struct sockaddr
*, u_short
);
860 * Content filter global read write lock
864 cfil_rw_lock_exclusive(lck_rw_t
*lck
)
868 lr_saved
= __builtin_return_address(0);
870 lck_rw_lock_exclusive(lck
);
872 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
873 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
877 cfil_rw_unlock_exclusive(lck_rw_t
*lck
)
881 lr_saved
= __builtin_return_address(0);
883 lck_rw_unlock_exclusive(lck
);
885 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
886 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
890 cfil_rw_lock_shared(lck_rw_t
*lck
)
894 lr_saved
= __builtin_return_address(0);
896 lck_rw_lock_shared(lck
);
898 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
899 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
903 cfil_rw_unlock_shared(lck_rw_t
*lck
)
907 lr_saved
= __builtin_return_address(0);
909 lck_rw_unlock_shared(lck
);
911 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
912 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
916 cfil_rw_lock_shared_to_exclusive(lck_rw_t
*lck
)
921 lr_saved
= __builtin_return_address(0);
923 upgraded
= lck_rw_lock_shared_to_exclusive(lck
);
925 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
926 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
932 cfil_rw_lock_exclusive_to_shared(lck_rw_t
*lck
)
936 lr_saved
= __builtin_return_address(0);
938 lck_rw_lock_exclusive_to_shared(lck
);
940 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
941 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
945 cfil_rw_lock_assert_held(lck_rw_t
*lck
, int exclusive
)
948 #pragma unused(lck, exclusive)
951 exclusive
? LCK_RW_ASSERT_EXCLUSIVE
: LCK_RW_ASSERT_HELD
);
955 * Return the number of bytes in the mbuf chain using the same
956 * method as m_length() or sballoc()
958 * Returns data len - starting from PKT start
959 * - retmbcnt - optional param to get total mbuf bytes in chain
960 * - retmbnum - optional param to get number of mbufs in chain
963 cfil_data_length(struct mbuf
*m
, int *retmbcnt
, int *retmbnum
)
966 unsigned int pktlen
= 0;
970 // Locate the start of data
971 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
972 if (m0
->m_flags
& M_PKTHDR
) {
977 CFIL_LOG(LOG_ERR
, "cfil_data_length: no M_PKTHDR");
982 if (retmbcnt
== NULL
&& retmbnum
== NULL
) {
989 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
993 if (m0
->m_flags
& M_EXT
) {
994 mbcnt
+= m0
->m_ext
.ext_size
;
1006 static struct mbuf
*
1007 cfil_data_start(struct mbuf
*m
)
1011 // Locate the start of data
1012 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
1013 if (m0
->m_flags
& M_PKTHDR
) {
1021 * Common mbuf queue utilities
1025 cfil_queue_init(struct cfil_queue
*cfq
)
1029 MBUFQ_INIT(&cfq
->q_mq
);
1032 static inline uint64_t
1033 cfil_queue_drain(struct cfil_queue
*cfq
)
1035 uint64_t drained
= cfq
->q_start
- cfq
->q_end
;
1038 MBUFQ_DRAIN(&cfq
->q_mq
);
1043 /* Return 1 when empty, 0 otherwise */
1045 cfil_queue_empty(struct cfil_queue
*cfq
)
1047 return MBUFQ_EMPTY(&cfq
->q_mq
);
1050 static inline uint64_t
1051 cfil_queue_offset_first(struct cfil_queue
*cfq
)
1053 return cfq
->q_start
;
1056 static inline uint64_t
1057 cfil_queue_offset_last(struct cfil_queue
*cfq
)
1062 static inline uint64_t
1063 cfil_queue_len(struct cfil_queue
*cfq
)
1065 return cfq
->q_end
- cfq
->q_start
;
1069 * Routines to verify some fundamental assumptions
1073 cfil_queue_verify(struct cfil_queue
*cfq
)
1078 uint64_t queuesize
= 0;
1080 /* Verify offset are ordered */
1081 VERIFY(cfq
->q_start
<= cfq
->q_end
);
1084 * When queue is empty, the offsets are equal otherwise the offsets
1087 VERIFY((MBUFQ_EMPTY(&cfq
->q_mq
) && cfq
->q_start
== cfq
->q_end
) ||
1088 (!MBUFQ_EMPTY(&cfq
->q_mq
) &&
1089 cfq
->q_start
!= cfq
->q_end
));
1091 MBUFQ_FOREACH(chain
, &cfq
->q_mq
) {
1092 size_t chainsize
= 0;
1094 unsigned int mlen
= cfil_data_length(m
, NULL
, NULL
);
1095 // skip the addr and control stuff if present
1096 m
= cfil_data_start(m
);
1099 m
== (void *)M_TAG_FREE_PATTERN
||
1100 m
->m_next
== (void *)M_TAG_FREE_PATTERN
||
1101 m
->m_nextpkt
== (void *)M_TAG_FREE_PATTERN
) {
1102 panic("%s - mq %p is free at %p", __func__
,
1105 for (n
= m
; n
!= NULL
; n
= n
->m_next
) {
1106 if (n
->m_type
!= MT_DATA
&&
1107 n
->m_type
!= MT_HEADER
&&
1108 n
->m_type
!= MT_OOBDATA
) {
1109 panic("%s - %p unsupported type %u", __func__
,
1112 chainsize
+= n
->m_len
;
1114 if (mlen
!= chainsize
) {
1115 panic("%s - %p m_length() %u != chainsize %lu",
1116 __func__
, m
, mlen
, chainsize
);
1118 queuesize
+= chainsize
;
1120 if (queuesize
!= cfq
->q_end
- cfq
->q_start
) {
1121 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__
,
1122 m
, queuesize
, cfq
->q_end
- cfq
->q_start
);
1127 cfil_queue_enqueue(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
1129 CFIL_QUEUE_VERIFY(cfq
);
1131 MBUFQ_ENQUEUE(&cfq
->q_mq
, m
);
1134 CFIL_QUEUE_VERIFY(cfq
);
1138 cfil_queue_remove(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
1140 CFIL_QUEUE_VERIFY(cfq
);
1142 VERIFY(cfil_data_length(m
, NULL
, NULL
) == len
);
1144 MBUFQ_REMOVE(&cfq
->q_mq
, m
);
1145 MBUFQ_NEXT(m
) = NULL
;
1146 cfq
->q_start
+= len
;
1148 CFIL_QUEUE_VERIFY(cfq
);
1152 cfil_queue_first(struct cfil_queue
*cfq
)
1154 return MBUFQ_FIRST(&cfq
->q_mq
);
1158 cfil_queue_next(struct cfil_queue
*cfq
, mbuf_t m
)
1161 return MBUFQ_NEXT(m
);
1165 cfil_entry_buf_verify(struct cfe_buf
*cfe_buf
)
1167 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_ctl_q
);
1168 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_pending_q
);
1170 /* Verify the queues are ordered so that pending is before ctl */
1171 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
>= cfe_buf
->cfe_pending_q
.q_end
);
1173 /* The peek offset cannot be less than the pass offset */
1174 VERIFY(cfe_buf
->cfe_peek_offset
>= cfe_buf
->cfe_pass_offset
);
1176 /* Make sure we've updated the offset we peeked at */
1177 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
<= cfe_buf
->cfe_peeked
);
1181 cfil_entry_verify(struct cfil_entry
*entry
)
1183 cfil_entry_buf_verify(&entry
->cfe_snd
);
1184 cfil_entry_buf_verify(&entry
->cfe_rcv
);
1188 cfil_info_buf_verify(struct cfi_buf
*cfi_buf
)
1190 CFIL_QUEUE_VERIFY(&cfi_buf
->cfi_inject_q
);
1192 VERIFY(cfi_buf
->cfi_pending_first
<= cfi_buf
->cfi_pending_last
);
1196 cfil_info_verify(struct cfil_info
*cfil_info
)
1200 if (cfil_info
== NULL
) {
1204 cfil_info_buf_verify(&cfil_info
->cfi_snd
);
1205 cfil_info_buf_verify(&cfil_info
->cfi_rcv
);
1207 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++) {
1208 cfil_entry_verify(&cfil_info
->cfi_entries
[i
]);
1213 verify_content_filter(struct content_filter
*cfc
)
1215 struct cfil_entry
*entry
;
1218 VERIFY(cfc
->cf_sock_count
>= 0);
1220 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
1222 VERIFY(cfc
== entry
->cfe_filter
);
1224 VERIFY(count
== cfc
->cf_sock_count
);
1228 * Kernel control socket callbacks
1231 cfil_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
1235 struct content_filter
*cfc
= NULL
;
1237 CFIL_LOG(LOG_NOTICE
, "");
1239 cfc
= zalloc(content_filter_zone
);
1241 CFIL_LOG(LOG_ERR
, "zalloc failed");
1245 bzero(cfc
, sizeof(struct content_filter
));
1247 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1248 if (content_filters
== NULL
) {
1249 struct content_filter
**tmp
;
1251 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1254 struct content_filter
**,
1255 MAX_CONTENT_FILTER
* sizeof(struct content_filter
*),
1259 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1261 if (tmp
== NULL
&& content_filters
== NULL
) {
1263 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1266 /* Another thread may have won the race */
1267 if (content_filters
!= NULL
) {
1270 content_filters
= tmp
;
1274 if (sac
->sc_unit
== 0 || sac
->sc_unit
> MAX_CONTENT_FILTER
) {
1275 CFIL_LOG(LOG_ERR
, "bad sc_unit %u", sac
->sc_unit
);
1277 } else if (content_filters
[sac
->sc_unit
- 1] != NULL
) {
1278 CFIL_LOG(LOG_ERR
, "sc_unit %u in use", sac
->sc_unit
);
1282 * kernel control socket kcunit numbers start at 1
1284 content_filters
[sac
->sc_unit
- 1] = cfc
;
1286 cfc
->cf_kcref
= kctlref
;
1287 cfc
->cf_kcunit
= sac
->sc_unit
;
1288 TAILQ_INIT(&cfc
->cf_sock_entries
);
1291 cfil_active_count
++;
1293 // Allocate periodic stats buffer for this filter
1294 if (global_cfil_stats_report_buffers
[cfc
->cf_kcunit
- 1] == NULL
) {
1295 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1297 struct cfil_stats_report_buffer
*buf
;
1300 struct cfil_stats_report_buffer
*,
1301 sizeof(struct cfil_stats_report_buffer
),
1305 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1309 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1313 /* Another thread may have won the race */
1314 if (global_cfil_stats_report_buffers
[cfc
->cf_kcunit
- 1] != NULL
) {
1317 global_cfil_stats_report_buffers
[cfc
->cf_kcunit
- 1] = buf
;
1321 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1323 if (error
!= 0 && cfc
!= NULL
) {
1324 zfree(content_filter_zone
, cfc
);
1328 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_ok
);
1330 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_fail
);
1333 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
1334 error
, cfil_active_count
, sac
->sc_unit
);
1340 cfil_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
)
1342 #pragma unused(kctlref)
1344 struct content_filter
*cfc
;
1345 struct cfil_entry
*entry
;
1346 uint64_t sock_flow_id
= 0;
1348 CFIL_LOG(LOG_NOTICE
, "");
1350 if (content_filters
== NULL
) {
1351 CFIL_LOG(LOG_ERR
, "no content filter");
1355 if (kcunit
> MAX_CONTENT_FILTER
) {
1356 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1357 kcunit
, MAX_CONTENT_FILTER
);
1362 cfc
= (struct content_filter
*)unitinfo
;
1367 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1368 if (content_filters
[kcunit
- 1] != cfc
|| cfc
->cf_kcunit
!= kcunit
) {
1369 CFIL_LOG(LOG_ERR
, "bad unit info %u)",
1371 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1374 cfc
->cf_flags
|= CFF_DETACHING
;
1376 * Remove all sockets from the filter
1378 while ((entry
= TAILQ_FIRST(&cfc
->cf_sock_entries
)) != NULL
) {
1379 cfil_rw_lock_assert_held(&cfil_lck_rw
, 1);
1381 verify_content_filter(cfc
);
1383 * Accept all outstanding data by pushing to next filter
1386 * TBD: Actually we should make sure all data has been pushed
1389 if (entry
->cfe_cfil_info
&& entry
->cfe_cfil_info
->cfi_so
) {
1390 struct cfil_info
*cfil_info
= entry
->cfe_cfil_info
;
1391 struct socket
*so
= cfil_info
->cfi_so
;
1392 sock_flow_id
= cfil_info
->cfi_sock_id
;
1394 /* Need to let data flow immediately */
1395 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
|
1399 * Respect locking hierarchy
1401 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1406 * When cfe_filter is NULL the filter is detached
1407 * and the entry has been removed from cf_sock_entries
1409 if ((so
->so_cfil
== NULL
&& so
->so_cfil_db
== NULL
) || entry
->cfe_filter
== NULL
) {
1410 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1414 (void) cfil_action_data_pass(so
, cfil_info
, kcunit
, 1,
1418 (void) cfil_action_data_pass(so
, cfil_info
, kcunit
, 0,
1422 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1425 * Check again to make sure if the cfil_info is still valid
1426 * as the socket may have been unlocked when when calling
1427 * cfil_acquire_sockbuf()
1429 if (entry
->cfe_filter
== NULL
||
1430 (so
->so_cfil
== NULL
&& cfil_db_get_cfil_info(so
->so_cfil_db
, sock_flow_id
) == NULL
)) {
1434 /* The filter is now detached */
1435 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
1437 cfil_info_log(LOG_DEBUG
, cfil_info
, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1439 CFIL_LOG(LOG_NOTICE
, "so %llx detached %u",
1440 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1441 if ((cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
1442 cfil_filters_attached(so
) == 0) {
1443 CFIL_LOG(LOG_NOTICE
, "so %llx waking",
1444 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1445 wakeup((caddr_t
)cfil_info
);
1449 * Remove the filter entry from the content filter
1450 * but leave the rest of the state intact as the queues
1451 * may not be empty yet
1453 entry
->cfe_filter
= NULL
;
1454 entry
->cfe_necp_control_unit
= 0;
1456 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1457 cfc
->cf_sock_count
--;
1459 socket_unlock(so
, 1);
1462 verify_content_filter(cfc
);
1464 /* Free the stats buffer for this filter */
1465 if (global_cfil_stats_report_buffers
[cfc
->cf_kcunit
- 1] != NULL
) {
1466 FREE(global_cfil_stats_report_buffers
[cfc
->cf_kcunit
- 1], M_TEMP
);
1467 global_cfil_stats_report_buffers
[cfc
->cf_kcunit
- 1] = NULL
;
1469 VERIFY(cfc
->cf_sock_count
== 0);
1472 * Make filter inactive
1474 content_filters
[kcunit
- 1] = NULL
;
1475 cfil_active_count
--;
1476 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1478 if (cfc
->cf_crypto_state
!= NULL
) {
1479 cfil_crypto_cleanup_state(cfc
->cf_crypto_state
);
1480 cfc
->cf_crypto_state
= NULL
;
1483 zfree(content_filter_zone
, cfc
);
1486 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_ok
);
1488 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_fail
);
1491 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
1492 error
, cfil_active_count
, kcunit
);
1498 * cfil_acquire_sockbuf()
1500 * Prevent any other thread from acquiring the sockbuf
1501 * We use sb_cfil_thread as a semaphore to prevent other threads from
1502 * messing with the sockbuf -- see sblock()
1503 * Note: We do not set SB_LOCK here because the thread may check or modify
1504 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1505 * sblock(), sbunlock() or sodefunct()
1508 cfil_acquire_sockbuf(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
)
1510 thread_t tp
= current_thread();
1511 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1512 lck_mtx_t
*mutex_held
;
1516 * Wait until no thread is holding the sockbuf and other content
1517 * filter threads have released the sockbuf
1519 while ((sb
->sb_flags
& SB_LOCK
) ||
1520 (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
)) {
1521 if (so
->so_proto
->pr_getlock
!= NULL
) {
1522 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
1524 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1527 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1530 VERIFY(sb
->sb_wantlock
!= 0);
1532 msleep(&sb
->sb_flags
, mutex_held
, PSOCK
, "cfil_acquire_sockbuf",
1535 VERIFY(sb
->sb_wantlock
!= 0);
1539 * Use reference count for repetitive calls on same thread
1541 if (sb
->sb_cfil_refs
== 0) {
1542 VERIFY(sb
->sb_cfil_thread
== NULL
);
1543 VERIFY((sb
->sb_flags
& SB_LOCK
) == 0);
1545 sb
->sb_cfil_thread
= tp
;
1546 sb
->sb_flags
|= SB_LOCK
;
1550 /* We acquire the socket buffer when we need to cleanup */
1551 if (cfil_info
== NULL
) {
1552 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
1553 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1555 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
1556 CFIL_LOG(LOG_ERR
, "so %llx drop set",
1557 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1565 cfil_release_sockbuf(struct socket
*so
, int outgoing
)
1567 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1568 thread_t tp
= current_thread();
1570 socket_lock_assert_owned(so
);
1572 if (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
) {
1573 panic("%s sb_cfil_thread %p not current %p", __func__
,
1574 sb
->sb_cfil_thread
, tp
);
1577 * Don't panic if we are defunct because SB_LOCK has
1578 * been cleared by sodefunct()
1580 if (!(so
->so_flags
& SOF_DEFUNCT
) && !(sb
->sb_flags
& SB_LOCK
)) {
1581 panic("%s SB_LOCK not set on %p", __func__
,
1585 * We can unlock when the thread unwinds to the last reference
1588 if (sb
->sb_cfil_refs
== 0) {
1589 sb
->sb_cfil_thread
= NULL
;
1590 sb
->sb_flags
&= ~SB_LOCK
;
1592 if (sb
->sb_wantlock
> 0) {
1593 wakeup(&sb
->sb_flags
);
1599 cfil_sock_id_from_socket(struct socket
*so
)
1601 if ((so
->so_flags
& SOF_CONTENT_FILTER
) && so
->so_cfil
) {
1602 return so
->so_cfil
->cfi_sock_id
;
1604 return CFIL_SOCK_ID_NONE
;
1609 cfil_socket_safe_lock(struct inpcb
*inp
)
1611 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) != WNT_STOPUSING
) {
1612 socket_lock(inp
->inp_socket
, 1);
1613 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) != WNT_STOPUSING
) {
1616 socket_unlock(inp
->inp_socket
, 1);
1621 static struct socket
*
1622 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id
, bool udp_only
)
1624 struct socket
*so
= NULL
;
1625 u_int64_t gencnt
= cfil_sock_id
>> 32;
1626 u_int32_t flowhash
= (u_int32_t
)(cfil_sock_id
& 0x0ffffffff);
1627 struct inpcb
*inp
= NULL
;
1628 struct inpcbinfo
*pcbinfo
= NULL
;
1631 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id
, gencnt
, flowhash
);
1639 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1640 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1641 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1642 inp
->inp_socket
!= NULL
&&
1643 inp
->inp_flowhash
== flowhash
&&
1644 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
&&
1645 inp
->inp_socket
->so_cfil
!= NULL
) {
1646 if (cfil_socket_safe_lock(inp
)) {
1647 so
= inp
->inp_socket
;
1652 lck_rw_done(pcbinfo
->ipi_lock
);
1660 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1661 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1662 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1663 inp
->inp_socket
!= NULL
&&
1664 inp
->inp_socket
->so_cfil_db
!= NULL
&&
1665 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
) {
1666 if (cfil_socket_safe_lock(inp
)) {
1667 so
= inp
->inp_socket
;
1672 lck_rw_done(pcbinfo
->ipi_lock
);
1674 pcbinfo
= &ripcbinfo
;
1675 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1676 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1677 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1678 inp
->inp_socket
!= NULL
&&
1679 inp
->inp_socket
->so_cfil_db
!= NULL
&&
1680 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
) {
1681 if (cfil_socket_safe_lock(inp
)) {
1682 so
= inp
->inp_socket
;
1687 lck_rw_done(pcbinfo
->ipi_lock
);
1691 OSIncrementAtomic(&cfil_stats
.cfs_sock_id_not_found
);
1693 "no socket for sock_id %llx gencnt %llx flowhash %x",
1694 cfil_sock_id
, gencnt
, flowhash
);
1700 static struct socket
*
1701 cfil_socket_from_client_uuid(uuid_t necp_client_uuid
, bool *cfil_attached
)
1703 struct socket
*so
= NULL
;
1704 struct inpcb
*inp
= NULL
;
1705 struct inpcbinfo
*pcbinfo
= &tcbinfo
;
1707 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1708 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1709 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1710 inp
->inp_socket
!= NULL
&&
1711 uuid_compare(inp
->necp_client_uuid
, necp_client_uuid
) == 0) {
1712 *cfil_attached
= (inp
->inp_socket
->so_cfil
!= NULL
);
1713 if (cfil_socket_safe_lock(inp
)) {
1714 so
= inp
->inp_socket
;
1719 lck_rw_done(pcbinfo
->ipi_lock
);
1725 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1726 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1727 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1728 inp
->inp_socket
!= NULL
&&
1729 uuid_compare(inp
->necp_client_uuid
, necp_client_uuid
) == 0) {
1730 *cfil_attached
= (inp
->inp_socket
->so_cfil_db
!= NULL
);
1731 if (cfil_socket_safe_lock(inp
)) {
1732 so
= inp
->inp_socket
;
1737 lck_rw_done(pcbinfo
->ipi_lock
);
1744 cfil_info_stats_toggle(struct cfil_info
*cfil_info
, struct cfil_entry
*entry
, uint32_t report_frequency
)
1746 struct cfil_info
*cfil
= NULL
;
1747 Boolean found
= FALSE
;
1750 if (cfil_info
== NULL
) {
1754 if (report_frequency
) {
1755 if (entry
== NULL
) {
1759 // Update stats reporting frequency.
1760 if (entry
->cfe_stats_report_frequency
!= report_frequency
) {
1761 entry
->cfe_stats_report_frequency
= report_frequency
;
1762 if (entry
->cfe_stats_report_frequency
< CFIL_STATS_REPORT_INTERVAL_MIN_MSEC
) {
1763 entry
->cfe_stats_report_frequency
= CFIL_STATS_REPORT_INTERVAL_MIN_MSEC
;
1765 microuptime(&entry
->cfe_stats_report_ts
);
1767 // Insert cfil_info into list only if it is not in yet.
1768 TAILQ_FOREACH(cfil
, &cfil_sock_head_stats
, cfi_link_stats
) {
1769 if (cfil
== cfil_info
) {
1774 TAILQ_INSERT_TAIL(&cfil_sock_head_stats
, cfil_info
, cfi_link_stats
);
1776 // Wake up stats thread if this is first flow added
1777 if (cfil_sock_attached_stats_count
== 0) {
1778 thread_wakeup((caddr_t
)&cfil_sock_attached_stats_count
);
1780 cfil_sock_attached_stats_count
++;
1782 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu> stats frequency %d msecs",
1783 cfil_info
->cfi_so
? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info
->cfi_so
) : 0,
1784 cfil_info
->cfi_sock_id
,
1785 entry
->cfe_stats_report_frequency
);
1789 // Turn off stats reporting for this filter.
1790 if (entry
!= NULL
) {
1791 // Already off, no change.
1792 if (entry
->cfe_stats_report_frequency
== 0) {
1796 entry
->cfe_stats_report_frequency
= 0;
1797 // If cfil_info still has filter(s) asking for stats, no need to remove from list.
1798 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
1799 if (cfil_info
->cfi_entries
[kcunit
- 1].cfe_stats_report_frequency
> 0) {
1805 // No more filter asking for stats for this cfil_info, remove from list.
1806 if (!TAILQ_EMPTY(&cfil_sock_head_stats
)) {
1808 TAILQ_FOREACH(cfil
, &cfil_sock_head_stats
, cfi_link_stats
) {
1809 if (cfil
== cfil_info
) {
1815 cfil_sock_attached_stats_count
--;
1816 TAILQ_REMOVE(&cfil_sock_head_stats
, cfil_info
, cfi_link_stats
);
1818 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu> stats frequency reset",
1819 cfil_info
->cfi_so
? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info
->cfi_so
) : 0,
1820 cfil_info
->cfi_sock_id
);
1828 cfil_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, mbuf_t m
,
1831 #pragma unused(kctlref, flags)
1833 struct cfil_msg_hdr
*msghdr
;
1834 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1836 struct cfil_msg_action
*action_msg
;
1837 struct cfil_entry
*entry
;
1838 struct cfil_info
*cfil_info
= NULL
;
1839 unsigned int data_len
= 0;
1841 CFIL_LOG(LOG_INFO
, "");
1843 if (content_filters
== NULL
) {
1844 CFIL_LOG(LOG_ERR
, "no content filter");
1848 if (kcunit
> MAX_CONTENT_FILTER
) {
1849 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1850 kcunit
, MAX_CONTENT_FILTER
);
1855 CFIL_LOG(LOG_ERR
, "null mbuf");
1859 data_len
= m_length(m
);
1861 if (data_len
< sizeof(struct cfil_msg_hdr
)) {
1862 CFIL_LOG(LOG_ERR
, "too short %u", data_len
);
1866 msghdr
= (struct cfil_msg_hdr
*)mbuf_data(m
);
1867 if (msghdr
->cfm_version
!= CFM_VERSION_CURRENT
) {
1868 CFIL_LOG(LOG_ERR
, "bad version %u", msghdr
->cfm_version
);
1872 if (msghdr
->cfm_type
!= CFM_TYPE_ACTION
) {
1873 CFIL_LOG(LOG_ERR
, "bad type %u", msghdr
->cfm_type
);
1877 if (msghdr
->cfm_len
> data_len
) {
1878 CFIL_LOG(LOG_ERR
, "bad length %u", msghdr
->cfm_len
);
1883 /* Validate action operation */
1884 switch (msghdr
->cfm_op
) {
1885 case CFM_OP_DATA_UPDATE
:
1887 &cfil_stats
.cfs_ctl_action_data_update
);
1890 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_drop
);
1892 case CFM_OP_BLESS_CLIENT
:
1893 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_bless_client
)) {
1894 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1896 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1901 error
= cfil_action_bless_client(kcunit
, msghdr
);
1903 case CFM_OP_SET_CRYPTO_KEY
:
1904 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_set_crypto_key
)) {
1905 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1907 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1912 error
= cfil_action_set_crypto_key(kcunit
, msghdr
);
1915 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_op
);
1916 CFIL_LOG(LOG_ERR
, "bad op %u", msghdr
->cfm_op
);
1920 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_action
)) {
1921 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1923 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1928 cfil_rw_lock_shared(&cfil_lck_rw
);
1929 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1930 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1933 cfil_rw_unlock_shared(&cfil_lck_rw
);
1936 cfil_rw_unlock_shared(&cfil_lck_rw
);
1938 // Search for socket (TCP+UDP and lock so)
1939 so
= cfil_socket_from_sock_id(msghdr
->cfm_sock_id
, false);
1941 CFIL_LOG(LOG_NOTICE
, "bad sock_id %llx",
1942 msghdr
->cfm_sock_id
);
1947 cfil_info
= so
->so_cfil_db
!= NULL
?
1948 cfil_db_get_cfil_info(so
->so_cfil_db
, msghdr
->cfm_sock_id
) : so
->so_cfil
;
1950 // We should not obtain global lock here in order to avoid deadlock down the path.
1951 // But we attempt to retain a valid cfil_info to prevent any deallocation until
1952 // we are done. Abort retain if cfil_info has already entered the free code path.
1953 if (cfil_info
&& os_ref_retain_try(&cfil_info
->cfi_ref_count
) == false) {
1954 socket_unlock(so
, 1);
1958 if (cfil_info
== NULL
) {
1959 CFIL_LOG(LOG_NOTICE
, "so %llx <id %llu> not attached",
1960 (uint64_t)VM_KERNEL_ADDRPERM(so
), msghdr
->cfm_sock_id
);
1963 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
1964 CFIL_LOG(LOG_NOTICE
, "so %llx drop set",
1965 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1970 if (cfil_info
->cfi_debug
) {
1971 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: RECEIVED MSG FROM FILTER");
1974 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1975 if (entry
->cfe_filter
== NULL
) {
1976 CFIL_LOG(LOG_NOTICE
, "so %llx no filter",
1977 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1982 if (entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) {
1983 entry
->cfe_flags
|= CFEF_DATA_START
;
1986 "so %llx attached not sent for %u",
1987 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1992 microuptime(&entry
->cfe_last_action
);
1993 CFI_ADD_TIME_LOG(cfil_info
, &entry
->cfe_last_action
, &cfil_info
->cfi_first_event
, msghdr
->cfm_op
);
1995 action_msg
= (struct cfil_msg_action
*)msghdr
;
1997 switch (msghdr
->cfm_op
) {
1998 case CFM_OP_DATA_UPDATE
:
2000 if (cfil_info
->cfi_debug
) {
2001 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: RECEIVED CFM_OP_DATA_UPDATE");
2002 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2003 (uint64_t)VM_KERNEL_ADDRPERM(so
),
2004 cfil_info
->cfi_sock_id
,
2005 action_msg
->cfa_in_peek_offset
, action_msg
->cfa_in_pass_offset
,
2006 action_msg
->cfa_out_peek_offset
, action_msg
->cfa_out_pass_offset
);
2010 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2011 (uint64_t)VM_KERNEL_ADDRPERM(so
),
2012 cfil_info
->cfi_sock_id
,
2013 action_msg
->cfa_in_peek_offset
, action_msg
->cfa_in_pass_offset
,
2014 action_msg
->cfa_out_peek_offset
, action_msg
->cfa_out_pass_offset
);
2017 * Received verdict, at this point we know this
2018 * socket connection is allowed. Unblock thread
2019 * immediately before proceeding to process the verdict.
2021 cfil_sock_received_verdict(so
);
2023 if (action_msg
->cfa_out_peek_offset
!= 0 ||
2024 action_msg
->cfa_out_pass_offset
!= 0) {
2025 error
= cfil_action_data_pass(so
, cfil_info
, kcunit
, 1,
2026 action_msg
->cfa_out_pass_offset
,
2027 action_msg
->cfa_out_peek_offset
);
2029 if (error
== EJUSTRETURN
) {
2035 if (action_msg
->cfa_in_peek_offset
!= 0 ||
2036 action_msg
->cfa_in_pass_offset
!= 0) {
2037 error
= cfil_action_data_pass(so
, cfil_info
, kcunit
, 0,
2038 action_msg
->cfa_in_pass_offset
,
2039 action_msg
->cfa_in_peek_offset
);
2041 if (error
== EJUSTRETURN
) {
2045 // Toggle stats reporting according to received verdict.
2046 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2047 cfil_info_stats_toggle(cfil_info
, entry
, action_msg
->cfa_stats_frequency
);
2048 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2053 if (cfil_info
->cfi_debug
) {
2054 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: RECEIVED CFM_OP_DROP");
2055 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2056 (uint64_t)VM_KERNEL_ADDRPERM(so
),
2057 cfil_info
->cfi_sock_id
,
2058 action_msg
->cfa_in_peek_offset
, action_msg
->cfa_in_pass_offset
,
2059 action_msg
->cfa_out_peek_offset
, action_msg
->cfa_out_pass_offset
);
2063 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2064 (uint64_t)VM_KERNEL_ADDRPERM(so
),
2065 cfil_info
->cfi_sock_id
,
2066 action_msg
->cfa_in_peek_offset
, action_msg
->cfa_in_pass_offset
,
2067 action_msg
->cfa_out_peek_offset
, action_msg
->cfa_out_pass_offset
);
2069 error
= cfil_action_drop(so
, cfil_info
, kcunit
);
2070 cfil_sock_received_verdict(so
);
2078 CFIL_INFO_FREE(cfil_info
)
2079 socket_unlock(so
, 1);
2084 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_ok
);
2086 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_bad
);
2093 cfil_ctl_getopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
2094 int opt
, void *data
, size_t *len
)
2096 #pragma unused(kctlref, opt)
2097 struct cfil_info
*cfil_info
= NULL
;
2099 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
2101 CFIL_LOG(LOG_NOTICE
, "");
2103 cfil_rw_lock_shared(&cfil_lck_rw
);
2105 if (content_filters
== NULL
) {
2106 CFIL_LOG(LOG_ERR
, "no content filter");
2110 if (kcunit
> MAX_CONTENT_FILTER
) {
2111 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2112 kcunit
, MAX_CONTENT_FILTER
);
2116 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
2117 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
2123 case CFIL_OPT_NECP_CONTROL_UNIT
:
2124 if (*len
< sizeof(uint32_t)) {
2125 CFIL_LOG(LOG_ERR
, "len too small %lu", *len
);
2130 *(uint32_t *)data
= cfc
->cf_necp_control_unit
;
2133 case CFIL_OPT_GET_SOCKET_INFO
:
2134 if (*len
!= sizeof(struct cfil_opt_sock_info
)) {
2135 CFIL_LOG(LOG_ERR
, "len does not match %lu", *len
);
2140 CFIL_LOG(LOG_ERR
, "data not passed");
2145 struct cfil_opt_sock_info
*sock_info
=
2146 (struct cfil_opt_sock_info
*) data
;
2148 // Unlock here so that we never hold both cfil_lck_rw and the
2149 // socket_lock at the same time. Otherwise, this can deadlock
2150 // because soclose() takes the socket_lock and then exclusive
2151 // cfil_lck_rw and we require the opposite order.
2153 // WARNING: Be sure to never use anything protected
2154 // by cfil_lck_rw beyond this point.
2155 // WARNING: Be sure to avoid fallthrough and
2156 // goto return_already_unlocked from this branch.
2157 cfil_rw_unlock_shared(&cfil_lck_rw
);
2159 // Search (TCP+UDP) and lock socket
2160 struct socket
*sock
=
2161 cfil_socket_from_sock_id(sock_info
->cfs_sock_id
, false);
2164 CFIL_LOG(LOG_ERR
, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2165 sock_info
->cfs_sock_id
);
2168 goto return_already_unlocked
;
2171 cfil_info
= (sock
->so_cfil_db
!= NULL
) ?
2172 cfil_db_get_cfil_info(sock
->so_cfil_db
, sock_info
->cfs_sock_id
) : sock
->so_cfil
;
2174 if (cfil_info
== NULL
) {
2176 CFIL_LOG(LOG_ERR
, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2177 (uint64_t)VM_KERNEL_ADDRPERM(sock
));
2180 socket_unlock(sock
, 1);
2181 goto return_already_unlocked
;
2184 // Fill out family, type, and protocol
2185 sock_info
->cfs_sock_family
= sock
->so_proto
->pr_domain
->dom_family
;
2186 sock_info
->cfs_sock_type
= sock
->so_proto
->pr_type
;
2187 sock_info
->cfs_sock_protocol
= sock
->so_proto
->pr_protocol
;
2189 // Source and destination addresses
2190 struct inpcb
*inp
= sotoinpcb(sock
);
2191 if (inp
->inp_vflag
& INP_IPV6
) {
2192 struct in6_addr
*laddr
= NULL
, *faddr
= NULL
;
2193 u_int16_t lport
= 0, fport
= 0;
2195 cfil_get_flow_address_v6(cfil_info
->cfi_hash_entry
, inp
,
2196 &laddr
, &faddr
, &lport
, &fport
);
2197 fill_ip6_sockaddr_4_6(&sock_info
->cfs_local
, laddr
, lport
);
2198 fill_ip6_sockaddr_4_6(&sock_info
->cfs_remote
, faddr
, fport
);
2199 } else if (inp
->inp_vflag
& INP_IPV4
) {
2200 struct in_addr laddr
= {.s_addr
= 0}, faddr
= {.s_addr
= 0};
2201 u_int16_t lport
= 0, fport
= 0;
2203 cfil_get_flow_address(cfil_info
->cfi_hash_entry
, inp
,
2204 &laddr
, &faddr
, &lport
, &fport
);
2205 fill_ip_sockaddr_4_6(&sock_info
->cfs_local
, laddr
, lport
);
2206 fill_ip_sockaddr_4_6(&sock_info
->cfs_remote
, faddr
, fport
);
2210 sock_info
->cfs_pid
= sock
->last_pid
;
2211 memcpy(sock_info
->cfs_uuid
, sock
->last_uuid
, sizeof(uuid_t
));
2213 if (sock
->so_flags
& SOF_DELEGATED
) {
2214 sock_info
->cfs_e_pid
= sock
->e_pid
;
2215 memcpy(sock_info
->cfs_e_uuid
, sock
->e_uuid
, sizeof(uuid_t
));
2217 sock_info
->cfs_e_pid
= sock
->last_pid
;
2218 memcpy(sock_info
->cfs_e_uuid
, sock
->last_uuid
, sizeof(uuid_t
));
2221 socket_unlock(sock
, 1);
2223 goto return_already_unlocked
;
2225 error
= ENOPROTOOPT
;
2229 cfil_rw_unlock_shared(&cfil_lck_rw
);
2233 return_already_unlocked
:
2239 cfil_ctl_setopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
2240 int opt
, void *data
, size_t len
)
2242 #pragma unused(kctlref, opt)
2244 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
2246 CFIL_LOG(LOG_NOTICE
, "");
2248 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2250 if (content_filters
== NULL
) {
2251 CFIL_LOG(LOG_ERR
, "no content filter");
2255 if (kcunit
> MAX_CONTENT_FILTER
) {
2256 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2257 kcunit
, MAX_CONTENT_FILTER
);
2261 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
2262 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
2268 case CFIL_OPT_NECP_CONTROL_UNIT
:
2269 if (len
< sizeof(uint32_t)) {
2270 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
2271 "len too small %lu", len
);
2275 if (cfc
->cf_necp_control_unit
!= 0) {
2276 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
2278 cfc
->cf_necp_control_unit
);
2282 cfc
->cf_necp_control_unit
= *(uint32_t *)data
;
2285 error
= ENOPROTOOPT
;
2289 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2296 cfil_ctl_rcvd(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, int flags
)
2298 #pragma unused(kctlref, flags)
2299 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
2300 struct socket
*so
= NULL
;
2302 struct cfil_entry
*entry
;
2303 struct cfil_info
*cfil_info
= NULL
;
2305 CFIL_LOG(LOG_INFO
, "");
2307 if (content_filters
== NULL
) {
2308 CFIL_LOG(LOG_ERR
, "no content filter");
2309 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
2312 if (kcunit
> MAX_CONTENT_FILTER
) {
2313 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2314 kcunit
, MAX_CONTENT_FILTER
);
2315 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
2318 cfil_rw_lock_shared(&cfil_lck_rw
);
2319 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
2320 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
2322 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
2325 /* Let's assume the flow control is lifted */
2326 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2327 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
2328 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2331 cfc
->cf_flags
&= ~CFF_FLOW_CONTROLLED
;
2333 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw
);
2334 LCK_RW_ASSERT(&cfil_lck_rw
, LCK_RW_ASSERT_SHARED
);
2337 * Flow control will be raised again as soon as an entry cannot enqueue
2338 * to the kernel control socket
2340 while ((cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) == 0) {
2341 verify_content_filter(cfc
);
2343 cfil_rw_lock_assert_held(&cfil_lck_rw
, 0);
2345 /* Find an entry that is flow controlled */
2346 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
2347 if (entry
->cfe_cfil_info
== NULL
||
2348 entry
->cfe_cfil_info
->cfi_so
== NULL
) {
2351 if ((entry
->cfe_flags
& CFEF_FLOW_CONTROLLED
) == 0) {
2355 if (entry
== NULL
) {
2359 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_flow_lift
);
2361 cfil_info
= entry
->cfe_cfil_info
;
2362 so
= cfil_info
->cfi_so
;
2364 cfil_rw_unlock_shared(&cfil_lck_rw
);
2368 error
= cfil_acquire_sockbuf(so
, cfil_info
, 1);
2370 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, 1);
2372 cfil_release_sockbuf(so
, 1);
2377 error
= cfil_acquire_sockbuf(so
, cfil_info
, 0);
2379 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, 0);
2381 cfil_release_sockbuf(so
, 0);
2384 socket_lock_assert_owned(so
);
2385 socket_unlock(so
, 1);
2387 cfil_rw_lock_shared(&cfil_lck_rw
);
2390 cfil_rw_unlock_shared(&cfil_lck_rw
);
2396 struct kern_ctl_reg kern_ctl
;
2398 unsigned int mbuf_limit
= 0;
2400 CFIL_LOG(LOG_NOTICE
, "");
2403 * Compile time verifications
2405 _CASSERT(CFIL_MAX_FILTER_COUNT
== MAX_CONTENT_FILTER
);
2406 _CASSERT(sizeof(struct cfil_filter_stat
) % sizeof(uint32_t) == 0);
2407 _CASSERT(sizeof(struct cfil_entry_stat
) % sizeof(uint32_t) == 0);
2408 _CASSERT(sizeof(struct cfil_sock_stat
) % sizeof(uint32_t) == 0);
2411 * Runtime time verifications
2413 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_enqueued
,
2415 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_enqueued
,
2417 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_peeked
,
2419 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_peeked
,
2422 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_in_enqueued
,
2424 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_out_enqueued
,
2427 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_enqueued
,
2429 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_enqueued
,
2431 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_passed
,
2433 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_passed
,
2439 cfil_lck_grp_attr
= lck_grp_attr_alloc_init();
2440 if (cfil_lck_grp_attr
== NULL
) {
2441 panic("%s: lck_grp_attr_alloc_init failed", __func__
);
2444 cfil_lck_grp
= lck_grp_alloc_init("content filter",
2446 if (cfil_lck_grp
== NULL
) {
2447 panic("%s: lck_grp_alloc_init failed", __func__
);
2450 cfil_lck_attr
= lck_attr_alloc_init();
2451 if (cfil_lck_attr
== NULL
) {
2452 panic("%s: lck_attr_alloc_init failed", __func__
);
2455 lck_rw_init(&cfil_lck_rw
, cfil_lck_grp
, cfil_lck_attr
);
2457 TAILQ_INIT(&cfil_sock_head
);
2458 TAILQ_INIT(&cfil_sock_head_stats
);
2461 * Register kernel control
2463 bzero(&kern_ctl
, sizeof(kern_ctl
));
2464 strlcpy(kern_ctl
.ctl_name
, CONTENT_FILTER_CONTROL_NAME
,
2465 sizeof(kern_ctl
.ctl_name
));
2466 kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
| CTL_FLAG_REG_EXTENDED
;
2467 kern_ctl
.ctl_sendsize
= 512 * 1024; /* enough? */
2468 kern_ctl
.ctl_recvsize
= 512 * 1024; /* enough? */
2469 kern_ctl
.ctl_connect
= cfil_ctl_connect
;
2470 kern_ctl
.ctl_disconnect
= cfil_ctl_disconnect
;
2471 kern_ctl
.ctl_send
= cfil_ctl_send
;
2472 kern_ctl
.ctl_getopt
= cfil_ctl_getopt
;
2473 kern_ctl
.ctl_setopt
= cfil_ctl_setopt
;
2474 kern_ctl
.ctl_rcvd
= cfil_ctl_rcvd
;
2475 error
= ctl_register(&kern_ctl
, &cfil_kctlref
);
2477 CFIL_LOG(LOG_ERR
, "ctl_register failed: %d", error
);
2481 // Spawn thread for gargage collection
2482 if (kernel_thread_start(cfil_udp_gc_thread_func
, NULL
,
2483 &cfil_udp_gc_thread
) != KERN_SUCCESS
) {
2484 panic_plain("%s: Can't create UDP GC thread", __func__
);
2487 /* this must not fail */
2488 VERIFY(cfil_udp_gc_thread
!= NULL
);
2490 // Spawn thread for statistics reporting
2491 if (kernel_thread_start(cfil_stats_report_thread_func
, NULL
,
2492 &cfil_stats_report_thread
) != KERN_SUCCESS
) {
2493 panic_plain("%s: Can't create statistics report thread", __func__
);
2496 /* this must not fail */
2497 VERIFY(cfil_stats_report_thread
!= NULL
);
2499 // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2500 mbuf_limit
= MAX(UDP_FLOW_GC_MBUF_CNT_MAX
, (nmbclusters
<< MCLSHIFT
) >> UDP_FLOW_GC_MBUF_SHIFT
);
2501 cfil_udp_gc_mbuf_num_max
= (mbuf_limit
>> MCLSHIFT
);
2502 cfil_udp_gc_mbuf_cnt_max
= mbuf_limit
;
2504 memset(&global_cfil_stats_report_buffers
, 0, sizeof(global_cfil_stats_report_buffers
));
2508 cfil_info_alloc(struct socket
*so
, struct cfil_hash_entry
*hash_entry
)
2511 struct cfil_info
*cfil_info
= NULL
;
2512 struct inpcb
*inp
= sotoinpcb(so
);
2514 CFIL_LOG(LOG_INFO
, "");
2516 socket_lock_assert_owned(so
);
2518 cfil_info
= zalloc(cfil_info_zone
);
2519 if (cfil_info
== NULL
) {
2522 bzero(cfil_info
, sizeof(struct cfil_info
));
2523 os_ref_init(&cfil_info
->cfi_ref_count
, &cfil_refgrp
);
2525 cfil_queue_init(&cfil_info
->cfi_snd
.cfi_inject_q
);
2526 cfil_queue_init(&cfil_info
->cfi_rcv
.cfi_inject_q
);
2528 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2529 struct cfil_entry
*entry
;
2531 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2532 entry
->cfe_cfil_info
= cfil_info
;
2534 /* Initialize the filter entry */
2535 entry
->cfe_filter
= NULL
;
2536 entry
->cfe_flags
= 0;
2537 entry
->cfe_necp_control_unit
= 0;
2538 entry
->cfe_snd
.cfe_pass_offset
= 0;
2539 entry
->cfe_snd
.cfe_peek_offset
= 0;
2540 entry
->cfe_snd
.cfe_peeked
= 0;
2541 entry
->cfe_rcv
.cfe_pass_offset
= 0;
2542 entry
->cfe_rcv
.cfe_peek_offset
= 0;
2543 entry
->cfe_rcv
.cfe_peeked
= 0;
2545 * Timestamp the last action to avoid pre-maturely
2546 * triggering garbage collection
2548 microuptime(&entry
->cfe_last_action
);
2550 cfil_queue_init(&entry
->cfe_snd
.cfe_pending_q
);
2551 cfil_queue_init(&entry
->cfe_rcv
.cfe_pending_q
);
2552 cfil_queue_init(&entry
->cfe_snd
.cfe_ctl_q
);
2553 cfil_queue_init(&entry
->cfe_rcv
.cfe_ctl_q
);
2556 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2559 * Create a cfi_sock_id that's not the socket pointer!
2562 if (hash_entry
== NULL
) {
2563 // This is the TCP case, cfil_info is tracked per socket
2564 if (inp
->inp_flowhash
== 0) {
2565 inp
->inp_flowhash
= inp_calc_flowhash(inp
);
2568 so
->so_cfil
= cfil_info
;
2569 cfil_info
->cfi_so
= so
;
2570 cfil_info
->cfi_sock_id
=
2571 ((so
->so_gencnt
<< 32) | inp
->inp_flowhash
);
2573 // This is the UDP case, cfil_info is tracked in per-socket hash
2574 cfil_info
->cfi_so
= so
;
2575 hash_entry
->cfentry_cfil
= cfil_info
;
2576 cfil_info
->cfi_hash_entry
= hash_entry
;
2577 cfil_info
->cfi_sock_id
= ((so
->so_gencnt
<< 32) | (hash_entry
->cfentry_flowhash
& 0xffffffff));
2578 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2579 inp
->inp_flowhash
, so
->so_gencnt
, hash_entry
->cfentry_flowhash
, cfil_info
->cfi_sock_id
);
2581 // Wake up gc thread if this is first flow added
2582 if (cfil_sock_udp_attached_count
== 0) {
2583 thread_wakeup((caddr_t
)&cfil_sock_udp_attached_count
);
2586 cfil_sock_udp_attached_count
++;
2589 TAILQ_INSERT_TAIL(&cfil_sock_head
, cfil_info
, cfi_link
);
2590 SLIST_INIT(&cfil_info
->cfi_ordered_entries
);
2592 cfil_sock_attached_count
++;
2594 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2597 if (cfil_info
!= NULL
) {
2598 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_ok
);
2600 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_fail
);
2607 cfil_info_attach_unit(struct socket
*so
, uint32_t filter_control_unit
, struct cfil_info
*cfil_info
)
2612 CFIL_LOG(LOG_INFO
, "");
2614 socket_lock_assert_owned(so
);
2616 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2619 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
2621 struct content_filter
*cfc
= content_filters
[kcunit
- 1];
2622 struct cfil_entry
*entry
;
2623 struct cfil_entry
*iter_entry
;
2624 struct cfil_entry
*iter_prev
;
2629 if (!(cfc
->cf_necp_control_unit
& filter_control_unit
)) {
2633 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2635 entry
->cfe_filter
= cfc
;
2636 entry
->cfe_necp_control_unit
= cfc
->cf_necp_control_unit
;
2637 TAILQ_INSERT_TAIL(&cfc
->cf_sock_entries
, entry
, cfe_link
);
2638 cfc
->cf_sock_count
++;
2640 /* Insert the entry into the list ordered by control unit */
2642 SLIST_FOREACH(iter_entry
, &cfil_info
->cfi_ordered_entries
, cfe_order_link
) {
2643 if (entry
->cfe_necp_control_unit
< iter_entry
->cfe_necp_control_unit
) {
2646 iter_prev
= iter_entry
;
2649 if (iter_prev
== NULL
) {
2650 SLIST_INSERT_HEAD(&cfil_info
->cfi_ordered_entries
, entry
, cfe_order_link
);
2652 SLIST_INSERT_AFTER(iter_prev
, entry
, cfe_order_link
);
2655 verify_content_filter(cfc
);
2657 entry
->cfe_flags
|= CFEF_CFIL_ATTACHED
;
2660 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2666 cfil_info_free(struct cfil_info
*cfil_info
)
2669 uint64_t in_drain
= 0;
2670 uint64_t out_drained
= 0;
2672 if (cfil_info
== NULL
) {
2676 CFIL_LOG(LOG_INFO
, "");
2678 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2681 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
2683 struct cfil_entry
*entry
;
2684 struct content_filter
*cfc
;
2686 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2688 /* Don't be silly and try to detach twice */
2689 if (entry
->cfe_filter
== NULL
) {
2693 cfc
= content_filters
[kcunit
- 1];
2695 VERIFY(cfc
== entry
->cfe_filter
);
2697 entry
->cfe_filter
= NULL
;
2698 entry
->cfe_necp_control_unit
= 0;
2699 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
2700 cfc
->cf_sock_count
--;
2702 verify_content_filter(cfc
);
2704 if (cfil_info
->cfi_hash_entry
!= NULL
) {
2705 cfil_sock_udp_attached_count
--;
2707 cfil_sock_attached_count
--;
2708 TAILQ_REMOVE(&cfil_sock_head
, cfil_info
, cfi_link
);
2710 // Turn off stats reporting for cfil_info.
2711 cfil_info_stats_toggle(cfil_info
, NULL
, 0);
2713 out_drained
+= cfil_queue_drain(&cfil_info
->cfi_snd
.cfi_inject_q
);
2714 in_drain
+= cfil_queue_drain(&cfil_info
->cfi_rcv
.cfi_inject_q
);
2716 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2717 struct cfil_entry
*entry
;
2719 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2720 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_pending_q
);
2721 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_pending_q
);
2722 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
2723 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_ctl_q
);
2725 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2728 OSIncrementAtomic(&cfil_stats
.cfs_flush_out_free
);
2731 OSIncrementAtomic(&cfil_stats
.cfs_flush_in_free
);
2734 zfree(cfil_info_zone
, cfil_info
);
2738 * Received a verdict from userspace for a socket.
2739 * Perform any delayed operation if needed.
2742 cfil_sock_received_verdict(struct socket
*so
)
2744 if (so
== NULL
|| so
->so_cfil
== NULL
) {
2748 so
->so_cfil
->cfi_flags
|= CFIF_INITIAL_VERDICT
;
2751 * If socket has already been connected, trigger
2752 * soisconnected now.
2754 if (so
->so_cfil
->cfi_flags
& CFIF_SOCKET_CONNECTED
) {
2755 so
->so_cfil
->cfi_flags
&= ~CFIF_SOCKET_CONNECTED
;
2762 * Entry point from Sockets layer
2763 * The socket is locked.
2765 * Checks if a connected socket is subject to filter and
2766 * pending the initial verdict.
2769 cfil_sock_connected_pending_verdict(struct socket
*so
)
2771 if (so
== NULL
|| so
->so_cfil
== NULL
) {
2775 if (so
->so_cfil
->cfi_flags
& CFIF_INITIAL_VERDICT
) {
2779 * Remember that this protocol is already connected, so
2780 * we will trigger soisconnected() upon receipt of
2781 * initial verdict later.
2783 so
->so_cfil
->cfi_flags
|= CFIF_SOCKET_CONNECTED
;
2789 cfil_filter_present(void)
2791 return cfil_active_count
> 0;
2795 * Entry point from Sockets layer
2796 * The socket is locked.
2799 cfil_sock_attach(struct socket
*so
, struct sockaddr
*local
, struct sockaddr
*remote
, int dir
)
2802 uint32_t filter_control_unit
;
2804 socket_lock_assert_owned(so
);
2806 if (so
->so_flags1
& SOF1_FLOW_DIVERT_SKIP
) {
2808 * This socket has already been evaluated (and ultimately skipped) by
2809 * flow divert, so it has also already been through content filter if there
2815 /* Limit ourselves to TCP that are not MPTCP subflows */
2816 if (SKIP_FILTER_FOR_TCP_SOCKET(so
)) {
2820 filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
2821 if (filter_control_unit
== 0) {
2825 if (filter_control_unit
== NECP_FILTER_UNIT_NO_FILTER
) {
2828 if ((filter_control_unit
& NECP_MASK_USERSPACE_ONLY
) != 0) {
2829 OSIncrementAtomic(&cfil_stats
.cfs_sock_userspace_only
);
2832 if (cfil_active_count
== 0) {
2833 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_in_vain
);
2836 if (so
->so_cfil
!= NULL
) {
2837 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_already
);
2838 CFIL_LOG(LOG_ERR
, "already attached");
2840 cfil_info_alloc(so
, NULL
);
2841 if (so
->so_cfil
== NULL
) {
2843 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
2846 so
->so_cfil
->cfi_dir
= dir
;
2847 so
->so_cfil
->cfi_filter_control_unit
= filter_control_unit
;
2849 if (cfil_info_attach_unit(so
, filter_control_unit
, so
->so_cfil
) == 0) {
2850 CFIL_LOG(LOG_ERR
, "cfil_info_attach_unit(%u) failed",
2851 filter_control_unit
);
2852 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_failed
);
2855 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u sockID %llx",
2856 (uint64_t)VM_KERNEL_ADDRPERM(so
),
2857 filter_control_unit
, so
->so_cfil
->cfi_sock_id
);
2859 so
->so_flags
|= SOF_CONTENT_FILTER
;
2860 OSIncrementAtomic(&cfil_stats
.cfs_sock_attached
);
2862 /* Hold a reference on the socket */
2866 * Save passed addresses for attach event msg (in case resend
2869 if (remote
!= NULL
&& (remote
->sa_len
<= sizeof(union sockaddr_in_4_6
))) {
2870 memcpy(&so
->so_cfil
->cfi_so_attach_faddr
, remote
, remote
->sa_len
);
2872 if (local
!= NULL
&& (local
->sa_len
<= sizeof(union sockaddr_in_4_6
))) {
2873 memcpy(&so
->so_cfil
->cfi_so_attach_laddr
, local
, local
->sa_len
);
2876 error
= cfil_dispatch_attach_event(so
, so
->so_cfil
, 0, dir
);
2877 /* We can recover from flow control or out of memory errors */
2878 if (error
== ENOBUFS
|| error
== ENOMEM
) {
2880 } else if (error
!= 0) {
2884 CFIL_INFO_VERIFY(so
->so_cfil
);
2890 * Entry point from Sockets layer
2891 * The socket is locked.
2894 cfil_sock_detach(struct socket
*so
)
2896 if (IS_IP_DGRAM(so
)) {
2902 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
2903 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
2904 VERIFY(so
->so_usecount
> 0);
2907 CFIL_INFO_FREE(so
->so_cfil
);
2909 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
2915 * Fill in the address info of an event message from either
2916 * the socket or passed in address info.
2919 cfil_fill_event_msg_addresses(struct cfil_hash_entry
*entry
, struct inpcb
*inp
,
2920 union sockaddr_in_4_6
*sin_src
, union sockaddr_in_4_6
*sin_dst
,
2921 boolean_t isIPv4
, boolean_t outgoing
)
2924 struct in_addr laddr
= {0}, faddr
= {0};
2925 u_int16_t lport
= 0, fport
= 0;
2927 cfil_get_flow_address(entry
, inp
, &laddr
, &faddr
, &lport
, &fport
);
2930 fill_ip_sockaddr_4_6(sin_src
, laddr
, lport
);
2931 fill_ip_sockaddr_4_6(sin_dst
, faddr
, fport
);
2933 fill_ip_sockaddr_4_6(sin_src
, faddr
, fport
);
2934 fill_ip_sockaddr_4_6(sin_dst
, laddr
, lport
);
2937 struct in6_addr
*laddr
= NULL
, *faddr
= NULL
;
2938 u_int16_t lport
= 0, fport
= 0;
2940 cfil_get_flow_address_v6(entry
, inp
, &laddr
, &faddr
, &lport
, &fport
);
2942 fill_ip6_sockaddr_4_6(sin_src
, laddr
, lport
);
2943 fill_ip6_sockaddr_4_6(sin_dst
, faddr
, fport
);
2945 fill_ip6_sockaddr_4_6(sin_src
, faddr
, fport
);
2946 fill_ip6_sockaddr_4_6(sin_dst
, laddr
, lport
);
2952 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state
,
2953 struct cfil_info
*cfil_info
,
2954 struct cfil_msg_sock_attached
*msg
)
2956 struct cfil_crypto_data data
= {};
2958 if (crypto_state
== NULL
|| msg
== NULL
|| cfil_info
== NULL
) {
2962 data
.sock_id
= msg
->cfs_msghdr
.cfm_sock_id
;
2963 data
.direction
= msg
->cfs_conn_dir
;
2965 data
.pid
= msg
->cfs_pid
;
2966 data
.effective_pid
= msg
->cfs_e_pid
;
2967 uuid_copy(data
.uuid
, msg
->cfs_uuid
);
2968 uuid_copy(data
.effective_uuid
, msg
->cfs_e_uuid
);
2969 data
.socketProtocol
= msg
->cfs_sock_protocol
;
2970 if (data
.direction
== CFS_CONNECTION_DIR_OUT
) {
2971 data
.remote
.sin6
= msg
->cfs_dst
.sin6
;
2972 data
.local
.sin6
= msg
->cfs_src
.sin6
;
2974 data
.remote
.sin6
= msg
->cfs_src
.sin6
;
2975 data
.local
.sin6
= msg
->cfs_dst
.sin6
;
2978 // At attach, if local address is already present, no need to re-sign subsequent data messages.
2979 if (!NULLADDRESS(data
.local
)) {
2980 cfil_info
->cfi_isSignatureLatest
= true;
2983 msg
->cfs_signature_length
= sizeof(cfil_crypto_signature
);
2984 if (cfil_crypto_sign_data(crypto_state
, &data
, msg
->cfs_signature
, &msg
->cfs_signature_length
) != 0) {
2985 msg
->cfs_signature_length
= 0;
2986 CFIL_LOG(LOG_ERR
, "CFIL: Failed to sign attached msg <sockID %llu>",
2987 msg
->cfs_msghdr
.cfm_sock_id
);
2995 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state
,
2996 struct socket
*so
, struct cfil_info
*cfil_info
,
2997 struct cfil_msg_data_event
*msg
)
2999 struct cfil_crypto_data data
= {};
3001 if (crypto_state
== NULL
|| msg
== NULL
||
3002 so
== NULL
|| cfil_info
== NULL
) {
3006 data
.sock_id
= cfil_info
->cfi_sock_id
;
3007 data
.direction
= cfil_info
->cfi_dir
;
3008 data
.pid
= so
->last_pid
;
3009 memcpy(data
.uuid
, so
->last_uuid
, sizeof(uuid_t
));
3010 if (so
->so_flags
& SOF_DELEGATED
) {
3011 data
.effective_pid
= so
->e_pid
;
3012 memcpy(data
.effective_uuid
, so
->e_uuid
, sizeof(uuid_t
));
3014 data
.effective_pid
= so
->last_pid
;
3015 memcpy(data
.effective_uuid
, so
->last_uuid
, sizeof(uuid_t
));
3017 data
.socketProtocol
= so
->so_proto
->pr_protocol
;
3019 if (data
.direction
== CFS_CONNECTION_DIR_OUT
) {
3020 data
.remote
.sin6
= msg
->cfc_dst
.sin6
;
3021 data
.local
.sin6
= msg
->cfc_src
.sin6
;
3023 data
.remote
.sin6
= msg
->cfc_src
.sin6
;
3024 data
.local
.sin6
= msg
->cfc_dst
.sin6
;
3027 // At first data, local address may show up for the first time, update address cache and
3028 // no need to re-sign subsequent data messages anymore.
3029 if (!NULLADDRESS(data
.local
)) {
3030 memcpy(&cfil_info
->cfi_so_attach_laddr
, &data
.local
, data
.local
.sa
.sa_len
);
3031 cfil_info
->cfi_isSignatureLatest
= true;
3034 msg
->cfd_signature_length
= sizeof(cfil_crypto_signature
);
3035 if (cfil_crypto_sign_data(crypto_state
, &data
, msg
->cfd_signature
, &msg
->cfd_signature_length
) != 0) {
3036 msg
->cfd_signature_length
= 0;
3037 CFIL_LOG(LOG_ERR
, "CFIL: Failed to sign data msg <sockID %llu>",
3038 msg
->cfd_msghdr
.cfm_sock_id
);
3046 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state
,
3047 struct socket
*so
, struct cfil_info
*cfil_info
,
3048 struct cfil_msg_sock_closed
*msg
)
3050 struct cfil_crypto_data data
= {};
3051 struct cfil_hash_entry hash_entry
= {};
3052 struct cfil_hash_entry
*hash_entry_ptr
= NULL
;
3053 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
3055 if (crypto_state
== NULL
|| msg
== NULL
||
3056 so
== NULL
|| inp
== NULL
|| cfil_info
== NULL
) {
3060 data
.sock_id
= cfil_info
->cfi_sock_id
;
3061 data
.direction
= cfil_info
->cfi_dir
;
3063 data
.pid
= so
->last_pid
;
3064 memcpy(data
.uuid
, so
->last_uuid
, sizeof(uuid_t
));
3065 if (so
->so_flags
& SOF_DELEGATED
) {
3066 data
.effective_pid
= so
->e_pid
;
3067 memcpy(data
.effective_uuid
, so
->e_uuid
, sizeof(uuid_t
));
3069 data
.effective_pid
= so
->last_pid
;
3070 memcpy(data
.effective_uuid
, so
->last_uuid
, sizeof(uuid_t
));
3072 data
.socketProtocol
= so
->so_proto
->pr_protocol
;
3075 * Fill in address info:
3076 * For UDP, use the cfil_info hash entry directly.
3077 * For TCP, compose an hash entry with the saved addresses.
3079 if (cfil_info
->cfi_hash_entry
!= NULL
) {
3080 hash_entry_ptr
= cfil_info
->cfi_hash_entry
;
3081 } else if (cfil_info
->cfi_so_attach_faddr
.sa
.sa_len
> 0 ||
3082 cfil_info
->cfi_so_attach_laddr
.sa
.sa_len
> 0) {
3083 fill_cfil_hash_entry_from_address(&hash_entry
, TRUE
, &cfil_info
->cfi_so_attach_laddr
.sa
, FALSE
);
3084 fill_cfil_hash_entry_from_address(&hash_entry
, FALSE
, &cfil_info
->cfi_so_attach_faddr
.sa
, FALSE
);
3085 hash_entry_ptr
= &hash_entry
;
3087 if (hash_entry_ptr
!= NULL
) {
3088 boolean_t outgoing
= (cfil_info
->cfi_dir
== CFS_CONNECTION_DIR_OUT
);
3089 union sockaddr_in_4_6
*src
= outgoing
? &data
.local
: &data
.remote
;
3090 union sockaddr_in_4_6
*dst
= outgoing
? &data
.remote
: &data
.local
;
3091 cfil_fill_event_msg_addresses(hash_entry_ptr
, inp
, src
, dst
, !IS_INP_V6(inp
), outgoing
);
3094 data
.byte_count_in
= cfil_info
->cfi_byte_inbound_count
;
3095 data
.byte_count_out
= cfil_info
->cfi_byte_outbound_count
;
3097 msg
->cfc_signature_length
= sizeof(cfil_crypto_signature
);
3098 if (cfil_crypto_sign_data(crypto_state
, &data
, msg
->cfc_signature
, &msg
->cfc_signature_length
) != 0) {
3099 msg
->cfc_signature_length
= 0;
3100 CFIL_LOG(LOG_ERR
, "CFIL: Failed to sign closed msg <sockID %llu>",
3101 msg
->cfc_msghdr
.cfm_sock_id
);
3109 cfil_dispatch_attach_event(struct socket
*so
, struct cfil_info
*cfil_info
,
3110 uint32_t kcunit
, int conn_dir
)
3113 struct cfil_entry
*entry
= NULL
;
3114 struct cfil_msg_sock_attached msg_attached
;
3115 struct content_filter
*cfc
= NULL
;
3116 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
3117 struct cfil_hash_entry
*hash_entry_ptr
= NULL
;
3118 struct cfil_hash_entry hash_entry
;
3120 memset(&hash_entry
, 0, sizeof(struct cfil_hash_entry
));
3121 proc_t p
= PROC_NULL
;
3122 task_t t
= TASK_NULL
;
3124 socket_lock_assert_owned(so
);
3126 cfil_rw_lock_shared(&cfil_lck_rw
);
3128 if (so
->so_proto
== NULL
|| so
->so_proto
->pr_domain
== NULL
) {
3134 entry
= SLIST_FIRST(&cfil_info
->cfi_ordered_entries
);
3136 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3139 if (entry
== NULL
) {
3143 cfc
= entry
->cfe_filter
;
3148 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
)) {
3153 kcunit
= CFI_ENTRY_KCUNIT(cfil_info
, entry
);
3156 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u kcunit %u",
3157 (uint64_t)VM_KERNEL_ADDRPERM(so
), entry
->cfe_necp_control_unit
, kcunit
);
3159 /* Would be wasteful to try when flow controlled */
3160 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
3165 bzero(&msg_attached
, sizeof(struct cfil_msg_sock_attached
));
3166 msg_attached
.cfs_msghdr
.cfm_len
= sizeof(struct cfil_msg_sock_attached
);
3167 msg_attached
.cfs_msghdr
.cfm_version
= CFM_VERSION_CURRENT
;
3168 msg_attached
.cfs_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
3169 msg_attached
.cfs_msghdr
.cfm_op
= CFM_OP_SOCKET_ATTACHED
;
3170 msg_attached
.cfs_msghdr
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
3172 msg_attached
.cfs_sock_family
= so
->so_proto
->pr_domain
->dom_family
;
3173 msg_attached
.cfs_sock_type
= so
->so_proto
->pr_type
;
3174 msg_attached
.cfs_sock_protocol
= so
->so_proto
->pr_protocol
;
3175 msg_attached
.cfs_pid
= so
->last_pid
;
3176 memcpy(msg_attached
.cfs_uuid
, so
->last_uuid
, sizeof(uuid_t
));
3177 if (so
->so_flags
& SOF_DELEGATED
) {
3178 msg_attached
.cfs_e_pid
= so
->e_pid
;
3179 memcpy(msg_attached
.cfs_e_uuid
, so
->e_uuid
, sizeof(uuid_t
));
3181 msg_attached
.cfs_e_pid
= so
->last_pid
;
3182 memcpy(msg_attached
.cfs_e_uuid
, so
->last_uuid
, sizeof(uuid_t
));
3186 * Fill in address info:
3187 * For UDP, use the cfil_info hash entry directly.
3188 * For TCP, compose an hash entry with the saved addresses.
3190 if (cfil_info
->cfi_hash_entry
!= NULL
) {
3191 hash_entry_ptr
= cfil_info
->cfi_hash_entry
;
3192 } else if (cfil_info
->cfi_so_attach_faddr
.sa
.sa_len
> 0 ||
3193 cfil_info
->cfi_so_attach_laddr
.sa
.sa_len
> 0) {
3194 fill_cfil_hash_entry_from_address(&hash_entry
, TRUE
, &cfil_info
->cfi_so_attach_laddr
.sa
, FALSE
);
3195 fill_cfil_hash_entry_from_address(&hash_entry
, FALSE
, &cfil_info
->cfi_so_attach_faddr
.sa
, FALSE
);
3196 hash_entry_ptr
= &hash_entry
;
3198 if (hash_entry_ptr
!= NULL
) {
3199 cfil_fill_event_msg_addresses(hash_entry_ptr
, inp
,
3200 &msg_attached
.cfs_src
, &msg_attached
.cfs_dst
,
3201 !IS_INP_V6(inp
), conn_dir
== CFS_CONNECTION_DIR_OUT
);
3203 msg_attached
.cfs_conn_dir
= conn_dir
;
3205 if (msg_attached
.cfs_e_pid
!= 0) {
3206 p
= proc_find(msg_attached
.cfs_e_pid
);
3207 if (p
!= PROC_NULL
) {
3209 if (t
!= TASK_NULL
) {
3210 audit_token_t audit_token
;
3211 mach_msg_type_number_t count
= TASK_AUDIT_TOKEN_COUNT
;
3212 if (task_info(t
, TASK_AUDIT_TOKEN
, (task_info_t
)&audit_token
, &count
) == KERN_SUCCESS
) {
3213 memcpy(&msg_attached
.cfs_audit_token
, &audit_token
, sizeof(msg_attached
.cfs_audit_token
));
3215 CFIL_LOG(LOG_ERR
, "CFIL: Failed to get process audit token <sockID %llu> ",
3216 entry
->cfe_cfil_info
->cfi_sock_id
);
3223 if (cfil_info
->cfi_debug
) {
3224 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: SENDING ATTACH UP");
3227 cfil_dispatch_attach_event_sign(entry
->cfe_filter
->cf_crypto_state
, cfil_info
, &msg_attached
);
3230 CFIL_LOG(LOG_DEBUG
, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
3231 entry
->cfe_cfil_info
->cfi_sock_id
);
3234 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
3235 entry
->cfe_filter
->cf_kcunit
,
3237 sizeof(struct cfil_msg_sock_attached
),
3240 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d", error
);
3243 microuptime(&entry
->cfe_last_event
);
3244 cfil_info
->cfi_first_event
.tv_sec
= entry
->cfe_last_event
.tv_sec
;
3245 cfil_info
->cfi_first_event
.tv_usec
= entry
->cfe_last_event
.tv_usec
;
3247 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
;
3248 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_ok
);
3251 /* We can recover from flow control */
3252 if (error
== ENOBUFS
) {
3253 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
3254 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_flow_control
);
3256 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
3257 cfil_rw_lock_exclusive(&cfil_lck_rw
);
3260 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
3262 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
3265 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_fail
);
3268 cfil_rw_unlock_shared(&cfil_lck_rw
);
3274 cfil_dispatch_disconnect_event(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
)
3277 struct mbuf
*msg
= NULL
;
3278 struct cfil_entry
*entry
;
3279 struct cfe_buf
*entrybuf
;
3280 struct cfil_msg_hdr msg_disconnected
;
3281 struct content_filter
*cfc
;
3283 socket_lock_assert_owned(so
);
3285 cfil_rw_lock_shared(&cfil_lck_rw
);
3287 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3289 entrybuf
= &entry
->cfe_snd
;
3291 entrybuf
= &entry
->cfe_rcv
;
3294 cfc
= entry
->cfe_filter
;
3299 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
3300 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
3303 * Send the disconnection event once
3305 if ((outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
)) ||
3306 (!outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
))) {
3307 CFIL_LOG(LOG_INFO
, "so %llx disconnect already sent",
3308 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3313 * We're not disconnected as long as some data is waiting
3314 * to be delivered to the filter
3316 if (outgoing
&& cfil_queue_empty(&entrybuf
->cfe_ctl_q
) == 0) {
3317 CFIL_LOG(LOG_INFO
, "so %llx control queue not empty",
3318 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3322 /* Would be wasteful to try when flow controlled */
3323 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
3328 if (cfil_info
->cfi_debug
) {
3329 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: SENDING DISCONNECT UP");
3333 cfil_info_log(LOG_ERR
, cfil_info
, outgoing
?
3334 "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
3335 "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
3338 bzero(&msg_disconnected
, sizeof(struct cfil_msg_hdr
));
3339 msg_disconnected
.cfm_len
= sizeof(struct cfil_msg_hdr
);
3340 msg_disconnected
.cfm_version
= CFM_VERSION_CURRENT
;
3341 msg_disconnected
.cfm_type
= CFM_TYPE_EVENT
;
3342 msg_disconnected
.cfm_op
= outgoing
? CFM_OP_DISCONNECT_OUT
:
3343 CFM_OP_DISCONNECT_IN
;
3344 msg_disconnected
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
3345 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
3346 entry
->cfe_filter
->cf_kcunit
,
3348 sizeof(struct cfil_msg_hdr
),
3351 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
3355 microuptime(&entry
->cfe_last_event
);
3356 CFI_ADD_TIME_LOG(cfil_info
, &entry
->cfe_last_event
, &cfil_info
->cfi_first_event
, msg_disconnected
.cfm_op
);
3358 /* Remember we have sent the disconnection message */
3360 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_OUT
;
3361 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_out_event_ok
);
3363 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_IN
;
3364 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_in_event_ok
);
3367 if (error
== ENOBUFS
) {
3368 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
3370 &cfil_stats
.cfs_disconnect_event_flow_control
);
3372 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
3373 cfil_rw_lock_exclusive(&cfil_lck_rw
);
3376 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
3378 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
3382 &cfil_stats
.cfs_disconnect_event_fail
);
3385 cfil_rw_unlock_shared(&cfil_lck_rw
);
3391 cfil_dispatch_closed_event(struct socket
*so
, struct cfil_info
*cfil_info
, int kcunit
)
3393 struct cfil_entry
*entry
;
3394 struct cfil_msg_sock_closed msg_closed
;
3396 struct content_filter
*cfc
;
3398 socket_lock_assert_owned(so
);
3400 cfil_rw_lock_shared(&cfil_lck_rw
);
3402 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3403 cfc
= entry
->cfe_filter
;
3408 CFIL_LOG(LOG_INFO
, "so %llx kcunit %d",
3409 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3411 /* Would be wasteful to try when flow controlled */
3412 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
3417 * Send a single closed message per filter
3419 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_CLOSED
) != 0) {
3422 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
3426 microuptime(&entry
->cfe_last_event
);
3427 CFI_ADD_TIME_LOG(cfil_info
, &entry
->cfe_last_event
, &cfil_info
->cfi_first_event
, CFM_OP_SOCKET_CLOSED
);
3429 bzero(&msg_closed
, sizeof(struct cfil_msg_sock_closed
));
3430 msg_closed
.cfc_msghdr
.cfm_len
= sizeof(struct cfil_msg_sock_closed
);
3431 msg_closed
.cfc_msghdr
.cfm_version
= CFM_VERSION_CURRENT
;
3432 msg_closed
.cfc_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
3433 msg_closed
.cfc_msghdr
.cfm_op
= CFM_OP_SOCKET_CLOSED
;
3434 msg_closed
.cfc_msghdr
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
3435 msg_closed
.cfc_first_event
.tv_sec
= cfil_info
->cfi_first_event
.tv_sec
;
3436 msg_closed
.cfc_first_event
.tv_usec
= cfil_info
->cfi_first_event
.tv_usec
;
3437 memcpy(msg_closed
.cfc_op_time
, cfil_info
->cfi_op_time
, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY
);
3438 memcpy(msg_closed
.cfc_op_list
, cfil_info
->cfi_op_list
, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY
);
3439 msg_closed
.cfc_op_list_ctr
= cfil_info
->cfi_op_list_ctr
;
3440 msg_closed
.cfc_byte_inbound_count
= cfil_info
->cfi_byte_inbound_count
;
3441 msg_closed
.cfc_byte_outbound_count
= cfil_info
->cfi_byte_outbound_count
;
3443 cfil_dispatch_closed_event_sign(entry
->cfe_filter
->cf_crypto_state
, so
, cfil_info
, &msg_closed
);
3445 if (cfil_info
->cfi_debug
) {
3446 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: SENDING CLOSED UP");
3450 CFIL_LOG(LOG_ERR
, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed
.cfc_msghdr
.cfm_sock_id
, cfil_info
->cfi_op_list_ctr
, cfil_info
->cfi_first_event
.tv_sec
, cfil_info
->cfi_first_event
.tv_usec
);
3453 * if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3454 * msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
3456 * for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3457 * CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3461 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
3462 entry
->cfe_filter
->cf_kcunit
,
3464 sizeof(struct cfil_msg_sock_closed
),
3467 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d",
3472 entry
->cfe_flags
|= CFEF_SENT_SOCK_CLOSED
;
3473 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_ok
);
3475 /* We can recover from flow control */
3476 if (error
== ENOBUFS
) {
3477 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
3478 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_flow_control
);
3480 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
3481 cfil_rw_lock_exclusive(&cfil_lck_rw
);
3484 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
3486 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
3489 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_fail
);
3492 cfil_rw_unlock_shared(&cfil_lck_rw
);
3499 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
3500 struct in6_addr
*ip6
, u_int16_t port
)
3502 if (sin46
== NULL
) {
3506 struct sockaddr_in6
*sin6
= &sin46
->sin6
;
3508 sin6
->sin6_family
= AF_INET6
;
3509 sin6
->sin6_len
= sizeof(*sin6
);
3510 sin6
->sin6_port
= port
;
3511 sin6
->sin6_addr
= *ip6
;
3512 if (IN6_IS_SCOPE_EMBED(&sin6
->sin6_addr
)) {
3513 sin6
->sin6_scope_id
= ntohs(sin6
->sin6_addr
.s6_addr16
[1]);
3514 sin6
->sin6_addr
.s6_addr16
[1] = 0;
3519 fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
3520 struct in_addr ip
, u_int16_t port
)
3522 if (sin46
== NULL
) {
3526 struct sockaddr_in
*sin
= &sin46
->sin
;
3528 sin
->sin_family
= AF_INET
;
3529 sin
->sin_len
= sizeof(*sin
);
3530 sin
->sin_port
= port
;
3531 sin
->sin_addr
.s_addr
= ip
.s_addr
;
3535 cfil_get_flow_address_v6(struct cfil_hash_entry
*entry
, struct inpcb
*inp
,
3536 struct in6_addr
**laddr
, struct in6_addr
**faddr
,
3537 u_int16_t
*lport
, u_int16_t
*fport
)
3539 if (entry
!= NULL
) {
3540 *laddr
= &entry
->cfentry_laddr
.addr6
;
3541 *faddr
= &entry
->cfentry_faddr
.addr6
;
3542 *lport
= entry
->cfentry_lport
;
3543 *fport
= entry
->cfentry_fport
;
3545 *laddr
= &inp
->in6p_laddr
;
3546 *faddr
= &inp
->in6p_faddr
;
3547 *lport
= inp
->inp_lport
;
3548 *fport
= inp
->inp_fport
;
3553 cfil_get_flow_address(struct cfil_hash_entry
*entry
, struct inpcb
*inp
,
3554 struct in_addr
*laddr
, struct in_addr
*faddr
,
3555 u_int16_t
*lport
, u_int16_t
*fport
)
3557 if (entry
!= NULL
) {
3558 *laddr
= entry
->cfentry_laddr
.addr46
.ia46_addr4
;
3559 *faddr
= entry
->cfentry_faddr
.addr46
.ia46_addr4
;
3560 *lport
= entry
->cfentry_lport
;
3561 *fport
= entry
->cfentry_fport
;
3563 *laddr
= inp
->inp_laddr
;
3564 *faddr
= inp
->inp_faddr
;
3565 *lport
= inp
->inp_lport
;
3566 *fport
= inp
->inp_fport
;
3571 cfil_dispatch_data_event(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
3572 struct mbuf
*data
, unsigned int copyoffset
, unsigned int copylen
)
3575 struct mbuf
*copy
= NULL
;
3576 struct mbuf
*msg
= NULL
;
3577 unsigned int one
= 1;
3578 struct cfil_msg_data_event
*data_req
;
3580 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
3581 struct cfil_entry
*entry
;
3582 struct cfe_buf
*entrybuf
;
3583 struct content_filter
*cfc
;
3587 cfil_rw_lock_shared(&cfil_lck_rw
);
3589 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3591 entrybuf
= &entry
->cfe_snd
;
3593 entrybuf
= &entry
->cfe_rcv
;
3596 cfc
= entry
->cfe_filter
;
3601 data
= cfil_data_start(data
);
3602 if (data
== NULL
|| (data
->m_flags
& M_PKTHDR
) == 0) {
3603 CFIL_LOG(LOG_ERR
, "NOT PKTHDR");
3607 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
3608 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
3610 socket_lock_assert_owned(so
);
3612 /* Would be wasteful to try */
3613 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
3618 /* Make a copy of the data to pass to kernel control socket */
3619 copy
= m_copym_mode(data
, copyoffset
, copylen
, M_DONTWAIT
,
3622 CFIL_LOG(LOG_ERR
, "m_copym_mode() failed");
3627 /* We need an mbuf packet for the message header */
3628 hdrsize
= sizeof(struct cfil_msg_data_event
);
3629 error
= mbuf_allocpacket(MBUF_DONTWAIT
, hdrsize
, &one
, &msg
);
3631 CFIL_LOG(LOG_ERR
, "mbuf_allocpacket() failed");
3634 * ENOBUFS is to indicate flow control
3639 mbuf_setlen(msg
, hdrsize
);
3640 mbuf_pkthdr_setlen(msg
, hdrsize
+ copylen
);
3642 data_req
= (struct cfil_msg_data_event
*)mbuf_data(msg
);
3643 bzero(data_req
, hdrsize
);
3644 data_req
->cfd_msghdr
.cfm_len
= (uint32_t)hdrsize
+ copylen
;
3645 data_req
->cfd_msghdr
.cfm_version
= 1;
3646 data_req
->cfd_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
3647 data_req
->cfd_msghdr
.cfm_op
=
3648 outgoing
? CFM_OP_DATA_OUT
: CFM_OP_DATA_IN
;
3649 data_req
->cfd_msghdr
.cfm_sock_id
=
3650 entry
->cfe_cfil_info
->cfi_sock_id
;
3651 data_req
->cfd_start_offset
= entrybuf
->cfe_peeked
;
3652 data_req
->cfd_end_offset
= entrybuf
->cfe_peeked
+ copylen
;
3654 data_req
->cfd_flags
= 0;
3655 if (OPTIONAL_IP_HEADER(so
)) {
3657 * For non-UDP/TCP traffic, indicate to filters if optional
3658 * IP header is present:
3659 * outgoing - indicate according to INP_HDRINCL flag
3660 * incoming - For IPv4 only, stripping of IP header is
3661 * optional. But for CFIL, we delay stripping
3662 * at rip_input. So CFIL always expects IP
3663 * frames. IP header will be stripped according
3664 * to INP_STRIPHDR flag later at reinjection.
3666 if ((!outgoing
&& !IS_INP_V6(inp
)) ||
3667 (outgoing
&& cfil_dgram_peek_socket_state(data
, &inp_flags
) && (inp_flags
& INP_HDRINCL
))) {
3668 data_req
->cfd_flags
|= CFD_DATA_FLAG_IP_HEADER
;
3673 * Copy address/port into event msg.
3674 * For non connected sockets need to copy addresses from passed
3677 cfil_fill_event_msg_addresses(cfil_info
->cfi_hash_entry
, inp
,
3678 &data_req
->cfc_src
, &data_req
->cfc_dst
,
3679 !IS_INP_V6(inp
), outgoing
);
3681 if (cfil_info
->cfi_debug
) {
3682 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: SENDING DATA UP");
3685 if (cfil_info
->cfi_isSignatureLatest
== false) {
3686 cfil_dispatch_data_event_sign(entry
->cfe_filter
->cf_crypto_state
, so
, cfil_info
, data_req
);
3690 CFI_ADD_TIME_LOG(cfil_info
, &tv
, &cfil_info
->cfi_first_event
, data_req
->cfd_msghdr
.cfm_op
);
3692 /* Pass the message to the content filter */
3693 error
= ctl_enqueuembuf(entry
->cfe_filter
->cf_kcref
,
3694 entry
->cfe_filter
->cf_kcunit
,
3697 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
3701 entry
->cfe_flags
&= ~CFEF_FLOW_CONTROLLED
;
3702 OSIncrementAtomic(&cfil_stats
.cfs_data_event_ok
);
3705 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3706 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, outgoing
, (uint64_t)VM_KERNEL_ADDRPERM(data
), copyoffset
, copylen
);
3709 if (cfil_info
->cfi_debug
) {
3710 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u (%s)",
3711 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, outgoing
, (uint64_t)VM_KERNEL_ADDRPERM(data
), copyoffset
, copylen
,
3712 data_req
->cfd_flags
& CFD_DATA_FLAG_IP_HEADER
? "IP HDR" : "NO IP HDR");
3716 if (error
== ENOBUFS
) {
3717 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
3719 &cfil_stats
.cfs_data_event_flow_control
);
3721 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
3722 cfil_rw_lock_exclusive(&cfil_lck_rw
);
3725 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
3727 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
3730 OSIncrementAtomic(&cfil_stats
.cfs_data_event_fail
);
3733 cfil_rw_unlock_shared(&cfil_lck_rw
);
3739 * Process the queue of data waiting to be delivered to content filter
3742 cfil_data_service_ctl_q(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
)
3745 struct mbuf
*data
, *tmp
= NULL
;
3746 unsigned int datalen
= 0, copylen
= 0, copyoffset
= 0;
3747 struct cfil_entry
*entry
;
3748 struct cfe_buf
*entrybuf
;
3749 uint64_t currentoffset
= 0;
3751 if (cfil_info
== NULL
) {
3755 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
3756 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
3758 socket_lock_assert_owned(so
);
3760 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3762 entrybuf
= &entry
->cfe_snd
;
3764 entrybuf
= &entry
->cfe_rcv
;
3767 /* Send attached message if not yet done */
3768 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
3769 error
= cfil_dispatch_attach_event(so
, cfil_info
, CFI_ENTRY_KCUNIT(cfil_info
, entry
),
3770 outgoing
? CFS_CONNECTION_DIR_OUT
: CFS_CONNECTION_DIR_IN
);
3772 /* We can recover from flow control */
3773 if (error
== ENOBUFS
|| error
== ENOMEM
) {
3778 } else if ((entry
->cfe_flags
& CFEF_DATA_START
) == 0) {
3779 OSIncrementAtomic(&cfil_stats
.cfs_ctl_q_not_started
);
3784 CFIL_LOG(LOG_DEBUG
, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3785 entrybuf
->cfe_pass_offset
,
3786 entrybuf
->cfe_peeked
,
3787 entrybuf
->cfe_peek_offset
);
3790 /* Move all data that can pass */
3791 while ((data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
)) != NULL
&&
3792 entrybuf
->cfe_ctl_q
.q_start
< entrybuf
->cfe_pass_offset
) {
3793 datalen
= cfil_data_length(data
, NULL
, NULL
);
3796 if (entrybuf
->cfe_ctl_q
.q_start
+ datalen
<=
3797 entrybuf
->cfe_pass_offset
) {
3799 * The first mbuf can fully pass
3804 * The first mbuf can partially pass
3806 copylen
= (unsigned int)(entrybuf
->cfe_pass_offset
- entrybuf
->cfe_ctl_q
.q_start
);
3808 VERIFY(copylen
<= datalen
);
3812 "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3813 "datalen %u copylen %u",
3814 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3815 entrybuf
->cfe_ctl_q
.q_start
,
3816 entrybuf
->cfe_peeked
,
3817 entrybuf
->cfe_pass_offset
,
3818 entrybuf
->cfe_peek_offset
,
3823 * Data that passes has been peeked at explicitly or
3826 if (entrybuf
->cfe_ctl_q
.q_start
+ copylen
>
3827 entrybuf
->cfe_peeked
) {
3828 entrybuf
->cfe_peeked
=
3829 entrybuf
->cfe_ctl_q
.q_start
+ copylen
;
3832 * Stop on partial pass
3834 if (copylen
< datalen
) {
3838 /* All good, move full data from ctl queue to pending queue */
3839 cfil_queue_remove(&entrybuf
->cfe_ctl_q
, data
, datalen
);
3841 cfil_queue_enqueue(&entrybuf
->cfe_pending_q
, data
, datalen
);
3843 OSAddAtomic64(datalen
,
3844 &cfil_stats
.cfs_pending_q_out_enqueued
);
3846 OSAddAtomic64(datalen
,
3847 &cfil_stats
.cfs_pending_q_in_enqueued
);
3850 CFIL_INFO_VERIFY(cfil_info
);
3853 "%llx first %llu peeked %llu pass %llu peek %llu"
3854 "datalen %u copylen %u",
3855 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3856 entrybuf
->cfe_ctl_q
.q_start
,
3857 entrybuf
->cfe_peeked
,
3858 entrybuf
->cfe_pass_offset
,
3859 entrybuf
->cfe_peek_offset
,
3864 /* Now deal with remaining data the filter wants to peek at */
3865 for (data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
),
3866 currentoffset
= entrybuf
->cfe_ctl_q
.q_start
;
3867 data
!= NULL
&& currentoffset
< entrybuf
->cfe_peek_offset
;
3868 data
= cfil_queue_next(&entrybuf
->cfe_ctl_q
, data
),
3869 currentoffset
+= datalen
) {
3870 datalen
= cfil_data_length(data
, NULL
, NULL
);
3873 /* We've already peeked at this mbuf */
3874 if (currentoffset
+ datalen
<= entrybuf
->cfe_peeked
) {
3878 * The data in the first mbuf may have been
3879 * partially peeked at
3881 copyoffset
= (unsigned int)(entrybuf
->cfe_peeked
- currentoffset
);
3882 VERIFY(copyoffset
< datalen
);
3883 copylen
= datalen
- copyoffset
;
3884 VERIFY(copylen
<= datalen
);
3886 * Do not copy more than needed
3888 if (currentoffset
+ copyoffset
+ copylen
>
3889 entrybuf
->cfe_peek_offset
) {
3890 copylen
= (unsigned int)(entrybuf
->cfe_peek_offset
-
3891 (currentoffset
+ copyoffset
));
3896 "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3897 "datalen %u copylen %u copyoffset %u",
3898 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3900 entrybuf
->cfe_peeked
,
3901 entrybuf
->cfe_pass_offset
,
3902 entrybuf
->cfe_peek_offset
,
3903 datalen
, copylen
, copyoffset
);
3907 * Stop if there is nothing more to peek at
3913 * Let the filter get a peek at this span of data
3915 error
= cfil_dispatch_data_event(so
, cfil_info
, kcunit
,
3916 outgoing
, data
, copyoffset
, copylen
);
3918 /* On error, leave data in ctl_q */
3921 entrybuf
->cfe_peeked
+= copylen
;
3923 OSAddAtomic64(copylen
,
3924 &cfil_stats
.cfs_ctl_q_out_peeked
);
3926 OSAddAtomic64(copylen
,
3927 &cfil_stats
.cfs_ctl_q_in_peeked
);
3930 /* Stop when data could not be fully peeked at */
3931 if (copylen
+ copyoffset
< datalen
) {
3935 CFIL_INFO_VERIFY(cfil_info
);
3938 "%llx first %llu peeked %llu pass %llu peek %llu"
3939 "datalen %u copylen %u copyoffset %u",
3940 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3942 entrybuf
->cfe_peeked
,
3943 entrybuf
->cfe_pass_offset
,
3944 entrybuf
->cfe_peek_offset
,
3945 datalen
, copylen
, copyoffset
);
3949 * Process data that has passed the filter
3951 error
= cfil_service_pending_queue(so
, cfil_info
, kcunit
, outgoing
);
3953 CFIL_LOG(LOG_ERR
, "cfil_service_pending_queue() error %d",
3959 * Dispatch disconnect events that could not be sent
3961 if (cfil_info
== NULL
) {
3963 } else if (outgoing
) {
3964 if ((cfil_info
->cfi_flags
& CFIF_SHUT_WR
) &&
3965 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
)) {
3966 cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 1);
3969 if ((cfil_info
->cfi_flags
& CFIF_SHUT_RD
) &&
3970 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
)) {
3971 cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 0);
3977 "first %llu peeked %llu pass %llu peek %llu",
3978 entrybuf
->cfe_ctl_q
.q_start
,
3979 entrybuf
->cfe_peeked
,
3980 entrybuf
->cfe_pass_offset
,
3981 entrybuf
->cfe_peek_offset
);
3983 CFIL_INFO_VERIFY(cfil_info
);
3988 * cfil_data_filter()
3990 * Process data for a content filter installed on a socket
3993 cfil_data_filter(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
3994 struct mbuf
*data
, uint32_t datalen
)
3997 struct cfil_entry
*entry
;
3998 struct cfe_buf
*entrybuf
;
4000 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
4001 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
4003 socket_lock_assert_owned(so
);
4005 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4007 entrybuf
= &entry
->cfe_snd
;
4009 entrybuf
= &entry
->cfe_rcv
;
4012 /* Are we attached to the filter? */
4013 if (entry
->cfe_filter
== NULL
) {
4018 /* Dispatch to filters */
4019 cfil_queue_enqueue(&entrybuf
->cfe_ctl_q
, data
, datalen
);
4021 OSAddAtomic64(datalen
,
4022 &cfil_stats
.cfs_ctl_q_out_enqueued
);
4024 OSAddAtomic64(datalen
,
4025 &cfil_stats
.cfs_ctl_q_in_enqueued
);
4028 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, outgoing
);
4030 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
4034 * We have to return EJUSTRETURN in all cases to avoid double free
4037 error
= EJUSTRETURN
;
4039 CFIL_INFO_VERIFY(cfil_info
);
4041 CFIL_LOG(LOG_INFO
, "return %d", error
);
4046 * cfil_service_inject_queue() re-inject data that passed the
4050 cfil_service_inject_queue(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
)
4053 unsigned int datalen
;
4057 struct cfi_buf
*cfi_buf
;
4058 struct cfil_queue
*inject_q
;
4059 int need_rwakeup
= 0;
4061 struct inpcb
*inp
= NULL
;
4062 struct ip
*ip
= NULL
;
4065 if (cfil_info
== NULL
) {
4069 socket_lock_assert_owned(so
);
4072 cfi_buf
= &cfil_info
->cfi_snd
;
4073 cfil_info
->cfi_flags
&= ~CFIF_RETRY_INJECT_OUT
;
4075 cfi_buf
= &cfil_info
->cfi_rcv
;
4076 cfil_info
->cfi_flags
&= ~CFIF_RETRY_INJECT_IN
;
4078 inject_q
= &cfi_buf
->cfi_inject_q
;
4080 if (cfil_queue_empty(inject_q
)) {
4084 #if DATA_DEBUG | VERDICT_DEBUG
4085 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
4086 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
, cfil_queue_len(inject_q
));
4089 while ((data
= cfil_queue_first(inject_q
)) != NULL
) {
4090 datalen
= cfil_data_length(data
, &mbcnt
, &mbnum
);
4093 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4094 (uint64_t)VM_KERNEL_ADDRPERM(so
), (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, mbcnt
);
4096 if (cfil_info
->cfi_debug
) {
4097 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4098 (uint64_t)VM_KERNEL_ADDRPERM(so
), (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, mbcnt
);
4101 /* Remove data from queue and adjust stats */
4102 cfil_queue_remove(inject_q
, data
, datalen
);
4103 cfi_buf
->cfi_pending_first
+= datalen
;
4104 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
4105 cfi_buf
->cfi_pending_mbnum
-= mbnum
;
4106 cfil_info_buf_verify(cfi_buf
);
4109 error
= sosend_reinject(so
, NULL
, data
, NULL
, 0);
4112 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: Error: sosend_reinject() failed");
4113 CFIL_LOG(LOG_ERR
, "### sosend() failed %d", error
);
4117 // At least one injection succeeded, need to wake up pending threads.
4120 data
->m_flags
|= M_SKIPCFIL
;
4123 * NOTE: We currently only support TCP, UDP, ICMP,
4124 * ICMPv6 and RAWIP. For MPTCP and message TCP we'll
4125 * need to call the appropriate sbappendxxx()
4126 * of fix sock_inject_data_in()
4128 if (IS_IP_DGRAM(so
)) {
4129 if (OPTIONAL_IP_HEADER(so
)) {
4130 inp
= sotoinpcb(so
);
4131 if (inp
&& (inp
->inp_flags
& INP_STRIPHDR
)) {
4132 mbuf_t data_start
= cfil_data_start(data
);
4133 if (data_start
!= NULL
&& (data_start
->m_flags
& M_PKTHDR
)) {
4134 ip
= mtod(data_start
, struct ip
*);
4135 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
4136 data_start
->m_len
-= hlen
;
4137 data_start
->m_pkthdr
.len
-= hlen
;
4138 data_start
->m_data
+= hlen
;
4143 if (sbappendchain(&so
->so_rcv
, data
, 0)) {
4147 if (sbappendstream(&so
->so_rcv
, data
)) {
4154 OSAddAtomic64(datalen
,
4155 &cfil_stats
.cfs_inject_q_out_passed
);
4157 OSAddAtomic64(datalen
,
4158 &cfil_stats
.cfs_inject_q_in_passed
);
4164 #if DATA_DEBUG | VERDICT_DEBUG
4165 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4166 (uint64_t)VM_KERNEL_ADDRPERM(so
), count
);
4168 if (cfil_info
->cfi_debug
) {
4169 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4170 (uint64_t)VM_KERNEL_ADDRPERM(so
), count
);
4173 /* A single wakeup is for several packets is more efficient */
4175 if (outgoing
== TRUE
) {
4182 if (error
!= 0 && cfil_info
) {
4183 if (error
== ENOBUFS
) {
4184 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nobufs
);
4186 if (error
== ENOMEM
) {
4187 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nomem
);
4191 cfil_info
->cfi_flags
|= CFIF_RETRY_INJECT_OUT
;
4192 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_fail
);
4194 cfil_info
->cfi_flags
|= CFIF_RETRY_INJECT_IN
;
4195 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_fail
);
4202 if (cfil_info
&& (cfil_info
->cfi_flags
& CFIF_SHUT_WR
)) {
4203 cfil_sock_notify_shutdown(so
, SHUT_WR
);
4204 if (cfil_sock_data_pending(&so
->so_snd
) == 0) {
4205 soshutdownlock_final(so
, SHUT_WR
);
4208 if (cfil_info
&& (cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
4209 if (cfil_filters_attached(so
) == 0) {
4210 CFIL_LOG(LOG_INFO
, "so %llx waking",
4211 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4212 wakeup((caddr_t
)cfil_info
);
4216 CFIL_INFO_VERIFY(cfil_info
);
4222 cfil_service_pending_queue(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
)
4224 uint64_t passlen
, curlen
;
4226 unsigned int datalen
;
4228 struct cfil_entry
*entry
;
4229 struct cfe_buf
*entrybuf
;
4230 struct cfil_queue
*pending_q
;
4232 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
4233 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
4235 socket_lock_assert_owned(so
);
4237 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4239 entrybuf
= &entry
->cfe_snd
;
4241 entrybuf
= &entry
->cfe_rcv
;
4244 pending_q
= &entrybuf
->cfe_pending_q
;
4246 passlen
= entrybuf
->cfe_pass_offset
- pending_q
->q_start
;
4249 * Locate the chunks of data that we can pass to the next filter
4250 * A data chunk must be on mbuf boundaries
4253 while ((data
= cfil_queue_first(pending_q
)) != NULL
) {
4254 struct cfil_entry
*iter_entry
;
4255 datalen
= cfil_data_length(data
, NULL
, NULL
);
4259 "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4260 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
,
4264 if (curlen
+ datalen
> passlen
) {
4268 cfil_queue_remove(pending_q
, data
, datalen
);
4272 for (iter_entry
= SLIST_NEXT(entry
, cfe_order_link
);
4274 iter_entry
= SLIST_NEXT(iter_entry
, cfe_order_link
)) {
4275 error
= cfil_data_filter(so
, cfil_info
, CFI_ENTRY_KCUNIT(cfil_info
, iter_entry
), outgoing
,
4277 /* 0 means passed so we can continue */
4282 /* When data has passed all filters, re-inject */
4286 &cfil_info
->cfi_snd
.cfi_inject_q
,
4288 OSAddAtomic64(datalen
,
4289 &cfil_stats
.cfs_inject_q_out_enqueued
);
4292 &cfil_info
->cfi_rcv
.cfi_inject_q
,
4294 OSAddAtomic64(datalen
,
4295 &cfil_stats
.cfs_inject_q_in_enqueued
);
4300 CFIL_INFO_VERIFY(cfil_info
);
4306 cfil_update_data_offsets(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
4307 uint64_t pass_offset
, uint64_t peek_offset
)
4310 struct cfil_entry
*entry
= NULL
;
4311 struct cfe_buf
*entrybuf
;
4314 CFIL_LOG(LOG_INFO
, "pass %llu peek %llu", pass_offset
, peek_offset
);
4316 socket_lock_assert_owned(so
);
4318 if (cfil_info
== NULL
) {
4319 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
4320 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4323 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
4324 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4325 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4330 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4332 entrybuf
= &entry
->cfe_snd
;
4334 entrybuf
= &entry
->cfe_rcv
;
4337 /* Record updated offsets for this content filter */
4338 if (pass_offset
> entrybuf
->cfe_pass_offset
) {
4339 entrybuf
->cfe_pass_offset
= pass_offset
;
4341 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
) {
4342 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
4346 CFIL_LOG(LOG_INFO
, "pass_offset %llu <= cfe_pass_offset %llu",
4347 pass_offset
, entrybuf
->cfe_pass_offset
);
4349 /* Filter does not want or need to see data that's allowed to pass */
4350 if (peek_offset
> entrybuf
->cfe_pass_offset
&&
4351 peek_offset
> entrybuf
->cfe_peek_offset
) {
4352 entrybuf
->cfe_peek_offset
= peek_offset
;
4360 /* Move data held in control queue to pending queue if needed */
4361 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, outgoing
);
4363 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
4367 error
= EJUSTRETURN
;
4371 * The filter is effectively detached when pass all from both sides
4372 * or when the socket is closed and no more data is waiting
4373 * to be delivered to the filter
4375 if (entry
!= NULL
&&
4376 ((entry
->cfe_snd
.cfe_pass_offset
== CFM_MAX_OFFSET
&&
4377 entry
->cfe_rcv
.cfe_pass_offset
== CFM_MAX_OFFSET
) ||
4378 ((cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
4379 cfil_queue_empty(&entry
->cfe_snd
.cfe_ctl_q
) &&
4380 cfil_queue_empty(&entry
->cfe_rcv
.cfe_ctl_q
)))) {
4381 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
4383 cfil_info_log(LOG_ERR
, cfil_info
, outgoing
?
4384 "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
4385 "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
4387 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
4388 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
4389 if ((cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
4390 cfil_filters_attached(so
) == 0) {
4392 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAKING");
4394 CFIL_LOG(LOG_INFO
, "so %llx waking",
4395 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4396 wakeup((caddr_t
)cfil_info
);
4399 CFIL_INFO_VERIFY(cfil_info
);
4400 CFIL_LOG(LOG_INFO
, "return %d", error
);
4405 * Update pass offset for socket when no data is pending
4408 cfil_set_socket_pass_offset(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
)
4410 struct cfi_buf
*cfi_buf
;
4411 struct cfil_entry
*entry
;
4412 struct cfe_buf
*entrybuf
;
4414 uint64_t pass_offset
= 0;
4415 boolean_t first
= true;
4417 if (cfil_info
== NULL
) {
4421 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d",
4422 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
4424 socket_lock_assert_owned(so
);
4427 cfi_buf
= &cfil_info
->cfi_snd
;
4429 cfi_buf
= &cfil_info
->cfi_rcv
;
4432 CFIL_LOG(LOG_DEBUG
, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4433 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, outgoing
,
4434 cfi_buf
->cfi_pending_first
, cfi_buf
->cfi_pending_last
);
4436 if (cfi_buf
->cfi_pending_last
- cfi_buf
->cfi_pending_first
== 0) {
4437 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4438 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4440 /* Are we attached to a filter? */
4441 if (entry
->cfe_filter
== NULL
) {
4446 entrybuf
= &entry
->cfe_snd
;
4448 entrybuf
= &entry
->cfe_rcv
;
4451 // Keep track of the smallest pass_offset among filters.
4452 if (first
== true ||
4453 entrybuf
->cfe_pass_offset
< pass_offset
) {
4454 pass_offset
= entrybuf
->cfe_pass_offset
;
4458 cfi_buf
->cfi_pass_offset
= pass_offset
;
4461 CFIL_LOG(LOG_DEBUG
, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
4462 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, cfi_buf
->cfi_pass_offset
);
4468 cfil_action_data_pass(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
4469 uint64_t pass_offset
, uint64_t peek_offset
)
4473 CFIL_LOG(LOG_INFO
, "");
4475 socket_lock_assert_owned(so
);
4477 error
= cfil_acquire_sockbuf(so
, cfil_info
, outgoing
);
4479 CFIL_LOG(LOG_INFO
, "so %llx %s dropped",
4480 (uint64_t)VM_KERNEL_ADDRPERM(so
),
4481 outgoing
? "out" : "in");
4485 error
= cfil_update_data_offsets(so
, cfil_info
, kcunit
, outgoing
,
4486 pass_offset
, peek_offset
);
4488 cfil_service_inject_queue(so
, cfil_info
, outgoing
);
4490 cfil_set_socket_pass_offset(so
, cfil_info
, outgoing
);
4492 CFIL_INFO_VERIFY(cfil_info
);
4493 cfil_release_sockbuf(so
, outgoing
);
4500 cfil_flush_queues(struct socket
*so
, struct cfil_info
*cfil_info
)
4502 struct cfil_entry
*entry
;
4506 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || cfil_info
== NULL
) {
4510 socket_lock_assert_owned(so
);
4513 * Flush the output queues and ignore errors as long as
4516 (void) cfil_acquire_sockbuf(so
, cfil_info
, 1);
4517 if (cfil_info
!= NULL
) {
4519 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4520 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4522 drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
4523 drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_pending_q
);
4525 drained
+= cfil_queue_drain(&cfil_info
->cfi_snd
.cfi_inject_q
);
4528 if (cfil_info
->cfi_flags
& CFIF_DROP
) {
4530 &cfil_stats
.cfs_flush_out_drop
);
4533 &cfil_stats
.cfs_flush_out_close
);
4537 cfil_release_sockbuf(so
, 1);
4540 * Flush the input queues
4542 (void) cfil_acquire_sockbuf(so
, cfil_info
, 0);
4543 if (cfil_info
!= NULL
) {
4545 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4546 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4548 drained
+= cfil_queue_drain(
4549 &entry
->cfe_rcv
.cfe_ctl_q
);
4550 drained
+= cfil_queue_drain(
4551 &entry
->cfe_rcv
.cfe_pending_q
);
4553 drained
+= cfil_queue_drain(&cfil_info
->cfi_rcv
.cfi_inject_q
);
4556 if (cfil_info
->cfi_flags
& CFIF_DROP
) {
4558 &cfil_stats
.cfs_flush_in_drop
);
4561 &cfil_stats
.cfs_flush_in_close
);
4565 cfil_release_sockbuf(so
, 0);
4567 CFIL_INFO_VERIFY(cfil_info
);
4571 cfil_action_drop(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
)
4574 struct cfil_entry
*entry
;
4577 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || cfil_info
== NULL
) {
4581 socket_lock_assert_owned(so
);
4583 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4585 /* Are we attached to the filter? */
4586 if (entry
->cfe_filter
== NULL
) {
4590 cfil_info
->cfi_flags
|= CFIF_DROP
;
4595 * Force the socket to be marked defunct
4596 * (forcing fixed along with rdar://19391339)
4598 if (so
->so_cfil_db
== NULL
) {
4599 error
= sosetdefunct(p
, so
,
4600 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
,
4603 /* Flush the socket buffer and disconnect */
4605 error
= sodefunct(p
, so
,
4606 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
);
4610 /* The filter is done, mark as detached */
4611 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
4613 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: DROP - DETACH");
4615 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
4616 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
4618 /* Pending data needs to go */
4619 cfil_flush_queues(so
, cfil_info
);
4621 if (cfil_info
&& (cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
4622 if (cfil_filters_attached(so
) == 0) {
4623 CFIL_LOG(LOG_INFO
, "so %llx waking",
4624 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4625 wakeup((caddr_t
)cfil_info
);
4633 cfil_action_bless_client(uint32_t kcunit
, struct cfil_msg_hdr
*msghdr
)
4636 struct cfil_info
*cfil_info
= NULL
;
4638 bool cfil_attached
= false;
4639 struct cfil_msg_bless_client
*blessmsg
= (struct cfil_msg_bless_client
*)msghdr
;
4641 // Search and lock socket
4642 struct socket
*so
= cfil_socket_from_client_uuid(blessmsg
->cfb_client_uuid
, &cfil_attached
);
4646 // The client gets a pass automatically
4647 cfil_info
= (so
->so_cfil_db
!= NULL
) ?
4648 cfil_db_get_cfil_info(so
->so_cfil_db
, msghdr
->cfm_sock_id
) : so
->so_cfil
;
4650 if (cfil_attached
) {
4652 if (cfil_info
!= NULL
) {
4653 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
4654 cfil_info
->cfi_hash_entry
? "UDP" : "TCP",
4655 (uint64_t)VM_KERNEL_ADDRPERM(so
),
4656 cfil_info
->cfi_sock_id
);
4659 cfil_sock_received_verdict(so
);
4660 (void)cfil_action_data_pass(so
, cfil_info
, kcunit
, 1, CFM_MAX_OFFSET
, CFM_MAX_OFFSET
);
4661 (void)cfil_action_data_pass(so
, cfil_info
, kcunit
, 0, CFM_MAX_OFFSET
, CFM_MAX_OFFSET
);
4663 so
->so_flags1
|= SOF1_CONTENT_FILTER_SKIP
;
4665 socket_unlock(so
, 1);
4672 cfil_action_set_crypto_key(uint32_t kcunit
, struct cfil_msg_hdr
*msghdr
)
4674 struct content_filter
*cfc
= NULL
;
4675 cfil_crypto_state_t crypto_state
= NULL
;
4676 struct cfil_msg_set_crypto_key
*keymsg
= (struct cfil_msg_set_crypto_key
*)msghdr
;
4678 CFIL_LOG(LOG_NOTICE
, "");
4680 if (content_filters
== NULL
) {
4681 CFIL_LOG(LOG_ERR
, "no content filter");
4684 if (kcunit
> MAX_CONTENT_FILTER
) {
4685 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4686 kcunit
, MAX_CONTENT_FILTER
);
4689 crypto_state
= cfil_crypto_init_client((uint8_t *)keymsg
->crypto_key
);
4690 if (crypto_state
== NULL
) {
4691 CFIL_LOG(LOG_ERR
, "failed to initialize crypto state for unit %u)",
4696 cfil_rw_lock_exclusive(&cfil_lck_rw
);
4698 cfc
= content_filters
[kcunit
- 1];
4699 if (cfc
->cf_kcunit
!= kcunit
) {
4700 CFIL_LOG(LOG_ERR
, "bad unit info %u)",
4702 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
4703 cfil_crypto_cleanup_state(crypto_state
);
4706 if (cfc
->cf_crypto_state
!= NULL
) {
4707 cfil_crypto_cleanup_state(cfc
->cf_crypto_state
);
4708 cfc
->cf_crypto_state
= NULL
;
4710 cfc
->cf_crypto_state
= crypto_state
;
4712 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
4717 cfil_update_entry_offsets(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
, unsigned int datalen
)
4719 struct cfil_entry
*entry
;
4720 struct cfe_buf
*entrybuf
;
4723 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d datalen %u",
4724 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
, datalen
);
4726 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4727 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
4729 /* Are we attached to the filter? */
4730 if (entry
->cfe_filter
== NULL
) {
4735 entrybuf
= &entry
->cfe_snd
;
4737 entrybuf
= &entry
->cfe_rcv
;
4740 entrybuf
->cfe_ctl_q
.q_start
+= datalen
;
4741 entrybuf
->cfe_pass_offset
= entrybuf
->cfe_ctl_q
.q_start
;
4742 entrybuf
->cfe_peeked
= entrybuf
->cfe_ctl_q
.q_start
;
4743 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
) {
4744 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
4747 entrybuf
->cfe_ctl_q
.q_end
+= datalen
;
4749 entrybuf
->cfe_pending_q
.q_start
+= datalen
;
4750 entrybuf
->cfe_pending_q
.q_end
+= datalen
;
4752 CFIL_INFO_VERIFY(cfil_info
);
4757 cfil_data_common(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
, struct sockaddr
*to
,
4758 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
4760 #pragma unused(to, control, flags)
4762 unsigned int datalen
;
4766 struct cfi_buf
*cfi_buf
;
4767 struct mbuf
*chain
= NULL
;
4769 if (cfil_info
== NULL
) {
4770 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
4771 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4774 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
4775 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4776 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4781 datalen
= cfil_data_length(data
, &mbcnt
, &mbnum
);
4784 cfi_buf
= &cfil_info
->cfi_snd
;
4785 cfil_info
->cfi_byte_outbound_count
+= datalen
;
4787 cfi_buf
= &cfil_info
->cfi_rcv
;
4788 cfil_info
->cfi_byte_inbound_count
+= datalen
;
4791 cfi_buf
->cfi_pending_last
+= datalen
;
4792 cfi_buf
->cfi_pending_mbcnt
+= mbcnt
;
4793 cfi_buf
->cfi_pending_mbnum
+= mbnum
;
4795 if (IS_IP_DGRAM(so
)) {
4796 if (cfi_buf
->cfi_pending_mbnum
> cfil_udp_gc_mbuf_num_max
||
4797 cfi_buf
->cfi_pending_mbcnt
> cfil_udp_gc_mbuf_cnt_max
) {
4798 cfi_buf
->cfi_tail_drop_cnt
++;
4799 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
4800 cfi_buf
->cfi_pending_mbnum
-= mbnum
;
4805 cfil_info_buf_verify(cfi_buf
);
4808 CFIL_LOG(LOG_DEBUG
, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
4809 (uint64_t)VM_KERNEL_ADDRPERM(so
),
4810 outgoing
? "OUT" : "IN",
4811 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, data
->m_flags
,
4812 (uint64_t)VM_KERNEL_ADDRPERM(data
->m_nextpkt
),
4813 cfi_buf
->cfi_pending_last
,
4814 cfi_buf
->cfi_pending_mbcnt
,
4815 cfi_buf
->cfi_pass_offset
);
4818 /* Fast path when below pass offset */
4819 if (cfi_buf
->cfi_pending_last
<= cfi_buf
->cfi_pass_offset
) {
4820 cfil_update_entry_offsets(so
, cfil_info
, outgoing
, datalen
);
4822 CFIL_LOG(LOG_DEBUG
, "CFIL: QUEUEING DATA: FAST PATH");
4825 struct cfil_entry
*iter_entry
;
4826 SLIST_FOREACH(iter_entry
, &cfil_info
->cfi_ordered_entries
, cfe_order_link
) {
4827 // Is cfil attached to this filter?
4828 kcunit
= CFI_ENTRY_KCUNIT(cfil_info
, iter_entry
);
4829 if (IS_ENTRY_ATTACHED(cfil_info
, kcunit
)) {
4830 if (IS_IP_DGRAM(so
) && chain
== NULL
) {
4832 * Chain addr (incoming only TDB), control (optional) and data into one chain.
4833 * This full chain will be reinjected into socket after recieving verdict.
4835 (void) cfil_dgram_save_socket_state(cfil_info
, data
);
4836 chain
= sbconcat_mbufs(NULL
, outgoing
? NULL
: to
, data
, control
);
4837 if (chain
== NULL
) {
4842 error
= cfil_data_filter(so
, cfil_info
, kcunit
, outgoing
, data
,
4845 /* 0 means passed so continue with next filter */
4852 /* Move cursor if no filter claimed the data */
4854 cfi_buf
->cfi_pending_first
+= datalen
;
4855 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
4856 cfi_buf
->cfi_pending_mbnum
-= mbnum
;
4857 cfil_info_buf_verify(cfi_buf
);
4860 CFIL_INFO_VERIFY(cfil_info
);
4866 * Callback from socket layer sosendxxx()
4869 cfil_sock_data_out(struct socket
*so
, struct sockaddr
*to
,
4870 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
4873 int new_filter_control_unit
= 0;
4875 if (IS_IP_DGRAM(so
)) {
4876 return cfil_sock_udp_handle_data(TRUE
, so
, NULL
, to
, data
, control
, flags
);
4879 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
4880 /* Drop pre-existing TCP sockets if filter is enabled now */
4881 if (cfil_active_count
> 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so
)) {
4882 new_filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
4883 if (new_filter_control_unit
> 0) {
4890 /* Drop pre-existing TCP sockets when filter state changed */
4891 new_filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
4892 if (new_filter_control_unit
> 0 && new_filter_control_unit
!= so
->so_cfil
->cfi_filter_control_unit
&& !SKIP_FILTER_FOR_TCP_SOCKET(so
)) {
4897 * Pass initial data for TFO.
4899 if (IS_INITIAL_TFO_DATA(so
)) {
4903 socket_lock_assert_owned(so
);
4905 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
4906 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4907 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4910 if (control
!= NULL
) {
4911 CFIL_LOG(LOG_ERR
, "so %llx control",
4912 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4913 OSIncrementAtomic(&cfil_stats
.cfs_data_out_control
);
4915 if ((flags
& MSG_OOB
)) {
4916 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
4917 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4918 OSIncrementAtomic(&cfil_stats
.cfs_data_out_oob
);
4920 if ((so
->so_snd
.sb_flags
& SB_LOCK
) == 0) {
4921 panic("so %p SB_LOCK not set", so
);
4924 if (so
->so_snd
.sb_cfil_thread
!= NULL
) {
4925 panic("%s sb_cfil_thread %p not NULL", __func__
,
4926 so
->so_snd
.sb_cfil_thread
);
4929 error
= cfil_data_common(so
, so
->so_cfil
, 1, to
, data
, control
, flags
);
4935 * Callback from socket layer sbappendxxx()
4938 cfil_sock_data_in(struct socket
*so
, struct sockaddr
*from
,
4939 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
4942 int new_filter_control_unit
= 0;
4944 if (IS_IP_DGRAM(so
)) {
4945 return cfil_sock_udp_handle_data(FALSE
, so
, NULL
, from
, data
, control
, flags
);
4948 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
4949 /* Drop pre-existing TCP sockets if filter is enabled now */
4950 if (cfil_active_count
> 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so
)) {
4951 new_filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
4952 if (new_filter_control_unit
> 0) {
4959 /* Drop pre-existing TCP sockets when filter state changed */
4960 new_filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
4961 if (new_filter_control_unit
> 0 && new_filter_control_unit
!= so
->so_cfil
->cfi_filter_control_unit
&& !SKIP_FILTER_FOR_TCP_SOCKET(so
)) {
4966 * Pass initial data for TFO.
4968 if (IS_INITIAL_TFO_DATA(so
)) {
4972 socket_lock_assert_owned(so
);
4974 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
4975 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4976 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4979 if (control
!= NULL
) {
4980 CFIL_LOG(LOG_ERR
, "so %llx control",
4981 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4982 OSIncrementAtomic(&cfil_stats
.cfs_data_in_control
);
4984 if (data
->m_type
== MT_OOBDATA
) {
4985 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
4986 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4987 OSIncrementAtomic(&cfil_stats
.cfs_data_in_oob
);
4989 error
= cfil_data_common(so
, so
->so_cfil
, 0, from
, data
, control
, flags
);
4995 * Callback from socket layer soshutdownxxx()
4997 * We may delay the shutdown write if there's outgoing data in process.
4999 * There is no point in delaying the shutdown read because the process
5000 * indicated that it does not want to read anymore data.
5003 cfil_sock_shutdown(struct socket
*so
, int *how
)
5007 if (IS_IP_DGRAM(so
)) {
5008 return cfil_sock_udp_shutdown(so
, how
);
5011 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
5015 socket_lock_assert_owned(so
);
5017 CFIL_LOG(LOG_INFO
, "so %llx how %d",
5018 (uint64_t)VM_KERNEL_ADDRPERM(so
), *how
);
5021 * Check the state of the socket before the content filter
5023 if (*how
!= SHUT_WR
&& (so
->so_state
& SS_CANTRCVMORE
) != 0) {
5024 /* read already shut down */
5028 if (*how
!= SHUT_RD
&& (so
->so_state
& SS_CANTSENDMORE
) != 0) {
5029 /* write already shut down */
5034 if ((so
->so_cfil
->cfi_flags
& CFIF_DROP
) != 0) {
5035 CFIL_LOG(LOG_ERR
, "so %llx drop set",
5036 (uint64_t)VM_KERNEL_ADDRPERM(so
));
5041 * shutdown read: SHUT_RD or SHUT_RDWR
5043 if (*how
!= SHUT_WR
) {
5044 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_RD
) {
5048 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_RD
;
5049 cfil_sock_notify_shutdown(so
, SHUT_RD
);
5052 * shutdown write: SHUT_WR or SHUT_RDWR
5054 if (*how
!= SHUT_RD
) {
5055 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
) {
5059 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_WR
;
5060 cfil_sock_notify_shutdown(so
, SHUT_WR
);
5062 * When outgoing data is pending, we delay the shutdown at the
5063 * protocol level until the content filters give the final
5064 * verdict on the pending data.
5066 if (cfil_sock_data_pending(&so
->so_snd
) != 0) {
5068 * When shutting down the read and write sides at once
5069 * we can proceed to the final shutdown of the read
5070 * side. Otherwise, we just return.
5072 if (*how
== SHUT_WR
) {
5073 error
= EJUSTRETURN
;
5074 } else if (*how
== SHUT_RDWR
) {
5084 * This is called when the socket is closed and there is no more
5085 * opportunity for filtering
5088 cfil_sock_is_closed(struct socket
*so
)
5093 if (IS_IP_DGRAM(so
)) {
5094 cfil_sock_udp_is_closed(so
);
5098 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
5102 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
5104 socket_lock_assert_owned(so
);
5106 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
5107 /* Let the filters know of the closing */
5108 error
= cfil_dispatch_closed_event(so
, so
->so_cfil
, kcunit
);
5111 /* Last chance to push passed data out */
5112 error
= cfil_acquire_sockbuf(so
, so
->so_cfil
, 1);
5114 cfil_service_inject_queue(so
, so
->so_cfil
, 1);
5116 cfil_release_sockbuf(so
, 1);
5118 so
->so_cfil
->cfi_flags
|= CFIF_SOCK_CLOSED
;
5120 /* Pending data needs to go */
5121 cfil_flush_queues(so
, so
->so_cfil
);
5123 CFIL_INFO_VERIFY(so
->so_cfil
);
5127 * This is called when the socket is disconnected so let the filters
5128 * know about the disconnection and that no more data will come
5130 * The how parameter has the same values as soshutown()
5133 cfil_sock_notify_shutdown(struct socket
*so
, int how
)
5138 if (IS_IP_DGRAM(so
)) {
5139 cfil_sock_udp_notify_shutdown(so
, how
, 0, 0);
5143 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
5147 CFIL_LOG(LOG_INFO
, "so %llx how %d",
5148 (uint64_t)VM_KERNEL_ADDRPERM(so
), how
);
5150 socket_lock_assert_owned(so
);
5152 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
5153 /* Disconnect incoming side */
5154 if (how
!= SHUT_WR
) {
5155 error
= cfil_dispatch_disconnect_event(so
, so
->so_cfil
, kcunit
, 0);
5157 /* Disconnect outgoing side */
5158 if (how
!= SHUT_RD
) {
5159 error
= cfil_dispatch_disconnect_event(so
, so
->so_cfil
, kcunit
, 1);
5165 cfil_filters_attached(struct socket
*so
)
5167 struct cfil_entry
*entry
;
5171 if (IS_IP_DGRAM(so
)) {
5172 return cfil_filters_udp_attached(so
, FALSE
);
5175 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
5179 socket_lock_assert_owned(so
);
5181 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
5182 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
5184 /* Are we attached to the filter? */
5185 if (entry
->cfe_filter
== NULL
) {
5188 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
5191 if ((entry
->cfe_flags
& CFEF_CFIL_DETACHED
) != 0) {
5202 * This is called when the socket is closed and we are waiting for
5203 * the filters to gives the final pass or drop
5206 cfil_sock_close_wait(struct socket
*so
)
5208 lck_mtx_t
*mutex_held
;
5212 if (IS_IP_DGRAM(so
)) {
5213 cfil_sock_udp_close_wait(so
);
5217 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
5221 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
5223 if (so
->so_proto
->pr_getlock
!= NULL
) {
5224 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
5226 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
5228 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
5230 while (cfil_filters_attached(so
)) {
5232 * Notify the filters we are going away so they can detach
5234 cfil_sock_notify_shutdown(so
, SHUT_RDWR
);
5237 * Make sure we need to wait after the filter are notified
5238 * of the disconnection
5240 if (cfil_filters_attached(so
) == 0) {
5244 CFIL_LOG(LOG_INFO
, "so %llx waiting",
5245 (uint64_t)VM_KERNEL_ADDRPERM(so
));
5247 ts
.tv_sec
= cfil_close_wait_timeout
/ 1000;
5248 ts
.tv_nsec
= (cfil_close_wait_timeout
% 1000) *
5249 NSEC_PER_USEC
* 1000;
5251 OSIncrementAtomic(&cfil_stats
.cfs_close_wait
);
5252 so
->so_cfil
->cfi_flags
|= CFIF_CLOSE_WAIT
;
5253 error
= msleep((caddr_t
)so
->so_cfil
, mutex_held
,
5254 PSOCK
| PCATCH
, "cfil_sock_close_wait", &ts
);
5255 so
->so_cfil
->cfi_flags
&= ~CFIF_CLOSE_WAIT
;
5257 CFIL_LOG(LOG_NOTICE
, "so %llx timed out %d",
5258 (uint64_t)VM_KERNEL_ADDRPERM(so
), (error
!= 0));
5261 * Force close in case of timeout
5264 OSIncrementAtomic(&cfil_stats
.cfs_close_wait_timeout
);
5271 * Returns the size of the data held by the content filter by using
5274 cfil_sock_data_pending(struct sockbuf
*sb
)
5276 struct socket
*so
= sb
->sb_so
;
5277 uint64_t pending
= 0;
5279 if (IS_IP_DGRAM(so
)) {
5280 return cfil_sock_udp_data_pending(sb
, FALSE
);
5283 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
) {
5284 struct cfi_buf
*cfi_buf
;
5286 socket_lock_assert_owned(so
);
5288 if ((sb
->sb_flags
& SB_RECV
) == 0) {
5289 cfi_buf
= &so
->so_cfil
->cfi_snd
;
5291 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
5294 pending
= cfi_buf
->cfi_pending_last
-
5295 cfi_buf
->cfi_pending_first
;
5298 * If we are limited by the "chars of mbufs used" roughly
5299 * adjust so we won't overcommit
5301 if (pending
> (uint64_t)cfi_buf
->cfi_pending_mbcnt
) {
5302 pending
= cfi_buf
->cfi_pending_mbcnt
;
5306 VERIFY(pending
< INT32_MAX
);
5308 return (int32_t)(pending
);
5312 * Return the socket buffer space used by data being held by content filters
5313 * so processes won't clog the socket buffer
5316 cfil_sock_data_space(struct sockbuf
*sb
)
5318 struct socket
*so
= sb
->sb_so
;
5319 uint64_t pending
= 0;
5321 if (IS_IP_DGRAM(so
)) {
5322 return cfil_sock_udp_data_pending(sb
, TRUE
);
5325 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
&&
5326 so
->so_snd
.sb_cfil_thread
!= current_thread()) {
5327 struct cfi_buf
*cfi_buf
;
5329 socket_lock_assert_owned(so
);
5331 if ((sb
->sb_flags
& SB_RECV
) == 0) {
5332 cfi_buf
= &so
->so_cfil
->cfi_snd
;
5334 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
5337 pending
= cfi_buf
->cfi_pending_last
-
5338 cfi_buf
->cfi_pending_first
;
5341 * If we are limited by the "chars of mbufs used" roughly
5342 * adjust so we won't overcommit
5344 if ((uint64_t)cfi_buf
->cfi_pending_mbcnt
> pending
) {
5345 pending
= cfi_buf
->cfi_pending_mbcnt
;
5349 VERIFY(pending
< INT32_MAX
);
5351 return (int32_t)(pending
);
5355 * A callback from the socket and protocol layer when data becomes
5356 * available in the socket buffer to give a chance for the content filter
5357 * to re-inject data that was held back
5360 cfil_sock_buf_update(struct sockbuf
*sb
)
5364 struct socket
*so
= sb
->sb_so
;
5366 if (IS_IP_DGRAM(so
)) {
5367 cfil_sock_udp_buf_update(sb
);
5371 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
5379 socket_lock_assert_owned(so
);
5381 if ((sb
->sb_flags
& SB_RECV
) == 0) {
5382 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) == 0) {
5386 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_retry
);
5388 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_IN
) == 0) {
5392 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_retry
);
5395 CFIL_LOG(LOG_NOTICE
, "so %llx outgoing %d",
5396 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
5398 error
= cfil_acquire_sockbuf(so
, so
->so_cfil
, outgoing
);
5400 cfil_service_inject_queue(so
, so
->so_cfil
, outgoing
);
5402 cfil_release_sockbuf(so
, outgoing
);
5406 sysctl_cfil_filter_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
5407 struct sysctl_req
*req
)
5409 #pragma unused(oidp, arg1, arg2)
5415 if (req
->newptr
!= USER_ADDR_NULL
) {
5419 cfil_rw_lock_shared(&cfil_lck_rw
);
5421 for (i
= 0; content_filters
!= NULL
&& i
< MAX_CONTENT_FILTER
; i
++) {
5422 struct cfil_filter_stat filter_stat
;
5423 struct content_filter
*cfc
= content_filters
[i
];
5429 /* If just asking for the size */
5430 if (req
->oldptr
== USER_ADDR_NULL
) {
5431 len
+= sizeof(struct cfil_filter_stat
);
5435 bzero(&filter_stat
, sizeof(struct cfil_filter_stat
));
5436 filter_stat
.cfs_len
= sizeof(struct cfil_filter_stat
);
5437 filter_stat
.cfs_filter_id
= cfc
->cf_kcunit
;
5438 filter_stat
.cfs_flags
= cfc
->cf_flags
;
5439 filter_stat
.cfs_sock_count
= cfc
->cf_sock_count
;
5440 filter_stat
.cfs_necp_control_unit
= cfc
->cf_necp_control_unit
;
5442 error
= SYSCTL_OUT(req
, &filter_stat
,
5443 sizeof(struct cfil_filter_stat
));
5448 /* If just asking for the size */
5449 if (req
->oldptr
== USER_ADDR_NULL
) {
5453 cfil_rw_unlock_shared(&cfil_lck_rw
);
5456 if (req
->oldptr
!= USER_ADDR_NULL
) {
5457 for (i
= 1; content_filters
!= NULL
&& i
<= MAX_CONTENT_FILTER
; i
++) {
5458 cfil_filter_show(i
);
5467 sysctl_cfil_sock_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
5468 struct sysctl_req
*req
)
5470 #pragma unused(oidp, arg1, arg2)
5473 struct cfil_info
*cfi
;
5476 if (req
->newptr
!= USER_ADDR_NULL
) {
5480 cfil_rw_lock_shared(&cfil_lck_rw
);
5483 * If just asking for the size,
5485 if (req
->oldptr
== USER_ADDR_NULL
) {
5486 req
->oldidx
= cfil_sock_attached_count
*
5487 sizeof(struct cfil_sock_stat
);
5488 /* Bump the length in case new sockets gets attached */
5489 req
->oldidx
+= req
->oldidx
>> 3;
5493 TAILQ_FOREACH(cfi
, &cfil_sock_head
, cfi_link
) {
5494 struct cfil_entry
*entry
;
5495 struct cfil_sock_stat stat
;
5496 struct socket
*so
= cfi
->cfi_so
;
5498 bzero(&stat
, sizeof(struct cfil_sock_stat
));
5499 stat
.cfs_len
= sizeof(struct cfil_sock_stat
);
5500 stat
.cfs_sock_id
= cfi
->cfi_sock_id
;
5501 stat
.cfs_flags
= cfi
->cfi_flags
;
5504 stat
.cfs_pid
= so
->last_pid
;
5505 memcpy(stat
.cfs_uuid
, so
->last_uuid
,
5507 if (so
->so_flags
& SOF_DELEGATED
) {
5508 stat
.cfs_e_pid
= so
->e_pid
;
5509 memcpy(stat
.cfs_e_uuid
, so
->e_uuid
,
5512 stat
.cfs_e_pid
= so
->last_pid
;
5513 memcpy(stat
.cfs_e_uuid
, so
->last_uuid
,
5517 stat
.cfs_sock_family
= so
->so_proto
->pr_domain
->dom_family
;
5518 stat
.cfs_sock_type
= so
->so_proto
->pr_type
;
5519 stat
.cfs_sock_protocol
= so
->so_proto
->pr_protocol
;
5522 stat
.cfs_snd
.cbs_pending_first
=
5523 cfi
->cfi_snd
.cfi_pending_first
;
5524 stat
.cfs_snd
.cbs_pending_last
=
5525 cfi
->cfi_snd
.cfi_pending_last
;
5526 stat
.cfs_snd
.cbs_inject_q_len
=
5527 cfil_queue_len(&cfi
->cfi_snd
.cfi_inject_q
);
5528 stat
.cfs_snd
.cbs_pass_offset
=
5529 cfi
->cfi_snd
.cfi_pass_offset
;
5531 stat
.cfs_rcv
.cbs_pending_first
=
5532 cfi
->cfi_rcv
.cfi_pending_first
;
5533 stat
.cfs_rcv
.cbs_pending_last
=
5534 cfi
->cfi_rcv
.cfi_pending_last
;
5535 stat
.cfs_rcv
.cbs_inject_q_len
=
5536 cfil_queue_len(&cfi
->cfi_rcv
.cfi_inject_q
);
5537 stat
.cfs_rcv
.cbs_pass_offset
=
5538 cfi
->cfi_rcv
.cfi_pass_offset
;
5540 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++) {
5541 struct cfil_entry_stat
*estat
;
5542 struct cfe_buf
*ebuf
;
5543 struct cfe_buf_stat
*sbuf
;
5545 entry
= &cfi
->cfi_entries
[i
];
5547 estat
= &stat
.ces_entries
[i
];
5549 estat
->ces_len
= sizeof(struct cfil_entry_stat
);
5550 estat
->ces_filter_id
= entry
->cfe_filter
?
5551 entry
->cfe_filter
->cf_kcunit
: 0;
5552 estat
->ces_flags
= entry
->cfe_flags
;
5553 estat
->ces_necp_control_unit
=
5554 entry
->cfe_necp_control_unit
;
5556 estat
->ces_last_event
.tv_sec
=
5557 (int64_t)entry
->cfe_last_event
.tv_sec
;
5558 estat
->ces_last_event
.tv_usec
=
5559 (int64_t)entry
->cfe_last_event
.tv_usec
;
5561 estat
->ces_last_action
.tv_sec
=
5562 (int64_t)entry
->cfe_last_action
.tv_sec
;
5563 estat
->ces_last_action
.tv_usec
=
5564 (int64_t)entry
->cfe_last_action
.tv_usec
;
5566 ebuf
= &entry
->cfe_snd
;
5567 sbuf
= &estat
->ces_snd
;
5568 sbuf
->cbs_pending_first
=
5569 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
5570 sbuf
->cbs_pending_last
=
5571 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
5572 sbuf
->cbs_ctl_first
=
5573 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
5574 sbuf
->cbs_ctl_last
=
5575 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
5576 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
5577 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
5578 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
5580 ebuf
= &entry
->cfe_rcv
;
5581 sbuf
= &estat
->ces_rcv
;
5582 sbuf
->cbs_pending_first
=
5583 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
5584 sbuf
->cbs_pending_last
=
5585 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
5586 sbuf
->cbs_ctl_first
=
5587 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
5588 sbuf
->cbs_ctl_last
=
5589 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
5590 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
5591 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
5592 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
5594 error
= SYSCTL_OUT(req
, &stat
,
5595 sizeof(struct cfil_sock_stat
));
5601 cfil_rw_unlock_shared(&cfil_lck_rw
);
5604 if (req
->oldptr
!= USER_ADDR_NULL
) {
5613 * UDP Socket Support
5616 cfil_hash_entry_log(int level
, struct socket
*so
, struct cfil_hash_entry
*entry
, uint64_t sockId
, const char* msg
)
5618 char local
[MAX_IPv6_STR_LEN
+ 6];
5619 char remote
[MAX_IPv6_STR_LEN
+ 6];
5622 // No sock or not UDP, no-op
5623 if (so
== NULL
|| entry
== NULL
) {
5627 local
[0] = remote
[0] = 0x0;
5629 switch (entry
->cfentry_family
) {
5631 addr
= &entry
->cfentry_laddr
.addr6
;
5632 inet_ntop(AF_INET6
, addr
, local
, sizeof(local
));
5633 addr
= &entry
->cfentry_faddr
.addr6
;
5634 inet_ntop(AF_INET6
, addr
, remote
, sizeof(local
));
5637 addr
= &entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
;
5638 inet_ntop(AF_INET
, addr
, local
, sizeof(local
));
5639 addr
= &entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
;
5640 inet_ntop(AF_INET
, addr
, remote
, sizeof(local
));
5646 CFIL_LOG(level
, "<%s>: <%s(%d) so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s hash %X",
5648 IS_UDP(so
) ? "UDP" : "proto", GET_SO_PROTO(so
),
5649 (uint64_t)VM_KERNEL_ADDRPERM(so
), entry
, sockId
,
5650 ntohs(entry
->cfentry_lport
), ntohs(entry
->cfentry_fport
), local
, remote
,
5651 entry
->cfentry_flowhash
);
5655 cfil_inp_log(int level
, struct socket
*so
, const char* msg
)
5657 struct inpcb
*inp
= NULL
;
5658 char local
[MAX_IPv6_STR_LEN
+ 6];
5659 char remote
[MAX_IPv6_STR_LEN
+ 6];
5666 inp
= sotoinpcb(so
);
5671 local
[0] = remote
[0] = 0x0;
5673 if (inp
->inp_vflag
& INP_IPV6
) {
5674 addr
= &inp
->in6p_laddr
.s6_addr32
;
5675 inet_ntop(AF_INET6
, addr
, local
, sizeof(local
));
5676 addr
= &inp
->in6p_faddr
.s6_addr32
;
5677 inet_ntop(AF_INET6
, addr
, remote
, sizeof(local
));
5679 addr
= &inp
->inp_laddr
.s_addr
;
5680 inet_ntop(AF_INET
, addr
, local
, sizeof(local
));
5681 addr
= &inp
->inp_faddr
.s_addr
;
5682 inet_ntop(AF_INET
, addr
, remote
, sizeof(local
));
5685 if (so
->so_cfil
!= NULL
) {
5686 CFIL_LOG(level
, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
5687 msg
, IS_UDP(so
) ? "UDP" : "TCP",
5688 (uint64_t)VM_KERNEL_ADDRPERM(so
), inp
->inp_flags
, inp
->inp_socket
->so_flags
, so
->so_cfil
->cfi_sock_id
,
5689 ntohs(inp
->inp_lport
), ntohs(inp
->inp_fport
), local
, remote
);
5691 CFIL_LOG(level
, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
5692 msg
, IS_UDP(so
) ? "UDP" : "TCP",
5693 (uint64_t)VM_KERNEL_ADDRPERM(so
), inp
->inp_flags
, inp
->inp_socket
->so_flags
,
5694 ntohs(inp
->inp_lport
), ntohs(inp
->inp_fport
), local
, remote
);
5699 cfil_info_log(int level
, struct cfil_info
*cfil_info
, const char* msg
)
5701 if (cfil_info
== NULL
) {
5705 if (cfil_info
->cfi_hash_entry
!= NULL
) {
5706 cfil_hash_entry_log(level
, cfil_info
->cfi_so
, cfil_info
->cfi_hash_entry
, cfil_info
->cfi_sock_id
, msg
);
5708 cfil_inp_log(level
, cfil_info
->cfi_so
, msg
);
5713 cfil_db_init(struct socket
*so
)
5716 struct cfil_db
*db
= NULL
;
5718 CFIL_LOG(LOG_INFO
, "");
5720 db
= zalloc(cfil_db_zone
);
5725 bzero(db
, sizeof(struct cfil_db
));
5727 db
->cfdb_hashbase
= hashinit(CFILHASHSIZE
, M_CFIL
, &db
->cfdb_hashmask
);
5728 if (db
->cfdb_hashbase
== NULL
) {
5729 zfree(cfil_db_zone
, db
);
5735 so
->so_cfil_db
= db
;
5742 cfil_db_free(struct socket
*so
)
5744 struct cfil_hash_entry
*entry
= NULL
;
5745 struct cfil_hash_entry
*temp_entry
= NULL
;
5746 struct cfilhashhead
*cfilhash
= NULL
;
5747 struct cfil_db
*db
= NULL
;
5749 CFIL_LOG(LOG_INFO
, "");
5751 if (so
== NULL
|| so
->so_cfil_db
== NULL
) {
5754 db
= so
->so_cfil_db
;
5757 CFIL_LOG(LOG_ERR
, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
5758 (uint64_t)VM_KERNEL_ADDRPERM(so
), db
, db
->cfdb_count
);
5761 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
5762 cfilhash
= &db
->cfdb_hashbase
[i
];
5763 LIST_FOREACH_SAFE(entry
, cfilhash
, cfentry_link
, temp_entry
) {
5764 if (entry
->cfentry_cfil
!= NULL
) {
5766 cfil_info_log(LOG_ERR
, entry
->cfentry_cfil
, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
5768 CFIL_INFO_FREE(entry
->cfentry_cfil
);
5769 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
5770 entry
->cfentry_cfil
= NULL
;
5773 cfil_db_delete_entry(db
, entry
);
5774 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
5775 if (db
->cfdb_count
== 0) {
5776 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
5778 VERIFY(so
->so_usecount
> 0);
5784 // Make sure all entries are cleaned up!
5785 VERIFY(db
->cfdb_count
== 0);
5787 CFIL_LOG(LOG_ERR
, "CFIL: LIFECYCLE: so usecount %d", so
->so_usecount
);
5790 hashdestroy(db
->cfdb_hashbase
, M_CFIL
, db
->cfdb_hashmask
);
5791 zfree(cfil_db_zone
, db
);
5792 so
->so_cfil_db
= NULL
;
5796 fill_cfil_hash_entry_from_address(struct cfil_hash_entry
*entry
, bool isLocal
, struct sockaddr
*addr
, bool islocalUpdate
)
5798 struct sockaddr_in
*sin
= NULL
;
5799 struct sockaddr_in6
*sin6
= NULL
;
5801 if (entry
== NULL
|| addr
== NULL
) {
5805 switch (addr
->sa_family
) {
5807 sin
= satosin(addr
);
5808 if (sin
->sin_len
!= sizeof(*sin
)) {
5811 if (isLocal
== TRUE
) {
5812 if (sin
->sin_port
) {
5813 entry
->cfentry_lport
= sin
->sin_port
;
5814 if (islocalUpdate
) {
5815 entry
->cfentry_lport_updated
= TRUE
;
5818 if (sin
->sin_addr
.s_addr
) {
5819 entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
= sin
->sin_addr
.s_addr
;
5820 if (islocalUpdate
) {
5821 entry
->cfentry_laddr_updated
= TRUE
;
5825 if (sin
->sin_port
) {
5826 entry
->cfentry_fport
= sin
->sin_port
;
5828 if (sin
->sin_addr
.s_addr
) {
5829 entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
= sin
->sin_addr
.s_addr
;
5832 entry
->cfentry_family
= AF_INET
;
5835 sin6
= satosin6(addr
);
5836 if (sin6
->sin6_len
!= sizeof(*sin6
)) {
5839 if (isLocal
== TRUE
) {
5840 if (sin6
->sin6_port
) {
5841 entry
->cfentry_lport
= sin6
->sin6_port
;
5842 if (islocalUpdate
) {
5843 entry
->cfentry_lport_updated
= TRUE
;
5846 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
)) {
5847 entry
->cfentry_laddr
.addr6
= sin6
->sin6_addr
;
5848 if (islocalUpdate
) {
5849 entry
->cfentry_laddr_updated
= TRUE
;
5853 if (sin6
->sin6_port
) {
5854 entry
->cfentry_fport
= sin6
->sin6_port
;
5856 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
)) {
5857 entry
->cfentry_faddr
.addr6
= sin6
->sin6_addr
;
5860 entry
->cfentry_family
= AF_INET6
;
5868 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry
*entry
, bool isLocal
, struct inpcb
*inp
, bool islocalUpdate
)
5870 if (entry
== NULL
|| inp
== NULL
) {
5874 if (inp
->inp_vflag
& INP_IPV6
) {
5875 if (isLocal
== TRUE
) {
5876 if (inp
->inp_lport
) {
5877 entry
->cfentry_lport
= inp
->inp_lport
;
5878 if (islocalUpdate
) {
5879 entry
->cfentry_lport_updated
= TRUE
;
5882 if (!IN6_IS_ADDR_UNSPECIFIED(&inp
->in6p_laddr
)) {
5883 entry
->cfentry_laddr
.addr6
= inp
->in6p_laddr
;
5884 if (islocalUpdate
) {
5885 entry
->cfentry_laddr_updated
= TRUE
;
5889 if (inp
->inp_fport
) {
5890 entry
->cfentry_fport
= inp
->inp_fport
;
5892 if (!IN6_IS_ADDR_UNSPECIFIED(&inp
->in6p_faddr
)) {
5893 entry
->cfentry_faddr
.addr6
= inp
->in6p_faddr
;
5896 entry
->cfentry_family
= AF_INET6
;
5898 } else if (inp
->inp_vflag
& INP_IPV4
) {
5899 if (isLocal
== TRUE
) {
5900 if (inp
->inp_lport
) {
5901 entry
->cfentry_lport
= inp
->inp_lport
;
5902 if (islocalUpdate
) {
5903 entry
->cfentry_lport_updated
= TRUE
;
5906 if (inp
->inp_laddr
.s_addr
) {
5907 entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
= inp
->inp_laddr
.s_addr
;
5908 if (islocalUpdate
) {
5909 entry
->cfentry_laddr_updated
= TRUE
;
5913 if (inp
->inp_fport
) {
5914 entry
->cfentry_fport
= inp
->inp_fport
;
5916 if (inp
->inp_faddr
.s_addr
) {
5917 entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
= inp
->inp_faddr
.s_addr
;
5920 entry
->cfentry_family
= AF_INET
;
5927 check_port(struct sockaddr
*addr
, u_short port
)
5929 struct sockaddr_in
*sin
= NULL
;
5930 struct sockaddr_in6
*sin6
= NULL
;
5932 if (addr
== NULL
|| port
== 0) {
5936 switch (addr
->sa_family
) {
5938 sin
= satosin(addr
);
5939 if (sin
->sin_len
!= sizeof(*sin
)) {
5942 if (port
== ntohs(sin
->sin_port
)) {
5947 sin6
= satosin6(addr
);
5948 if (sin6
->sin6_len
!= sizeof(*sin6
)) {
5951 if (port
== ntohs(sin6
->sin6_port
)) {
5961 struct cfil_hash_entry
*
5962 cfil_db_lookup_entry_with_sockid(struct cfil_db
*db
, u_int64_t sock_id
)
5964 struct cfilhashhead
*cfilhash
= NULL
;
5965 u_int32_t flowhash
= (u_int32_t
)(sock_id
& 0x0ffffffff);
5966 struct cfil_hash_entry
*nextentry
;
5968 if (db
== NULL
|| db
->cfdb_hashbase
== NULL
|| sock_id
== 0) {
5972 flowhash
&= db
->cfdb_hashmask
;
5973 cfilhash
= &db
->cfdb_hashbase
[flowhash
];
5975 LIST_FOREACH(nextentry
, cfilhash
, cfentry_link
) {
5976 if (nextentry
->cfentry_cfil
!= NULL
&&
5977 nextentry
->cfentry_cfil
->cfi_sock_id
== sock_id
) {
5978 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
5979 (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), nextentry
->cfentry_cfil
->cfi_sock_id
, flowhash
);
5980 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, nextentry
, 0, "CFIL: UDP found entry");
5985 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
5986 (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), sock_id
, flowhash
);
5990 struct cfil_hash_entry
*
5991 cfil_db_lookup_entry_internal(struct cfil_db
*db
, struct sockaddr
*local
, struct sockaddr
*remote
, boolean_t remoteOnly
, boolean_t withLocalPort
)
5993 struct cfil_hash_entry matchentry
= { };
5994 struct cfil_hash_entry
*nextentry
= NULL
;
5995 struct inpcb
*inp
= sotoinpcb(db
->cfdb_so
);
5996 u_int32_t hashkey_faddr
= 0, hashkey_laddr
= 0;
5997 u_int16_t hashkey_fport
= 0, hashkey_lport
= 0;
5998 int inp_hash_element
= 0;
5999 struct cfilhashhead
*cfilhash
= NULL
;
6001 CFIL_LOG(LOG_INFO
, "");
6007 if (local
!= NULL
) {
6008 fill_cfil_hash_entry_from_address(&matchentry
, TRUE
, local
, FALSE
);
6010 fill_cfil_hash_entry_from_inp(&matchentry
, TRUE
, inp
, FALSE
);
6012 if (remote
!= NULL
) {
6013 fill_cfil_hash_entry_from_address(&matchentry
, FALSE
, remote
, FALSE
);
6015 fill_cfil_hash_entry_from_inp(&matchentry
, FALSE
, inp
, FALSE
);
6018 if (inp
->inp_vflag
& INP_IPV6
) {
6019 hashkey_faddr
= matchentry
.cfentry_faddr
.addr6
.s6_addr32
[3];
6020 hashkey_laddr
= (remoteOnly
== false) ? matchentry
.cfentry_laddr
.addr6
.s6_addr32
[3] : 0;
6022 hashkey_faddr
= matchentry
.cfentry_faddr
.addr46
.ia46_addr4
.s_addr
;
6023 hashkey_laddr
= (remoteOnly
== false) ? matchentry
.cfentry_laddr
.addr46
.ia46_addr4
.s_addr
: 0;
6026 hashkey_fport
= matchentry
.cfentry_fport
;
6027 hashkey_lport
= (remoteOnly
== false || withLocalPort
== true) ? matchentry
.cfentry_lport
: 0;
6029 inp_hash_element
= CFIL_HASH(hashkey_laddr
, hashkey_faddr
, hashkey_lport
, hashkey_fport
);
6030 inp_hash_element
&= db
->cfdb_hashmask
;
6031 cfilhash
= &db
->cfdb_hashbase
[inp_hash_element
];
6033 LIST_FOREACH(nextentry
, cfilhash
, cfentry_link
) {
6034 if ((inp
->inp_vflag
& INP_IPV6
) &&
6035 (remoteOnly
|| nextentry
->cfentry_lport_updated
|| nextentry
->cfentry_lport
== matchentry
.cfentry_lport
) &&
6036 nextentry
->cfentry_fport
== matchentry
.cfentry_fport
&&
6037 (remoteOnly
|| nextentry
->cfentry_laddr_updated
|| IN6_ARE_ADDR_EQUAL(&nextentry
->cfentry_laddr
.addr6
, &matchentry
.cfentry_laddr
.addr6
)) &&
6038 IN6_ARE_ADDR_EQUAL(&nextentry
->cfentry_faddr
.addr6
, &matchentry
.cfentry_faddr
.addr6
)) {
6040 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, &matchentry
, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
6043 } else if ((remoteOnly
|| nextentry
->cfentry_lport_updated
|| nextentry
->cfentry_lport
== matchentry
.cfentry_lport
) &&
6044 nextentry
->cfentry_fport
== matchentry
.cfentry_fport
&&
6045 (remoteOnly
|| nextentry
->cfentry_laddr_updated
|| nextentry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
== matchentry
.cfentry_laddr
.addr46
.ia46_addr4
.s_addr
) &&
6046 nextentry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
== matchentry
.cfentry_faddr
.addr46
.ia46_addr4
.s_addr
) {
6048 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, &matchentry
, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
6056 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, &matchentry
, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
6061 struct cfil_hash_entry
*
6062 cfil_db_lookup_entry(struct cfil_db
*db
, struct sockaddr
*local
, struct sockaddr
*remote
, boolean_t remoteOnly
)
6064 struct cfil_hash_entry
*entry
= cfil_db_lookup_entry_internal(db
, local
, remote
, remoteOnly
, false);
6065 if (entry
== NULL
&& remoteOnly
== true) {
6066 entry
= cfil_db_lookup_entry_internal(db
, local
, remote
, remoteOnly
, true);
6072 cfil_sock_id_from_datagram_socket(struct socket
*so
, struct sockaddr
*local
, struct sockaddr
*remote
)
6074 struct cfil_hash_entry
*hash_entry
= NULL
;
6076 socket_lock_assert_owned(so
);
6078 if (so
->so_cfil_db
== NULL
) {
6079 return CFIL_SOCK_ID_NONE
;
6082 hash_entry
= cfil_db_lookup_entry(so
->so_cfil_db
, local
, remote
, false);
6083 if (hash_entry
== NULL
) {
6084 // No match with both local and remote, try match with remote only
6085 hash_entry
= cfil_db_lookup_entry(so
->so_cfil_db
, local
, remote
, true);
6087 if (hash_entry
== NULL
|| hash_entry
->cfentry_cfil
== NULL
) {
6088 return CFIL_SOCK_ID_NONE
;
6091 return hash_entry
->cfentry_cfil
->cfi_sock_id
;
6095 cfil_db_delete_entry(struct cfil_db
*db
, struct cfil_hash_entry
*hash_entry
)
6097 if (hash_entry
== NULL
) {
6100 if (db
== NULL
|| db
->cfdb_count
== 0) {
6104 if (db
->cfdb_only_entry
== hash_entry
) {
6105 db
->cfdb_only_entry
= NULL
;
6107 LIST_REMOVE(hash_entry
, cfentry_link
);
6108 zfree(cfil_hash_entry_zone
, hash_entry
);
6111 struct cfil_hash_entry
*
6112 cfil_db_add_entry(struct cfil_db
*db
, struct sockaddr
*local
, struct sockaddr
*remote
)
6114 struct cfil_hash_entry
*entry
= NULL
;
6115 struct inpcb
*inp
= sotoinpcb(db
->cfdb_so
);
6116 u_int32_t hashkey_faddr
= 0, hashkey_laddr
= 0;
6117 int inp_hash_element
= 0;
6118 struct cfilhashhead
*cfilhash
= NULL
;
6120 CFIL_LOG(LOG_INFO
, "");
6126 entry
= zalloc(cfil_hash_entry_zone
);
6127 if (entry
== NULL
) {
6130 bzero(entry
, sizeof(struct cfil_hash_entry
));
6132 if (local
!= NULL
) {
6133 fill_cfil_hash_entry_from_address(entry
, TRUE
, local
, FALSE
);
6135 fill_cfil_hash_entry_from_inp(entry
, TRUE
, inp
, FALSE
);
6137 if (remote
!= NULL
) {
6138 fill_cfil_hash_entry_from_address(entry
, FALSE
, remote
, FALSE
);
6140 fill_cfil_hash_entry_from_inp(entry
, FALSE
, inp
, FALSE
);
6142 entry
->cfentry_lastused
= net_uptime();
6144 if (inp
->inp_vflag
& INP_IPV6
) {
6145 hashkey_faddr
= entry
->cfentry_faddr
.addr6
.s6_addr32
[3];
6146 hashkey_laddr
= entry
->cfentry_laddr
.addr6
.s6_addr32
[3];
6148 hashkey_faddr
= entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
;
6149 hashkey_laddr
= entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
;
6151 entry
->cfentry_flowhash
= CFIL_HASH(hashkey_laddr
, hashkey_faddr
,
6152 entry
->cfentry_lport
, entry
->cfentry_fport
);
6153 inp_hash_element
= entry
->cfentry_flowhash
& db
->cfdb_hashmask
;
6155 cfilhash
= &db
->cfdb_hashbase
[inp_hash_element
];
6157 LIST_INSERT_HEAD(cfilhash
, entry
, cfentry_link
);
6159 db
->cfdb_only_entry
= entry
;
6160 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, entry
, 0, "CFIL: cfil_db_add_entry: ADDED");
6163 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), db
->cfdb_count
);
6168 cfil_db_update_entry_local(struct cfil_db
*db
, struct cfil_hash_entry
*entry
, struct sockaddr
*local
, struct mbuf
*control
)
6170 struct inpcb
*inp
= sotoinpcb(db
->cfdb_so
);
6171 union sockaddr_in_4_6 address_buf
= { };
6173 CFIL_LOG(LOG_INFO
, "");
6175 if (inp
== NULL
|| entry
== NULL
) {
6179 if (LOCAL_ADDRESS_NEEDS_UPDATE(entry
)) {
6180 // Flow does not have a local address yet. Retrieve local address
6181 // from control mbufs if present.
6182 if (local
== NULL
&& control
!= NULL
) {
6183 uint8_t *addr_ptr
= NULL
;
6184 int size
= cfil_sock_udp_get_address_from_control(entry
->cfentry_family
, control
, &addr_ptr
);
6186 if (size
&& addr_ptr
) {
6187 switch (entry
->cfentry_family
) {
6189 if (size
== sizeof(struct in_addr
)) {
6190 address_buf
.sin
.sin_port
= 0;
6191 address_buf
.sin
.sin_family
= AF_INET
;
6192 address_buf
.sin
.sin_len
= sizeof(struct sockaddr_in
);
6193 (void) memcpy(&address_buf
.sin
.sin_addr
, addr_ptr
, sizeof(struct in_addr
));
6194 local
= sintosa(&address_buf
.sin
);
6198 if (size
== sizeof(struct in6_addr
)) {
6199 address_buf
.sin6
.sin6_port
= 0;
6200 address_buf
.sin6
.sin6_family
= AF_INET6
;
6201 address_buf
.sin6
.sin6_len
= sizeof(struct sockaddr_in6
);
6202 (void) memcpy(&address_buf
.sin6
.sin6_addr
, addr_ptr
, sizeof(struct in6_addr
));
6203 local
= sin6tosa(&address_buf
.sin6
);
6211 if (local
!= NULL
) {
6212 fill_cfil_hash_entry_from_address(entry
, TRUE
, local
, TRUE
);
6214 fill_cfil_hash_entry_from_inp(entry
, TRUE
, inp
, TRUE
);
6218 if (LOCAL_PORT_NEEDS_UPDATE(entry
, db
->cfdb_so
)) {
6219 fill_cfil_hash_entry_from_inp(entry
, TRUE
, inp
, TRUE
);
6226 cfil_db_get_cfil_info(struct cfil_db
*db
, cfil_sock_id_t id
)
6228 struct cfil_hash_entry
*hash_entry
= NULL
;
6230 CFIL_LOG(LOG_INFO
, "");
6232 if (db
== NULL
|| id
== 0) {
6233 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> NULL DB <id %llu>",
6234 db
? (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
) : 0, id
);
6238 // This is an optimization for connected UDP socket which only has one flow.
6239 // No need to do the hash lookup.
6240 if (db
->cfdb_count
== 1) {
6241 if (db
->cfdb_only_entry
&& db
->cfdb_only_entry
->cfentry_cfil
&&
6242 db
->cfdb_only_entry
->cfentry_cfil
->cfi_sock_id
== id
) {
6243 return db
->cfdb_only_entry
->cfentry_cfil
;
6247 hash_entry
= cfil_db_lookup_entry_with_sockid(db
, id
);
6248 return hash_entry
!= NULL
? hash_entry
->cfentry_cfil
: NULL
;
6251 struct cfil_hash_entry
*
6252 cfil_sock_udp_get_flow(struct socket
*so
, uint32_t filter_control_unit
, bool outgoing
, struct sockaddr
*local
, struct sockaddr
*remote
, struct mbuf
*control
, int debug
)
6254 struct cfil_hash_entry
*hash_entry
= NULL
;
6255 int new_filter_control_unit
= 0;
6258 socket_lock_assert_owned(so
);
6260 // If new socket, allocate cfil db
6261 if (so
->so_cfil_db
== NULL
) {
6262 if (cfil_db_init(so
) != 0) {
6267 // See if flow already exists.
6268 hash_entry
= cfil_db_lookup_entry(so
->so_cfil_db
, local
, remote
, false);
6269 if (hash_entry
== NULL
) {
6270 // No match with both local and remote, try match with remote only
6271 hash_entry
= cfil_db_lookup_entry(so
->so_cfil_db
, local
, remote
, true);
6273 if (hash_entry
!= NULL
) {
6274 /* Drop pre-existing UDP flow if filter state changed */
6275 new_filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
6276 if (new_filter_control_unit
> 0 &&
6277 new_filter_control_unit
!= hash_entry
->cfentry_cfil
->cfi_filter_control_unit
) {
6281 // Try to update flow info from socket and/or control mbufs if necessary
6282 if (LOCAL_ADDRESS_NEEDS_UPDATE(hash_entry
) || LOCAL_PORT_NEEDS_UPDATE(hash_entry
, so
)) {
6283 cfil_db_update_entry_local(so
->so_cfil_db
, hash_entry
, local
, control
);
6288 hash_entry
= cfil_db_add_entry(so
->so_cfil_db
, local
, remote
);
6289 if (hash_entry
== NULL
) {
6290 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
6291 CFIL_LOG(LOG_ERR
, "CFIL: UDP failed to add entry");
6295 if (cfil_info_alloc(so
, hash_entry
) == NULL
||
6296 hash_entry
->cfentry_cfil
== NULL
) {
6297 cfil_db_delete_entry(so
->so_cfil_db
, hash_entry
);
6298 CFIL_LOG(LOG_ERR
, "CFIL: UDP failed to alloc cfil_info");
6299 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
6302 hash_entry
->cfentry_cfil
->cfi_filter_control_unit
= filter_control_unit
;
6303 hash_entry
->cfentry_cfil
->cfi_dir
= outgoing
? CFS_CONNECTION_DIR_OUT
: CFS_CONNECTION_DIR_IN
;
6304 hash_entry
->cfentry_cfil
->cfi_debug
= debug
;
6307 cfil_info_log(LOG_ERR
, hash_entry
->cfentry_cfil
, "CFIL: LIFECYCLE: ADDED");
6310 // Check if we can update the new flow's local address from control mbufs
6311 if (control
!= NULL
) {
6312 cfil_db_update_entry_local(so
->so_cfil_db
, hash_entry
, local
, control
);
6315 if (cfil_info_attach_unit(so
, filter_control_unit
, hash_entry
->cfentry_cfil
) == 0) {
6316 CFIL_INFO_FREE(hash_entry
->cfentry_cfil
);
6317 cfil_db_delete_entry(so
->so_cfil_db
, hash_entry
);
6318 CFIL_LOG(LOG_ERR
, "CFIL: UDP cfil_info_attach_unit(%u) failed",
6319 filter_control_unit
);
6320 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_failed
);
6323 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
6324 (uint64_t)VM_KERNEL_ADDRPERM(so
),
6325 filter_control_unit
, hash_entry
->cfentry_cfil
->cfi_sock_id
);
6327 so
->so_flags
|= SOF_CONTENT_FILTER
;
6328 OSIncrementAtomic(&cfil_stats
.cfs_sock_attached
);
6330 /* Hold a reference on the socket for each flow */
6334 cfil_info_log(LOG_ERR
, hash_entry
->cfentry_cfil
, "CFIL: LIFECYCLE: ADDED");
6337 error
= cfil_dispatch_attach_event(so
, hash_entry
->cfentry_cfil
, 0,
6338 outgoing
? CFS_CONNECTION_DIR_OUT
: CFS_CONNECTION_DIR_IN
);
6339 /* We can recover from flow control or out of memory errors */
6340 if (error
!= 0 && error
!= ENOBUFS
&& error
!= ENOMEM
) {
6344 CFIL_INFO_VERIFY(hash_entry
->cfentry_cfil
);
6349 cfil_sock_udp_get_address_from_control(sa_family_t family
, struct mbuf
*control
, uint8_t **address_ptr
)
6352 struct in6_pktinfo
*pi6
;
6354 if (control
== NULL
|| address_ptr
== NULL
) {
6359 if (control
->m_type
!= MT_CONTROL
) {
6360 control
= control
->m_next
;
6364 for (cm
= M_FIRST_CMSGHDR(control
);
6365 is_cmsg_valid(control
, cm
);
6366 cm
= M_NXT_CMSGHDR(control
, cm
)) {
6367 switch (cm
->cmsg_type
) {
6368 case IP_RECVDSTADDR
:
6369 if (family
== AF_INET
&&
6370 cm
->cmsg_level
== IPPROTO_IP
&&
6371 cm
->cmsg_len
== CMSG_LEN(sizeof(struct in_addr
))) {
6372 *address_ptr
= CMSG_DATA(cm
);
6373 return sizeof(struct in_addr
);
6377 case IPV6_2292PKTINFO
:
6378 if (family
== AF_INET6
&&
6379 cm
->cmsg_level
== IPPROTO_IPV6
&&
6380 cm
->cmsg_len
== CMSG_LEN(sizeof(struct in6_pktinfo
))) {
6381 pi6
= (struct in6_pktinfo
*)(void *)CMSG_DATA(cm
);
6382 *address_ptr
= (uint8_t *)&pi6
->ipi6_addr
;
6383 return sizeof(struct in6_addr
);
6391 control
= control
->m_next
;
6397 cfil_sock_udp_handle_data(bool outgoing
, struct socket
*so
,
6398 struct sockaddr
*local
, struct sockaddr
*remote
,
6399 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
6401 #pragma unused(outgoing, so, local, remote, data, control, flags)
6403 uint32_t filter_control_unit
;
6404 struct cfil_hash_entry
*hash_entry
= NULL
;
6405 struct cfil_info
*cfil_info
= NULL
;
6408 socket_lock_assert_owned(so
);
6410 if (cfil_active_count
== 0) {
6411 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP no active filter");
6412 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_in_vain
);
6416 // Socket has been blessed
6417 if ((so
->so_flags1
& SOF1_CONTENT_FILTER_SKIP
) != 0) {
6421 filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
6422 if (filter_control_unit
== 0) {
6423 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP failed to get control unit");
6427 if (filter_control_unit
== NECP_FILTER_UNIT_NO_FILTER
) {
6431 if ((filter_control_unit
& NECP_MASK_USERSPACE_ONLY
) != 0) {
6432 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP user space only");
6433 OSIncrementAtomic(&cfil_stats
.cfs_sock_userspace_only
);
6437 hash_entry
= cfil_sock_udp_get_flow(so
, filter_control_unit
, outgoing
, local
, remote
, control
, debug
);
6438 if (hash_entry
== NULL
|| hash_entry
->cfentry_cfil
== NULL
) {
6439 CFIL_LOG(LOG_ERR
, "CFIL: Falied to create UDP flow");
6442 // Update last used timestamp, this is for flow Idle TO
6443 hash_entry
->cfentry_lastused
= net_uptime();
6444 cfil_info
= hash_entry
->cfentry_cfil
;
6446 if (cfil_info
->cfi_flags
& CFIF_DROP
) {
6448 cfil_hash_entry_log(LOG_DEBUG
, so
, hash_entry
, 0, "CFIL: UDP DROP");
6452 if (control
!= NULL
) {
6453 OSIncrementAtomic(&cfil_stats
.cfs_data_in_control
);
6455 if (data
->m_type
== MT_OOBDATA
) {
6456 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
6457 (uint64_t)VM_KERNEL_ADDRPERM(so
));
6458 OSIncrementAtomic(&cfil_stats
.cfs_data_in_oob
);
6461 error
= cfil_data_common(so
, cfil_info
, outgoing
, remote
, data
, control
, flags
);
6467 * Go through all UDP flows for specified socket and returns TRUE if
6468 * any flow is still attached. If need_wait is TRUE, wait on first
6472 cfil_filters_udp_attached(struct socket
*so
, bool need_wait
)
6475 lck_mtx_t
*mutex_held
;
6476 struct cfilhashhead
*cfilhash
= NULL
;
6477 struct cfil_db
*db
= NULL
;
6478 struct cfil_hash_entry
*hash_entry
= NULL
;
6479 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
6480 struct cfil_info
*cfil_info
= NULL
;
6481 struct cfil_entry
*entry
= NULL
;
6485 uint64_t sock_flow_id
= 0;
6487 socket_lock_assert_owned(so
);
6489 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
6490 if (so
->so_proto
->pr_getlock
!= NULL
) {
6491 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
6493 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
6495 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
6497 db
= so
->so_cfil_db
;
6499 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
6500 cfilhash
= &db
->cfdb_hashbase
[i
];
6502 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
6503 if (hash_entry
->cfentry_cfil
!= NULL
) {
6504 cfil_info
= hash_entry
->cfentry_cfil
;
6505 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
6506 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
6508 /* Are we attached to the filter? */
6509 if (entry
->cfe_filter
== NULL
) {
6513 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
6516 if ((entry
->cfe_flags
& CFEF_CFIL_DETACHED
) != 0) {
6522 if (need_wait
== TRUE
) {
6524 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
6527 ts
.tv_sec
= cfil_close_wait_timeout
/ 1000;
6528 ts
.tv_nsec
= (cfil_close_wait_timeout
% 1000) *
6529 NSEC_PER_USEC
* 1000;
6531 OSIncrementAtomic(&cfil_stats
.cfs_close_wait
);
6532 cfil_info
->cfi_flags
|= CFIF_CLOSE_WAIT
;
6533 sock_flow_id
= cfil_info
->cfi_sock_id
;
6535 error
= msleep((caddr_t
)cfil_info
, mutex_held
,
6536 PSOCK
| PCATCH
, "cfil_filters_udp_attached", &ts
);
6538 // Woke up from sleep, validate if cfil_info is still valid
6539 if (so
->so_cfil_db
== NULL
||
6540 (cfil_info
!= cfil_db_get_cfil_info(so
->so_cfil_db
, sock_flow_id
))) {
6541 // cfil_info is not valid, do not continue
6545 cfil_info
->cfi_flags
&= ~CFIF_CLOSE_WAIT
;
6548 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
6552 * Force close in case of timeout
6555 OSIncrementAtomic(&cfil_stats
.cfs_close_wait_timeout
);
6557 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6559 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
6574 cfil_sock_udp_data_pending(struct sockbuf
*sb
, bool check_thread
)
6576 struct socket
*so
= sb
->sb_so
;
6577 struct cfi_buf
*cfi_buf
;
6578 uint64_t pending
= 0;
6579 uint64_t total_pending
= 0;
6580 struct cfilhashhead
*cfilhash
= NULL
;
6581 struct cfil_db
*db
= NULL
;
6582 struct cfil_hash_entry
*hash_entry
= NULL
;
6583 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
6585 socket_lock_assert_owned(so
);
6587 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
&&
6588 (check_thread
== FALSE
|| so
->so_snd
.sb_cfil_thread
!= current_thread())) {
6589 db
= so
->so_cfil_db
;
6591 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
6592 cfilhash
= &db
->cfdb_hashbase
[i
];
6594 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
6595 if (hash_entry
->cfentry_cfil
!= NULL
) {
6596 if ((sb
->sb_flags
& SB_RECV
) == 0) {
6597 cfi_buf
= &hash_entry
->cfentry_cfil
->cfi_snd
;
6599 cfi_buf
= &hash_entry
->cfentry_cfil
->cfi_rcv
;
6602 pending
= cfi_buf
->cfi_pending_last
- cfi_buf
->cfi_pending_first
;
6604 * If we are limited by the "chars of mbufs used" roughly
6605 * adjust so we won't overcommit
6607 if ((uint64_t)cfi_buf
->cfi_pending_mbcnt
> pending
) {
6608 pending
= cfi_buf
->cfi_pending_mbcnt
;
6611 total_pending
+= pending
;
6616 VERIFY(total_pending
< INT32_MAX
);
6618 CFIL_LOG(LOG_DEBUG
, "CFIL: <so %llx> total pending %llu <check_thread %d>",
6619 (uint64_t)VM_KERNEL_ADDRPERM(so
),
6620 total_pending
, check_thread
);
6624 return (int32_t)(total_pending
);
6628 cfil_sock_udp_notify_shutdown(struct socket
*so
, int how
, int drop_flag
, int shut_flag
)
6630 struct cfil_info
*cfil_info
= NULL
;
6631 struct cfilhashhead
*cfilhash
= NULL
;
6632 struct cfil_db
*db
= NULL
;
6633 struct cfil_hash_entry
*hash_entry
= NULL
;
6634 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
6639 socket_lock_assert_owned(so
);
6641 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
6642 db
= so
->so_cfil_db
;
6644 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
6645 cfilhash
= &db
->cfdb_hashbase
[i
];
6647 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
6648 if (hash_entry
->cfentry_cfil
!= NULL
) {
6649 cfil_info
= hash_entry
->cfentry_cfil
;
6651 // This flow is marked as DROP
6652 if (cfil_info
->cfi_flags
& drop_flag
) {
6657 // This flow has been shut already, skip
6658 if (cfil_info
->cfi_flags
& shut_flag
) {
6661 // Mark flow as shut
6662 cfil_info
->cfi_flags
|= shut_flag
;
6665 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
6666 /* Disconnect incoming side */
6667 if (how
!= SHUT_WR
) {
6668 error
= cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 0);
6670 /* Disconnect outgoing side */
6671 if (how
!= SHUT_RD
) {
6672 error
= cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 1);
6680 if (done_count
== 0) {
6687 cfil_sock_udp_shutdown(struct socket
*so
, int *how
)
6691 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || (so
->so_cfil_db
== NULL
)) {
6695 socket_lock_assert_owned(so
);
6697 CFIL_LOG(LOG_INFO
, "so %llx how %d",
6698 (uint64_t)VM_KERNEL_ADDRPERM(so
), *how
);
6701 * Check the state of the socket before the content filter
6703 if (*how
!= SHUT_WR
&& (so
->so_state
& SS_CANTRCVMORE
) != 0) {
6704 /* read already shut down */
6708 if (*how
!= SHUT_RD
&& (so
->so_state
& SS_CANTSENDMORE
) != 0) {
6709 /* write already shut down */
6715 * shutdown read: SHUT_RD or SHUT_RDWR
6717 if (*how
!= SHUT_WR
) {
6718 error
= cfil_sock_udp_notify_shutdown(so
, SHUT_RD
, CFIF_DROP
, CFIF_SHUT_RD
);
6724 * shutdown write: SHUT_WR or SHUT_RDWR
6726 if (*how
!= SHUT_RD
) {
6727 error
= cfil_sock_udp_notify_shutdown(so
, SHUT_WR
, CFIF_DROP
, CFIF_SHUT_WR
);
6733 * When outgoing data is pending, we delay the shutdown at the
6734 * protocol level until the content filters give the final
6735 * verdict on the pending data.
6737 if (cfil_sock_data_pending(&so
->so_snd
) != 0) {
6739 * When shutting down the read and write sides at once
6740 * we can proceed to the final shutdown of the read
6741 * side. Otherwise, we just return.
6743 if (*how
== SHUT_WR
) {
6744 error
= EJUSTRETURN
;
6745 } else if (*how
== SHUT_RDWR
) {
6755 cfil_sock_udp_close_wait(struct socket
*so
)
6757 socket_lock_assert_owned(so
);
6759 while (cfil_filters_udp_attached(so
, FALSE
)) {
6761 * Notify the filters we are going away so they can detach
6763 cfil_sock_udp_notify_shutdown(so
, SHUT_RDWR
, 0, 0);
6766 * Make sure we need to wait after the filter are notified
6767 * of the disconnection
6769 if (cfil_filters_udp_attached(so
, TRUE
) == 0) {
6776 cfil_sock_udp_is_closed(struct socket
*so
)
6778 struct cfil_info
*cfil_info
= NULL
;
6779 struct cfilhashhead
*cfilhash
= NULL
;
6780 struct cfil_db
*db
= NULL
;
6781 struct cfil_hash_entry
*hash_entry
= NULL
;
6782 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
6786 socket_lock_assert_owned(so
);
6788 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
6789 db
= so
->so_cfil_db
;
6791 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
6792 cfilhash
= &db
->cfdb_hashbase
[i
];
6794 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
6795 if (hash_entry
->cfentry_cfil
!= NULL
) {
6796 cfil_info
= hash_entry
->cfentry_cfil
;
6798 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
6799 /* Let the filters know of the closing */
6800 error
= cfil_dispatch_closed_event(so
, cfil_info
, kcunit
);
6803 /* Last chance to push passed data out */
6804 error
= cfil_acquire_sockbuf(so
, cfil_info
, 1);
6806 cfil_service_inject_queue(so
, cfil_info
, 1);
6808 cfil_release_sockbuf(so
, 1);
6810 cfil_info
->cfi_flags
|= CFIF_SOCK_CLOSED
;
6812 /* Pending data needs to go */
6813 cfil_flush_queues(so
, cfil_info
);
6815 CFIL_INFO_VERIFY(cfil_info
);
6823 cfil_sock_udp_buf_update(struct sockbuf
*sb
)
6825 struct cfil_info
*cfil_info
= NULL
;
6826 struct cfilhashhead
*cfilhash
= NULL
;
6827 struct cfil_db
*db
= NULL
;
6828 struct cfil_hash_entry
*hash_entry
= NULL
;
6829 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
6832 struct socket
*so
= sb
->sb_so
;
6834 socket_lock_assert_owned(so
);
6836 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
6841 db
= so
->so_cfil_db
;
6843 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
6844 cfilhash
= &db
->cfdb_hashbase
[i
];
6846 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
6847 if (hash_entry
->cfentry_cfil
!= NULL
) {
6848 cfil_info
= hash_entry
->cfentry_cfil
;
6850 if ((sb
->sb_flags
& SB_RECV
) == 0) {
6851 if ((cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) == 0) {
6855 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_retry
);
6857 if ((cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_IN
) == 0) {
6861 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_retry
);
6864 CFIL_LOG(LOG_NOTICE
, "so %llx outgoing %d",
6865 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
6867 error
= cfil_acquire_sockbuf(so
, cfil_info
, outgoing
);
6869 cfil_service_inject_queue(so
, cfil_info
, outgoing
);
6871 cfil_release_sockbuf(so
, outgoing
);
6879 cfil_filter_show(u_int32_t kcunit
)
6881 struct content_filter
*cfc
= NULL
;
6882 struct cfil_entry
*entry
;
6885 if (content_filters
== NULL
) {
6888 if (kcunit
> MAX_CONTENT_FILTER
) {
6892 cfil_rw_lock_shared(&cfil_lck_rw
);
6894 if (content_filters
[kcunit
- 1] == NULL
) {
6895 cfil_rw_unlock_shared(&cfil_lck_rw
);
6898 cfc
= content_filters
[kcunit
- 1];
6900 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6901 kcunit
, cfc
->cf_sock_count
, (unsigned long)cfc
->cf_flags
);
6902 if (cfc
->cf_flags
& CFF_DETACHING
) {
6903 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: - DETACHING");
6905 if (cfc
->cf_flags
& CFF_ACTIVE
) {
6906 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: - ACTIVE");
6908 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
6909 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
6912 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
6913 if (entry
->cfe_cfil_info
&& entry
->cfe_cfil_info
->cfi_so
) {
6914 struct cfil_info
*cfil_info
= entry
->cfe_cfil_info
;
6918 if (entry
->cfe_flags
& CFEF_CFIL_DETACHED
) {
6919 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: FILTER SHOW: - DETACHED");
6921 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: FILTER SHOW: - ATTACHED");
6926 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count
);
6928 cfil_rw_unlock_shared(&cfil_lck_rw
);
6932 cfil_info_show(void)
6934 struct cfil_info
*cfil_info
;
6937 cfil_rw_lock_shared(&cfil_lck_rw
);
6939 CFIL_LOG(LOG_ERR
, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count
);
6941 TAILQ_FOREACH(cfil_info
, &cfil_sock_head
, cfi_link
) {
6944 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: INFO SHOW");
6946 if (cfil_info
->cfi_flags
& CFIF_DROP
) {
6947 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - DROP");
6949 if (cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) {
6950 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - CLOSE_WAIT");
6952 if (cfil_info
->cfi_flags
& CFIF_SOCK_CLOSED
) {
6953 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - SOCK_CLOSED");
6955 if (cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_IN
) {
6956 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - RETRY_INJECT_IN");
6958 if (cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) {
6959 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
6961 if (cfil_info
->cfi_flags
& CFIF_SHUT_WR
) {
6962 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - SHUT_WR");
6964 if (cfil_info
->cfi_flags
& CFIF_SHUT_RD
) {
6965 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - SHUT_RD");
6969 CFIL_LOG(LOG_ERR
, "CFIL: INFO SHOW: total cfil_info shown: %d", count
);
6971 cfil_rw_unlock_shared(&cfil_lck_rw
);
6975 cfil_info_idle_timed_out(struct cfil_info
*cfil_info
, int timeout
, u_int64_t current_time
)
6977 if (cfil_info
&& cfil_info
->cfi_hash_entry
&&
6978 (current_time
- cfil_info
->cfi_hash_entry
->cfentry_lastused
>= (u_int64_t
)timeout
)) {
6980 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: flow IDLE timeout expired");
6988 cfil_info_action_timed_out(struct cfil_info
*cfil_info
, int timeout
)
6990 struct cfil_entry
*entry
;
6991 struct timeval current_tv
;
6992 struct timeval diff_time
;
6994 if (cfil_info
== NULL
) {
6999 * If we have queued up more data than passed offset and we haven't received
7000 * an action from user space for a while (the user space filter might have crashed),
7001 * return action timed out.
7003 if (cfil_info
->cfi_snd
.cfi_pending_last
> cfil_info
->cfi_snd
.cfi_pass_offset
||
7004 cfil_info
->cfi_rcv
.cfi_pending_last
> cfil_info
->cfi_rcv
.cfi_pass_offset
) {
7005 microuptime(¤t_tv
);
7007 for (int kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
7008 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
7010 if (entry
->cfe_filter
== NULL
) {
7014 if (cfil_info
->cfi_snd
.cfi_pending_last
> entry
->cfe_snd
.cfe_pass_offset
||
7015 cfil_info
->cfi_rcv
.cfi_pending_last
> entry
->cfe_rcv
.cfe_pass_offset
) {
7016 // haven't gotten an action from this filter, check timeout
7017 timersub(¤t_tv
, &entry
->cfe_last_action
, &diff_time
);
7018 if (diff_time
.tv_sec
>= timeout
) {
7020 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: flow ACTION timeout expired");
7031 cfil_info_buffer_threshold_exceeded(struct cfil_info
*cfil_info
)
7033 if (cfil_info
== NULL
) {
7038 * Clean up flow if it exceeded queue thresholds
7040 if (cfil_info
->cfi_snd
.cfi_tail_drop_cnt
||
7041 cfil_info
->cfi_rcv
.cfi_tail_drop_cnt
) {
7043 CFIL_LOG(LOG_ERR
, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
7044 cfil_udp_gc_mbuf_num_max
,
7045 cfil_udp_gc_mbuf_cnt_max
,
7046 cfil_info
->cfi_snd
.cfi_tail_drop_cnt
,
7047 cfil_info
->cfi_rcv
.cfi_tail_drop_cnt
);
7048 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: queue threshold exceeded");
7057 cfil_udp_gc_thread_sleep(bool forever
)
7060 (void) assert_wait((event_t
) &cfil_sock_udp_attached_count
,
7061 THREAD_INTERRUPTIBLE
);
7063 uint64_t deadline
= 0;
7064 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC
, &deadline
);
7065 clock_absolutetime_interval_to_deadline(deadline
, &deadline
);
7067 (void) assert_wait_deadline(&cfil_sock_udp_attached_count
,
7068 THREAD_INTERRUPTIBLE
, deadline
);
7073 cfil_udp_gc_thread_func(void *v
, wait_result_t w
)
7075 #pragma unused(v, w)
7077 ASSERT(cfil_udp_gc_thread
== current_thread());
7078 thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
7080 // Kick off gc shortly
7081 cfil_udp_gc_thread_sleep(false);
7082 thread_block_parameter((thread_continue_t
) cfil_info_udp_expire
, NULL
);
7087 cfil_info_udp_expire(void *v
, wait_result_t w
)
7089 #pragma unused(v, w)
7091 static uint64_t expired_array
[UDP_FLOW_GC_MAX_COUNT
];
7092 static uint32_t expired_count
= 0;
7094 struct cfil_info
*cfil_info
;
7095 struct cfil_hash_entry
*hash_entry
;
7098 u_int64_t current_time
= 0;
7100 current_time
= net_uptime();
7102 // Get all expired UDP flow ids
7103 cfil_rw_lock_shared(&cfil_lck_rw
);
7105 if (cfil_sock_udp_attached_count
== 0) {
7106 cfil_rw_unlock_shared(&cfil_lck_rw
);
7110 TAILQ_FOREACH(cfil_info
, &cfil_sock_head
, cfi_link
) {
7111 if (expired_count
>= UDP_FLOW_GC_MAX_COUNT
) {
7115 if (IS_IP_DGRAM(cfil_info
->cfi_so
)) {
7116 if (cfil_info_idle_timed_out(cfil_info
, UDP_FLOW_GC_IDLE_TO
, current_time
) ||
7117 cfil_info_action_timed_out(cfil_info
, UDP_FLOW_GC_ACTION_TO
) ||
7118 cfil_info_buffer_threshold_exceeded(cfil_info
)) {
7119 expired_array
[expired_count
] = cfil_info
->cfi_sock_id
;
7124 cfil_rw_unlock_shared(&cfil_lck_rw
);
7126 if (expired_count
== 0) {
7130 for (uint32_t i
= 0; i
< expired_count
; i
++) {
7131 // Search for socket (UDP only and lock so)
7132 so
= cfil_socket_from_sock_id(expired_array
[i
], true);
7137 cfil_info
= cfil_db_get_cfil_info(so
->so_cfil_db
, expired_array
[i
]);
7138 if (cfil_info
== NULL
) {
7142 db
= so
->so_cfil_db
;
7143 hash_entry
= cfil_info
->cfi_hash_entry
;
7145 if (db
== NULL
|| hash_entry
== NULL
) {
7149 #if GC_DEBUG || LIFECYCLE_DEBUG
7150 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: GC CLEAN UP");
7153 cfil_db_delete_entry(db
, hash_entry
);
7154 CFIL_INFO_FREE(cfil_info
);
7155 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
7157 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
7158 if (db
->cfdb_count
== 0) {
7159 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
7161 VERIFY(so
->so_usecount
> 0);
7165 socket_unlock(so
, 1);
7169 CFIL_LOG(LOG_ERR
, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count
);
7175 // Sleep forever (until waken up) if no more UDP flow to clean
7176 cfil_rw_lock_shared(&cfil_lck_rw
);
7177 cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count
== 0 ? true : false);
7178 cfil_rw_unlock_shared(&cfil_lck_rw
);
7179 thread_block_parameter((thread_continue_t
)cfil_info_udp_expire
, NULL
);
7184 cfil_dgram_save_socket_state(struct cfil_info
*cfil_info
, struct mbuf
*m
)
7186 struct m_tag
*tag
= NULL
;
7187 struct cfil_tag
*ctag
= NULL
;
7188 struct cfil_hash_entry
*hash_entry
= NULL
;
7189 struct inpcb
*inp
= NULL
;
7191 if (cfil_info
== NULL
|| cfil_info
->cfi_so
== NULL
||
7192 cfil_info
->cfi_hash_entry
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
)) {
7196 inp
= sotoinpcb(cfil_info
->cfi_so
);
7198 /* Allocate a tag */
7199 tag
= m_tag_create(KERNEL_MODULE_TAG_ID
, KERNEL_TAG_TYPE_CFIL_UDP
,
7200 sizeof(struct cfil_tag
), M_DONTWAIT
, m
);
7203 ctag
= (struct cfil_tag
*)(tag
+ 1);
7204 ctag
->cfil_so_state_change_cnt
= cfil_info
->cfi_so
->so_state_change_cnt
;
7205 ctag
->cfil_so_options
= cfil_info
->cfi_so
->so_options
;
7206 ctag
->cfil_inp_flags
= inp
? inp
->inp_flags
: 0;
7208 hash_entry
= cfil_info
->cfi_hash_entry
;
7209 if (hash_entry
->cfentry_family
== AF_INET6
) {
7210 fill_ip6_sockaddr_4_6(&ctag
->cfil_faddr
,
7211 &hash_entry
->cfentry_faddr
.addr6
,
7212 hash_entry
->cfentry_fport
);
7213 } else if (hash_entry
->cfentry_family
== AF_INET
) {
7214 fill_ip_sockaddr_4_6(&ctag
->cfil_faddr
,
7215 hash_entry
->cfentry_faddr
.addr46
.ia46_addr4
,
7216 hash_entry
->cfentry_fport
);
7218 m_tag_prepend(m
, tag
);
7225 cfil_dgram_get_socket_state(struct mbuf
*m
, uint32_t *state_change_cnt
, uint32_t *options
,
7226 struct sockaddr
**faddr
, int *inp_flags
)
7228 struct m_tag
*tag
= NULL
;
7229 struct cfil_tag
*ctag
= NULL
;
7231 tag
= m_tag_locate(m
, KERNEL_MODULE_TAG_ID
, KERNEL_TAG_TYPE_CFIL_UDP
, NULL
);
7233 ctag
= (struct cfil_tag
*)(tag
+ 1);
7234 if (state_change_cnt
) {
7235 *state_change_cnt
= ctag
->cfil_so_state_change_cnt
;
7238 *options
= ctag
->cfil_so_options
;
7241 *faddr
= (struct sockaddr
*) &ctag
->cfil_faddr
;
7244 *inp_flags
= ctag
->cfil_inp_flags
;
7248 * Unlink tag and hand it over to caller.
7249 * Note that caller will be responsible to free it.
7251 m_tag_unlink(m
, tag
);
7258 cfil_dgram_peek_socket_state(struct mbuf
*m
, int *inp_flags
)
7260 struct m_tag
*tag
= NULL
;
7261 struct cfil_tag
*ctag
= NULL
;
7263 tag
= m_tag_locate(m
, KERNEL_MODULE_TAG_ID
, KERNEL_TAG_TYPE_CFIL_UDP
, NULL
);
7265 ctag
= (struct cfil_tag
*)(tag
+ 1);
7267 *inp_flags
= ctag
->cfil_inp_flags
;
7275 cfil_dispatch_stats_event_locked(int kcunit
, struct cfil_stats_report_buffer
*buffer
, uint32_t stats_count
)
7277 struct content_filter
*cfc
= NULL
;
7281 if (buffer
== NULL
|| stats_count
== 0) {
7285 if (content_filters
== NULL
|| kcunit
> MAX_CONTENT_FILTER
) {
7289 cfc
= content_filters
[kcunit
- 1];
7294 /* Would be wasteful to try */
7295 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
7300 msgsize
= sizeof(struct cfil_msg_stats_report
) + (sizeof(struct cfil_msg_sock_stats
) * stats_count
);
7301 buffer
->msghdr
.cfm_len
= (uint32_t)msgsize
;
7302 buffer
->msghdr
.cfm_version
= 1;
7303 buffer
->msghdr
.cfm_type
= CFM_TYPE_EVENT
;
7304 buffer
->msghdr
.cfm_op
= CFM_OP_STATS
;
7305 buffer
->msghdr
.cfm_sock_id
= 0;
7306 buffer
->count
= stats_count
;
7309 CFIL_LOG(LOG_ERR
, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
7311 (unsigned long)msgsize
,
7312 (unsigned long)sizeof(struct cfil_msg_stats_report
),
7313 (unsigned long)sizeof(struct cfil_msg_sock_stats
),
7314 (unsigned long)stats_count
);
7317 error
= ctl_enqueuedata(cfc
->cf_kcref
, cfc
->cf_kcunit
,
7322 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d", error
);
7325 OSIncrementAtomic(&cfil_stats
.cfs_stats_event_ok
);
7328 CFIL_LOG(LOG_ERR
, "CFIL: STATS REPORT: send msg to %d", kcunit
);
7333 if (error
== ENOBUFS
) {
7335 &cfil_stats
.cfs_stats_event_flow_control
);
7337 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
7338 cfil_rw_lock_exclusive(&cfil_lck_rw
);
7341 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
7343 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
7344 } else if (error
!= 0) {
7345 OSIncrementAtomic(&cfil_stats
.cfs_stats_event_fail
);
7352 cfil_stats_report_thread_sleep(bool forever
)
7355 CFIL_LOG(LOG_ERR
, "CFIL: STATS COLLECTION SLEEP");
7359 (void) assert_wait((event_t
) &cfil_sock_attached_stats_count
,
7360 THREAD_INTERRUPTIBLE
);
7362 uint64_t deadline
= 0;
7363 nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC
, &deadline
);
7364 clock_absolutetime_interval_to_deadline(deadline
, &deadline
);
7366 (void) assert_wait_deadline(&cfil_sock_attached_stats_count
,
7367 THREAD_INTERRUPTIBLE
, deadline
);
7372 cfil_stats_report_thread_func(void *v
, wait_result_t w
)
7374 #pragma unused(v, w)
7376 ASSERT(cfil_stats_report_thread
== current_thread());
7377 thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
7379 // Kick off gc shortly
7380 cfil_stats_report_thread_sleep(false);
7381 thread_block_parameter((thread_continue_t
) cfil_stats_report
, NULL
);
7386 cfil_stats_collect_flow_stats_for_filter(int kcunit
,
7387 struct cfil_info
*cfil_info
,
7388 struct cfil_entry
*entry
,
7389 struct timeval current_tv
)
7391 struct cfil_stats_report_buffer
*buffer
= NULL
;
7392 struct cfil_msg_sock_stats
*flow_array
= NULL
;
7393 struct cfil_msg_sock_stats
*stats
= NULL
;
7394 struct inpcb
*inp
= NULL
;
7395 struct timeval diff_time
;
7396 uint64_t diff_time_usecs
;
7399 if (entry
->cfe_stats_report_frequency
== 0) {
7403 buffer
= global_cfil_stats_report_buffers
[kcunit
- 1];
7404 if (buffer
== NULL
) {
7406 CFIL_LOG(LOG_ERR
, "CFIL: STATS: no buffer");
7411 timersub(¤t_tv
, &entry
->cfe_stats_report_ts
, &diff_time
);
7412 diff_time_usecs
= (diff_time
.tv_sec
* USEC_PER_SEC
) + diff_time
.tv_usec
;
7415 CFIL_LOG(LOG_ERR
, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu (usecs %llu) @freq %llu usecs sockID %llu",
7416 (unsigned long long)entry
->cfe_stats_report_ts
.tv_sec
,
7417 (unsigned long long)entry
->cfe_stats_report_ts
.tv_usec
,
7418 (unsigned long long)current_tv
.tv_sec
,
7419 (unsigned long long)current_tv
.tv_usec
,
7420 (unsigned long long)diff_time
.tv_sec
,
7421 (unsigned long long)diff_time
.tv_usec
,
7422 (unsigned long long)diff_time_usecs
,
7423 (unsigned long long)((entry
->cfe_stats_report_frequency
* NSEC_PER_MSEC
) / NSEC_PER_USEC
),
7424 cfil_info
->cfi_sock_id
);
7427 // Compare elapsed time in usecs
7428 if (diff_time_usecs
>= (entry
->cfe_stats_report_frequency
* NSEC_PER_MSEC
) / NSEC_PER_USEC
) {
7430 CFIL_LOG(LOG_ERR
, "CFIL: STATS REPORT - in %llu reported %llu",
7431 cfil_info
->cfi_byte_inbound_count
,
7432 entry
->cfe_byte_inbound_count_reported
);
7433 CFIL_LOG(LOG_ERR
, "CFIL: STATS REPORT - out %llu reported %llu",
7434 cfil_info
->cfi_byte_outbound_count
,
7435 entry
->cfe_byte_outbound_count_reported
);
7437 // Check if flow has new bytes that have not been reported
7438 if (entry
->cfe_byte_inbound_count_reported
< cfil_info
->cfi_byte_inbound_count
||
7439 entry
->cfe_byte_outbound_count_reported
< cfil_info
->cfi_byte_outbound_count
) {
7440 flow_array
= (struct cfil_msg_sock_stats
*)&buffer
->stats
;
7441 index
= global_cfil_stats_counts
[kcunit
- 1];
7443 stats
= &flow_array
[index
];
7444 stats
->cfs_sock_id
= cfil_info
->cfi_sock_id
;
7445 stats
->cfs_byte_inbound_count
= cfil_info
->cfi_byte_inbound_count
;
7446 stats
->cfs_byte_outbound_count
= cfil_info
->cfi_byte_outbound_count
;
7448 if (entry
->cfe_laddr_sent
== false) {
7449 /* cache it if necessary */
7450 if (cfil_info
->cfi_so_attach_laddr
.sa
.sa_len
== 0) {
7451 inp
= cfil_info
->cfi_so
? sotoinpcb(cfil_info
->cfi_so
) : NULL
;
7453 boolean_t outgoing
= (cfil_info
->cfi_dir
== CFS_CONNECTION_DIR_OUT
);
7454 union sockaddr_in_4_6
*src
= outgoing
? &cfil_info
->cfi_so_attach_laddr
: NULL
;
7455 union sockaddr_in_4_6
*dst
= outgoing
? NULL
: &cfil_info
->cfi_so_attach_laddr
;
7456 cfil_fill_event_msg_addresses(cfil_info
->cfi_hash_entry
, inp
,
7457 src
, dst
, !IS_INP_V6(inp
), outgoing
);
7461 if (cfil_info
->cfi_so_attach_laddr
.sa
.sa_len
!= 0) {
7462 stats
->cfs_laddr
.sin6
= cfil_info
->cfi_so_attach_laddr
.sin6
;
7463 entry
->cfe_laddr_sent
= true;
7467 global_cfil_stats_counts
[kcunit
- 1]++;
7469 entry
->cfe_stats_report_ts
= current_tv
;
7470 entry
->cfe_byte_inbound_count_reported
= cfil_info
->cfi_byte_inbound_count
;
7471 entry
->cfe_byte_outbound_count_reported
= cfil_info
->cfi_byte_outbound_count
;
7473 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: STATS COLLECTED");
7475 CFI_ADD_TIME_LOG(cfil_info
, ¤t_tv
, &cfil_info
->cfi_first_event
, CFM_OP_STATS
);
7483 cfil_stats_report(void *v
, wait_result_t w
)
7485 #pragma unused(v, w)
7487 struct cfil_info
*cfil_info
= NULL
;
7488 struct cfil_entry
*entry
= NULL
;
7489 struct timeval current_tv
;
7490 uint32_t flow_count
= 0;
7491 uint64_t saved_next_sock_id
= 0; // Next sock id to be reported for next loop
7492 bool flow_reported
= false;
7495 CFIL_LOG(LOG_ERR
, "CFIL: STATS COLLECTION RUNNING");
7499 // Collect all sock ids of flows that has new stats
7500 cfil_rw_lock_shared(&cfil_lck_rw
);
7502 if (cfil_sock_attached_stats_count
== 0) {
7504 CFIL_LOG(LOG_ERR
, "CFIL: STATS: no flow");
7506 cfil_rw_unlock_shared(&cfil_lck_rw
);
7510 for (int kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
7511 if (global_cfil_stats_report_buffers
[kcunit
- 1] != NULL
) {
7512 memset(global_cfil_stats_report_buffers
[kcunit
- 1], 0, sizeof(struct cfil_stats_report_buffer
));
7514 global_cfil_stats_counts
[kcunit
- 1] = 0;
7517 microuptime(¤t_tv
);
7520 TAILQ_FOREACH(cfil_info
, &cfil_sock_head_stats
, cfi_link_stats
) {
7521 if (saved_next_sock_id
!= 0 &&
7522 saved_next_sock_id
== cfil_info
->cfi_sock_id
) {
7523 // Here is where we left off previously, start accumulating
7524 saved_next_sock_id
= 0;
7527 if (saved_next_sock_id
== 0) {
7528 if (flow_count
>= CFIL_STATS_REPORT_MAX_COUNT
) {
7529 // Examine a fixed number of flows each round. Remember the current flow
7530 // so we can start from here for next loop
7531 saved_next_sock_id
= cfil_info
->cfi_sock_id
;
7535 flow_reported
= false;
7536 for (int kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
7537 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
7538 if (entry
->cfe_filter
== NULL
) {
7540 CFIL_LOG(LOG_NOTICE
, "CFIL: STATS REPORT - so %llx no filter",
7541 cfil_info
->cfi_so
? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info
->cfi_so
) : 0);
7546 if ((entry
->cfe_stats_report_frequency
> 0) &&
7547 cfil_stats_collect_flow_stats_for_filter(kcunit
, cfil_info
, entry
, current_tv
) == true) {
7548 flow_reported
= true;
7551 if (flow_reported
== true) {
7557 if (flow_count
> 0) {
7559 CFIL_LOG(LOG_ERR
, "CFIL: STATS reporting for %d flows", flow_count
);
7561 for (int kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
7562 if (global_cfil_stats_report_buffers
[kcunit
- 1] != NULL
&&
7563 global_cfil_stats_counts
[kcunit
- 1] > 0) {
7564 cfil_dispatch_stats_event_locked(kcunit
,
7565 global_cfil_stats_report_buffers
[kcunit
- 1],
7566 global_cfil_stats_counts
[kcunit
- 1]);
7570 cfil_rw_unlock_shared(&cfil_lck_rw
);
7574 cfil_rw_unlock_shared(&cfil_lck_rw
);
7576 // Loop again if we haven't finished the whole cfil_info list
7577 } while (saved_next_sock_id
!= 0);
7581 // Sleep forever (until waken up) if no more flow to report
7582 cfil_rw_lock_shared(&cfil_lck_rw
);
7583 cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count
== 0 ? true : false);
7584 cfil_rw_unlock_shared(&cfil_lck_rw
);
7585 thread_block_parameter((thread_continue_t
) cfil_stats_report
, NULL
);