2 * Copyright (c) 2013-2018 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by TCP/IP sockets.
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
45 * It should be noted that messages about many TCP/IP sockets can be multiplexed
46 * over a single kernel control socket.
49 * - The current implementation is limited to TCP sockets.
50 * - The current implementation supports up to two simultaneous content filters
51 * for the sake of simplicity of the implementation.
54 * NECP FILTER CONTROL UNIT
56 * A user space filter agent uses the Network Extension Control Policy (NECP)
57 * database to specify which TCP/IP sockets need to be filtered. The NECP
58 * criteria may be based on a variety of properties like user ID or proc UUID.
60 * The NECP "filter control unit" is used by the socket content filter subsystem
61 * to deliver the relevant TCP/IP content information to the appropriate
62 * user space filter agent via its kernel control socket instance.
63 * This works as follows:
65 * 1) The user space filter agent specifies an NECP filter control unit when
66 * in adds its filtering rules to the NECP database.
68 * 2) The user space filter agent also sets its NECP filter control unit on the
69 * content filter kernel control socket via the socket option
70 * CFIL_OPT_NECP_CONTROL_UNIT.
72 * 3) The NECP database is consulted to find out if a given TCP/IP socket
73 * needs to be subjected to content filtering and returns the corresponding
74 * NECP filter control unit -- the NECP filter control unit is actually
75 * stored in the TCP/IP socket structure so the NECP lookup is really simple.
77 * 4) The NECP filter control unit is then used to find the corresponding
78 * kernel control socket instance.
80 * Note: NECP currently supports a single filter control unit per TCP/IP socket
81 * but this restriction may be soon lifted.
84 * THE MESSAGING PROTOCOL
86 * The socket content filter subsystem and a user space filter agent
87 * communicate over the kernel control socket via an asynchronous
88 * messaging protocol (this is not a request-response protocol).
89 * The socket content filter subsystem sends event messages to the user
90 * space filter agent about the TCP/IP sockets it is interested to filter.
91 * The user space filter agent sends action messages to either allow
92 * data to pass or to disallow the data flow (and drop the connection).
94 * All messages over a content filter kernel control socket share the same
95 * common header of type "struct cfil_msg_hdr". The message type tells if
96 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
97 * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
98 * Note the message header length field may be padded for alignment and can
99 * be larger than the actual content of the message.
100 * The field "cfm_op" describe the kind of event or action.
102 * Here are the kinds of content filter events:
103 * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
104 * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
105 * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
106 * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
111 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
112 * data that is being sent or received. The position of this span of data
113 * in the data flow is described by a set of start and end offsets. These
114 * are absolute 64 bits offsets. The first byte sent (or received) starts
115 * at offset 0 and ends at offset 1. The length of the content data
116 * is given by the difference between the end offset and the start offset.
118 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
119 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
120 * action message is sent by the user space filter agent.
122 * Note: absolute 64 bits offsets should be large enough for the foreseeable
123 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
124 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
126 * They are two kinds of primary content filter actions:
127 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
128 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
130 * There is also an action to mark a given client flow as already filtered
131 * at a higher level, CFM_OP_BLESS_CLIENT.
136 * The CFM_OP_DATA_UPDATE action messages let the user space filter
137 * agent allow data to flow up to the specified pass offset -- there
138 * is a pass offset for outgoing data and a pass offset for incoming data.
139 * When a new TCP/IP socket is attached to the content filter, each pass offset
140 * is initially set to 0 so not data is allowed to pass by default.
141 * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
142 * then the data flow becomes unrestricted.
144 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
145 * with a pass offset smaller than the pass offset of a previous
146 * CFM_OP_DATA_UPDATE message is silently ignored.
148 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
149 * to tell the kernel how much data it wants to see by using the peek offsets.
150 * Just like pass offsets, there is a peek offset for each direction.
151 * When a new TCP/IP socket is attached to the content filter, each peek offset
152 * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
153 * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
154 * with a greater than 0 peek offset is sent by the user space filter agent.
155 * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
156 * then the flow of update data events becomes unrestricted.
158 * Note that peek offsets cannot be smaller than the corresponding pass offset.
159 * Also a peek offsets cannot be smaller than the corresponding end offset
160 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
161 * to set a too small peek value is silently ignored.
164 * PER SOCKET "struct cfil_info"
166 * As soon as a TCP/IP socket gets attached to a content filter, a
167 * "struct cfil_info" is created to hold the content filtering state for this
170 * The content filtering state is made of the following information
171 * for each direction:
172 * - The current pass offset;
173 * - The first and last offsets of the data pending, waiting for a filtering
175 * - The inject queue for data that passed the filters and that needs
177 * - A content filter specific state in a set of "struct cfil_entry"
180 * CONTENT FILTER STATE "struct cfil_entry"
182 * The "struct cfil_entry" maintains the information most relevant to the
183 * message handling over a kernel control socket with a user space filter agent.
185 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
186 * to the kernel control socket unit it corresponds to and also has a pointer
187 * to the corresponding "struct content_filter".
189 * For each direction, "struct cfil_entry" maintains the following information:
192 * - The offset of the last data peeked at by the filter
193 * - A queue of data that's waiting to be delivered to the user space filter
194 * agent on the kernel control socket
195 * - A queue of data for which event messages have been sent on the kernel
196 * control socket and are pending for a filtering decision.
199 * CONTENT FILTER QUEUES
201 * Data that is being filtered is steered away from the TCP/IP socket buffer
202 * and instead will sit in one of three content filter queues until the data
203 * can be re-injected into the TCP/IP socket buffer.
205 * A content filter queue is represented by "struct cfil_queue" that contains
206 * a list of mbufs and the start and end offset of the data span of
209 * The data moves into the three content filter queues according to this
211 * a) The "cfe_ctl_q" of "struct cfil_entry"
212 * b) The "cfe_pending_q" of "struct cfil_entry"
213 * c) The "cfi_inject_q" of "struct cfil_info"
215 * Note: The sequence (a),(b) may be repeated several times if there is more
216 * than one content filter attached to the TCP/IP socket.
218 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
219 * kernel conntrol socket for two reasons:
220 * - The peek offset is less that the end offset of the mbuf data
221 * - The kernel control socket is flow controlled
223 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
224 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
225 * socket and are waiting for a pass action message fromn the user space
226 * filter agent. An mbuf length must be fully allowed to pass to be removed
227 * from the cfe_pending_q.
229 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
230 * by the user space filter agent and that needs to be re-injected into the
234 * IMPACT ON FLOW CONTROL
236 * An essential aspect of the content filer subsystem is to minimize the
237 * impact on flow control of the TCP/IP sockets being filtered.
239 * The processing overhead of the content filtering may have an effect on
240 * flow control by adding noticeable delays and cannot be eliminated --
241 * care must be taken by the user space filter agent to minimize the
244 * The amount of data being filtered is kept in buffers while waiting for
245 * a decision by the user space filter agent. This amount of data pending
246 * needs to be subtracted from the amount of data available in the
247 * corresponding TCP/IP socket buffer. This is done by modifying
248 * sbspace() and tcp_sbspace() to account for amount of data pending
249 * in the content filter.
254 * The global state of content filter subsystem is protected by a single
255 * read-write lock "cfil_lck_rw". The data flow can be done with the
256 * cfil read-write lock held as shared so it can be re-entered from multiple
259 * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
260 * protected by the socket lock.
262 * A TCP/IP socket lock cannot be taken while the cfil read-write lock
263 * is held. That's why we have some sequences where we drop the cfil read-write
264 * lock before taking the TCP/IP lock.
266 * It is also important to lock the TCP/IP socket buffer while the content
267 * filter is modifying the amount of pending data. Otherwise the calculations
268 * in sbspace() and tcp_sbspace() could be wrong.
270 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
271 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
273 * Actually "cfe_link" and "cfe_filter" are protected by both by
274 * "cfil_lck_rw" and the socket lock: they may be modified only when
275 * "cfil_lck_rw" is exclusive and the socket is locked.
277 * To read the other fields of "struct content_filter" we have to take
278 * "cfil_lck_rw" in shared mode.
283 * - For TCP sockets only
285 * - Does not support TCP unordered messages
297 * If support datagram, enqueue control and address mbufs as well
300 #include <sys/types.h>
301 #include <sys/kern_control.h>
302 #include <sys/queue.h>
303 #include <sys/domain.h>
304 #include <sys/protosw.h>
305 #include <sys/syslog.h>
306 #include <sys/systm.h>
307 #include <sys/param.h>
308 #include <sys/mbuf.h>
310 #include <kern/locks.h>
311 #include <kern/zalloc.h>
312 #include <kern/debug.h>
314 #include <net/content_filter.h>
316 #include <netinet/in_pcb.h>
317 #include <netinet/tcp.h>
318 #include <netinet/tcp_var.h>
319 #include <netinet/udp.h>
320 #include <netinet/udp_var.h>
323 #include <libkern/libkern.h>
324 #include <kern/sched_prim.h>
326 #define MAX_CONTENT_FILTER 2
331 * The structure content_filter represents a user space content filter
332 * It's created and associated with a kernel control socket instance
334 struct content_filter
{
335 kern_ctl_ref cf_kcref
;
339 uint32_t cf_necp_control_unit
;
341 uint32_t cf_sock_count
;
342 TAILQ_HEAD(, cfil_entry
) cf_sock_entries
;
345 #define CFF_ACTIVE 0x01
346 #define CFF_DETACHING 0x02
347 #define CFF_FLOW_CONTROLLED 0x04
349 struct content_filter
**content_filters
= NULL
;
350 uint32_t cfil_active_count
= 0; /* Number of active content filters */
351 uint32_t cfil_sock_attached_count
= 0; /* Number of sockets attachements */
352 uint32_t cfil_sock_udp_attached_count
= 0; /* Number of UDP sockets attachements */
353 uint32_t cfil_close_wait_timeout
= 1000; /* in milliseconds */
355 static kern_ctl_ref cfil_kctlref
= NULL
;
357 static lck_grp_attr_t
*cfil_lck_grp_attr
= NULL
;
358 static lck_attr_t
*cfil_lck_attr
= NULL
;
359 static lck_grp_t
*cfil_lck_grp
= NULL
;
360 decl_lck_rw_data(static, cfil_lck_rw
);
362 #define CFIL_RW_LCK_MAX 8
364 int cfil_rw_nxt_lck
= 0;
365 void* cfil_rw_lock_history
[CFIL_RW_LCK_MAX
];
367 int cfil_rw_nxt_unlck
= 0;
368 void* cfil_rw_unlock_history
[CFIL_RW_LCK_MAX
];
370 #define CONTENT_FILTER_ZONE_NAME "content_filter"
371 #define CONTENT_FILTER_ZONE_MAX 10
372 static struct zone
*content_filter_zone
= NULL
; /* zone for content_filter */
375 #define CFIL_INFO_ZONE_NAME "cfil_info"
376 #define CFIL_INFO_ZONE_MAX 1024
377 static struct zone
*cfil_info_zone
= NULL
; /* zone for cfil_info */
379 MBUFQ_HEAD(cfil_mqhead
);
382 uint64_t q_start
; /* offset of first byte in queue */
383 uint64_t q_end
; /* offset of last byte in queue */
384 struct cfil_mqhead q_mq
;
390 * The is one entry per content filter
393 TAILQ_ENTRY(cfil_entry
) cfe_link
;
394 struct content_filter
*cfe_filter
;
396 struct cfil_info
*cfe_cfil_info
;
398 uint32_t cfe_necp_control_unit
;
399 struct timeval cfe_last_event
; /* To user space */
400 struct timeval cfe_last_action
; /* From user space */
404 * cfe_pending_q holds data that has been delivered to
405 * the filter and for which we are waiting for an action
407 struct cfil_queue cfe_pending_q
;
409 * This queue is for data that has not be delivered to
410 * the content filter (new data, pass peek or flow control)
412 struct cfil_queue cfe_ctl_q
;
414 uint64_t cfe_pass_offset
;
415 uint64_t cfe_peek_offset
;
420 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
421 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
422 #define CFEF_DATA_START 0x0004 /* can send data event */
423 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
424 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
425 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
426 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
427 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
430 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
431 struct timeval _tdiff; \
432 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
433 timersub(t1, t0, &_tdiff); \
434 (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
435 (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
436 (cfil)->cfi_op_list_ctr ++; \
439 struct cfil_hash_entry
;
444 * There is a struct cfil_info per socket
447 TAILQ_ENTRY(cfil_info
) cfi_link
;
448 struct socket
*cfi_so
;
450 uint64_t cfi_sock_id
;
451 struct timeval64 cfi_first_event
;
452 uint32_t cfi_op_list_ctr
;
453 uint32_t cfi_op_time
[CFI_MAX_TIME_LOG_ENTRY
]; /* time interval in microseconds since first event */
454 unsigned char cfi_op_list
[CFI_MAX_TIME_LOG_ENTRY
];
458 * cfi_pending_first and cfi_pending_last describe the total
459 * amount of data outstanding for all the filters on
460 * this socket and data in the flow queue
461 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
463 uint64_t cfi_pending_first
;
464 uint64_t cfi_pending_last
;
465 uint32_t cfi_pending_mbcnt
;
466 uint32_t cfi_pending_mbnum
;
467 uint32_t cfi_tail_drop_cnt
;
469 * cfi_pass_offset is the minimum of all the filters
471 uint64_t cfi_pass_offset
;
473 * cfi_inject_q holds data that needs to be re-injected
474 * into the socket after filtering and that can
475 * be queued because of flow control
477 struct cfil_queue cfi_inject_q
;
480 struct cfil_entry cfi_entries
[MAX_CONTENT_FILTER
];
481 struct cfil_hash_entry
*cfi_hash_entry
;
482 } __attribute__((aligned(8)));
484 #define CFIF_DROP 0x0001 /* drop action applied */
485 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
486 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
487 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
488 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
489 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
490 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
492 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
493 #define CFI_SHIFT_GENCNT 32
494 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
495 #define CFI_SHIFT_FLOWHASH 0
497 TAILQ_HEAD(cfil_sock_head
, cfil_info
) cfil_sock_head
;
499 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
500 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
505 LIST_HEAD(cfilhashhead
, cfil_hash_entry
);
506 #define CFILHASHSIZE 16
507 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
508 #define IS_UDP(so) (so && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
509 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
510 ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
511 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
512 cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
513 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
516 * UDP Garbage Collection:
518 static struct thread
*cfil_udp_gc_thread
;
519 #define UDP_FLOW_GC_IDLE_TO 30 // Flow Idle Timeout in seconds
520 #define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
521 #define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
522 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC (10 * NSEC_PER_SEC) // GC wakes up every 10 seconds
525 * UDP flow queue thresholds
527 #define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
528 #define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
529 #define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
531 * UDP flow queue threshold globals:
533 static unsigned int cfil_udp_gc_mbuf_num_max
= UDP_FLOW_GC_MBUF_NUM_MAX
;
534 static unsigned int cfil_udp_gc_mbuf_cnt_max
= UDP_FLOW_GC_MBUF_CNT_MAX
;
537 * struct cfil_hash_entry
539 * Hash entry for cfil_info
541 struct cfil_hash_entry
{
542 LIST_ENTRY(cfil_hash_entry
) cfentry_link
;
543 struct cfil_info
*cfentry_cfil
;
544 u_short cfentry_fport
;
545 u_short cfentry_lport
;
546 sa_family_t cfentry_family
;
547 u_int32_t cfentry_flowhash
;
548 u_int32_t cfentry_lastused
;
550 /* foreign host table entry */
551 struct in_addr_4in6 addr46
;
552 struct in6_addr addr6
;
555 /* local host table entry */
556 struct in_addr_4in6 addr46
;
557 struct in6_addr addr6
;
564 * For each UDP socket, this is a hash table maintaining all cfil_info structs
565 * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
568 struct socket
*cfdb_so
;
569 uint32_t cfdb_count
; /* Number of total content filters */
570 struct cfilhashhead
*cfdb_hashbase
;
571 u_long cfdb_hashmask
;
572 struct cfil_hash_entry
*cfdb_only_entry
; /* Optimization for connected UDP */
576 * CFIL specific mbuf tag:
577 * Save state of socket at the point of data entry into cfil.
578 * Use saved state for reinjection at protocol layer.
581 union sockaddr_in_4_6 cfil_faddr
;
582 uint32_t cfil_so_state_change_cnt
;
583 short cfil_so_options
;
586 #define CFIL_HASH_ENTRY_ZONE_NAME "cfil_entry_hash"
587 #define CFIL_HASH_ENTRY_ZONE_MAX 1024
588 static struct zone
*cfil_hash_entry_zone
= NULL
;
590 #define CFIL_DB_ZONE_NAME "cfil_db"
591 #define CFIL_DB_ZONE_MAX 1024
592 static struct zone
*cfil_db_zone
= NULL
;
598 struct cfil_stats cfil_stats
;
601 * For troubleshooting
603 int cfil_log_level
= LOG_ERR
;
606 // Debug controls added for selective debugging.
607 // Disabled for production. If enabled,
608 // these will have performance impact
609 #define LIFECYCLE_DEBUG 0
610 #define VERDICT_DEBUG 0
616 * Sysctls for logs and statistics
618 static int sysctl_cfil_filter_list(struct sysctl_oid
*, void *, int,
619 struct sysctl_req
*);
620 static int sysctl_cfil_sock_list(struct sysctl_oid
*, void *, int,
621 struct sysctl_req
*);
623 SYSCTL_NODE(_net
, OID_AUTO
, cfil
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "cfil");
625 SYSCTL_INT(_net_cfil
, OID_AUTO
, log
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
626 &cfil_log_level
, 0, "");
628 SYSCTL_INT(_net_cfil
, OID_AUTO
, debug
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
631 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sock_attached_count
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
632 &cfil_sock_attached_count
, 0, "");
634 SYSCTL_UINT(_net_cfil
, OID_AUTO
, active_count
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
635 &cfil_active_count
, 0, "");
637 SYSCTL_UINT(_net_cfil
, OID_AUTO
, close_wait_timeout
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
638 &cfil_close_wait_timeout
, 0, "");
640 static int cfil_sbtrim
= 1;
641 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sbtrim
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
642 &cfil_sbtrim
, 0, "");
644 SYSCTL_PROC(_net_cfil
, OID_AUTO
, filter_list
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
645 0, 0, sysctl_cfil_filter_list
, "S,cfil_filter_stat", "");
647 SYSCTL_PROC(_net_cfil
, OID_AUTO
, sock_list
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
648 0, 0, sysctl_cfil_sock_list
, "S,cfil_sock_stat", "");
650 SYSCTL_STRUCT(_net_cfil
, OID_AUTO
, stats
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
651 &cfil_stats
, cfil_stats
, "");
654 * Forward declaration to appease the compiler
656 static int cfil_action_data_pass(struct socket
*, struct cfil_info
*, uint32_t, int,
658 static int cfil_action_drop(struct socket
*, struct cfil_info
*, uint32_t);
659 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr
*);
660 static int cfil_dispatch_closed_event(struct socket
*, struct cfil_info
*, int);
661 static int cfil_data_common(struct socket
*, struct cfil_info
*, int, struct sockaddr
*,
662 struct mbuf
*, struct mbuf
*, uint32_t);
663 static int cfil_data_filter(struct socket
*, struct cfil_info
*, uint32_t, int,
664 struct mbuf
*, uint64_t);
665 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*,
666 struct in_addr
, u_int16_t
);
667 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*,
668 struct in6_addr
*, u_int16_t
);
670 static int cfil_dispatch_attach_event(struct socket
*, struct cfil_info
*, uint32_t);
671 static void cfil_info_free(struct cfil_info
*);
672 static struct cfil_info
* cfil_info_alloc(struct socket
*, struct cfil_hash_entry
*);
673 static int cfil_info_attach_unit(struct socket
*, uint32_t, struct cfil_info
*);
674 static struct socket
* cfil_socket_from_sock_id(cfil_sock_id_t
, bool);
675 static struct socket
* cfil_socket_from_client_uuid(uuid_t
, bool *);
676 static int cfil_service_pending_queue(struct socket
*, struct cfil_info
*, uint32_t, int);
677 static int cfil_data_service_ctl_q(struct socket
*, struct cfil_info
*, uint32_t, int);
678 static void cfil_info_verify(struct cfil_info
*);
679 static int cfil_update_data_offsets(struct socket
*, struct cfil_info
*, uint32_t, int,
681 static int cfil_acquire_sockbuf(struct socket
*, struct cfil_info
*, int);
682 static void cfil_release_sockbuf(struct socket
*, int);
683 static int cfil_filters_attached(struct socket
*);
685 static void cfil_rw_lock_exclusive(lck_rw_t
*);
686 static void cfil_rw_unlock_exclusive(lck_rw_t
*);
687 static void cfil_rw_lock_shared(lck_rw_t
*);
688 static void cfil_rw_unlock_shared(lck_rw_t
*);
689 static boolean_t
cfil_rw_lock_shared_to_exclusive(lck_rw_t
*);
690 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t
*);
692 static unsigned int cfil_data_length(struct mbuf
*, int *, int *);
693 static errno_t
cfil_db_init(struct socket
*);
694 static void cfil_db_free(struct socket
*so
);
695 struct cfil_hash_entry
*cfil_db_lookup_entry(struct cfil_db
*, struct sockaddr
*, struct sockaddr
*);
696 struct cfil_hash_entry
*cfil_db_lookup_entry_with_sockid(struct cfil_db
*, u_int64_t
);
697 struct cfil_hash_entry
*cfil_db_add_entry(struct cfil_db
*, struct sockaddr
*, struct sockaddr
*);
698 void cfil_db_delete_entry(struct cfil_db
*, struct cfil_hash_entry
*);
699 struct cfil_hash_entry
*cfil_sock_udp_get_flow(struct socket
*, uint32_t, bool, struct sockaddr
*, struct sockaddr
*);
700 struct cfil_info
*cfil_db_get_cfil_info(struct cfil_db
*, cfil_sock_id_t
);
701 static errno_t
cfil_sock_udp_handle_data(bool, struct socket
*, struct sockaddr
*, struct sockaddr
*,
702 struct mbuf
*, struct mbuf
*, uint32_t);
703 static int32_t cfil_sock_udp_data_pending(struct sockbuf
*, bool);
704 static void cfil_sock_udp_is_closed(struct socket
*);
705 static int cfil_sock_udp_notify_shutdown(struct socket
*, int , int, int);
706 static int cfil_sock_udp_shutdown(struct socket
*, int *);
707 static void cfil_sock_udp_close_wait(struct socket
*);
708 static void cfil_sock_udp_buf_update(struct sockbuf
*);
709 static int cfil_filters_udp_attached(struct socket
*, bool);
710 static void cfil_get_flow_address_v6(struct cfil_hash_entry
*, struct inpcb
*,
711 struct in6_addr
**, struct in6_addr
**,
712 u_int16_t
*, u_int16_t
*);
713 static void cfil_get_flow_address(struct cfil_hash_entry
*, struct inpcb
*,
714 struct in_addr
*, struct in_addr
*,
715 u_int16_t
*, u_int16_t
*);
716 static void cfil_info_log(int, struct cfil_info
*, const char *);
717 void cfil_filter_show(u_int32_t
);
718 void cfil_info_show(void);
719 bool cfil_info_idle_timed_out(struct cfil_info
*, int, u_int32_t
);
720 bool cfil_info_action_timed_out(struct cfil_info
*, int);
721 bool cfil_info_buffer_threshold_exceeded(struct cfil_info
*);
722 struct m_tag
*cfil_udp_save_socket_state(struct cfil_info
*, struct mbuf
*);
723 static void cfil_udp_gc_thread_func(void *, wait_result_t
);
724 static void cfil_info_udp_expire(void *, wait_result_t
);
726 bool check_port(struct sockaddr
*, u_short
);
729 * Content filter global read write lock
733 cfil_rw_lock_exclusive(lck_rw_t
*lck
)
737 lr_saved
= __builtin_return_address(0);
739 lck_rw_lock_exclusive(lck
);
741 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
742 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
746 cfil_rw_unlock_exclusive(lck_rw_t
*lck
)
750 lr_saved
= __builtin_return_address(0);
752 lck_rw_unlock_exclusive(lck
);
754 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
755 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
759 cfil_rw_lock_shared(lck_rw_t
*lck
)
763 lr_saved
= __builtin_return_address(0);
765 lck_rw_lock_shared(lck
);
767 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
768 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
772 cfil_rw_unlock_shared(lck_rw_t
*lck
)
776 lr_saved
= __builtin_return_address(0);
778 lck_rw_unlock_shared(lck
);
780 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
781 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
785 cfil_rw_lock_shared_to_exclusive(lck_rw_t
*lck
)
790 lr_saved
= __builtin_return_address(0);
792 upgraded
= lck_rw_lock_shared_to_exclusive(lck
);
794 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
795 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
801 cfil_rw_lock_exclusive_to_shared(lck_rw_t
*lck
)
805 lr_saved
= __builtin_return_address(0);
807 lck_rw_lock_exclusive_to_shared(lck
);
809 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
810 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
814 cfil_rw_lock_assert_held(lck_rw_t
*lck
, int exclusive
)
817 #pragma unused(lck, exclusive)
820 exclusive
? LCK_RW_ASSERT_EXCLUSIVE
: LCK_RW_ASSERT_HELD
);
824 * Return the number of bytes in the mbuf chain using the same
825 * method as m_length() or sballoc()
827 * Returns data len - starting from PKT start
828 * - retmbcnt - optional param to get total mbuf bytes in chain
829 * - retmbnum - optional param to get number of mbufs in chain
832 cfil_data_length(struct mbuf
*m
, int *retmbcnt
, int *retmbnum
)
835 unsigned int pktlen
= 0;
839 // Locate the start of data
840 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
841 if (m0
->m_flags
& M_PKTHDR
)
845 CFIL_LOG(LOG_ERR
, "cfil_data_length: no M_PKTHDR");
850 if (retmbcnt
== NULL
&& retmbnum
== NULL
)
851 return (m_length(m
));
856 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
860 if (m0
->m_flags
& M_EXT
)
861 mbcnt
+= m0
->m_ext
.ext_size
;
873 cfil_data_start(struct mbuf
*m
)
877 // Locate the start of data
878 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
879 if (m0
->m_flags
& M_PKTHDR
)
886 * Common mbuf queue utilities
890 cfil_queue_init(struct cfil_queue
*cfq
)
894 MBUFQ_INIT(&cfq
->q_mq
);
897 static inline uint64_t
898 cfil_queue_drain(struct cfil_queue
*cfq
)
900 uint64_t drained
= cfq
->q_start
- cfq
->q_end
;
903 MBUFQ_DRAIN(&cfq
->q_mq
);
908 /* Return 1 when empty, 0 otherwise */
910 cfil_queue_empty(struct cfil_queue
*cfq
)
912 return (MBUFQ_EMPTY(&cfq
->q_mq
));
915 static inline uint64_t
916 cfil_queue_offset_first(struct cfil_queue
*cfq
)
918 return (cfq
->q_start
);
921 static inline uint64_t
922 cfil_queue_offset_last(struct cfil_queue
*cfq
)
927 static inline uint64_t
928 cfil_queue_len(struct cfil_queue
*cfq
)
930 return (cfq
->q_end
- cfq
->q_start
);
934 * Routines to verify some fundamental assumptions
938 cfil_queue_verify(struct cfil_queue
*cfq
)
943 uint64_t queuesize
= 0;
945 /* Verify offset are ordered */
946 VERIFY(cfq
->q_start
<= cfq
->q_end
);
949 * When queue is empty, the offsets are equal otherwise the offsets
952 VERIFY((MBUFQ_EMPTY(&cfq
->q_mq
) && cfq
->q_start
== cfq
->q_end
) ||
953 (!MBUFQ_EMPTY(&cfq
->q_mq
) &&
954 cfq
->q_start
!= cfq
->q_end
));
956 MBUFQ_FOREACH(chain
, &cfq
->q_mq
) {
957 size_t chainsize
= 0;
959 unsigned int mlen
= cfil_data_length(m
, NULL
, NULL
);
960 // skip the addr and control stuff if present
961 m
= cfil_data_start(m
);
964 m
== (void *)M_TAG_FREE_PATTERN
||
965 m
->m_next
== (void *)M_TAG_FREE_PATTERN
||
966 m
->m_nextpkt
== (void *)M_TAG_FREE_PATTERN
)
967 panic("%s - mq %p is free at %p", __func__
,
969 for (n
= m
; n
!= NULL
; n
= n
->m_next
) {
970 if (n
->m_type
!= MT_DATA
&&
971 n
->m_type
!= MT_HEADER
&&
972 n
->m_type
!= MT_OOBDATA
)
973 panic("%s - %p unsupported type %u", __func__
,
975 chainsize
+= n
->m_len
;
977 if (mlen
!= chainsize
)
978 panic("%s - %p m_length() %u != chainsize %lu",
979 __func__
, m
, mlen
, chainsize
);
980 queuesize
+= chainsize
;
982 if (queuesize
!= cfq
->q_end
- cfq
->q_start
)
983 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__
,
984 m
, queuesize
, cfq
->q_end
- cfq
->q_start
);
988 cfil_queue_enqueue(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
990 CFIL_QUEUE_VERIFY(cfq
);
992 MBUFQ_ENQUEUE(&cfq
->q_mq
, m
);
995 CFIL_QUEUE_VERIFY(cfq
);
999 cfil_queue_remove(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
1001 CFIL_QUEUE_VERIFY(cfq
);
1003 VERIFY(cfil_data_length(m
, NULL
, NULL
) == len
);
1005 MBUFQ_REMOVE(&cfq
->q_mq
, m
);
1006 MBUFQ_NEXT(m
) = NULL
;
1007 cfq
->q_start
+= len
;
1009 CFIL_QUEUE_VERIFY(cfq
);
1013 cfil_queue_first(struct cfil_queue
*cfq
)
1015 return (MBUFQ_FIRST(&cfq
->q_mq
));
1019 cfil_queue_next(struct cfil_queue
*cfq
, mbuf_t m
)
1022 return (MBUFQ_NEXT(m
));
1026 cfil_entry_buf_verify(struct cfe_buf
*cfe_buf
)
1028 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_ctl_q
);
1029 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_pending_q
);
1031 /* Verify the queues are ordered so that pending is before ctl */
1032 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
>= cfe_buf
->cfe_pending_q
.q_end
);
1034 /* The peek offset cannot be less than the pass offset */
1035 VERIFY(cfe_buf
->cfe_peek_offset
>= cfe_buf
->cfe_pass_offset
);
1037 /* Make sure we've updated the offset we peeked at */
1038 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
<= cfe_buf
->cfe_peeked
);
1042 cfil_entry_verify(struct cfil_entry
*entry
)
1044 cfil_entry_buf_verify(&entry
->cfe_snd
);
1045 cfil_entry_buf_verify(&entry
->cfe_rcv
);
1049 cfil_info_buf_verify(struct cfi_buf
*cfi_buf
)
1051 CFIL_QUEUE_VERIFY(&cfi_buf
->cfi_inject_q
);
1053 VERIFY(cfi_buf
->cfi_pending_first
<= cfi_buf
->cfi_pending_last
);
1054 VERIFY(cfi_buf
->cfi_pending_mbcnt
>= 0);
1058 cfil_info_verify(struct cfil_info
*cfil_info
)
1062 if (cfil_info
== NULL
)
1065 cfil_info_buf_verify(&cfil_info
->cfi_snd
);
1066 cfil_info_buf_verify(&cfil_info
->cfi_rcv
);
1068 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++)
1069 cfil_entry_verify(&cfil_info
->cfi_entries
[i
]);
1073 verify_content_filter(struct content_filter
*cfc
)
1075 struct cfil_entry
*entry
;
1078 VERIFY(cfc
->cf_sock_count
>= 0);
1080 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
1082 VERIFY(cfc
== entry
->cfe_filter
);
1084 VERIFY(count
== cfc
->cf_sock_count
);
1088 * Kernel control socket callbacks
1091 cfil_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
1095 struct content_filter
*cfc
= NULL
;
1097 CFIL_LOG(LOG_NOTICE
, "");
1099 cfc
= zalloc(content_filter_zone
);
1101 CFIL_LOG(LOG_ERR
, "zalloc failed");
1105 bzero(cfc
, sizeof(struct content_filter
));
1107 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1108 if (content_filters
== NULL
) {
1109 struct content_filter
**tmp
;
1111 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1114 struct content_filter
**,
1115 MAX_CONTENT_FILTER
* sizeof(struct content_filter
*),
1119 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1121 if (tmp
== NULL
&& content_filters
== NULL
) {
1123 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1126 /* Another thread may have won the race */
1127 if (content_filters
!= NULL
)
1130 content_filters
= tmp
;
1133 if (sac
->sc_unit
== 0 || sac
->sc_unit
> MAX_CONTENT_FILTER
) {
1134 CFIL_LOG(LOG_ERR
, "bad sc_unit %u", sac
->sc_unit
);
1136 } else if (content_filters
[sac
->sc_unit
- 1] != NULL
) {
1137 CFIL_LOG(LOG_ERR
, "sc_unit %u in use", sac
->sc_unit
);
1141 * kernel control socket kcunit numbers start at 1
1143 content_filters
[sac
->sc_unit
- 1] = cfc
;
1145 cfc
->cf_kcref
= kctlref
;
1146 cfc
->cf_kcunit
= sac
->sc_unit
;
1147 TAILQ_INIT(&cfc
->cf_sock_entries
);
1150 cfil_active_count
++;
1152 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1154 if (error
!= 0 && cfc
!= NULL
)
1155 zfree(content_filter_zone
, cfc
);
1158 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_ok
);
1160 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_fail
);
1162 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
1163 error
, cfil_active_count
, sac
->sc_unit
);
1169 cfil_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
)
1171 #pragma unused(kctlref)
1173 struct content_filter
*cfc
;
1174 struct cfil_entry
*entry
;
1175 uint64_t sock_flow_id
= 0;
1177 CFIL_LOG(LOG_NOTICE
, "");
1179 if (content_filters
== NULL
) {
1180 CFIL_LOG(LOG_ERR
, "no content filter");
1184 if (kcunit
> MAX_CONTENT_FILTER
) {
1185 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1186 kcunit
, MAX_CONTENT_FILTER
);
1191 cfc
= (struct content_filter
*)unitinfo
;
1195 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1196 if (content_filters
[kcunit
- 1] != cfc
|| cfc
->cf_kcunit
!= kcunit
) {
1197 CFIL_LOG(LOG_ERR
, "bad unit info %u)",
1199 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1202 cfc
->cf_flags
|= CFF_DETACHING
;
1204 * Remove all sockets from the filter
1206 while ((entry
= TAILQ_FIRST(&cfc
->cf_sock_entries
)) != NULL
) {
1207 cfil_rw_lock_assert_held(&cfil_lck_rw
, 1);
1209 verify_content_filter(cfc
);
1211 * Accept all outstanding data by pushing to next filter
1214 * TBD: Actually we should make sure all data has been pushed
1217 if (entry
->cfe_cfil_info
&& entry
->cfe_cfil_info
->cfi_so
) {
1218 struct cfil_info
*cfil_info
= entry
->cfe_cfil_info
;
1219 struct socket
*so
= cfil_info
->cfi_so
;
1220 sock_flow_id
= cfil_info
->cfi_sock_id
;
1222 /* Need to let data flow immediately */
1223 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
|
1227 * Respect locking hierarchy
1229 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1234 * When cfe_filter is NULL the filter is detached
1235 * and the entry has been removed from cf_sock_entries
1237 if ((so
->so_cfil
== NULL
&& so
->so_cfil_db
== NULL
) || entry
->cfe_filter
== NULL
) {
1238 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1242 (void) cfil_action_data_pass(so
, cfil_info
, kcunit
, 1,
1246 (void) cfil_action_data_pass(so
, cfil_info
, kcunit
, 0,
1250 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1253 * Check again to make sure if the cfil_info is still valid
1254 * as the socket may have been unlocked when when calling
1255 * cfil_acquire_sockbuf()
1257 if (entry
->cfe_filter
== NULL
||
1258 (so
->so_cfil
== NULL
&& cfil_db_get_cfil_info(so
->so_cfil_db
, sock_flow_id
) == NULL
)) {
1262 /* The filter is now detached */
1263 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
1265 cfil_info_log(LOG_DEBUG
, cfil_info
, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1267 CFIL_LOG(LOG_NOTICE
, "so %llx detached %u",
1268 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1269 if ((cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
1270 cfil_filters_attached(so
) == 0) {
1271 CFIL_LOG(LOG_NOTICE
, "so %llx waking",
1272 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1273 wakeup((caddr_t
)cfil_info
);
1277 * Remove the filter entry from the content filter
1278 * but leave the rest of the state intact as the queues
1279 * may not be empty yet
1281 entry
->cfe_filter
= NULL
;
1282 entry
->cfe_necp_control_unit
= 0;
1284 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1285 cfc
->cf_sock_count
--;
1287 socket_unlock(so
, 1);
1290 verify_content_filter(cfc
);
1292 VERIFY(cfc
->cf_sock_count
== 0);
1295 * Make filter inactive
1297 content_filters
[kcunit
- 1] = NULL
;
1298 cfil_active_count
--;
1299 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1301 zfree(content_filter_zone
, cfc
);
1304 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_ok
);
1306 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_fail
);
1308 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
1309 error
, cfil_active_count
, kcunit
);
1315 * cfil_acquire_sockbuf()
1317 * Prevent any other thread from acquiring the sockbuf
1318 * We use sb_cfil_thread as a semaphore to prevent other threads from
1319 * messing with the sockbuf -- see sblock()
1320 * Note: We do not set SB_LOCK here because the thread may check or modify
1321 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1322 * sblock(), sbunlock() or sodefunct()
1325 cfil_acquire_sockbuf(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
)
1327 thread_t tp
= current_thread();
1328 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1329 lck_mtx_t
*mutex_held
;
1333 * Wait until no thread is holding the sockbuf and other content
1334 * filter threads have released the sockbuf
1336 while ((sb
->sb_flags
& SB_LOCK
) ||
1337 (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
)) {
1338 if (so
->so_proto
->pr_getlock
!= NULL
)
1339 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
1341 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1343 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1346 VERIFY(sb
->sb_wantlock
!= 0);
1348 msleep(&sb
->sb_flags
, mutex_held
, PSOCK
, "cfil_acquire_sockbuf",
1351 VERIFY(sb
->sb_wantlock
!= 0);
1355 * Use reference count for repetitive calls on same thread
1357 if (sb
->sb_cfil_refs
== 0) {
1358 VERIFY(sb
->sb_cfil_thread
== NULL
);
1359 VERIFY((sb
->sb_flags
& SB_LOCK
) == 0);
1361 sb
->sb_cfil_thread
= tp
;
1362 sb
->sb_flags
|= SB_LOCK
;
1366 /* We acquire the socket buffer when we need to cleanup */
1367 if (cfil_info
== NULL
) {
1368 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
1369 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1371 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
1372 CFIL_LOG(LOG_ERR
, "so %llx drop set",
1373 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1381 cfil_release_sockbuf(struct socket
*so
, int outgoing
)
1383 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1384 thread_t tp
= current_thread();
1386 socket_lock_assert_owned(so
);
1388 if (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
)
1389 panic("%s sb_cfil_thread %p not current %p", __func__
,
1390 sb
->sb_cfil_thread
, tp
);
1392 * Don't panic if we are defunct because SB_LOCK has
1393 * been cleared by sodefunct()
1395 if (!(so
->so_flags
& SOF_DEFUNCT
) && !(sb
->sb_flags
& SB_LOCK
))
1396 panic("%s SB_LOCK not set on %p", __func__
,
1399 * We can unlock when the thread unwinds to the last reference
1402 if (sb
->sb_cfil_refs
== 0) {
1403 sb
->sb_cfil_thread
= NULL
;
1404 sb
->sb_flags
&= ~SB_LOCK
;
1406 if (sb
->sb_wantlock
> 0)
1407 wakeup(&sb
->sb_flags
);
1412 cfil_sock_id_from_socket(struct socket
*so
)
1414 if ((so
->so_flags
& SOF_CONTENT_FILTER
) && so
->so_cfil
)
1415 return (so
->so_cfil
->cfi_sock_id
);
1417 return (CFIL_SOCK_ID_NONE
);
1421 cfil_socket_safe_lock(struct inpcb
*inp
)
1423 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) != WNT_STOPUSING
) {
1424 socket_lock(inp
->inp_socket
, 1);
1425 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) != WNT_STOPUSING
) {
1428 socket_unlock(inp
->inp_socket
, 1);
1433 static struct socket
*
1434 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id
, bool udp_only
)
1436 struct socket
*so
= NULL
;
1437 u_int64_t gencnt
= cfil_sock_id
>> 32;
1438 u_int32_t flowhash
= (u_int32_t
)(cfil_sock_id
& 0x0ffffffff);
1439 struct inpcb
*inp
= NULL
;
1440 struct inpcbinfo
*pcbinfo
= NULL
;
1443 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id
, gencnt
, flowhash
);
1450 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1451 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1452 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1453 inp
->inp_socket
!= NULL
&&
1454 inp
->inp_flowhash
== flowhash
&&
1455 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
&&
1456 inp
->inp_socket
->so_cfil
!= NULL
) {
1457 if (cfil_socket_safe_lock(inp
))
1458 so
= inp
->inp_socket
;
1462 lck_rw_done(pcbinfo
->ipi_lock
);
1470 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1471 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1472 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1473 inp
->inp_socket
!= NULL
&&
1474 inp
->inp_socket
->so_cfil_db
!= NULL
&&
1475 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
) {
1476 if (cfil_socket_safe_lock(inp
))
1477 so
= inp
->inp_socket
;
1481 lck_rw_done(pcbinfo
->ipi_lock
);
1485 OSIncrementAtomic(&cfil_stats
.cfs_sock_id_not_found
);
1487 "no socket for sock_id %llx gencnt %llx flowhash %x",
1488 cfil_sock_id
, gencnt
, flowhash
);
1494 static struct socket
*
1495 cfil_socket_from_client_uuid(uuid_t necp_client_uuid
, bool *cfil_attached
)
1497 struct socket
*so
= NULL
;
1498 struct inpcb
*inp
= NULL
;
1499 struct inpcbinfo
*pcbinfo
= &tcbinfo
;
1501 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1502 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1503 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1504 inp
->inp_socket
!= NULL
&&
1505 uuid_compare(inp
->necp_client_uuid
, necp_client_uuid
) == 0) {
1506 *cfil_attached
= (inp
->inp_socket
->so_cfil
!= NULL
);
1507 if (cfil_socket_safe_lock(inp
))
1508 so
= inp
->inp_socket
;
1512 lck_rw_done(pcbinfo
->ipi_lock
);
1518 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1519 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1520 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1521 inp
->inp_socket
!= NULL
&&
1522 uuid_compare(inp
->necp_client_uuid
, necp_client_uuid
) == 0) {
1523 *cfil_attached
= (inp
->inp_socket
->so_cfil_db
!= NULL
);
1524 if (cfil_socket_safe_lock(inp
))
1525 so
= inp
->inp_socket
;
1529 lck_rw_done(pcbinfo
->ipi_lock
);
1536 cfil_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, mbuf_t m
,
1539 #pragma unused(kctlref, flags)
1541 struct cfil_msg_hdr
*msghdr
;
1542 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1544 struct cfil_msg_action
*action_msg
;
1545 struct cfil_entry
*entry
;
1546 struct cfil_info
*cfil_info
= NULL
;
1548 CFIL_LOG(LOG_INFO
, "");
1550 if (content_filters
== NULL
) {
1551 CFIL_LOG(LOG_ERR
, "no content filter");
1555 if (kcunit
> MAX_CONTENT_FILTER
) {
1556 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1557 kcunit
, MAX_CONTENT_FILTER
);
1562 if (m_length(m
) < sizeof(struct cfil_msg_hdr
)) {
1563 CFIL_LOG(LOG_ERR
, "too short %u", m_length(m
));
1567 msghdr
= (struct cfil_msg_hdr
*)mbuf_data(m
);
1568 if (msghdr
->cfm_version
!= CFM_VERSION_CURRENT
) {
1569 CFIL_LOG(LOG_ERR
, "bad version %u", msghdr
->cfm_version
);
1573 if (msghdr
->cfm_type
!= CFM_TYPE_ACTION
) {
1574 CFIL_LOG(LOG_ERR
, "bad type %u", msghdr
->cfm_type
);
1578 /* Validate action operation */
1579 switch (msghdr
->cfm_op
) {
1580 case CFM_OP_DATA_UPDATE
:
1582 &cfil_stats
.cfs_ctl_action_data_update
);
1585 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_drop
);
1587 case CFM_OP_BLESS_CLIENT
:
1588 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_bless_client
)) {
1589 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1591 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1596 error
= cfil_action_bless_client(kcunit
, msghdr
);
1599 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_op
);
1600 CFIL_LOG(LOG_ERR
, "bad op %u", msghdr
->cfm_op
);
1604 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_action
)) {
1605 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1607 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1612 cfil_rw_lock_shared(&cfil_lck_rw
);
1613 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1614 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1617 cfil_rw_unlock_shared(&cfil_lck_rw
);
1620 cfil_rw_unlock_shared(&cfil_lck_rw
);
1622 // Search for socket (TCP+UDP and lock so)
1623 so
= cfil_socket_from_sock_id(msghdr
->cfm_sock_id
, false);
1625 CFIL_LOG(LOG_NOTICE
, "bad sock_id %llx",
1626 msghdr
->cfm_sock_id
);
1631 cfil_info
= so
->so_cfil_db
!= NULL
?
1632 cfil_db_get_cfil_info(so
->so_cfil_db
, msghdr
->cfm_sock_id
) : so
->so_cfil
;
1634 if (cfil_info
== NULL
) {
1635 CFIL_LOG(LOG_NOTICE
, "so %llx <id %llu> not attached",
1636 (uint64_t)VM_KERNEL_ADDRPERM(so
), msghdr
->cfm_sock_id
);
1639 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
1640 CFIL_LOG(LOG_NOTICE
, "so %llx drop set",
1641 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1645 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1646 if (entry
->cfe_filter
== NULL
) {
1647 CFIL_LOG(LOG_NOTICE
, "so %llx no filter",
1648 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1653 if (entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
)
1654 entry
->cfe_flags
|= CFEF_DATA_START
;
1657 "so %llx attached not sent for %u",
1658 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1663 microuptime(&entry
->cfe_last_action
);
1664 CFI_ADD_TIME_LOG(cfil_info
, &entry
->cfe_last_action
, &cfil_info
->cfi_first_event
, msghdr
->cfm_op
);
1666 action_msg
= (struct cfil_msg_action
*)msghdr
;
1668 switch (msghdr
->cfm_op
) {
1669 case CFM_OP_DATA_UPDATE
:
1671 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1672 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1673 cfil_info
->cfi_sock_id
,
1674 action_msg
->cfa_in_peek_offset
, action_msg
->cfa_in_pass_offset
,
1675 action_msg
->cfa_out_peek_offset
, action_msg
->cfa_out_pass_offset
);
1677 if (action_msg
->cfa_out_peek_offset
!= 0 ||
1678 action_msg
->cfa_out_pass_offset
!= 0)
1679 error
= cfil_action_data_pass(so
, cfil_info
, kcunit
, 1,
1680 action_msg
->cfa_out_pass_offset
,
1681 action_msg
->cfa_out_peek_offset
);
1682 if (error
== EJUSTRETURN
)
1686 if (action_msg
->cfa_in_peek_offset
!= 0 ||
1687 action_msg
->cfa_in_pass_offset
!= 0)
1688 error
= cfil_action_data_pass(so
, cfil_info
, kcunit
, 0,
1689 action_msg
->cfa_in_pass_offset
,
1690 action_msg
->cfa_in_peek_offset
);
1691 if (error
== EJUSTRETURN
)
1696 error
= cfil_action_drop(so
, cfil_info
, kcunit
);
1704 socket_unlock(so
, 1);
1709 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_ok
);
1711 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_bad
);
1717 cfil_ctl_getopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
1718 int opt
, void *data
, size_t *len
)
1720 #pragma unused(kctlref, opt)
1721 struct cfil_info
*cfil_info
= NULL
;
1723 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1725 CFIL_LOG(LOG_NOTICE
, "");
1727 cfil_rw_lock_shared(&cfil_lck_rw
);
1729 if (content_filters
== NULL
) {
1730 CFIL_LOG(LOG_ERR
, "no content filter");
1734 if (kcunit
> MAX_CONTENT_FILTER
) {
1735 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1736 kcunit
, MAX_CONTENT_FILTER
);
1740 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1741 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1747 case CFIL_OPT_NECP_CONTROL_UNIT
:
1748 if (*len
< sizeof(uint32_t)) {
1749 CFIL_LOG(LOG_ERR
, "len too small %lu", *len
);
1754 *(uint32_t *)data
= cfc
->cf_necp_control_unit
;
1757 case CFIL_OPT_GET_SOCKET_INFO
:
1758 if (*len
!= sizeof(struct cfil_opt_sock_info
)) {
1759 CFIL_LOG(LOG_ERR
, "len does not match %lu", *len
);
1764 CFIL_LOG(LOG_ERR
, "data not passed");
1769 struct cfil_opt_sock_info
*sock_info
=
1770 (struct cfil_opt_sock_info
*) data
;
1772 // Unlock here so that we never hold both cfil_lck_rw and the
1773 // socket_lock at the same time. Otherwise, this can deadlock
1774 // because soclose() takes the socket_lock and then exclusive
1775 // cfil_lck_rw and we require the opposite order.
1777 // WARNING: Be sure to never use anything protected
1778 // by cfil_lck_rw beyond this point.
1779 // WARNING: Be sure to avoid fallthrough and
1780 // goto return_already_unlocked from this branch.
1781 cfil_rw_unlock_shared(&cfil_lck_rw
);
1783 // Search (TCP+UDP) and lock socket
1784 struct socket
*sock
=
1785 cfil_socket_from_sock_id(sock_info
->cfs_sock_id
, false);
1788 CFIL_LOG(LOG_ERR
, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
1789 sock_info
->cfs_sock_id
);
1792 goto return_already_unlocked
;
1795 cfil_info
= (sock
->so_cfil_db
!= NULL
) ?
1796 cfil_db_get_cfil_info(sock
->so_cfil_db
, sock_info
->cfs_sock_id
) : sock
->so_cfil
;
1798 if (cfil_info
== NULL
) {
1800 CFIL_LOG(LOG_ERR
, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
1801 (uint64_t)VM_KERNEL_ADDRPERM(sock
));
1804 socket_unlock(sock
, 1);
1805 goto return_already_unlocked
;
1808 // Fill out family, type, and protocol
1809 sock_info
->cfs_sock_family
= sock
->so_proto
->pr_domain
->dom_family
;
1810 sock_info
->cfs_sock_type
= sock
->so_proto
->pr_type
;
1811 sock_info
->cfs_sock_protocol
= sock
->so_proto
->pr_protocol
;
1813 // Source and destination addresses
1814 struct inpcb
*inp
= sotoinpcb(sock
);
1815 if (inp
->inp_vflag
& INP_IPV6
) {
1816 struct in6_addr
*laddr
= NULL
, *faddr
= NULL
;
1817 u_int16_t lport
= 0, fport
= 0;
1819 cfil_get_flow_address_v6(cfil_info
->cfi_hash_entry
, inp
,
1820 &laddr
, &faddr
, &lport
, &fport
);
1821 fill_ip6_sockaddr_4_6(&sock_info
->cfs_local
, laddr
, lport
);
1822 fill_ip6_sockaddr_4_6(&sock_info
->cfs_remote
, faddr
, fport
);
1823 } else if (inp
->inp_vflag
& INP_IPV4
) {
1824 struct in_addr laddr
= {0}, faddr
= {0};
1825 u_int16_t lport
= 0, fport
= 0;
1827 cfil_get_flow_address(cfil_info
->cfi_hash_entry
, inp
,
1828 &laddr
, &faddr
, &lport
, &fport
);
1829 fill_ip_sockaddr_4_6(&sock_info
->cfs_local
, laddr
, lport
);
1830 fill_ip_sockaddr_4_6(&sock_info
->cfs_remote
, faddr
, fport
);
1834 sock_info
->cfs_pid
= sock
->last_pid
;
1835 memcpy(sock_info
->cfs_uuid
, sock
->last_uuid
, sizeof(uuid_t
));
1837 if (sock
->so_flags
& SOF_DELEGATED
) {
1838 sock_info
->cfs_e_pid
= sock
->e_pid
;
1839 memcpy(sock_info
->cfs_e_uuid
, sock
->e_uuid
, sizeof(uuid_t
));
1841 sock_info
->cfs_e_pid
= sock
->last_pid
;
1842 memcpy(sock_info
->cfs_e_uuid
, sock
->last_uuid
, sizeof(uuid_t
));
1845 socket_unlock(sock
, 1);
1847 goto return_already_unlocked
;
1849 error
= ENOPROTOOPT
;
1853 cfil_rw_unlock_shared(&cfil_lck_rw
);
1857 return_already_unlocked
:
1863 cfil_ctl_setopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
1864 int opt
, void *data
, size_t len
)
1866 #pragma unused(kctlref, opt)
1868 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1870 CFIL_LOG(LOG_NOTICE
, "");
1872 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1874 if (content_filters
== NULL
) {
1875 CFIL_LOG(LOG_ERR
, "no content filter");
1879 if (kcunit
> MAX_CONTENT_FILTER
) {
1880 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1881 kcunit
, MAX_CONTENT_FILTER
);
1885 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1886 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1892 case CFIL_OPT_NECP_CONTROL_UNIT
:
1893 if (len
< sizeof(uint32_t)) {
1894 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
1895 "len too small %lu", len
);
1899 if (cfc
->cf_necp_control_unit
!= 0) {
1900 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
1902 cfc
->cf_necp_control_unit
);
1906 cfc
->cf_necp_control_unit
= *(uint32_t *)data
;
1909 error
= ENOPROTOOPT
;
1913 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1920 cfil_ctl_rcvd(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, int flags
)
1922 #pragma unused(kctlref, flags)
1923 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1924 struct socket
*so
= NULL
;
1926 struct cfil_entry
*entry
;
1927 struct cfil_info
*cfil_info
= NULL
;
1929 CFIL_LOG(LOG_INFO
, "");
1931 if (content_filters
== NULL
) {
1932 CFIL_LOG(LOG_ERR
, "no content filter");
1933 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1936 if (kcunit
> MAX_CONTENT_FILTER
) {
1937 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1938 kcunit
, MAX_CONTENT_FILTER
);
1939 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1942 cfil_rw_lock_shared(&cfil_lck_rw
);
1943 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1944 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1946 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1949 /* Let's assume the flow control is lifted */
1950 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
1951 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
1952 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1954 cfc
->cf_flags
&= ~CFF_FLOW_CONTROLLED
;
1956 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw
);
1957 LCK_RW_ASSERT(&cfil_lck_rw
, LCK_RW_ASSERT_SHARED
);
1960 * Flow control will be raised again as soon as an entry cannot enqueue
1961 * to the kernel control socket
1963 while ((cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) == 0) {
1964 verify_content_filter(cfc
);
1966 cfil_rw_lock_assert_held(&cfil_lck_rw
, 0);
1968 /* Find an entry that is flow controlled */
1969 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
1970 if (entry
->cfe_cfil_info
== NULL
||
1971 entry
->cfe_cfil_info
->cfi_so
== NULL
)
1973 if ((entry
->cfe_flags
& CFEF_FLOW_CONTROLLED
) == 0)
1979 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_flow_lift
);
1981 cfil_info
= entry
->cfe_cfil_info
;
1982 so
= cfil_info
->cfi_so
;
1984 cfil_rw_unlock_shared(&cfil_lck_rw
);
1988 error
= cfil_acquire_sockbuf(so
, cfil_info
, 1);
1990 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, 1);
1991 cfil_release_sockbuf(so
, 1);
1995 error
= cfil_acquire_sockbuf(so
, cfil_info
, 0);
1997 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, 0);
1998 cfil_release_sockbuf(so
, 0);
2001 socket_lock_assert_owned(so
);
2002 socket_unlock(so
, 1);
2004 cfil_rw_lock_shared(&cfil_lck_rw
);
2007 cfil_rw_unlock_shared(&cfil_lck_rw
);
2013 struct kern_ctl_reg kern_ctl
;
2015 vm_size_t content_filter_size
= 0; /* size of content_filter */
2016 vm_size_t cfil_info_size
= 0; /* size of cfil_info */
2017 vm_size_t cfil_hash_entry_size
= 0; /* size of cfil_hash_entry */
2018 vm_size_t cfil_db_size
= 0; /* size of cfil_db */
2019 unsigned int mbuf_limit
= 0;
2021 CFIL_LOG(LOG_NOTICE
, "");
2024 * Compile time verifications
2026 _CASSERT(CFIL_MAX_FILTER_COUNT
== MAX_CONTENT_FILTER
);
2027 _CASSERT(sizeof(struct cfil_filter_stat
) % sizeof(uint32_t) == 0);
2028 _CASSERT(sizeof(struct cfil_entry_stat
) % sizeof(uint32_t) == 0);
2029 _CASSERT(sizeof(struct cfil_sock_stat
) % sizeof(uint32_t) == 0);
2032 * Runtime time verifications
2034 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_enqueued
,
2036 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_enqueued
,
2038 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_peeked
,
2040 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_peeked
,
2043 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_in_enqueued
,
2045 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_out_enqueued
,
2048 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_enqueued
,
2050 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_enqueued
,
2052 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_passed
,
2054 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_passed
,
2058 * Zone for content filters kernel control sockets
2060 content_filter_size
= sizeof(struct content_filter
);
2061 content_filter_zone
= zinit(content_filter_size
,
2062 CONTENT_FILTER_ZONE_MAX
* content_filter_size
,
2064 CONTENT_FILTER_ZONE_NAME
);
2065 if (content_filter_zone
== NULL
) {
2066 panic("%s: zinit(%s) failed", __func__
,
2067 CONTENT_FILTER_ZONE_NAME
);
2070 zone_change(content_filter_zone
, Z_CALLERACCT
, FALSE
);
2071 zone_change(content_filter_zone
, Z_EXPAND
, TRUE
);
2074 * Zone for per socket content filters
2076 cfil_info_size
= sizeof(struct cfil_info
);
2077 cfil_info_zone
= zinit(cfil_info_size
,
2078 CFIL_INFO_ZONE_MAX
* cfil_info_size
,
2080 CFIL_INFO_ZONE_NAME
);
2081 if (cfil_info_zone
== NULL
) {
2082 panic("%s: zinit(%s) failed", __func__
, CFIL_INFO_ZONE_NAME
);
2085 zone_change(cfil_info_zone
, Z_CALLERACCT
, FALSE
);
2086 zone_change(cfil_info_zone
, Z_EXPAND
, TRUE
);
2089 * Zone for content filters cfil hash entries and db
2091 cfil_hash_entry_size
= sizeof(struct cfil_hash_entry
);
2092 cfil_hash_entry_zone
= zinit(cfil_hash_entry_size
,
2093 CFIL_HASH_ENTRY_ZONE_MAX
* cfil_hash_entry_size
,
2095 CFIL_HASH_ENTRY_ZONE_NAME
);
2096 if (cfil_hash_entry_zone
== NULL
) {
2097 panic("%s: zinit(%s) failed", __func__
, CFIL_HASH_ENTRY_ZONE_NAME
);
2100 zone_change(cfil_hash_entry_zone
, Z_CALLERACCT
, FALSE
);
2101 zone_change(cfil_hash_entry_zone
, Z_EXPAND
, TRUE
);
2103 cfil_db_size
= sizeof(struct cfil_db
);
2104 cfil_db_zone
= zinit(cfil_db_size
,
2105 CFIL_DB_ZONE_MAX
* cfil_db_size
,
2108 if (cfil_db_zone
== NULL
) {
2109 panic("%s: zinit(%s) failed", __func__
, CFIL_DB_ZONE_NAME
);
2112 zone_change(cfil_db_zone
, Z_CALLERACCT
, FALSE
);
2113 zone_change(cfil_db_zone
, Z_EXPAND
, TRUE
);
2118 cfil_lck_grp_attr
= lck_grp_attr_alloc_init();
2119 if (cfil_lck_grp_attr
== NULL
) {
2120 panic("%s: lck_grp_attr_alloc_init failed", __func__
);
2123 cfil_lck_grp
= lck_grp_alloc_init("content filter",
2125 if (cfil_lck_grp
== NULL
) {
2126 panic("%s: lck_grp_alloc_init failed", __func__
);
2129 cfil_lck_attr
= lck_attr_alloc_init();
2130 if (cfil_lck_attr
== NULL
) {
2131 panic("%s: lck_attr_alloc_init failed", __func__
);
2134 lck_rw_init(&cfil_lck_rw
, cfil_lck_grp
, cfil_lck_attr
);
2136 TAILQ_INIT(&cfil_sock_head
);
2139 * Register kernel control
2141 bzero(&kern_ctl
, sizeof(kern_ctl
));
2142 strlcpy(kern_ctl
.ctl_name
, CONTENT_FILTER_CONTROL_NAME
,
2143 sizeof(kern_ctl
.ctl_name
));
2144 kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
| CTL_FLAG_REG_EXTENDED
;
2145 kern_ctl
.ctl_sendsize
= 512 * 1024; /* enough? */
2146 kern_ctl
.ctl_recvsize
= 512 * 1024; /* enough? */
2147 kern_ctl
.ctl_connect
= cfil_ctl_connect
;
2148 kern_ctl
.ctl_disconnect
= cfil_ctl_disconnect
;
2149 kern_ctl
.ctl_send
= cfil_ctl_send
;
2150 kern_ctl
.ctl_getopt
= cfil_ctl_getopt
;
2151 kern_ctl
.ctl_setopt
= cfil_ctl_setopt
;
2152 kern_ctl
.ctl_rcvd
= cfil_ctl_rcvd
;
2153 error
= ctl_register(&kern_ctl
, &cfil_kctlref
);
2155 CFIL_LOG(LOG_ERR
, "ctl_register failed: %d", error
);
2159 // Spawn thread for gargage collection
2160 if (kernel_thread_start(cfil_udp_gc_thread_func
, NULL
,
2161 &cfil_udp_gc_thread
) != KERN_SUCCESS
) {
2162 panic_plain("%s: Can't create UDP GC thread", __func__
);
2165 /* this must not fail */
2166 VERIFY(cfil_udp_gc_thread
!= NULL
);
2168 // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2169 mbuf_limit
= MAX(UDP_FLOW_GC_MBUF_CNT_MAX
, (nmbclusters
<< MCLSHIFT
) >> UDP_FLOW_GC_MBUF_SHIFT
);
2170 cfil_udp_gc_mbuf_num_max
= (mbuf_limit
>> MCLSHIFT
);
2171 cfil_udp_gc_mbuf_cnt_max
= mbuf_limit
;
2175 cfil_info_alloc(struct socket
*so
, struct cfil_hash_entry
*hash_entry
)
2178 struct cfil_info
*cfil_info
= NULL
;
2179 struct inpcb
*inp
= sotoinpcb(so
);
2181 CFIL_LOG(LOG_INFO
, "");
2183 socket_lock_assert_owned(so
);
2185 cfil_info
= zalloc(cfil_info_zone
);
2186 if (cfil_info
== NULL
)
2188 bzero(cfil_info
, sizeof(struct cfil_info
));
2190 cfil_queue_init(&cfil_info
->cfi_snd
.cfi_inject_q
);
2191 cfil_queue_init(&cfil_info
->cfi_rcv
.cfi_inject_q
);
2193 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2194 struct cfil_entry
*entry
;
2196 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2197 entry
->cfe_cfil_info
= cfil_info
;
2199 /* Initialize the filter entry */
2200 entry
->cfe_filter
= NULL
;
2201 entry
->cfe_flags
= 0;
2202 entry
->cfe_necp_control_unit
= 0;
2203 entry
->cfe_snd
.cfe_pass_offset
= 0;
2204 entry
->cfe_snd
.cfe_peek_offset
= 0;
2205 entry
->cfe_snd
.cfe_peeked
= 0;
2206 entry
->cfe_rcv
.cfe_pass_offset
= 0;
2207 entry
->cfe_rcv
.cfe_peek_offset
= 0;
2208 entry
->cfe_rcv
.cfe_peeked
= 0;
2210 * Timestamp the last action to avoid pre-maturely
2211 * triggering garbage collection
2213 microuptime(&entry
->cfe_last_action
);
2215 cfil_queue_init(&entry
->cfe_snd
.cfe_pending_q
);
2216 cfil_queue_init(&entry
->cfe_rcv
.cfe_pending_q
);
2217 cfil_queue_init(&entry
->cfe_snd
.cfe_ctl_q
);
2218 cfil_queue_init(&entry
->cfe_rcv
.cfe_ctl_q
);
2221 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2224 * Create a cfi_sock_id that's not the socket pointer!
2227 if (hash_entry
== NULL
) {
2228 // This is the TCP case, cfil_info is tracked per socket
2229 if (inp
->inp_flowhash
== 0)
2230 inp
->inp_flowhash
= inp_calc_flowhash(inp
);
2232 so
->so_cfil
= cfil_info
;
2233 cfil_info
->cfi_so
= so
;
2234 cfil_info
->cfi_sock_id
=
2235 ((so
->so_gencnt
<< 32) | inp
->inp_flowhash
);
2237 // This is the UDP case, cfil_info is tracked in per-socket hash
2238 cfil_info
->cfi_so
= so
;
2239 hash_entry
->cfentry_cfil
= cfil_info
;
2240 cfil_info
->cfi_hash_entry
= hash_entry
;
2241 cfil_info
->cfi_sock_id
= ((so
->so_gencnt
<< 32) | (hash_entry
->cfentry_flowhash
& 0xffffffff));
2242 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2243 inp
->inp_flowhash
, so
->so_gencnt
, hash_entry
->cfentry_flowhash
, cfil_info
->cfi_sock_id
);
2245 // Wake up gc thread if this is first flow added
2246 if (cfil_sock_udp_attached_count
== 0) {
2247 thread_wakeup((caddr_t
)&cfil_sock_udp_attached_count
);
2250 cfil_sock_udp_attached_count
++;
2253 TAILQ_INSERT_TAIL(&cfil_sock_head
, cfil_info
, cfi_link
);
2255 cfil_sock_attached_count
++;
2257 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2260 if (cfil_info
!= NULL
)
2261 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_ok
);
2263 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_fail
);
2269 cfil_info_attach_unit(struct socket
*so
, uint32_t filter_control_unit
, struct cfil_info
*cfil_info
)
2274 CFIL_LOG(LOG_INFO
, "");
2276 socket_lock_assert_owned(so
);
2278 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2281 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
2283 struct content_filter
*cfc
= content_filters
[kcunit
- 1];
2284 struct cfil_entry
*entry
;
2288 if (cfc
->cf_necp_control_unit
!= filter_control_unit
)
2291 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2293 entry
->cfe_filter
= cfc
;
2294 entry
->cfe_necp_control_unit
= filter_control_unit
;
2295 TAILQ_INSERT_TAIL(&cfc
->cf_sock_entries
, entry
, cfe_link
);
2296 cfc
->cf_sock_count
++;
2297 verify_content_filter(cfc
);
2299 entry
->cfe_flags
|= CFEF_CFIL_ATTACHED
;
2303 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2309 cfil_info_free(struct cfil_info
*cfil_info
)
2312 uint64_t in_drain
= 0;
2313 uint64_t out_drained
= 0;
2315 if (cfil_info
== NULL
)
2318 CFIL_LOG(LOG_INFO
, "");
2320 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2323 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
2325 struct cfil_entry
*entry
;
2326 struct content_filter
*cfc
;
2328 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2330 /* Don't be silly and try to detach twice */
2331 if (entry
->cfe_filter
== NULL
)
2334 cfc
= content_filters
[kcunit
- 1];
2336 VERIFY(cfc
== entry
->cfe_filter
);
2338 entry
->cfe_filter
= NULL
;
2339 entry
->cfe_necp_control_unit
= 0;
2340 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
2341 cfc
->cf_sock_count
--;
2343 verify_content_filter(cfc
);
2345 if (cfil_info
->cfi_hash_entry
!= NULL
)
2346 cfil_sock_udp_attached_count
--;
2347 cfil_sock_attached_count
--;
2348 TAILQ_REMOVE(&cfil_sock_head
, cfil_info
, cfi_link
);
2350 out_drained
+= cfil_queue_drain(&cfil_info
->cfi_snd
.cfi_inject_q
);
2351 in_drain
+= cfil_queue_drain(&cfil_info
->cfi_rcv
.cfi_inject_q
);
2353 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2354 struct cfil_entry
*entry
;
2356 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2357 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_pending_q
);
2358 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_pending_q
);
2359 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
2360 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_ctl_q
);
2362 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2365 OSIncrementAtomic(&cfil_stats
.cfs_flush_out_free
);
2367 OSIncrementAtomic(&cfil_stats
.cfs_flush_in_free
);
2369 zfree(cfil_info_zone
, cfil_info
);
2373 * Entry point from Sockets layer
2374 * The socket is locked.
2377 cfil_sock_attach(struct socket
*so
)
2380 uint32_t filter_control_unit
;
2382 socket_lock_assert_owned(so
);
2384 /* Limit ourselves to TCP that are not MPTCP subflows */
2385 if ((so
->so_proto
->pr_domain
->dom_family
!= PF_INET
&&
2386 so
->so_proto
->pr_domain
->dom_family
!= PF_INET6
) ||
2387 so
->so_proto
->pr_type
!= SOCK_STREAM
||
2388 so
->so_proto
->pr_protocol
!= IPPROTO_TCP
||
2389 (so
->so_flags
& SOF_MP_SUBFLOW
) != 0 ||
2390 (so
->so_flags1
& SOF1_CONTENT_FILTER_SKIP
) != 0)
2393 filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
2394 if (filter_control_unit
== 0)
2397 if ((filter_control_unit
& NECP_MASK_USERSPACE_ONLY
) != 0) {
2398 OSIncrementAtomic(&cfil_stats
.cfs_sock_userspace_only
);
2401 if (cfil_active_count
== 0) {
2402 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_in_vain
);
2405 if (so
->so_cfil
!= NULL
) {
2406 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_already
);
2407 CFIL_LOG(LOG_ERR
, "already attached");
2409 cfil_info_alloc(so
, NULL
);
2410 if (so
->so_cfil
== NULL
) {
2412 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
2416 if (cfil_info_attach_unit(so
, filter_control_unit
, so
->so_cfil
) == 0) {
2417 CFIL_LOG(LOG_ERR
, "cfil_info_attach_unit(%u) failed",
2418 filter_control_unit
);
2419 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_failed
);
2422 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u sockID %llx",
2423 (uint64_t)VM_KERNEL_ADDRPERM(so
),
2424 filter_control_unit
, so
->so_cfil
->cfi_sock_id
);
2426 so
->so_flags
|= SOF_CONTENT_FILTER
;
2427 OSIncrementAtomic(&cfil_stats
.cfs_sock_attached
);
2429 /* Hold a reference on the socket */
2432 error
= cfil_dispatch_attach_event(so
, so
->so_cfil
, filter_control_unit
);
2433 /* We can recover from flow control or out of memory errors */
2434 if (error
== ENOBUFS
|| error
== ENOMEM
)
2436 else if (error
!= 0)
2439 CFIL_INFO_VERIFY(so
->so_cfil
);
2445 * Entry point from Sockets layer
2446 * The socket is locked.
2449 cfil_sock_detach(struct socket
*so
)
2457 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
2458 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
2459 VERIFY(so
->so_usecount
> 0);
2462 cfil_info_free(so
->so_cfil
);
2464 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
2470 cfil_dispatch_attach_event(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t filter_control_unit
)
2473 struct cfil_entry
*entry
= NULL
;
2474 struct cfil_msg_sock_attached msg_attached
;
2476 struct content_filter
*cfc
= NULL
;
2478 socket_lock_assert_owned(so
);
2480 cfil_rw_lock_shared(&cfil_lck_rw
);
2482 if (so
->so_proto
== NULL
|| so
->so_proto
->pr_domain
== NULL
) {
2487 * Find the matching filter unit
2489 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2490 cfc
= content_filters
[kcunit
- 1];
2494 if (cfc
->cf_necp_control_unit
!= filter_control_unit
)
2496 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2497 if (entry
->cfe_filter
== NULL
)
2500 VERIFY(cfc
== entry
->cfe_filter
);
2505 if (entry
== NULL
|| entry
->cfe_filter
== NULL
)
2508 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
))
2511 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u kcunit %u",
2512 (uint64_t)VM_KERNEL_ADDRPERM(so
), filter_control_unit
, kcunit
);
2514 /* Would be wasteful to try when flow controlled */
2515 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2520 bzero(&msg_attached
, sizeof(struct cfil_msg_sock_attached
));
2521 msg_attached
.cfs_msghdr
.cfm_len
= sizeof(struct cfil_msg_sock_attached
);
2522 msg_attached
.cfs_msghdr
.cfm_version
= CFM_VERSION_CURRENT
;
2523 msg_attached
.cfs_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2524 msg_attached
.cfs_msghdr
.cfm_op
= CFM_OP_SOCKET_ATTACHED
;
2525 msg_attached
.cfs_msghdr
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2527 msg_attached
.cfs_sock_family
= so
->so_proto
->pr_domain
->dom_family
;
2528 msg_attached
.cfs_sock_type
= so
->so_proto
->pr_type
;
2529 msg_attached
.cfs_sock_protocol
= so
->so_proto
->pr_protocol
;
2530 msg_attached
.cfs_pid
= so
->last_pid
;
2531 memcpy(msg_attached
.cfs_uuid
, so
->last_uuid
, sizeof(uuid_t
));
2532 if (so
->so_flags
& SOF_DELEGATED
) {
2533 msg_attached
.cfs_e_pid
= so
->e_pid
;
2534 memcpy(msg_attached
.cfs_e_uuid
, so
->e_uuid
, sizeof(uuid_t
));
2536 msg_attached
.cfs_e_pid
= so
->last_pid
;
2537 memcpy(msg_attached
.cfs_e_uuid
, so
->last_uuid
, sizeof(uuid_t
));
2541 CFIL_LOG(LOG_DEBUG
, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
2542 entry
->cfe_cfil_info
->cfi_sock_id
);
2545 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2546 entry
->cfe_filter
->cf_kcunit
,
2548 sizeof(struct cfil_msg_sock_attached
),
2551 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d", error
);
2554 microuptime(&entry
->cfe_last_event
);
2555 cfil_info
->cfi_first_event
.tv_sec
= entry
->cfe_last_event
.tv_sec
;
2556 cfil_info
->cfi_first_event
.tv_usec
= entry
->cfe_last_event
.tv_usec
;
2558 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
;
2559 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_ok
);
2562 /* We can recover from flow control */
2563 if (error
== ENOBUFS
) {
2564 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2565 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_flow_control
);
2567 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2568 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2570 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2572 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2575 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_fail
);
2577 cfil_rw_unlock_shared(&cfil_lck_rw
);
2583 cfil_dispatch_disconnect_event(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
)
2586 struct mbuf
*msg
= NULL
;
2587 struct cfil_entry
*entry
;
2588 struct cfe_buf
*entrybuf
;
2589 struct cfil_msg_hdr msg_disconnected
;
2590 struct content_filter
*cfc
;
2592 socket_lock_assert_owned(so
);
2594 cfil_rw_lock_shared(&cfil_lck_rw
);
2596 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2598 entrybuf
= &entry
->cfe_snd
;
2600 entrybuf
= &entry
->cfe_rcv
;
2602 cfc
= entry
->cfe_filter
;
2606 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2607 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2610 * Send the disconnection event once
2612 if ((outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
)) ||
2613 (!outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
))) {
2614 CFIL_LOG(LOG_INFO
, "so %llx disconnect already sent",
2615 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2620 * We're not disconnected as long as some data is waiting
2621 * to be delivered to the filter
2623 if (outgoing
&& cfil_queue_empty(&entrybuf
->cfe_ctl_q
) == 0) {
2624 CFIL_LOG(LOG_INFO
, "so %llx control queue not empty",
2625 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2629 /* Would be wasteful to try when flow controlled */
2630 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2636 cfil_info_log(LOG_ERR
, cfil_info
, outgoing
?
2637 "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
2638 "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
2641 bzero(&msg_disconnected
, sizeof(struct cfil_msg_hdr
));
2642 msg_disconnected
.cfm_len
= sizeof(struct cfil_msg_hdr
);
2643 msg_disconnected
.cfm_version
= CFM_VERSION_CURRENT
;
2644 msg_disconnected
.cfm_type
= CFM_TYPE_EVENT
;
2645 msg_disconnected
.cfm_op
= outgoing
? CFM_OP_DISCONNECT_OUT
:
2646 CFM_OP_DISCONNECT_IN
;
2647 msg_disconnected
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2648 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2649 entry
->cfe_filter
->cf_kcunit
,
2651 sizeof(struct cfil_msg_hdr
),
2654 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
2658 microuptime(&entry
->cfe_last_event
);
2659 CFI_ADD_TIME_LOG(cfil_info
, &entry
->cfe_last_event
, &cfil_info
->cfi_first_event
, msg_disconnected
.cfm_op
);
2661 /* Remember we have sent the disconnection message */
2663 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_OUT
;
2664 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_out_event_ok
);
2666 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_IN
;
2667 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_in_event_ok
);
2670 if (error
== ENOBUFS
) {
2671 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2673 &cfil_stats
.cfs_disconnect_event_flow_control
);
2675 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2676 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2678 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2680 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2684 &cfil_stats
.cfs_disconnect_event_fail
);
2686 cfil_rw_unlock_shared(&cfil_lck_rw
);
2692 cfil_dispatch_closed_event(struct socket
*so
, struct cfil_info
*cfil_info
, int kcunit
)
2694 struct cfil_entry
*entry
;
2695 struct cfil_msg_sock_closed msg_closed
;
2697 struct content_filter
*cfc
;
2699 socket_lock_assert_owned(so
);
2701 cfil_rw_lock_shared(&cfil_lck_rw
);
2703 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2704 cfc
= entry
->cfe_filter
;
2708 CFIL_LOG(LOG_INFO
, "so %llx kcunit %d",
2709 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
2711 /* Would be wasteful to try when flow controlled */
2712 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2717 * Send a single closed message per filter
2719 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_CLOSED
) != 0)
2721 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0)
2724 microuptime(&entry
->cfe_last_event
);
2725 CFI_ADD_TIME_LOG(cfil_info
, &entry
->cfe_last_event
, &cfil_info
->cfi_first_event
, CFM_OP_SOCKET_CLOSED
);
2727 bzero(&msg_closed
, sizeof(struct cfil_msg_sock_closed
));
2728 msg_closed
.cfc_msghdr
.cfm_len
= sizeof(struct cfil_msg_sock_closed
);
2729 msg_closed
.cfc_msghdr
.cfm_version
= CFM_VERSION_CURRENT
;
2730 msg_closed
.cfc_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2731 msg_closed
.cfc_msghdr
.cfm_op
= CFM_OP_SOCKET_CLOSED
;
2732 msg_closed
.cfc_msghdr
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2733 msg_closed
.cfc_first_event
.tv_sec
= cfil_info
->cfi_first_event
.tv_sec
;
2734 msg_closed
.cfc_first_event
.tv_usec
= cfil_info
->cfi_first_event
.tv_usec
;
2735 memcpy(msg_closed
.cfc_op_time
, cfil_info
->cfi_op_time
, sizeof(uint32_t)*CFI_MAX_TIME_LOG_ENTRY
);
2736 memcpy(msg_closed
.cfc_op_list
, cfil_info
->cfi_op_list
, sizeof(unsigned char)*CFI_MAX_TIME_LOG_ENTRY
);
2737 msg_closed
.cfc_op_list_ctr
= cfil_info
->cfi_op_list_ctr
;
2740 CFIL_LOG(LOG_ERR
, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed
.cfc_msghdr
.cfm_sock_id
, cfil_info
->cfi_op_list_ctr
, cfil_info
->cfi_first_event
.tv_sec
, cfil_info
->cfi_first_event
.tv_usec
);
2743 if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
2744 msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
2746 for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
2747 CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
2751 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2752 entry
->cfe_filter
->cf_kcunit
,
2754 sizeof(struct cfil_msg_sock_closed
),
2757 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d",
2762 entry
->cfe_flags
|= CFEF_SENT_SOCK_CLOSED
;
2763 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_ok
);
2765 /* We can recover from flow control */
2766 if (error
== ENOBUFS
) {
2767 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2768 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_flow_control
);
2770 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2771 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2773 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2775 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2778 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_fail
);
2780 cfil_rw_unlock_shared(&cfil_lck_rw
);
2787 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
2788 struct in6_addr
*ip6
, u_int16_t port
)
2790 struct sockaddr_in6
*sin6
= &sin46
->sin6
;
2792 sin6
->sin6_family
= AF_INET6
;
2793 sin6
->sin6_len
= sizeof(*sin6
);
2794 sin6
->sin6_port
= port
;
2795 sin6
->sin6_addr
= *ip6
;
2796 if (IN6_IS_SCOPE_EMBED(&sin6
->sin6_addr
)) {
2797 sin6
->sin6_scope_id
= ntohs(sin6
->sin6_addr
.s6_addr16
[1]);
2798 sin6
->sin6_addr
.s6_addr16
[1] = 0;
2803 fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
2804 struct in_addr ip
, u_int16_t port
)
2806 struct sockaddr_in
*sin
= &sin46
->sin
;
2808 sin
->sin_family
= AF_INET
;
2809 sin
->sin_len
= sizeof(*sin
);
2810 sin
->sin_port
= port
;
2811 sin
->sin_addr
.s_addr
= ip
.s_addr
;
2815 cfil_get_flow_address_v6(struct cfil_hash_entry
*entry
, struct inpcb
*inp
,
2816 struct in6_addr
**laddr
, struct in6_addr
**faddr
,
2817 u_int16_t
*lport
, u_int16_t
*fport
)
2819 if (entry
!= NULL
) {
2820 *laddr
= &entry
->cfentry_laddr
.addr6
;
2821 *faddr
= &entry
->cfentry_faddr
.addr6
;
2822 *lport
= entry
->cfentry_lport
;
2823 *fport
= entry
->cfentry_fport
;
2825 *laddr
= &inp
->in6p_laddr
;
2826 *faddr
= &inp
->in6p_faddr
;
2827 *lport
= inp
->inp_lport
;
2828 *fport
= inp
->inp_fport
;
2833 cfil_get_flow_address(struct cfil_hash_entry
*entry
, struct inpcb
*inp
,
2834 struct in_addr
*laddr
, struct in_addr
*faddr
,
2835 u_int16_t
*lport
, u_int16_t
*fport
)
2837 if (entry
!= NULL
) {
2838 *laddr
= entry
->cfentry_laddr
.addr46
.ia46_addr4
;
2839 *faddr
= entry
->cfentry_faddr
.addr46
.ia46_addr4
;
2840 *lport
= entry
->cfentry_lport
;
2841 *fport
= entry
->cfentry_fport
;
2843 *laddr
= inp
->inp_laddr
;
2844 *faddr
= inp
->inp_faddr
;
2845 *lport
= inp
->inp_lport
;
2846 *fport
= inp
->inp_fport
;
2851 cfil_dispatch_data_event(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
2852 struct mbuf
*data
, unsigned int copyoffset
, unsigned int copylen
)
2855 struct mbuf
*copy
= NULL
;
2856 struct mbuf
*msg
= NULL
;
2857 unsigned int one
= 1;
2858 struct cfil_msg_data_event
*data_req
;
2860 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
2861 struct cfil_entry
*entry
;
2862 struct cfe_buf
*entrybuf
;
2863 struct content_filter
*cfc
;
2866 cfil_rw_lock_shared(&cfil_lck_rw
);
2868 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2870 entrybuf
= &entry
->cfe_snd
;
2872 entrybuf
= &entry
->cfe_rcv
;
2874 cfc
= entry
->cfe_filter
;
2878 data
= cfil_data_start(data
);
2879 if (data
== NULL
|| (data
->m_flags
& M_PKTHDR
) == 0) {
2880 CFIL_LOG(LOG_ERR
, "NOT PKTHDR");
2884 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2885 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2887 socket_lock_assert_owned(so
);
2889 /* Would be wasteful to try */
2890 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2895 /* Make a copy of the data to pass to kernel control socket */
2896 copy
= m_copym_mode(data
, copyoffset
, copylen
, M_DONTWAIT
,
2899 CFIL_LOG(LOG_ERR
, "m_copym_mode() failed");
2904 /* We need an mbuf packet for the message header */
2905 hdrsize
= sizeof(struct cfil_msg_data_event
);
2906 error
= mbuf_allocpacket(MBUF_DONTWAIT
, hdrsize
, &one
, &msg
);
2908 CFIL_LOG(LOG_ERR
, "mbuf_allocpacket() failed");
2911 * ENOBUFS is to indicate flow control
2916 mbuf_setlen(msg
, hdrsize
);
2917 mbuf_pkthdr_setlen(msg
, hdrsize
+ copylen
);
2919 data_req
= (struct cfil_msg_data_event
*)mbuf_data(msg
);
2920 bzero(data_req
, hdrsize
);
2921 data_req
->cfd_msghdr
.cfm_len
= hdrsize
+ copylen
;
2922 data_req
->cfd_msghdr
.cfm_version
= 1;
2923 data_req
->cfd_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2924 data_req
->cfd_msghdr
.cfm_op
=
2925 outgoing
? CFM_OP_DATA_OUT
: CFM_OP_DATA_IN
;
2926 data_req
->cfd_msghdr
.cfm_sock_id
=
2927 entry
->cfe_cfil_info
->cfi_sock_id
;
2928 data_req
->cfd_start_offset
= entrybuf
->cfe_peeked
;
2929 data_req
->cfd_end_offset
= entrybuf
->cfe_peeked
+ copylen
;
2933 * For non connected sockets need to copy addresses from passed
2936 if (inp
->inp_vflag
& INP_IPV6
) {
2937 struct in6_addr
*laddr
= NULL
, *faddr
= NULL
;
2938 u_int16_t lport
= 0, fport
= 0;
2940 cfil_get_flow_address_v6(cfil_info
->cfi_hash_entry
, inp
,
2941 &laddr
, &faddr
, &lport
, &fport
);
2943 fill_ip6_sockaddr_4_6(&data_req
->cfc_src
, laddr
, lport
);
2944 fill_ip6_sockaddr_4_6(&data_req
->cfc_dst
, faddr
, fport
);
2946 fill_ip6_sockaddr_4_6(&data_req
->cfc_src
, faddr
, fport
);
2947 fill_ip6_sockaddr_4_6(&data_req
->cfc_dst
, laddr
, lport
);
2949 } else if (inp
->inp_vflag
& INP_IPV4
) {
2950 struct in_addr laddr
= {0}, faddr
= {0};
2951 u_int16_t lport
= 0, fport
= 0;
2953 cfil_get_flow_address(cfil_info
->cfi_hash_entry
, inp
,
2954 &laddr
, &faddr
, &lport
, &fport
);
2957 fill_ip_sockaddr_4_6(&data_req
->cfc_src
, laddr
, lport
);
2958 fill_ip_sockaddr_4_6(&data_req
->cfc_dst
, faddr
, fport
);
2960 fill_ip_sockaddr_4_6(&data_req
->cfc_src
, faddr
, fport
);
2961 fill_ip_sockaddr_4_6(&data_req
->cfc_dst
, laddr
, lport
);
2966 CFI_ADD_TIME_LOG(cfil_info
, &tv
, &cfil_info
->cfi_first_event
, data_req
->cfd_msghdr
.cfm_op
);
2968 /* Pass the message to the content filter */
2969 error
= ctl_enqueuembuf(entry
->cfe_filter
->cf_kcref
,
2970 entry
->cfe_filter
->cf_kcunit
,
2973 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
2977 entry
->cfe_flags
&= ~CFEF_FLOW_CONTROLLED
;
2978 OSIncrementAtomic(&cfil_stats
.cfs_data_event_ok
);
2981 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
2982 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, outgoing
, (uint64_t)VM_KERNEL_ADDRPERM(data
), copyoffset
, copylen
);
2986 if (error
== ENOBUFS
) {
2987 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2989 &cfil_stats
.cfs_data_event_flow_control
);
2991 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2992 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2994 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2996 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2999 OSIncrementAtomic(&cfil_stats
.cfs_data_event_fail
);
3001 cfil_rw_unlock_shared(&cfil_lck_rw
);
3007 * Process the queue of data waiting to be delivered to content filter
3010 cfil_data_service_ctl_q(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
)
3013 struct mbuf
*data
, *tmp
= NULL
;
3014 unsigned int datalen
= 0, copylen
= 0, copyoffset
= 0;
3015 struct cfil_entry
*entry
;
3016 struct cfe_buf
*entrybuf
;
3017 uint64_t currentoffset
= 0;
3019 if (cfil_info
== NULL
)
3022 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
3023 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
3025 socket_lock_assert_owned(so
);
3027 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3029 entrybuf
= &entry
->cfe_snd
;
3031 entrybuf
= &entry
->cfe_rcv
;
3033 /* Send attached message if not yet done */
3034 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
3035 error
= cfil_dispatch_attach_event(so
, cfil_info
, kcunit
);
3037 /* We can recover from flow control */
3038 if (error
== ENOBUFS
|| error
== ENOMEM
)
3042 } else if ((entry
->cfe_flags
& CFEF_DATA_START
) == 0) {
3043 OSIncrementAtomic(&cfil_stats
.cfs_ctl_q_not_started
);
3048 CFIL_LOG(LOG_DEBUG
, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3049 entrybuf
->cfe_pass_offset
,
3050 entrybuf
->cfe_peeked
,
3051 entrybuf
->cfe_peek_offset
);
3054 /* Move all data that can pass */
3055 while ((data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
)) != NULL
&&
3056 entrybuf
->cfe_ctl_q
.q_start
< entrybuf
->cfe_pass_offset
) {
3057 datalen
= cfil_data_length(data
, NULL
, NULL
);
3060 if (entrybuf
->cfe_ctl_q
.q_start
+ datalen
<=
3061 entrybuf
->cfe_pass_offset
) {
3063 * The first mbuf can fully pass
3068 * The first mbuf can partially pass
3070 copylen
= entrybuf
->cfe_pass_offset
-
3071 entrybuf
->cfe_ctl_q
.q_start
;
3073 VERIFY(copylen
<= datalen
);
3077 "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3078 "datalen %u copylen %u",
3079 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3080 entrybuf
->cfe_ctl_q
.q_start
,
3081 entrybuf
->cfe_peeked
,
3082 entrybuf
->cfe_pass_offset
,
3083 entrybuf
->cfe_peek_offset
,
3088 * Data that passes has been peeked at explicitly or
3091 if (entrybuf
->cfe_ctl_q
.q_start
+ copylen
>
3092 entrybuf
->cfe_peeked
)
3093 entrybuf
->cfe_peeked
=
3094 entrybuf
->cfe_ctl_q
.q_start
+ copylen
;
3096 * Stop on partial pass
3098 if (copylen
< datalen
)
3101 /* All good, move full data from ctl queue to pending queue */
3102 cfil_queue_remove(&entrybuf
->cfe_ctl_q
, data
, datalen
);
3104 cfil_queue_enqueue(&entrybuf
->cfe_pending_q
, data
, datalen
);
3106 OSAddAtomic64(datalen
,
3107 &cfil_stats
.cfs_pending_q_out_enqueued
);
3109 OSAddAtomic64(datalen
,
3110 &cfil_stats
.cfs_pending_q_in_enqueued
);
3112 CFIL_INFO_VERIFY(cfil_info
);
3115 "%llx first %llu peeked %llu pass %llu peek %llu"
3116 "datalen %u copylen %u",
3117 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3118 entrybuf
->cfe_ctl_q
.q_start
,
3119 entrybuf
->cfe_peeked
,
3120 entrybuf
->cfe_pass_offset
,
3121 entrybuf
->cfe_peek_offset
,
3125 /* Now deal with remaining data the filter wants to peek at */
3126 for (data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
),
3127 currentoffset
= entrybuf
->cfe_ctl_q
.q_start
;
3128 data
!= NULL
&& currentoffset
< entrybuf
->cfe_peek_offset
;
3129 data
= cfil_queue_next(&entrybuf
->cfe_ctl_q
, data
),
3130 currentoffset
+= datalen
) {
3131 datalen
= cfil_data_length(data
, NULL
, NULL
);
3134 /* We've already peeked at this mbuf */
3135 if (currentoffset
+ datalen
<= entrybuf
->cfe_peeked
)
3138 * The data in the first mbuf may have been
3139 * partially peeked at
3141 copyoffset
= entrybuf
->cfe_peeked
- currentoffset
;
3142 VERIFY(copyoffset
< datalen
);
3143 copylen
= datalen
- copyoffset
;
3144 VERIFY(copylen
<= datalen
);
3146 * Do not copy more than needed
3148 if (currentoffset
+ copyoffset
+ copylen
>
3149 entrybuf
->cfe_peek_offset
) {
3150 copylen
= entrybuf
->cfe_peek_offset
-
3151 (currentoffset
+ copyoffset
);
3156 "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3157 "datalen %u copylen %u copyoffset %u",
3158 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3160 entrybuf
->cfe_peeked
,
3161 entrybuf
->cfe_pass_offset
,
3162 entrybuf
->cfe_peek_offset
,
3163 datalen
, copylen
, copyoffset
);
3167 * Stop if there is nothing more to peek at
3172 * Let the filter get a peek at this span of data
3174 error
= cfil_dispatch_data_event(so
, cfil_info
, kcunit
,
3175 outgoing
, data
, copyoffset
, copylen
);
3177 /* On error, leave data in ctl_q */
3180 entrybuf
->cfe_peeked
+= copylen
;
3182 OSAddAtomic64(copylen
,
3183 &cfil_stats
.cfs_ctl_q_out_peeked
);
3185 OSAddAtomic64(copylen
,
3186 &cfil_stats
.cfs_ctl_q_in_peeked
);
3188 /* Stop when data could not be fully peeked at */
3189 if (copylen
+ copyoffset
< datalen
)
3192 CFIL_INFO_VERIFY(cfil_info
);
3195 "%llx first %llu peeked %llu pass %llu peek %llu"
3196 "datalen %u copylen %u copyoffset %u",
3197 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3199 entrybuf
->cfe_peeked
,
3200 entrybuf
->cfe_pass_offset
,
3201 entrybuf
->cfe_peek_offset
,
3202 datalen
, copylen
, copyoffset
);
3205 * Process data that has passed the filter
3207 error
= cfil_service_pending_queue(so
, cfil_info
, kcunit
, outgoing
);
3209 CFIL_LOG(LOG_ERR
, "cfil_service_pending_queue() error %d",
3215 * Dispatch disconnect events that could not be sent
3217 if (cfil_info
== NULL
)
3219 else if (outgoing
) {
3220 if ((cfil_info
->cfi_flags
& CFIF_SHUT_WR
) &&
3221 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
))
3222 cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 1);
3224 if ((cfil_info
->cfi_flags
& CFIF_SHUT_RD
) &&
3225 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
))
3226 cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 0);
3231 "first %llu peeked %llu pass %llu peek %llu",
3232 entrybuf
->cfe_ctl_q
.q_start
,
3233 entrybuf
->cfe_peeked
,
3234 entrybuf
->cfe_pass_offset
,
3235 entrybuf
->cfe_peek_offset
);
3237 CFIL_INFO_VERIFY(cfil_info
);
3242 * cfil_data_filter()
3244 * Process data for a content filter installed on a socket
3247 cfil_data_filter(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
3248 struct mbuf
*data
, uint64_t datalen
)
3251 struct cfil_entry
*entry
;
3252 struct cfe_buf
*entrybuf
;
3254 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
3255 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
3257 socket_lock_assert_owned(so
);
3259 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3261 entrybuf
= &entry
->cfe_snd
;
3263 entrybuf
= &entry
->cfe_rcv
;
3265 /* Are we attached to the filter? */
3266 if (entry
->cfe_filter
== NULL
) {
3271 /* Dispatch to filters */
3272 cfil_queue_enqueue(&entrybuf
->cfe_ctl_q
, data
, datalen
);
3274 OSAddAtomic64(datalen
,
3275 &cfil_stats
.cfs_ctl_q_out_enqueued
);
3277 OSAddAtomic64(datalen
,
3278 &cfil_stats
.cfs_ctl_q_in_enqueued
);
3280 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, outgoing
);
3282 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
3286 * We have to return EJUSTRETURN in all cases to avoid double free
3289 error
= EJUSTRETURN
;
3291 CFIL_INFO_VERIFY(cfil_info
);
3293 CFIL_LOG(LOG_INFO
, "return %d", error
);
3298 * cfil_service_inject_queue() re-inject data that passed the
3302 cfil_service_inject_queue(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
)
3305 unsigned int datalen
;
3309 struct cfi_buf
*cfi_buf
;
3310 struct cfil_queue
*inject_q
;
3311 int need_rwakeup
= 0;
3314 if (cfil_info
== NULL
)
3317 socket_lock_assert_owned(so
);
3320 cfi_buf
= &cfil_info
->cfi_snd
;
3321 cfil_info
->cfi_flags
&= ~CFIF_RETRY_INJECT_OUT
;
3323 cfi_buf
= &cfil_info
->cfi_rcv
;
3324 cfil_info
->cfi_flags
&= ~CFIF_RETRY_INJECT_IN
;
3326 inject_q
= &cfi_buf
->cfi_inject_q
;
3328 if (cfil_queue_empty(inject_q
))
3331 #if DATA_DEBUG | VERDICT_DEBUG
3332 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
3333 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
, cfil_queue_len(inject_q
));
3336 while ((data
= cfil_queue_first(inject_q
)) != NULL
) {
3337 datalen
= cfil_data_length(data
, &mbcnt
, &mbnum
);
3340 CFIL_LOG(LOG_DEBUG
, "CFIL: SERVICE INJECT-Q: <%s>: <so %llx> data %llx datalen %u (mbcnt %u)",
3341 remote_addr_ptr
? "UNCONNECTED" : "CONNECTED",
3342 (uint64_t)VM_KERNEL_ADDRPERM(so
), (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, mbcnt
);
3345 /* Remove data from queue and adjust stats */
3346 cfil_queue_remove(inject_q
, data
, datalen
);
3347 cfi_buf
->cfi_pending_first
+= datalen
;
3348 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
3349 cfi_buf
->cfi_pending_mbnum
-= mbnum
;
3350 cfil_info_buf_verify(cfi_buf
);
3353 error
= sosend_reinject(so
, NULL
, data
, NULL
, 0);
3356 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: Error: sosend_reinject() failed");
3357 CFIL_LOG(LOG_ERR
, "### sosend() failed %d", error
);
3361 // At least one injection succeeded, need to wake up pending threads.
3364 data
->m_flags
|= M_SKIPCFIL
;
3367 * NOTE: We currently only support TCP and UDP.
3368 * For RAWIP, MPTCP and message TCP we'll
3369 * need to call the appropriate sbappendxxx()
3370 * of fix sock_inject_data_in()
3372 if (IS_UDP(so
) == TRUE
) {
3373 if (sbappendchain(&so
->so_rcv
, data
, 0))
3376 if (sbappendstream(&so
->so_rcv
, data
))
3382 OSAddAtomic64(datalen
,
3383 &cfil_stats
.cfs_inject_q_out_passed
);
3385 OSAddAtomic64(datalen
,
3386 &cfil_stats
.cfs_inject_q_in_passed
);
3391 #if DATA_DEBUG | VERDICT_DEBUG
3392 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
3393 (uint64_t)VM_KERNEL_ADDRPERM(so
), count
);
3396 /* A single wakeup is for several packets is more efficient */
3398 if (outgoing
== TRUE
)
3404 if (error
!= 0 && cfil_info
) {
3405 if (error
== ENOBUFS
)
3406 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nobufs
);
3407 if (error
== ENOMEM
)
3408 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nomem
);
3411 cfil_info
->cfi_flags
|= CFIF_RETRY_INJECT_OUT
;
3412 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_fail
);
3414 cfil_info
->cfi_flags
|= CFIF_RETRY_INJECT_IN
;
3415 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_fail
);
3422 if (cfil_info
&& (cfil_info
->cfi_flags
& CFIF_SHUT_WR
)) {
3423 cfil_sock_notify_shutdown(so
, SHUT_WR
);
3424 if (cfil_sock_data_pending(&so
->so_snd
) == 0)
3425 soshutdownlock_final(so
, SHUT_WR
);
3427 if (cfil_info
&& (cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
3428 if (cfil_filters_attached(so
) == 0) {
3429 CFIL_LOG(LOG_INFO
, "so %llx waking",
3430 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3431 wakeup((caddr_t
)cfil_info
);
3435 CFIL_INFO_VERIFY(cfil_info
);
3441 cfil_service_pending_queue(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
)
3443 uint64_t passlen
, curlen
;
3445 unsigned int datalen
;
3447 struct cfil_entry
*entry
;
3448 struct cfe_buf
*entrybuf
;
3449 struct cfil_queue
*pending_q
;
3451 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
3452 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
3454 socket_lock_assert_owned(so
);
3456 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3458 entrybuf
= &entry
->cfe_snd
;
3460 entrybuf
= &entry
->cfe_rcv
;
3462 pending_q
= &entrybuf
->cfe_pending_q
;
3464 passlen
= entrybuf
->cfe_pass_offset
- pending_q
->q_start
;
3467 * Locate the chunks of data that we can pass to the next filter
3468 * A data chunk must be on mbuf boundaries
3471 while ((data
= cfil_queue_first(pending_q
)) != NULL
) {
3472 datalen
= cfil_data_length(data
, NULL
, NULL
);
3476 "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
3477 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
,
3481 if (curlen
+ datalen
> passlen
)
3484 cfil_queue_remove(pending_q
, data
, datalen
);
3489 kcunit
<= MAX_CONTENT_FILTER
;
3491 error
= cfil_data_filter(so
, cfil_info
, kcunit
, outgoing
,
3493 /* 0 means passed so we can continue */
3497 /* When data has passed all filters, re-inject */
3501 &cfil_info
->cfi_snd
.cfi_inject_q
,
3503 OSAddAtomic64(datalen
,
3504 &cfil_stats
.cfs_inject_q_out_enqueued
);
3507 &cfil_info
->cfi_rcv
.cfi_inject_q
,
3509 OSAddAtomic64(datalen
,
3510 &cfil_stats
.cfs_inject_q_in_enqueued
);
3515 CFIL_INFO_VERIFY(cfil_info
);
3521 cfil_update_data_offsets(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
3522 uint64_t pass_offset
, uint64_t peek_offset
)
3525 struct cfil_entry
*entry
= NULL
;
3526 struct cfe_buf
*entrybuf
;
3529 CFIL_LOG(LOG_INFO
, "pass %llu peek %llu", pass_offset
, peek_offset
);
3531 socket_lock_assert_owned(so
);
3533 if (cfil_info
== NULL
) {
3534 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
3535 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3538 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
3539 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3540 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3545 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3547 entrybuf
= &entry
->cfe_snd
;
3549 entrybuf
= &entry
->cfe_rcv
;
3551 /* Record updated offsets for this content filter */
3552 if (pass_offset
> entrybuf
->cfe_pass_offset
) {
3553 entrybuf
->cfe_pass_offset
= pass_offset
;
3555 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
)
3556 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
3559 CFIL_LOG(LOG_INFO
, "pass_offset %llu <= cfe_pass_offset %llu",
3560 pass_offset
, entrybuf
->cfe_pass_offset
);
3562 /* Filter does not want or need to see data that's allowed to pass */
3563 if (peek_offset
> entrybuf
->cfe_pass_offset
&&
3564 peek_offset
> entrybuf
->cfe_peek_offset
) {
3565 entrybuf
->cfe_peek_offset
= peek_offset
;
3572 /* Move data held in control queue to pending queue if needed */
3573 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, outgoing
);
3575 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
3579 error
= EJUSTRETURN
;
3583 * The filter is effectively detached when pass all from both sides
3584 * or when the socket is closed and no more data is waiting
3585 * to be delivered to the filter
3587 if (entry
!= NULL
&&
3588 ((entry
->cfe_snd
.cfe_pass_offset
== CFM_MAX_OFFSET
&&
3589 entry
->cfe_rcv
.cfe_pass_offset
== CFM_MAX_OFFSET
) ||
3590 ((cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
3591 cfil_queue_empty(&entry
->cfe_snd
.cfe_ctl_q
) &&
3592 cfil_queue_empty(&entry
->cfe_rcv
.cfe_ctl_q
)))) {
3593 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
3595 cfil_info_log(LOG_ERR
, cfil_info
, outgoing
?
3596 "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
3597 "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
3599 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
3600 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3601 if ((cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
3602 cfil_filters_attached(so
) == 0) {
3604 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAKING");
3606 CFIL_LOG(LOG_INFO
, "so %llx waking",
3607 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3608 wakeup((caddr_t
)cfil_info
);
3611 CFIL_INFO_VERIFY(cfil_info
);
3612 CFIL_LOG(LOG_INFO
, "return %d", error
);
3617 * Update pass offset for socket when no data is pending
3620 cfil_set_socket_pass_offset(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
)
3622 struct cfi_buf
*cfi_buf
;
3623 struct cfil_entry
*entry
;
3624 struct cfe_buf
*entrybuf
;
3626 uint64_t pass_offset
= 0;
3628 if (cfil_info
== NULL
)
3631 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d",
3632 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
3634 socket_lock_assert_owned(so
);
3637 cfi_buf
= &cfil_info
->cfi_snd
;
3639 cfi_buf
= &cfil_info
->cfi_rcv
;
3641 CFIL_LOG(LOG_DEBUG
, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
3642 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, outgoing
,
3643 cfi_buf
->cfi_pending_first
, cfi_buf
->cfi_pending_last
);
3645 if (cfi_buf
->cfi_pending_last
- cfi_buf
->cfi_pending_first
== 0) {
3646 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3647 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3649 /* Are we attached to a filter? */
3650 if (entry
->cfe_filter
== NULL
)
3654 entrybuf
= &entry
->cfe_snd
;
3656 entrybuf
= &entry
->cfe_rcv
;
3658 if (pass_offset
== 0 ||
3659 entrybuf
->cfe_pass_offset
< pass_offset
)
3660 pass_offset
= entrybuf
->cfe_pass_offset
;
3662 cfi_buf
->cfi_pass_offset
= pass_offset
;
3665 CFIL_LOG(LOG_DEBUG
, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
3666 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, cfi_buf
->cfi_pass_offset
);
3672 cfil_action_data_pass(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
3673 uint64_t pass_offset
, uint64_t peek_offset
)
3677 CFIL_LOG(LOG_INFO
, "");
3679 socket_lock_assert_owned(so
);
3681 error
= cfil_acquire_sockbuf(so
, cfil_info
, outgoing
);
3683 CFIL_LOG(LOG_INFO
, "so %llx %s dropped",
3684 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3685 outgoing
? "out" : "in");
3689 error
= cfil_update_data_offsets(so
, cfil_info
, kcunit
, outgoing
,
3690 pass_offset
, peek_offset
);
3692 cfil_service_inject_queue(so
, cfil_info
, outgoing
);
3694 cfil_set_socket_pass_offset(so
, cfil_info
, outgoing
);
3696 CFIL_INFO_VERIFY(cfil_info
);
3697 cfil_release_sockbuf(so
, outgoing
);
3704 cfil_flush_queues(struct socket
*so
, struct cfil_info
*cfil_info
)
3706 struct cfil_entry
*entry
;
3710 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || cfil_info
== NULL
)
3713 socket_lock_assert_owned(so
);
3716 * Flush the output queues and ignore errors as long as
3719 (void) cfil_acquire_sockbuf(so
, cfil_info
, 1);
3720 if (cfil_info
!= NULL
) {
3722 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3723 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3725 drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
3726 drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_pending_q
);
3728 drained
+= cfil_queue_drain(&cfil_info
->cfi_snd
.cfi_inject_q
);
3731 if (cfil_info
->cfi_flags
& CFIF_DROP
)
3733 &cfil_stats
.cfs_flush_out_drop
);
3736 &cfil_stats
.cfs_flush_out_close
);
3739 cfil_release_sockbuf(so
, 1);
3742 * Flush the input queues
3744 (void) cfil_acquire_sockbuf(so
, cfil_info
, 0);
3745 if (cfil_info
!= NULL
) {
3747 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3748 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3750 drained
+= cfil_queue_drain(
3751 &entry
->cfe_rcv
.cfe_ctl_q
);
3752 drained
+= cfil_queue_drain(
3753 &entry
->cfe_rcv
.cfe_pending_q
);
3755 drained
+= cfil_queue_drain(&cfil_info
->cfi_rcv
.cfi_inject_q
);
3758 if (cfil_info
->cfi_flags
& CFIF_DROP
)
3760 &cfil_stats
.cfs_flush_in_drop
);
3763 &cfil_stats
.cfs_flush_in_close
);
3766 cfil_release_sockbuf(so
, 0);
3768 CFIL_INFO_VERIFY(cfil_info
);
3772 cfil_action_drop(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
)
3775 struct cfil_entry
*entry
;
3778 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || cfil_info
== NULL
)
3781 socket_lock_assert_owned(so
);
3783 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3785 /* Are we attached to the filter? */
3786 if (entry
->cfe_filter
== NULL
)
3789 cfil_info
->cfi_flags
|= CFIF_DROP
;
3794 * Force the socket to be marked defunct
3795 * (forcing fixed along with rdar://19391339)
3797 if (so
->so_cfil_db
== NULL
) {
3798 error
= sosetdefunct(p
, so
,
3799 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
,
3802 /* Flush the socket buffer and disconnect */
3804 error
= sodefunct(p
, so
,
3805 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
);
3808 /* The filter is done, mark as detached */
3809 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
3811 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: DROP - DETACH");
3813 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
3814 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3816 /* Pending data needs to go */
3817 cfil_flush_queues(so
, cfil_info
);
3819 if (cfil_info
&& (cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
3820 if (cfil_filters_attached(so
) == 0) {
3821 CFIL_LOG(LOG_INFO
, "so %llx waking",
3822 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3823 wakeup((caddr_t
)cfil_info
);
3831 cfil_action_bless_client(uint32_t kcunit
, struct cfil_msg_hdr
*msghdr
)
3834 struct cfil_info
*cfil_info
= NULL
;
3836 bool cfil_attached
= false;
3837 struct cfil_msg_bless_client
*blessmsg
= (struct cfil_msg_bless_client
*)msghdr
;
3839 // Search and lock socket
3840 struct socket
*so
= cfil_socket_from_client_uuid(blessmsg
->cfb_client_uuid
, &cfil_attached
);
3844 // The client gets a pass automatically
3845 cfil_info
= (so
->so_cfil_db
!= NULL
) ?
3846 cfil_db_get_cfil_info(so
->so_cfil_db
, msghdr
->cfm_sock_id
) : so
->so_cfil
;
3848 if (cfil_attached
) {
3850 if (cfil_info
!= NULL
) {
3851 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
3852 cfil_info
->cfi_hash_entry
? "UDP" : "TCP",
3853 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3854 cfil_info
->cfi_sock_id
);
3857 (void)cfil_action_data_pass(so
, cfil_info
, kcunit
, 1, CFM_MAX_OFFSET
, CFM_MAX_OFFSET
);
3858 (void)cfil_action_data_pass(so
, cfil_info
, kcunit
, 0, CFM_MAX_OFFSET
, CFM_MAX_OFFSET
);
3860 so
->so_flags1
|= SOF1_CONTENT_FILTER_SKIP
;
3862 socket_unlock(so
, 1);
3869 cfil_update_entry_offsets(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
, unsigned int datalen
)
3871 struct cfil_entry
*entry
;
3872 struct cfe_buf
*entrybuf
;
3875 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d datalen %u",
3876 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
, datalen
);
3878 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3879 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3881 /* Are we attached to the filter? */
3882 if (entry
->cfe_filter
== NULL
)
3886 entrybuf
= &entry
->cfe_snd
;
3888 entrybuf
= &entry
->cfe_rcv
;
3890 entrybuf
->cfe_ctl_q
.q_start
+= datalen
;
3891 entrybuf
->cfe_pass_offset
= entrybuf
->cfe_ctl_q
.q_start
;
3892 entrybuf
->cfe_peeked
= entrybuf
->cfe_ctl_q
.q_start
;
3893 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
)
3894 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
3896 entrybuf
->cfe_ctl_q
.q_end
+= datalen
;
3898 entrybuf
->cfe_pending_q
.q_start
+= datalen
;
3899 entrybuf
->cfe_pending_q
.q_end
+= datalen
;
3901 CFIL_INFO_VERIFY(cfil_info
);
3906 cfil_data_common(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
, struct sockaddr
*to
,
3907 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
3909 #pragma unused(to, control, flags)
3911 unsigned int datalen
;
3915 struct cfi_buf
*cfi_buf
;
3916 struct mbuf
*chain
= NULL
;
3918 if (cfil_info
== NULL
) {
3919 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
3920 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3923 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
3924 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3925 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3930 datalen
= cfil_data_length(data
, &mbcnt
, &mbnum
);
3933 cfi_buf
= &cfil_info
->cfi_snd
;
3935 cfi_buf
= &cfil_info
->cfi_rcv
;
3937 cfi_buf
->cfi_pending_last
+= datalen
;
3938 cfi_buf
->cfi_pending_mbcnt
+= mbcnt
;
3939 cfi_buf
->cfi_pending_mbnum
+= mbnum
;
3942 if (cfi_buf
->cfi_pending_mbnum
> cfil_udp_gc_mbuf_num_max
||
3943 cfi_buf
->cfi_pending_mbcnt
> cfil_udp_gc_mbuf_cnt_max
) {
3944 cfi_buf
->cfi_tail_drop_cnt
++;
3945 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
3946 cfi_buf
->cfi_pending_mbnum
-= mbnum
;
3951 cfil_info_buf_verify(cfi_buf
);
3954 CFIL_LOG(LOG_DEBUG
, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
3955 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3956 outgoing
? "OUT" : "IN",
3957 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, data
->m_flags
,
3958 (uint64_t)VM_KERNEL_ADDRPERM(data
->m_nextpkt
),
3959 cfi_buf
->cfi_pending_last
,
3960 cfi_buf
->cfi_pending_mbcnt
,
3961 cfi_buf
->cfi_pass_offset
);
3964 /* Fast path when below pass offset */
3965 if (cfi_buf
->cfi_pending_last
<= cfi_buf
->cfi_pass_offset
) {
3966 cfil_update_entry_offsets(so
, cfil_info
, outgoing
, datalen
);
3968 CFIL_LOG(LOG_DEBUG
, "CFIL: QUEUEING DATA: FAST PATH");
3971 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3972 // Is cfil attached to this filter?
3973 if (IS_ENTRY_ATTACHED(cfil_info
, kcunit
)) {
3976 * Chain addr (incoming only TDB), control (optional) and data into one chain.
3977 * This full chain will be reinjected into socket after recieving verdict.
3979 (void) cfil_udp_save_socket_state(cfil_info
, data
);
3980 chain
= sbconcat_mbufs(NULL
, outgoing
? NULL
: to
, data
, control
);
3981 if (chain
== NULL
) {
3986 error
= cfil_data_filter(so
, cfil_info
, kcunit
, outgoing
, data
,
3989 /* 0 means passed so continue with next filter */
3995 /* Move cursor if no filter claimed the data */
3997 cfi_buf
->cfi_pending_first
+= datalen
;
3998 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
3999 cfi_buf
->cfi_pending_mbnum
-= mbnum
;
4000 cfil_info_buf_verify(cfi_buf
);
4003 CFIL_INFO_VERIFY(cfil_info
);
4009 * Callback from socket layer sosendxxx()
4012 cfil_sock_data_out(struct socket
*so
, struct sockaddr
*to
,
4013 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
4018 return (cfil_sock_udp_handle_data(TRUE
, so
, NULL
, to
, data
, control
, flags
));
4021 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
4024 socket_lock_assert_owned(so
);
4026 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
4027 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4028 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4031 if (control
!= NULL
) {
4032 CFIL_LOG(LOG_ERR
, "so %llx control",
4033 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4034 OSIncrementAtomic(&cfil_stats
.cfs_data_out_control
);
4036 if ((flags
& MSG_OOB
)) {
4037 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
4038 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4039 OSIncrementAtomic(&cfil_stats
.cfs_data_out_oob
);
4041 if ((so
->so_snd
.sb_flags
& SB_LOCK
) == 0)
4042 panic("so %p SB_LOCK not set", so
);
4044 if (so
->so_snd
.sb_cfil_thread
!= NULL
)
4045 panic("%s sb_cfil_thread %p not NULL", __func__
,
4046 so
->so_snd
.sb_cfil_thread
);
4048 error
= cfil_data_common(so
, so
->so_cfil
, 1, to
, data
, control
, flags
);
4054 * Callback from socket layer sbappendxxx()
4057 cfil_sock_data_in(struct socket
*so
, struct sockaddr
*from
,
4058 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
4063 return (cfil_sock_udp_handle_data(FALSE
, so
, NULL
, from
, data
, control
, flags
));
4066 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
4069 socket_lock_assert_owned(so
);
4071 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
4072 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4073 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4076 if (control
!= NULL
) {
4077 CFIL_LOG(LOG_ERR
, "so %llx control",
4078 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4079 OSIncrementAtomic(&cfil_stats
.cfs_data_in_control
);
4081 if (data
->m_type
== MT_OOBDATA
) {
4082 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
4083 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4084 OSIncrementAtomic(&cfil_stats
.cfs_data_in_oob
);
4086 error
= cfil_data_common(so
, so
->so_cfil
, 0, from
, data
, control
, flags
);
4092 * Callback from socket layer soshutdownxxx()
4094 * We may delay the shutdown write if there's outgoing data in process.
4096 * There is no point in delaying the shutdown read because the process
4097 * indicated that it does not want to read anymore data.
4100 cfil_sock_shutdown(struct socket
*so
, int *how
)
4105 return (cfil_sock_udp_shutdown(so
, how
));
4108 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
4111 socket_lock_assert_owned(so
);
4113 CFIL_LOG(LOG_INFO
, "so %llx how %d",
4114 (uint64_t)VM_KERNEL_ADDRPERM(so
), *how
);
4117 * Check the state of the socket before the content filter
4119 if (*how
!= SHUT_WR
&& (so
->so_state
& SS_CANTRCVMORE
) != 0) {
4120 /* read already shut down */
4124 if (*how
!= SHUT_RD
&& (so
->so_state
& SS_CANTSENDMORE
) != 0) {
4125 /* write already shut down */
4130 if ((so
->so_cfil
->cfi_flags
& CFIF_DROP
) != 0) {
4131 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4132 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4137 * shutdown read: SHUT_RD or SHUT_RDWR
4139 if (*how
!= SHUT_WR
) {
4140 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_RD
) {
4144 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_RD
;
4145 cfil_sock_notify_shutdown(so
, SHUT_RD
);
4148 * shutdown write: SHUT_WR or SHUT_RDWR
4150 if (*how
!= SHUT_RD
) {
4151 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
) {
4155 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_WR
;
4156 cfil_sock_notify_shutdown(so
, SHUT_WR
);
4158 * When outgoing data is pending, we delay the shutdown at the
4159 * protocol level until the content filters give the final
4160 * verdict on the pending data.
4162 if (cfil_sock_data_pending(&so
->so_snd
) != 0) {
4164 * When shutting down the read and write sides at once
4165 * we can proceed to the final shutdown of the read
4166 * side. Otherwise, we just return.
4168 if (*how
== SHUT_WR
) {
4169 error
= EJUSTRETURN
;
4170 } else if (*how
== SHUT_RDWR
) {
4180 * This is called when the socket is closed and there is no more
4181 * opportunity for filtering
4184 cfil_sock_is_closed(struct socket
*so
)
4190 cfil_sock_udp_is_closed(so
);
4194 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
4197 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
4199 socket_lock_assert_owned(so
);
4201 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4202 /* Let the filters know of the closing */
4203 error
= cfil_dispatch_closed_event(so
, so
->so_cfil
, kcunit
);
4206 /* Last chance to push passed data out */
4207 error
= cfil_acquire_sockbuf(so
, so
->so_cfil
, 1);
4209 cfil_service_inject_queue(so
, so
->so_cfil
, 1);
4210 cfil_release_sockbuf(so
, 1);
4212 so
->so_cfil
->cfi_flags
|= CFIF_SOCK_CLOSED
;
4214 /* Pending data needs to go */
4215 cfil_flush_queues(so
, so
->so_cfil
);
4217 CFIL_INFO_VERIFY(so
->so_cfil
);
4221 * This is called when the socket is disconnected so let the filters
4222 * know about the disconnection and that no more data will come
4224 * The how parameter has the same values as soshutown()
4227 cfil_sock_notify_shutdown(struct socket
*so
, int how
)
4233 cfil_sock_udp_notify_shutdown(so
, how
, 0, 0);
4237 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
4240 CFIL_LOG(LOG_INFO
, "so %llx how %d",
4241 (uint64_t)VM_KERNEL_ADDRPERM(so
), how
);
4243 socket_lock_assert_owned(so
);
4245 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4246 /* Disconnect incoming side */
4248 error
= cfil_dispatch_disconnect_event(so
, so
->so_cfil
, kcunit
, 0);
4249 /* Disconnect outgoing side */
4251 error
= cfil_dispatch_disconnect_event(so
, so
->so_cfil
, kcunit
, 1);
4256 cfil_filters_attached(struct socket
*so
)
4258 struct cfil_entry
*entry
;
4263 return cfil_filters_udp_attached(so
, FALSE
);
4266 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
4269 socket_lock_assert_owned(so
);
4271 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4272 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
4274 /* Are we attached to the filter? */
4275 if (entry
->cfe_filter
== NULL
)
4277 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0)
4279 if ((entry
->cfe_flags
& CFEF_CFIL_DETACHED
) != 0)
4289 * This is called when the socket is closed and we are waiting for
4290 * the filters to gives the final pass or drop
4293 cfil_sock_close_wait(struct socket
*so
)
4295 lck_mtx_t
*mutex_held
;
4300 cfil_sock_udp_close_wait(so
);
4304 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
4307 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
4309 if (so
->so_proto
->pr_getlock
!= NULL
)
4310 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
4312 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
4313 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
4315 while (cfil_filters_attached(so
)) {
4317 * Notify the filters we are going away so they can detach
4319 cfil_sock_notify_shutdown(so
, SHUT_RDWR
);
4322 * Make sure we need to wait after the filter are notified
4323 * of the disconnection
4325 if (cfil_filters_attached(so
) == 0)
4328 CFIL_LOG(LOG_INFO
, "so %llx waiting",
4329 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4331 ts
.tv_sec
= cfil_close_wait_timeout
/ 1000;
4332 ts
.tv_nsec
= (cfil_close_wait_timeout
% 1000) *
4333 NSEC_PER_USEC
* 1000;
4335 OSIncrementAtomic(&cfil_stats
.cfs_close_wait
);
4336 so
->so_cfil
->cfi_flags
|= CFIF_CLOSE_WAIT
;
4337 error
= msleep((caddr_t
)so
->so_cfil
, mutex_held
,
4338 PSOCK
| PCATCH
, "cfil_sock_close_wait", &ts
);
4339 so
->so_cfil
->cfi_flags
&= ~CFIF_CLOSE_WAIT
;
4341 CFIL_LOG(LOG_NOTICE
, "so %llx timed out %d",
4342 (uint64_t)VM_KERNEL_ADDRPERM(so
), (error
!= 0));
4345 * Force close in case of timeout
4348 OSIncrementAtomic(&cfil_stats
.cfs_close_wait_timeout
);
4356 * Returns the size of the data held by the content filter by using
4359 cfil_sock_data_pending(struct sockbuf
*sb
)
4361 struct socket
*so
= sb
->sb_so
;
4362 uint64_t pending
= 0;
4365 return (cfil_sock_udp_data_pending(sb
, FALSE
));
4368 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
) {
4369 struct cfi_buf
*cfi_buf
;
4371 socket_lock_assert_owned(so
);
4373 if ((sb
->sb_flags
& SB_RECV
) == 0)
4374 cfi_buf
= &so
->so_cfil
->cfi_snd
;
4376 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
4378 pending
= cfi_buf
->cfi_pending_last
-
4379 cfi_buf
->cfi_pending_first
;
4382 * If we are limited by the "chars of mbufs used" roughly
4383 * adjust so we won't overcommit
4385 if (pending
> (uint64_t)cfi_buf
->cfi_pending_mbcnt
)
4386 pending
= cfi_buf
->cfi_pending_mbcnt
;
4389 VERIFY(pending
< INT32_MAX
);
4391 return (int32_t)(pending
);
4395 * Return the socket buffer space used by data being held by content filters
4396 * so processes won't clog the socket buffer
4399 cfil_sock_data_space(struct sockbuf
*sb
)
4401 struct socket
*so
= sb
->sb_so
;
4402 uint64_t pending
= 0;
4405 return (cfil_sock_udp_data_pending(sb
, TRUE
));
4408 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
&&
4409 so
->so_snd
.sb_cfil_thread
!= current_thread()) {
4410 struct cfi_buf
*cfi_buf
;
4412 socket_lock_assert_owned(so
);
4414 if ((sb
->sb_flags
& SB_RECV
) == 0)
4415 cfi_buf
= &so
->so_cfil
->cfi_snd
;
4417 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
4419 pending
= cfi_buf
->cfi_pending_last
-
4420 cfi_buf
->cfi_pending_first
;
4423 * If we are limited by the "chars of mbufs used" roughly
4424 * adjust so we won't overcommit
4426 if ((uint64_t)cfi_buf
->cfi_pending_mbcnt
> pending
)
4427 pending
= cfi_buf
->cfi_pending_mbcnt
;
4430 VERIFY(pending
< INT32_MAX
);
4432 return (int32_t)(pending
);
4436 * A callback from the socket and protocol layer when data becomes
4437 * available in the socket buffer to give a chance for the content filter
4438 * to re-inject data that was held back
4441 cfil_sock_buf_update(struct sockbuf
*sb
)
4445 struct socket
*so
= sb
->sb_so
;
4448 cfil_sock_udp_buf_update(sb
);
4452 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
4458 socket_lock_assert_owned(so
);
4460 if ((sb
->sb_flags
& SB_RECV
) == 0) {
4461 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) == 0)
4464 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_retry
);
4466 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_IN
) == 0)
4469 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_retry
);
4472 CFIL_LOG(LOG_NOTICE
, "so %llx outgoing %d",
4473 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
4475 error
= cfil_acquire_sockbuf(so
, so
->so_cfil
, outgoing
);
4477 cfil_service_inject_queue(so
, so
->so_cfil
, outgoing
);
4478 cfil_release_sockbuf(so
, outgoing
);
4482 sysctl_cfil_filter_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
4483 struct sysctl_req
*req
)
4485 #pragma unused(oidp, arg1, arg2)
4491 if (req
->newptr
!= USER_ADDR_NULL
)
4494 cfil_rw_lock_shared(&cfil_lck_rw
);
4496 for (i
= 0; content_filters
!= NULL
&& i
< MAX_CONTENT_FILTER
; i
++) {
4497 struct cfil_filter_stat filter_stat
;
4498 struct content_filter
*cfc
= content_filters
[i
];
4503 /* If just asking for the size */
4504 if (req
->oldptr
== USER_ADDR_NULL
) {
4505 len
+= sizeof(struct cfil_filter_stat
);
4509 bzero(&filter_stat
, sizeof(struct cfil_filter_stat
));
4510 filter_stat
.cfs_len
= sizeof(struct cfil_filter_stat
);
4511 filter_stat
.cfs_filter_id
= cfc
->cf_kcunit
;
4512 filter_stat
.cfs_flags
= cfc
->cf_flags
;
4513 filter_stat
.cfs_sock_count
= cfc
->cf_sock_count
;
4514 filter_stat
.cfs_necp_control_unit
= cfc
->cf_necp_control_unit
;
4516 error
= SYSCTL_OUT(req
, &filter_stat
,
4517 sizeof (struct cfil_filter_stat
));
4521 /* If just asking for the size */
4522 if (req
->oldptr
== USER_ADDR_NULL
)
4525 cfil_rw_unlock_shared(&cfil_lck_rw
);
4528 if (req
->oldptr
!= USER_ADDR_NULL
) {
4529 for (i
= 1; content_filters
!= NULL
&& i
<= MAX_CONTENT_FILTER
; i
++) {
4530 cfil_filter_show(i
);
4538 static int sysctl_cfil_sock_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
4539 struct sysctl_req
*req
)
4541 #pragma unused(oidp, arg1, arg2)
4544 struct cfil_info
*cfi
;
4547 if (req
->newptr
!= USER_ADDR_NULL
)
4550 cfil_rw_lock_shared(&cfil_lck_rw
);
4553 * If just asking for the size,
4555 if (req
->oldptr
== USER_ADDR_NULL
) {
4556 req
->oldidx
= cfil_sock_attached_count
*
4557 sizeof(struct cfil_sock_stat
);
4558 /* Bump the length in case new sockets gets attached */
4559 req
->oldidx
+= req
->oldidx
>> 3;
4563 TAILQ_FOREACH(cfi
, &cfil_sock_head
, cfi_link
) {
4564 struct cfil_entry
*entry
;
4565 struct cfil_sock_stat stat
;
4566 struct socket
*so
= cfi
->cfi_so
;
4568 bzero(&stat
, sizeof(struct cfil_sock_stat
));
4569 stat
.cfs_len
= sizeof(struct cfil_sock_stat
);
4570 stat
.cfs_sock_id
= cfi
->cfi_sock_id
;
4571 stat
.cfs_flags
= cfi
->cfi_flags
;
4574 stat
.cfs_pid
= so
->last_pid
;
4575 memcpy(stat
.cfs_uuid
, so
->last_uuid
,
4577 if (so
->so_flags
& SOF_DELEGATED
) {
4578 stat
.cfs_e_pid
= so
->e_pid
;
4579 memcpy(stat
.cfs_e_uuid
, so
->e_uuid
,
4582 stat
.cfs_e_pid
= so
->last_pid
;
4583 memcpy(stat
.cfs_e_uuid
, so
->last_uuid
,
4587 stat
.cfs_sock_family
= so
->so_proto
->pr_domain
->dom_family
;
4588 stat
.cfs_sock_type
= so
->so_proto
->pr_type
;
4589 stat
.cfs_sock_protocol
= so
->so_proto
->pr_protocol
;
4592 stat
.cfs_snd
.cbs_pending_first
=
4593 cfi
->cfi_snd
.cfi_pending_first
;
4594 stat
.cfs_snd
.cbs_pending_last
=
4595 cfi
->cfi_snd
.cfi_pending_last
;
4596 stat
.cfs_snd
.cbs_inject_q_len
=
4597 cfil_queue_len(&cfi
->cfi_snd
.cfi_inject_q
);
4598 stat
.cfs_snd
.cbs_pass_offset
=
4599 cfi
->cfi_snd
.cfi_pass_offset
;
4601 stat
.cfs_rcv
.cbs_pending_first
=
4602 cfi
->cfi_rcv
.cfi_pending_first
;
4603 stat
.cfs_rcv
.cbs_pending_last
=
4604 cfi
->cfi_rcv
.cfi_pending_last
;
4605 stat
.cfs_rcv
.cbs_inject_q_len
=
4606 cfil_queue_len(&cfi
->cfi_rcv
.cfi_inject_q
);
4607 stat
.cfs_rcv
.cbs_pass_offset
=
4608 cfi
->cfi_rcv
.cfi_pass_offset
;
4610 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++) {
4611 struct cfil_entry_stat
*estat
;
4612 struct cfe_buf
*ebuf
;
4613 struct cfe_buf_stat
*sbuf
;
4615 entry
= &cfi
->cfi_entries
[i
];
4617 estat
= &stat
.ces_entries
[i
];
4619 estat
->ces_len
= sizeof(struct cfil_entry_stat
);
4620 estat
->ces_filter_id
= entry
->cfe_filter
?
4621 entry
->cfe_filter
->cf_kcunit
: 0;
4622 estat
->ces_flags
= entry
->cfe_flags
;
4623 estat
->ces_necp_control_unit
=
4624 entry
->cfe_necp_control_unit
;
4626 estat
->ces_last_event
.tv_sec
=
4627 (int64_t)entry
->cfe_last_event
.tv_sec
;
4628 estat
->ces_last_event
.tv_usec
=
4629 (int64_t)entry
->cfe_last_event
.tv_usec
;
4631 estat
->ces_last_action
.tv_sec
=
4632 (int64_t)entry
->cfe_last_action
.tv_sec
;
4633 estat
->ces_last_action
.tv_usec
=
4634 (int64_t)entry
->cfe_last_action
.tv_usec
;
4636 ebuf
= &entry
->cfe_snd
;
4637 sbuf
= &estat
->ces_snd
;
4638 sbuf
->cbs_pending_first
=
4639 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
4640 sbuf
->cbs_pending_last
=
4641 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
4642 sbuf
->cbs_ctl_first
=
4643 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
4644 sbuf
->cbs_ctl_last
=
4645 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
4646 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
4647 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
4648 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
4650 ebuf
= &entry
->cfe_rcv
;
4651 sbuf
= &estat
->ces_rcv
;
4652 sbuf
->cbs_pending_first
=
4653 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
4654 sbuf
->cbs_pending_last
=
4655 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
4656 sbuf
->cbs_ctl_first
=
4657 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
4658 sbuf
->cbs_ctl_last
=
4659 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
4660 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
4661 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
4662 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
4664 error
= SYSCTL_OUT(req
, &stat
,
4665 sizeof (struct cfil_sock_stat
));
4670 cfil_rw_unlock_shared(&cfil_lck_rw
);
4673 if (req
->oldptr
!= USER_ADDR_NULL
) {
4682 * UDP Socket Support
4685 cfil_hash_entry_log(int level
, struct socket
*so
, struct cfil_hash_entry
*entry
, uint64_t sockId
, const char* msg
)
4687 char local
[MAX_IPv6_STR_LEN
+6];
4688 char remote
[MAX_IPv6_STR_LEN
+6];
4691 // No sock or not UDP, no-op
4692 if (so
== NULL
|| entry
== NULL
) {
4696 local
[0] = remote
[0] = 0x0;
4698 switch (entry
->cfentry_family
) {
4700 addr
= &entry
->cfentry_laddr
.addr6
;
4701 inet_ntop(AF_INET6
, addr
, local
, sizeof(local
));
4702 addr
= &entry
->cfentry_faddr
.addr6
;
4703 inet_ntop(AF_INET6
, addr
, remote
, sizeof(local
));
4706 addr
= &entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
;
4707 inet_ntop(AF_INET
, addr
, local
, sizeof(local
));
4708 addr
= &entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
;
4709 inet_ntop(AF_INET
, addr
, remote
, sizeof(local
));
4715 CFIL_LOG(level
, "<%s>: <UDP so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s",
4717 (uint64_t)VM_KERNEL_ADDRPERM(so
), entry
, sockId
,
4718 ntohs(entry
->cfentry_lport
), ntohs(entry
->cfentry_fport
), local
, remote
);
4722 cfil_inp_log(int level
, struct socket
*so
, const char* msg
)
4724 struct inpcb
*inp
= NULL
;
4725 char local
[MAX_IPv6_STR_LEN
+6];
4726 char remote
[MAX_IPv6_STR_LEN
+6];
4733 inp
= sotoinpcb(so
);
4738 local
[0] = remote
[0] = 0x0;
4741 if (inp
->inp_vflag
& INP_IPV6
) {
4742 addr
= &inp
->in6p_laddr
.s6_addr32
;
4743 inet_ntop(AF_INET6
, addr
, local
, sizeof(local
));
4744 addr
= &inp
->in6p_faddr
.s6_addr32
;
4745 inet_ntop(AF_INET6
, addr
, remote
, sizeof(local
));
4749 addr
= &inp
->inp_laddr
.s_addr
;
4750 inet_ntop(AF_INET
, addr
, local
, sizeof(local
));
4751 addr
= &inp
->inp_faddr
.s_addr
;
4752 inet_ntop(AF_INET
, addr
, remote
, sizeof(local
));
4755 if (so
->so_cfil
!= NULL
)
4756 CFIL_LOG(level
, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
4757 msg
, IS_UDP(so
) ? "UDP" : "TCP",
4758 (uint64_t)VM_KERNEL_ADDRPERM(so
), inp
->inp_flags
, inp
->inp_socket
->so_flags
, so
->so_cfil
->cfi_sock_id
,
4759 ntohs(inp
->inp_lport
), ntohs(inp
->inp_fport
), local
, remote
);
4761 CFIL_LOG(level
, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
4762 msg
, IS_UDP(so
) ? "UDP" : "TCP",
4763 (uint64_t)VM_KERNEL_ADDRPERM(so
), inp
->inp_flags
, inp
->inp_socket
->so_flags
,
4764 ntohs(inp
->inp_lport
), ntohs(inp
->inp_fport
), local
, remote
);
4768 cfil_info_log(int level
, struct cfil_info
*cfil_info
, const char* msg
)
4770 if (cfil_info
== NULL
)
4773 if (cfil_info
->cfi_hash_entry
!= NULL
)
4774 cfil_hash_entry_log(level
, cfil_info
->cfi_so
, cfil_info
->cfi_hash_entry
, cfil_info
->cfi_sock_id
, msg
);
4776 cfil_inp_log(level
, cfil_info
->cfi_so
, msg
);
4780 cfil_db_init(struct socket
*so
)
4783 struct cfil_db
*db
= NULL
;
4785 CFIL_LOG(LOG_INFO
, "");
4787 db
= zalloc(cfil_db_zone
);
4792 bzero(db
, sizeof(struct cfil_db
));
4794 db
->cfdb_hashbase
= hashinit(CFILHASHSIZE
, M_CFIL
, &db
->cfdb_hashmask
);
4795 if (db
->cfdb_hashbase
== NULL
) {
4796 zfree(cfil_db_zone
, db
);
4802 so
->so_cfil_db
= db
;
4809 cfil_db_free(struct socket
*so
)
4811 struct cfil_hash_entry
*entry
= NULL
;
4812 struct cfil_hash_entry
*temp_entry
= NULL
;
4813 struct cfilhashhead
*cfilhash
= NULL
;
4814 struct cfil_db
*db
= NULL
;
4816 CFIL_LOG(LOG_INFO
, "");
4818 if (so
== NULL
|| so
->so_cfil_db
== NULL
) {
4821 db
= so
->so_cfil_db
;
4824 CFIL_LOG(LOG_ERR
, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
4825 (uint64_t)VM_KERNEL_ADDRPERM(so
), db
, db
->cfdb_count
);
4828 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
4829 cfilhash
= &db
->cfdb_hashbase
[i
];
4830 LIST_FOREACH_SAFE(entry
, cfilhash
, cfentry_link
, temp_entry
) {
4831 if (entry
->cfentry_cfil
!= NULL
) {
4833 cfil_info_log(LOG_ERR
, entry
->cfentry_cfil
, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
4835 cfil_info_free(entry
->cfentry_cfil
);
4836 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
4837 entry
->cfentry_cfil
= NULL
;
4840 cfil_db_delete_entry(db
, entry
);
4841 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
4842 if (db
->cfdb_count
== 0)
4843 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
4844 VERIFY(so
->so_usecount
> 0);
4850 // Make sure all entries are cleaned up!
4851 VERIFY(db
->cfdb_count
== 0);
4853 CFIL_LOG(LOG_ERR
, "CFIL: LIFECYCLE: so usecount %d", so
->so_usecount
);
4856 FREE(db
->cfdb_hashbase
, M_CFIL
);
4857 zfree(cfil_db_zone
, db
);
4858 so
->so_cfil_db
= NULL
;
4862 fill_cfil_hash_entry_from_address(struct cfil_hash_entry
*entry
, bool isLocal
, struct sockaddr
*addr
)
4864 struct sockaddr_in
*sin
= NULL
;
4865 struct sockaddr_in6
*sin6
= NULL
;
4867 if (entry
== NULL
|| addr
== NULL
) {
4871 switch (addr
->sa_family
) {
4873 sin
= satosin(addr
);
4874 if (sin
->sin_len
!= sizeof(*sin
)) {
4877 if (isLocal
== TRUE
) {
4878 entry
->cfentry_lport
= sin
->sin_port
;
4879 entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
= sin
->sin_addr
.s_addr
;
4881 entry
->cfentry_fport
= sin
->sin_port
;
4882 entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
= sin
->sin_addr
.s_addr
;
4884 entry
->cfentry_family
= AF_INET
;
4887 sin6
= satosin6(addr
);
4888 if (sin6
->sin6_len
!= sizeof(*sin6
)) {
4891 if (isLocal
== TRUE
) {
4892 entry
->cfentry_lport
= sin6
->sin6_port
;
4893 entry
->cfentry_laddr
.addr6
= sin6
->sin6_addr
;
4895 entry
->cfentry_fport
= sin6
->sin6_port
;
4896 entry
->cfentry_faddr
.addr6
= sin6
->sin6_addr
;
4898 entry
->cfentry_family
= AF_INET6
;
4906 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry
*entry
, bool isLocal
, struct inpcb
*inp
)
4908 if (entry
== NULL
|| inp
== NULL
) {
4912 if (inp
->inp_vflag
& INP_IPV4
) {
4913 if (isLocal
== TRUE
) {
4914 entry
->cfentry_lport
= inp
->inp_lport
;
4915 entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
= inp
->inp_laddr
.s_addr
;
4917 entry
->cfentry_fport
= inp
->inp_fport
;
4918 entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
= inp
->inp_faddr
.s_addr
;
4920 entry
->cfentry_family
= AF_INET
;
4922 } else if (inp
->inp_vflag
& INP_IPV6
) {
4923 if (isLocal
== TRUE
) {
4924 entry
->cfentry_lport
= inp
->inp_lport
;
4925 entry
->cfentry_laddr
.addr6
= inp
->in6p_laddr
;
4927 entry
->cfentry_fport
= inp
->inp_fport
;
4928 entry
->cfentry_faddr
.addr6
= inp
->in6p_faddr
;
4930 entry
->cfentry_family
= AF_INET6
;
4937 check_port(struct sockaddr
*addr
, u_short port
)
4939 struct sockaddr_in
*sin
= NULL
;
4940 struct sockaddr_in6
*sin6
= NULL
;
4942 if (addr
== NULL
|| port
== 0) {
4946 switch (addr
->sa_family
) {
4948 sin
= satosin(addr
);
4949 if (sin
->sin_len
!= sizeof(*sin
)) {
4952 if (port
== ntohs(sin
->sin_port
)) {
4957 sin6
= satosin6(addr
);
4958 if (sin6
->sin6_len
!= sizeof(*sin6
)) {
4961 if (port
== ntohs(sin6
->sin6_port
)) {
4971 struct cfil_hash_entry
*
4972 cfil_db_lookup_entry_with_sockid(struct cfil_db
*db
, u_int64_t sock_id
)
4974 struct cfilhashhead
*cfilhash
= NULL
;
4975 u_int32_t flowhash
= (u_int32_t
)(sock_id
& 0x0ffffffff);
4976 struct cfil_hash_entry
*nextentry
;
4978 if (db
== NULL
|| db
->cfdb_hashbase
== NULL
|| sock_id
== 0) {
4982 flowhash
&= db
->cfdb_hashmask
;
4983 cfilhash
= &db
->cfdb_hashbase
[flowhash
];
4985 LIST_FOREACH(nextentry
, cfilhash
, cfentry_link
) {
4986 if (nextentry
->cfentry_cfil
!= NULL
&&
4987 nextentry
->cfentry_cfil
->cfi_sock_id
== sock_id
) {
4988 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
4989 (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), nextentry
->cfentry_cfil
->cfi_sock_id
, flowhash
);
4990 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, nextentry
, 0, "CFIL: UDP found entry");
4995 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
4996 (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), sock_id
, flowhash
);
5000 struct cfil_hash_entry
*
5001 cfil_db_lookup_entry(struct cfil_db
*db
, struct sockaddr
*local
, struct sockaddr
*remote
)
5003 struct cfil_hash_entry matchentry
;
5004 struct cfil_hash_entry
*nextentry
= NULL
;
5005 struct inpcb
*inp
= sotoinpcb(db
->cfdb_so
);
5006 u_int32_t hashkey_faddr
= 0, hashkey_laddr
= 0;
5007 int inp_hash_element
= 0;
5008 struct cfilhashhead
*cfilhash
= NULL
;
5010 CFIL_LOG(LOG_INFO
, "");
5016 if (local
!= NULL
) {
5017 fill_cfil_hash_entry_from_address(&matchentry
, TRUE
, local
);
5019 fill_cfil_hash_entry_from_inp(&matchentry
, TRUE
, inp
);
5021 if (remote
!= NULL
) {
5022 fill_cfil_hash_entry_from_address(&matchentry
, FALSE
, remote
);
5024 fill_cfil_hash_entry_from_inp(&matchentry
, FALSE
, inp
);
5028 if (inp
->inp_vflag
& INP_IPV6
) {
5029 hashkey_faddr
= matchentry
.cfentry_faddr
.addr6
.s6_addr32
[3];
5030 hashkey_laddr
= matchentry
.cfentry_laddr
.addr6
.s6_addr32
[3];
5034 hashkey_faddr
= matchentry
.cfentry_faddr
.addr46
.ia46_addr4
.s_addr
;
5035 hashkey_laddr
= matchentry
.cfentry_laddr
.addr46
.ia46_addr4
.s_addr
;
5038 inp_hash_element
= CFIL_HASH(hashkey_laddr
, hashkey_faddr
,
5039 matchentry
.cfentry_lport
, matchentry
.cfentry_fport
);
5040 inp_hash_element
&= db
->cfdb_hashmask
;
5042 cfilhash
= &db
->cfdb_hashbase
[inp_hash_element
];
5044 LIST_FOREACH(nextentry
, cfilhash
, cfentry_link
) {
5047 if ((inp
->inp_vflag
& INP_IPV6
) &&
5048 nextentry
->cfentry_lport
== matchentry
.cfentry_lport
&&
5049 nextentry
->cfentry_fport
== matchentry
.cfentry_fport
&&
5050 IN6_ARE_ADDR_EQUAL(&nextentry
->cfentry_laddr
.addr6
, &matchentry
.cfentry_laddr
.addr6
) &&
5051 IN6_ARE_ADDR_EQUAL(&nextentry
->cfentry_faddr
.addr6
, &matchentry
.cfentry_faddr
.addr6
)) {
5053 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, &matchentry
, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
5058 if (nextentry
->cfentry_lport
== matchentry
.cfentry_lport
&&
5059 nextentry
->cfentry_fport
== matchentry
.cfentry_fport
&&
5060 nextentry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
== matchentry
.cfentry_laddr
.addr46
.ia46_addr4
.s_addr
&&
5061 nextentry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
== matchentry
.cfentry_faddr
.addr46
.ia46_addr4
.s_addr
) {
5063 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, &matchentry
, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
5071 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, &matchentry
, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
5077 cfil_db_delete_entry(struct cfil_db
*db
, struct cfil_hash_entry
*hash_entry
)
5079 if (hash_entry
== NULL
)
5082 LIST_REMOVE(hash_entry
, cfentry_link
);
5083 zfree(cfil_hash_entry_zone
, hash_entry
);
5085 if (db
->cfdb_only_entry
== hash_entry
)
5086 db
->cfdb_only_entry
= NULL
;
5089 struct cfil_hash_entry
*
5090 cfil_db_add_entry(struct cfil_db
*db
, struct sockaddr
*local
, struct sockaddr
*remote
)
5092 struct cfil_hash_entry
*entry
= NULL
;
5093 struct inpcb
*inp
= sotoinpcb(db
->cfdb_so
);
5094 u_int32_t hashkey_faddr
= 0, hashkey_laddr
= 0;
5095 int inp_hash_element
= 0;
5096 struct cfilhashhead
*cfilhash
= NULL
;
5098 CFIL_LOG(LOG_INFO
, "");
5104 entry
= zalloc(cfil_hash_entry_zone
);
5105 if (entry
== NULL
) {
5108 bzero(entry
, sizeof(struct cfil_hash_entry
));
5110 if (local
!= NULL
) {
5111 fill_cfil_hash_entry_from_address(entry
, TRUE
, local
);
5113 fill_cfil_hash_entry_from_inp(entry
, TRUE
, inp
);
5115 if (remote
!= NULL
) {
5116 fill_cfil_hash_entry_from_address(entry
, FALSE
, remote
);
5118 fill_cfil_hash_entry_from_inp(entry
, FALSE
, inp
);
5120 entry
->cfentry_lastused
= net_uptime();
5123 if (inp
->inp_vflag
& INP_IPV6
) {
5124 hashkey_faddr
= entry
->cfentry_faddr
.addr6
.s6_addr32
[3];
5125 hashkey_laddr
= entry
->cfentry_laddr
.addr6
.s6_addr32
[3];
5129 hashkey_faddr
= entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
;
5130 hashkey_laddr
= entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
;
5132 entry
->cfentry_flowhash
= CFIL_HASH(hashkey_laddr
, hashkey_faddr
,
5133 entry
->cfentry_lport
, entry
->cfentry_fport
);
5134 inp_hash_element
= entry
->cfentry_flowhash
& db
->cfdb_hashmask
;
5136 cfilhash
= &db
->cfdb_hashbase
[inp_hash_element
];
5138 LIST_INSERT_HEAD(cfilhash
, entry
, cfentry_link
);
5140 db
->cfdb_only_entry
= entry
;
5141 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, entry
, 0, "CFIL: cfil_db_add_entry: ADDED");
5144 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), db
->cfdb_count
);
5149 cfil_db_get_cfil_info(struct cfil_db
*db
, cfil_sock_id_t id
)
5151 struct cfil_hash_entry
*hash_entry
= NULL
;
5153 CFIL_LOG(LOG_INFO
, "");
5155 if (db
== NULL
|| id
== 0) {
5156 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> NULL DB <id %llu>",
5157 (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), id
);
5161 // This is an optimization for connected UDP socket which only has one flow.
5162 // No need to do the hash lookup.
5163 if (db
->cfdb_count
== 1) {
5164 if (db
->cfdb_only_entry
&& db
->cfdb_only_entry
->cfentry_cfil
&&
5165 db
->cfdb_only_entry
->cfentry_cfil
->cfi_sock_id
== id
) {
5166 return (db
->cfdb_only_entry
->cfentry_cfil
);
5170 hash_entry
= cfil_db_lookup_entry_with_sockid(db
, id
);
5171 return (hash_entry
!= NULL
? hash_entry
->cfentry_cfil
: NULL
);
5174 struct cfil_hash_entry
*
5175 cfil_sock_udp_get_flow(struct socket
*so
, uint32_t filter_control_unit
, bool outgoing
, struct sockaddr
*local
, struct sockaddr
*remote
)
5177 #pragma unused(so, filter_control_unit, outgoing, local, remote)
5178 struct cfil_hash_entry
*hash_entry
= NULL
;
5181 socket_lock_assert_owned(so
);
5183 // If new socket, allocate cfil db
5184 if (so
->so_cfil_db
== NULL
) {
5185 if (cfil_db_init(so
) != 0) {
5190 // See if flow already exists.
5191 hash_entry
= cfil_db_lookup_entry(so
->so_cfil_db
, local
, remote
);
5192 if (hash_entry
!= NULL
) {
5193 return (hash_entry
);
5196 hash_entry
= cfil_db_add_entry(so
->so_cfil_db
, local
, remote
);
5197 if (hash_entry
== NULL
) {
5198 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
5199 CFIL_LOG(LOG_ERR
, "CFIL: UDP failed to add entry");
5203 if (cfil_info_alloc(so
, hash_entry
) == NULL
||
5204 hash_entry
->cfentry_cfil
== NULL
) {
5205 cfil_db_delete_entry(so
->so_cfil_db
, hash_entry
);
5206 CFIL_LOG(LOG_ERR
, "CFIL: UDP failed to alloc cfil_info");
5207 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
5212 cfil_info_log(LOG_ERR
, hash_entry
->cfentry_cfil
, "CFIL: LIFECYCLE: ADDED");
5215 if (cfil_info_attach_unit(so
, filter_control_unit
, hash_entry
->cfentry_cfil
) == 0) {
5216 CFIL_LOG(LOG_ERR
, "CFIL: UDP cfil_info_attach_unit(%u) failed",
5217 filter_control_unit
);
5218 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_failed
);
5221 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
5222 (uint64_t)VM_KERNEL_ADDRPERM(so
),
5223 filter_control_unit
, hash_entry
->cfentry_cfil
->cfi_sock_id
);
5225 so
->so_flags
|= SOF_CONTENT_FILTER
;
5226 OSIncrementAtomic(&cfil_stats
.cfs_sock_attached
);
5228 /* Hold a reference on the socket for each flow */
5231 error
= cfil_dispatch_attach_event(so
, hash_entry
->cfentry_cfil
, filter_control_unit
);
5232 /* We can recover from flow control or out of memory errors */
5233 if (error
!= 0 && error
!= ENOBUFS
&& error
!= ENOMEM
)
5236 CFIL_INFO_VERIFY(hash_entry
->cfentry_cfil
);
5237 return (hash_entry
);
5241 cfil_sock_udp_handle_data(bool outgoing
, struct socket
*so
,
5242 struct sockaddr
*local
, struct sockaddr
*remote
,
5243 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
5245 #pragma unused(outgoing, so, local, remote, data, control, flags)
5247 uint32_t filter_control_unit
;
5248 struct cfil_hash_entry
*hash_entry
= NULL
;
5249 struct cfil_info
*cfil_info
= NULL
;
5251 socket_lock_assert_owned(so
);
5253 if (cfil_active_count
== 0) {
5254 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP no active filter");
5255 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_in_vain
);
5259 filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
5260 if (filter_control_unit
== 0) {
5261 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP failed to get control unit");
5265 if ((filter_control_unit
& NECP_MASK_USERSPACE_ONLY
) != 0) {
5266 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP user space only");
5267 OSIncrementAtomic(&cfil_stats
.cfs_sock_userspace_only
);
5271 hash_entry
= cfil_sock_udp_get_flow(so
, filter_control_unit
, outgoing
, local
, remote
);
5272 if (hash_entry
== NULL
|| hash_entry
->cfentry_cfil
== NULL
) {
5273 CFIL_LOG(LOG_ERR
, "CFIL: Falied to create UDP flow");
5276 // Update last used timestamp, this is for flow Idle TO
5277 hash_entry
->cfentry_lastused
= net_uptime();
5278 cfil_info
= hash_entry
->cfentry_cfil
;
5280 if (cfil_info
->cfi_flags
& CFIF_DROP
) {
5282 cfil_hash_entry_log(LOG_DEBUG
, so
, hash_entry
, 0, "CFIL: UDP DROP");
5286 if (control
!= NULL
) {
5287 OSIncrementAtomic(&cfil_stats
.cfs_data_in_control
);
5289 if (data
->m_type
== MT_OOBDATA
) {
5290 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
5291 (uint64_t)VM_KERNEL_ADDRPERM(so
));
5292 OSIncrementAtomic(&cfil_stats
.cfs_data_in_oob
);
5295 error
= cfil_data_common(so
, cfil_info
, outgoing
, remote
, data
, control
, flags
);
5301 * Go through all UDP flows for specified socket and returns TRUE if
5302 * any flow is still attached. If need_wait is TRUE, wait on first
5306 cfil_filters_udp_attached(struct socket
*so
, bool need_wait
)
5309 lck_mtx_t
*mutex_held
;
5310 struct cfilhashhead
*cfilhash
= NULL
;
5311 struct cfil_db
*db
= NULL
;
5312 struct cfil_hash_entry
*hash_entry
= NULL
;
5313 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
5314 struct cfil_info
*cfil_info
= NULL
;
5315 struct cfil_entry
*entry
= NULL
;
5320 socket_lock_assert_owned(so
);
5322 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
5324 if (so
->so_proto
->pr_getlock
!= NULL
)
5325 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
5327 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
5328 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
5330 db
= so
->so_cfil_db
;
5332 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
5333 cfilhash
= &db
->cfdb_hashbase
[i
];
5335 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
5337 if (hash_entry
->cfentry_cfil
!= NULL
) {
5339 cfil_info
= hash_entry
->cfentry_cfil
;
5340 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
5341 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
5343 /* Are we attached to the filter? */
5344 if (entry
->cfe_filter
== NULL
) {
5348 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0)
5350 if ((entry
->cfe_flags
& CFEF_CFIL_DETACHED
) != 0)
5355 if (need_wait
== TRUE
) {
5357 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
5360 ts
.tv_sec
= cfil_close_wait_timeout
/ 1000;
5361 ts
.tv_nsec
= (cfil_close_wait_timeout
% 1000) *
5362 NSEC_PER_USEC
* 1000;
5364 OSIncrementAtomic(&cfil_stats
.cfs_close_wait
);
5365 cfil_info
->cfi_flags
|= CFIF_CLOSE_WAIT
;
5366 error
= msleep((caddr_t
)cfil_info
, mutex_held
,
5367 PSOCK
| PCATCH
, "cfil_filters_udp_attached", &ts
);
5368 cfil_info
->cfi_flags
&= ~CFIF_CLOSE_WAIT
;
5371 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
5375 * Force close in case of timeout
5378 OSIncrementAtomic(&cfil_stats
.cfs_close_wait_timeout
);
5380 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
5382 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
5398 cfil_sock_udp_data_pending(struct sockbuf
*sb
, bool check_thread
)
5400 struct socket
*so
= sb
->sb_so
;
5401 struct cfi_buf
*cfi_buf
;
5402 uint64_t pending
= 0;
5403 uint64_t total_pending
= 0;
5404 struct cfilhashhead
*cfilhash
= NULL
;
5405 struct cfil_db
*db
= NULL
;
5406 struct cfil_hash_entry
*hash_entry
= NULL
;
5407 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
5409 socket_lock_assert_owned(so
);
5411 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
&&
5412 (check_thread
== FALSE
|| so
->so_snd
.sb_cfil_thread
!= current_thread())) {
5414 db
= so
->so_cfil_db
;
5416 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
5417 cfilhash
= &db
->cfdb_hashbase
[i
];
5419 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
5421 if (hash_entry
->cfentry_cfil
!= NULL
) {
5422 if ((sb
->sb_flags
& SB_RECV
) == 0)
5423 cfi_buf
= &hash_entry
->cfentry_cfil
->cfi_snd
;
5425 cfi_buf
= &hash_entry
->cfentry_cfil
->cfi_rcv
;
5427 pending
= cfi_buf
->cfi_pending_last
- cfi_buf
->cfi_pending_first
;
5429 * If we are limited by the "chars of mbufs used" roughly
5430 * adjust so we won't overcommit
5432 if ((uint64_t)cfi_buf
->cfi_pending_mbcnt
> pending
)
5433 pending
= cfi_buf
->cfi_pending_mbcnt
;
5435 total_pending
+= pending
;
5440 VERIFY(total_pending
< INT32_MAX
);
5442 CFIL_LOG(LOG_DEBUG
, "CFIL: <so %llx> total pending %llu <check_thread %d>",
5443 (uint64_t)VM_KERNEL_ADDRPERM(so
),
5444 total_pending
, check_thread
);
5448 return (int32_t)(total_pending
);
5452 cfil_sock_udp_notify_shutdown(struct socket
*so
, int how
, int drop_flag
, int shut_flag
)
5454 struct cfil_info
*cfil_info
= NULL
;
5455 struct cfilhashhead
*cfilhash
= NULL
;
5456 struct cfil_db
*db
= NULL
;
5457 struct cfil_hash_entry
*hash_entry
= NULL
;
5458 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
5463 socket_lock_assert_owned(so
);
5465 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
5467 db
= so
->so_cfil_db
;
5469 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
5470 cfilhash
= &db
->cfdb_hashbase
[i
];
5472 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
5474 if (hash_entry
->cfentry_cfil
!= NULL
) {
5475 cfil_info
= hash_entry
->cfentry_cfil
;
5477 // This flow is marked as DROP
5478 if (cfil_info
->cfi_flags
& drop_flag
) {
5483 // This flow has been shut already, skip
5484 if (cfil_info
->cfi_flags
& shut_flag
) {
5487 // Mark flow as shut
5488 cfil_info
->cfi_flags
|= shut_flag
;
5491 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
5492 /* Disconnect incoming side */
5493 if (how
!= SHUT_WR
) {
5494 error
= cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 0);
5496 /* Disconnect outgoing side */
5497 if (how
!= SHUT_RD
) {
5498 error
= cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 1);
5506 if (done_count
== 0) {
5513 cfil_sock_udp_shutdown(struct socket
*so
, int *how
)
5517 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || (so
->so_cfil_db
== NULL
))
5520 socket_lock_assert_owned(so
);
5522 CFIL_LOG(LOG_INFO
, "so %llx how %d",
5523 (uint64_t)VM_KERNEL_ADDRPERM(so
), *how
);
5526 * Check the state of the socket before the content filter
5528 if (*how
!= SHUT_WR
&& (so
->so_state
& SS_CANTRCVMORE
) != 0) {
5529 /* read already shut down */
5533 if (*how
!= SHUT_RD
&& (so
->so_state
& SS_CANTSENDMORE
) != 0) {
5534 /* write already shut down */
5540 * shutdown read: SHUT_RD or SHUT_RDWR
5542 if (*how
!= SHUT_WR
) {
5543 error
= cfil_sock_udp_notify_shutdown(so
, SHUT_RD
, CFIF_DROP
, CFIF_SHUT_RD
);
5548 * shutdown write: SHUT_WR or SHUT_RDWR
5550 if (*how
!= SHUT_RD
) {
5551 error
= cfil_sock_udp_notify_shutdown(so
, SHUT_WR
, CFIF_DROP
, CFIF_SHUT_WR
);
5556 * When outgoing data is pending, we delay the shutdown at the
5557 * protocol level until the content filters give the final
5558 * verdict on the pending data.
5560 if (cfil_sock_data_pending(&so
->so_snd
) != 0) {
5562 * When shutting down the read and write sides at once
5563 * we can proceed to the final shutdown of the read
5564 * side. Otherwise, we just return.
5566 if (*how
== SHUT_WR
) {
5567 error
= EJUSTRETURN
;
5568 } else if (*how
== SHUT_RDWR
) {
5578 cfil_sock_udp_close_wait(struct socket
*so
)
5580 socket_lock_assert_owned(so
);
5582 while (cfil_filters_udp_attached(so
, FALSE
)) {
5584 * Notify the filters we are going away so they can detach
5586 cfil_sock_udp_notify_shutdown(so
, SHUT_RDWR
, 0, 0);
5589 * Make sure we need to wait after the filter are notified
5590 * of the disconnection
5592 if (cfil_filters_udp_attached(so
, TRUE
) == 0)
5598 cfil_sock_udp_is_closed(struct socket
*so
)
5600 struct cfil_info
*cfil_info
= NULL
;
5601 struct cfilhashhead
*cfilhash
= NULL
;
5602 struct cfil_db
*db
= NULL
;
5603 struct cfil_hash_entry
*hash_entry
= NULL
;
5604 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
5608 socket_lock_assert_owned(so
);
5610 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
5612 db
= so
->so_cfil_db
;
5614 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
5615 cfilhash
= &db
->cfdb_hashbase
[i
];
5617 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
5618 if (hash_entry
->cfentry_cfil
!= NULL
) {
5620 cfil_info
= hash_entry
->cfentry_cfil
;
5622 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
5623 /* Let the filters know of the closing */
5624 error
= cfil_dispatch_closed_event(so
, cfil_info
, kcunit
);
5627 /* Last chance to push passed data out */
5628 error
= cfil_acquire_sockbuf(so
, cfil_info
, 1);
5630 cfil_service_inject_queue(so
, cfil_info
, 1);
5631 cfil_release_sockbuf(so
, 1);
5633 cfil_info
->cfi_flags
|= CFIF_SOCK_CLOSED
;
5635 /* Pending data needs to go */
5636 cfil_flush_queues(so
, cfil_info
);
5638 CFIL_INFO_VERIFY(cfil_info
);
5646 cfil_sock_udp_buf_update(struct sockbuf
*sb
)
5648 struct cfil_info
*cfil_info
= NULL
;
5649 struct cfilhashhead
*cfilhash
= NULL
;
5650 struct cfil_db
*db
= NULL
;
5651 struct cfil_hash_entry
*hash_entry
= NULL
;
5652 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
5655 struct socket
*so
= sb
->sb_so
;
5657 socket_lock_assert_owned(so
);
5659 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
5664 db
= so
->so_cfil_db
;
5666 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
5667 cfilhash
= &db
->cfdb_hashbase
[i
];
5669 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
5670 if (hash_entry
->cfentry_cfil
!= NULL
) {
5672 cfil_info
= hash_entry
->cfentry_cfil
;
5674 if ((sb
->sb_flags
& SB_RECV
) == 0) {
5675 if ((cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) == 0)
5678 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_retry
);
5680 if ((cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_IN
) == 0)
5683 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_retry
);
5686 CFIL_LOG(LOG_NOTICE
, "so %llx outgoing %d",
5687 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
5689 error
= cfil_acquire_sockbuf(so
, cfil_info
, outgoing
);
5691 cfil_service_inject_queue(so
, cfil_info
, outgoing
);
5692 cfil_release_sockbuf(so
, outgoing
);
5700 cfil_filter_show(u_int32_t kcunit
)
5702 struct content_filter
*cfc
= NULL
;
5703 struct cfil_entry
*entry
;
5706 if (content_filters
== NULL
) {
5709 if (kcunit
> MAX_CONTENT_FILTER
) {
5713 cfil_rw_lock_shared(&cfil_lck_rw
);
5715 if (content_filters
[kcunit
- 1] == NULL
) {
5716 cfil_rw_unlock_shared(&cfil_lck_rw
);
5719 cfc
= content_filters
[kcunit
- 1];
5721 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
5722 kcunit
, cfc
->cf_sock_count
, (unsigned long)cfc
->cf_flags
);
5723 if (cfc
->cf_flags
& CFF_DETACHING
)
5724 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: - DETACHING");
5725 if (cfc
->cf_flags
& CFF_ACTIVE
)
5726 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: - ACTIVE");
5727 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
)
5728 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
5730 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
5732 if (entry
->cfe_cfil_info
&& entry
->cfe_cfil_info
->cfi_so
) {
5733 struct cfil_info
*cfil_info
= entry
->cfe_cfil_info
;
5737 if (entry
->cfe_flags
& CFEF_CFIL_DETACHED
)
5738 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: FILTER SHOW: - DETACHED");
5740 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: FILTER SHOW: - ATTACHED");
5744 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count
);
5746 cfil_rw_unlock_shared(&cfil_lck_rw
);
5751 cfil_info_show(void)
5753 struct cfil_info
*cfil_info
;
5756 cfil_rw_lock_shared(&cfil_lck_rw
);
5758 CFIL_LOG(LOG_ERR
, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count
);
5760 TAILQ_FOREACH(cfil_info
, &cfil_sock_head
, cfi_link
) {
5764 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: INFO SHOW");
5766 if (cfil_info
->cfi_flags
& CFIF_DROP
)
5767 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - DROP");
5768 if (cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
)
5769 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - CLOSE_WAIT");
5770 if (cfil_info
->cfi_flags
& CFIF_SOCK_CLOSED
)
5771 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - SOCK_CLOSED");
5772 if (cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_IN
)
5773 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - RETRY_INJECT_IN");
5774 if (cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_OUT
)
5775 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
5776 if (cfil_info
->cfi_flags
& CFIF_SHUT_WR
)
5777 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - SHUT_WR");
5778 if (cfil_info
->cfi_flags
& CFIF_SHUT_RD
)
5779 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - SHUT_RD");
5782 CFIL_LOG(LOG_ERR
, "CFIL: INFO SHOW: total cfil_info shown: %d", count
);
5784 cfil_rw_unlock_shared(&cfil_lck_rw
);
5788 cfil_info_idle_timed_out(struct cfil_info
*cfil_info
, int timeout
, u_int32_t current_time
)
5790 if (cfil_info
&& cfil_info
->cfi_hash_entry
&&
5791 (current_time
- cfil_info
->cfi_hash_entry
->cfentry_lastused
>= (u_int32_t
)timeout
)) {
5793 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: flow IDLE timeout expired");
5801 cfil_info_action_timed_out(struct cfil_info
*cfil_info
, int timeout
)
5803 struct cfil_entry
*entry
;
5804 struct timeval current_tv
;
5805 struct timeval diff_time
;
5807 if (cfil_info
== NULL
)
5811 * If we have queued up more data than passed offset and we haven't received
5812 * an action from user space for a while (the user space filter might have crashed),
5813 * return action timed out.
5815 if (cfil_info
->cfi_snd
.cfi_pending_last
> cfil_info
->cfi_snd
.cfi_pass_offset
||
5816 cfil_info
->cfi_rcv
.cfi_pending_last
> cfil_info
->cfi_rcv
.cfi_pass_offset
) {
5818 microuptime(¤t_tv
);
5820 for (int kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
5821 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
5823 if (entry
->cfe_filter
== NULL
)
5826 if (cfil_info
->cfi_snd
.cfi_pending_last
> entry
->cfe_snd
.cfe_pass_offset
||
5827 cfil_info
->cfi_rcv
.cfi_pending_last
> entry
->cfe_rcv
.cfe_pass_offset
) {
5828 // haven't gotten an action from this filter, check timeout
5829 timersub(¤t_tv
, &entry
->cfe_last_action
, &diff_time
);
5830 if (diff_time
.tv_sec
>= timeout
) {
5832 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: flow ACTION timeout expired");
5843 cfil_info_buffer_threshold_exceeded(struct cfil_info
*cfil_info
)
5845 if (cfil_info
== NULL
)
5849 * Clean up flow if it exceeded queue thresholds
5851 if (cfil_info
->cfi_snd
.cfi_tail_drop_cnt
||
5852 cfil_info
->cfi_rcv
.cfi_tail_drop_cnt
) {
5854 CFIL_LOG(LOG_ERR
, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
5855 cfil_udp_gc_mbuf_num_max
,
5856 cfil_udp_gc_mbuf_cnt_max
,
5857 cfil_info
->cfi_snd
.cfi_tail_drop_cnt
,
5858 cfil_info
->cfi_rcv
.cfi_tail_drop_cnt
);
5859 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: queue threshold exceeded");
5868 cfil_udp_gc_thread_sleep(bool forever
)
5871 (void) assert_wait((event_t
) &cfil_sock_udp_attached_count
,
5872 THREAD_INTERRUPTIBLE
);
5874 uint64_t deadline
= 0;
5875 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC
, &deadline
);
5876 clock_absolutetime_interval_to_deadline(deadline
, &deadline
);
5878 (void) assert_wait_deadline(&cfil_sock_udp_attached_count
,
5879 THREAD_INTERRUPTIBLE
, deadline
);
5884 cfil_udp_gc_thread_func(void *v
, wait_result_t w
)
5886 #pragma unused(v, w)
5888 ASSERT(cfil_udp_gc_thread
== current_thread());
5889 thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
5891 // Kick off gc shortly
5892 cfil_udp_gc_thread_sleep(false);
5893 thread_block_parameter((thread_continue_t
) cfil_info_udp_expire
, NULL
);
5898 cfil_info_udp_expire(void *v
, wait_result_t w
)
5900 #pragma unused(v, w)
5902 static uint64_t expired_array
[UDP_FLOW_GC_MAX_COUNT
];
5903 static uint32_t expired_count
= 0;
5905 struct cfil_info
*cfil_info
;
5906 struct cfil_hash_entry
*hash_entry
;
5909 u_int32_t current_time
= 0;
5911 current_time
= net_uptime();
5913 // Get all expired UDP flow ids
5914 cfil_rw_lock_shared(&cfil_lck_rw
);
5916 if (cfil_sock_udp_attached_count
== 0) {
5917 cfil_rw_unlock_shared(&cfil_lck_rw
);
5921 TAILQ_FOREACH(cfil_info
, &cfil_sock_head
, cfi_link
) {
5922 if (expired_count
>= UDP_FLOW_GC_MAX_COUNT
)
5925 if (IS_UDP(cfil_info
->cfi_so
)) {
5926 if (cfil_info_idle_timed_out(cfil_info
, UDP_FLOW_GC_IDLE_TO
, current_time
) ||
5927 cfil_info_action_timed_out(cfil_info
, UDP_FLOW_GC_ACTION_TO
) ||
5928 cfil_info_buffer_threshold_exceeded(cfil_info
)) {
5929 expired_array
[expired_count
] = cfil_info
->cfi_sock_id
;
5934 cfil_rw_unlock_shared(&cfil_lck_rw
);
5936 if (expired_count
== 0)
5939 for (uint32_t i
= 0; i
< expired_count
; i
++) {
5941 // Search for socket (UDP only and lock so)
5942 so
= cfil_socket_from_sock_id(expired_array
[i
], true);
5947 cfil_info
= cfil_db_get_cfil_info(so
->so_cfil_db
, expired_array
[i
]);
5948 if (cfil_info
== NULL
) {
5952 db
= so
->so_cfil_db
;
5953 hash_entry
= cfil_info
->cfi_hash_entry
;
5955 if (db
== NULL
|| hash_entry
== NULL
) {
5959 #if GC_DEBUG || LIFECYCLE_DEBUG
5960 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: GC CLEAN UP");
5963 cfil_db_delete_entry(db
, hash_entry
);
5964 cfil_info_free(cfil_info
);
5965 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
5967 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
5968 if (db
->cfdb_count
== 0)
5969 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
5970 VERIFY(so
->so_usecount
> 0);
5974 socket_unlock(so
, 1);
5978 CFIL_LOG(LOG_ERR
, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count
);
5984 // Sleep forever (until waken up) if no more UDP flow to clean
5985 cfil_rw_lock_shared(&cfil_lck_rw
);
5986 cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count
== 0 ? true : false);
5987 cfil_rw_unlock_shared(&cfil_lck_rw
);
5988 thread_block_parameter((thread_continue_t
)cfil_info_udp_expire
, NULL
);
5993 cfil_udp_save_socket_state(struct cfil_info
*cfil_info
, struct mbuf
*m
)
5995 struct m_tag
*tag
= NULL
;
5996 struct cfil_tag
*ctag
= NULL
;
5997 struct cfil_hash_entry
*hash_entry
= NULL
;
5999 if (cfil_info
== NULL
|| cfil_info
->cfi_so
== NULL
||
6000 cfil_info
->cfi_hash_entry
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
)) {
6004 /* Allocate a tag */
6005 tag
= m_tag_create(KERNEL_MODULE_TAG_ID
, KERNEL_TAG_TYPE_CFIL_UDP
,
6006 sizeof(struct cfil_tag
), M_DONTWAIT
, m
);
6009 ctag
= (struct cfil_tag
*)(tag
+ 1);
6010 ctag
->cfil_so_state_change_cnt
= cfil_info
->cfi_so
->so_state_change_cnt
;
6011 ctag
->cfil_so_options
= cfil_info
->cfi_so
->so_options
;
6013 hash_entry
= cfil_info
->cfi_hash_entry
;
6014 if (hash_entry
->cfentry_family
== AF_INET6
) {
6015 fill_ip6_sockaddr_4_6(&ctag
->cfil_faddr
,
6016 &hash_entry
->cfentry_faddr
.addr6
,
6017 hash_entry
->cfentry_fport
);
6018 } else if (hash_entry
->cfentry_family
== AF_INET
) {
6019 fill_ip_sockaddr_4_6(&ctag
->cfil_faddr
,
6020 hash_entry
->cfentry_faddr
.addr46
.ia46_addr4
,
6021 hash_entry
->cfentry_fport
);
6023 m_tag_prepend(m
, tag
);
6030 cfil_udp_get_socket_state(struct mbuf
*m
, uint32_t *state_change_cnt
, short *options
,
6031 struct sockaddr
**faddr
)
6033 struct m_tag
*tag
= NULL
;
6034 struct cfil_tag
*ctag
= NULL
;
6036 tag
= m_tag_locate(m
, KERNEL_MODULE_TAG_ID
, KERNEL_TAG_TYPE_CFIL_UDP
, NULL
);
6038 ctag
= (struct cfil_tag
*)(tag
+ 1);
6039 if (state_change_cnt
)
6040 *state_change_cnt
= ctag
->cfil_so_state_change_cnt
;
6042 *options
= ctag
->cfil_so_options
;
6044 *faddr
= (struct sockaddr
*) &ctag
->cfil_faddr
;
6047 * Unlink tag and hand it over to caller.
6048 * Note that caller will be responsible to free it.
6050 m_tag_unlink(m
, tag
);