2 * Copyright (c) 2013-2018 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by TCP/IP sockets.
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
45 * It should be noted that messages about many TCP/IP sockets can be multiplexed
46 * over a single kernel control socket.
49 * - The current implementation is limited to TCP sockets.
50 * - The current implementation supports up to two simultaneous content filters
51 * for the sake of simplicity of the implementation.
54 * NECP FILTER CONTROL UNIT
56 * A user space filter agent uses the Network Extension Control Policy (NECP)
57 * database to specify which TCP/IP sockets need to be filtered. The NECP
58 * criteria may be based on a variety of properties like user ID or proc UUID.
60 * The NECP "filter control unit" is used by the socket content filter subsystem
61 * to deliver the relevant TCP/IP content information to the appropriate
62 * user space filter agent via its kernel control socket instance.
63 * This works as follows:
65 * 1) The user space filter agent specifies an NECP filter control unit when
66 * in adds its filtering rules to the NECP database.
68 * 2) The user space filter agent also sets its NECP filter control unit on the
69 * content filter kernel control socket via the socket option
70 * CFIL_OPT_NECP_CONTROL_UNIT.
72 * 3) The NECP database is consulted to find out if a given TCP/IP socket
73 * needs to be subjected to content filtering and returns the corresponding
74 * NECP filter control unit -- the NECP filter control unit is actually
75 * stored in the TCP/IP socket structure so the NECP lookup is really simple.
77 * 4) The NECP filter control unit is then used to find the corresponding
78 * kernel control socket instance.
80 * Note: NECP currently supports a single filter control unit per TCP/IP socket
81 * but this restriction may be soon lifted.
84 * THE MESSAGING PROTOCOL
86 * The socket content filter subsystem and a user space filter agent
87 * communicate over the kernel control socket via an asynchronous
88 * messaging protocol (this is not a request-response protocol).
89 * The socket content filter subsystem sends event messages to the user
90 * space filter agent about the TCP/IP sockets it is interested to filter.
91 * The user space filter agent sends action messages to either allow
92 * data to pass or to disallow the data flow (and drop the connection).
94 * All messages over a content filter kernel control socket share the same
95 * common header of type "struct cfil_msg_hdr". The message type tells if
96 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
97 * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
98 * Note the message header length field may be padded for alignment and can
99 * be larger than the actual content of the message.
100 * The field "cfm_op" describe the kind of event or action.
102 * Here are the kinds of content filter events:
103 * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
104 * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
105 * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
106 * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
111 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
112 * data that is being sent or received. The position of this span of data
113 * in the data flow is described by a set of start and end offsets. These
114 * are absolute 64 bits offsets. The first byte sent (or received) starts
115 * at offset 0 and ends at offset 1. The length of the content data
116 * is given by the difference between the end offset and the start offset.
118 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
119 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
120 * action message is sent by the user space filter agent.
122 * Note: absolute 64 bits offsets should be large enough for the foreseeable
123 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
124 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
126 * They are two kinds of primary content filter actions:
127 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
128 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
130 * There is also an action to mark a given client flow as already filtered
131 * at a higher level, CFM_OP_BLESS_CLIENT.
136 * The CFM_OP_DATA_UPDATE action messages let the user space filter
137 * agent allow data to flow up to the specified pass offset -- there
138 * is a pass offset for outgoing data and a pass offset for incoming data.
139 * When a new TCP/IP socket is attached to the content filter, each pass offset
140 * is initially set to 0 so not data is allowed to pass by default.
141 * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
142 * then the data flow becomes unrestricted.
144 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
145 * with a pass offset smaller than the pass offset of a previous
146 * CFM_OP_DATA_UPDATE message is silently ignored.
148 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
149 * to tell the kernel how much data it wants to see by using the peek offsets.
150 * Just like pass offsets, there is a peek offset for each direction.
151 * When a new TCP/IP socket is attached to the content filter, each peek offset
152 * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
153 * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
154 * with a greater than 0 peek offset is sent by the user space filter agent.
155 * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
156 * then the flow of update data events becomes unrestricted.
158 * Note that peek offsets cannot be smaller than the corresponding pass offset.
159 * Also a peek offsets cannot be smaller than the corresponding end offset
160 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
161 * to set a too small peek value is silently ignored.
164 * PER SOCKET "struct cfil_info"
166 * As soon as a TCP/IP socket gets attached to a content filter, a
167 * "struct cfil_info" is created to hold the content filtering state for this
170 * The content filtering state is made of the following information
171 * for each direction:
172 * - The current pass offset;
173 * - The first and last offsets of the data pending, waiting for a filtering
175 * - The inject queue for data that passed the filters and that needs
177 * - A content filter specific state in a set of "struct cfil_entry"
180 * CONTENT FILTER STATE "struct cfil_entry"
182 * The "struct cfil_entry" maintains the information most relevant to the
183 * message handling over a kernel control socket with a user space filter agent.
185 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
186 * to the kernel control socket unit it corresponds to and also has a pointer
187 * to the corresponding "struct content_filter".
189 * For each direction, "struct cfil_entry" maintains the following information:
192 * - The offset of the last data peeked at by the filter
193 * - A queue of data that's waiting to be delivered to the user space filter
194 * agent on the kernel control socket
195 * - A queue of data for which event messages have been sent on the kernel
196 * control socket and are pending for a filtering decision.
199 * CONTENT FILTER QUEUES
201 * Data that is being filtered is steered away from the TCP/IP socket buffer
202 * and instead will sit in one of three content filter queues until the data
203 * can be re-injected into the TCP/IP socket buffer.
205 * A content filter queue is represented by "struct cfil_queue" that contains
206 * a list of mbufs and the start and end offset of the data span of
209 * The data moves into the three content filter queues according to this
211 * a) The "cfe_ctl_q" of "struct cfil_entry"
212 * b) The "cfe_pending_q" of "struct cfil_entry"
213 * c) The "cfi_inject_q" of "struct cfil_info"
215 * Note: The sequence (a),(b) may be repeated several times if there is more
216 * than one content filter attached to the TCP/IP socket.
218 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
219 * kernel conntrol socket for two reasons:
220 * - The peek offset is less that the end offset of the mbuf data
221 * - The kernel control socket is flow controlled
223 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
224 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
225 * socket and are waiting for a pass action message fromn the user space
226 * filter agent. An mbuf length must be fully allowed to pass to be removed
227 * from the cfe_pending_q.
229 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
230 * by the user space filter agent and that needs to be re-injected into the
234 * IMPACT ON FLOW CONTROL
236 * An essential aspect of the content filer subsystem is to minimize the
237 * impact on flow control of the TCP/IP sockets being filtered.
239 * The processing overhead of the content filtering may have an effect on
240 * flow control by adding noticeable delays and cannot be eliminated --
241 * care must be taken by the user space filter agent to minimize the
244 * The amount of data being filtered is kept in buffers while waiting for
245 * a decision by the user space filter agent. This amount of data pending
246 * needs to be subtracted from the amount of data available in the
247 * corresponding TCP/IP socket buffer. This is done by modifying
248 * sbspace() and tcp_sbspace() to account for amount of data pending
249 * in the content filter.
254 * The global state of content filter subsystem is protected by a single
255 * read-write lock "cfil_lck_rw". The data flow can be done with the
256 * cfil read-write lock held as shared so it can be re-entered from multiple
259 * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
260 * protected by the socket lock.
262 * A TCP/IP socket lock cannot be taken while the cfil read-write lock
263 * is held. That's why we have some sequences where we drop the cfil read-write
264 * lock before taking the TCP/IP lock.
266 * It is also important to lock the TCP/IP socket buffer while the content
267 * filter is modifying the amount of pending data. Otherwise the calculations
268 * in sbspace() and tcp_sbspace() could be wrong.
270 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
271 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
273 * Actually "cfe_link" and "cfe_filter" are protected by both by
274 * "cfil_lck_rw" and the socket lock: they may be modified only when
275 * "cfil_lck_rw" is exclusive and the socket is locked.
277 * To read the other fields of "struct content_filter" we have to take
278 * "cfil_lck_rw" in shared mode.
283 * - For TCP sockets only
285 * - Does not support TCP unordered messages
297 * If support datagram, enqueue control and address mbufs as well
300 #include <sys/types.h>
301 #include <sys/kern_control.h>
302 #include <sys/queue.h>
303 #include <sys/domain.h>
304 #include <sys/protosw.h>
305 #include <sys/syslog.h>
306 #include <sys/systm.h>
307 #include <sys/param.h>
308 #include <sys/mbuf.h>
310 #include <kern/locks.h>
311 #include <kern/zalloc.h>
312 #include <kern/debug.h>
314 #include <net/content_filter.h>
316 #include <netinet/in_pcb.h>
317 #include <netinet/tcp.h>
318 #include <netinet/tcp_var.h>
319 #include <netinet/udp.h>
320 #include <netinet/udp_var.h>
323 #include <libkern/libkern.h>
324 #include <kern/sched_prim.h>
326 #define MAX_CONTENT_FILTER 2
331 * The structure content_filter represents a user space content filter
332 * It's created and associated with a kernel control socket instance
334 struct content_filter
{
335 kern_ctl_ref cf_kcref
;
339 uint32_t cf_necp_control_unit
;
341 uint32_t cf_sock_count
;
342 TAILQ_HEAD(, cfil_entry
) cf_sock_entries
;
345 #define CFF_ACTIVE 0x01
346 #define CFF_DETACHING 0x02
347 #define CFF_FLOW_CONTROLLED 0x04
349 struct content_filter
**content_filters
= NULL
;
350 uint32_t cfil_active_count
= 0; /* Number of active content filters */
351 uint32_t cfil_sock_attached_count
= 0; /* Number of sockets attachements */
352 uint32_t cfil_sock_udp_attached_count
= 0; /* Number of UDP sockets attachements */
353 uint32_t cfil_close_wait_timeout
= 1000; /* in milliseconds */
355 static kern_ctl_ref cfil_kctlref
= NULL
;
357 static lck_grp_attr_t
*cfil_lck_grp_attr
= NULL
;
358 static lck_attr_t
*cfil_lck_attr
= NULL
;
359 static lck_grp_t
*cfil_lck_grp
= NULL
;
360 decl_lck_rw_data(static, cfil_lck_rw
);
362 #define CFIL_RW_LCK_MAX 8
364 int cfil_rw_nxt_lck
= 0;
365 void* cfil_rw_lock_history
[CFIL_RW_LCK_MAX
];
367 int cfil_rw_nxt_unlck
= 0;
368 void* cfil_rw_unlock_history
[CFIL_RW_LCK_MAX
];
370 #define CONTENT_FILTER_ZONE_NAME "content_filter"
371 #define CONTENT_FILTER_ZONE_MAX 10
372 static struct zone
*content_filter_zone
= NULL
; /* zone for content_filter */
375 #define CFIL_INFO_ZONE_NAME "cfil_info"
376 #define CFIL_INFO_ZONE_MAX 1024
377 static struct zone
*cfil_info_zone
= NULL
; /* zone for cfil_info */
379 MBUFQ_HEAD(cfil_mqhead
);
382 uint64_t q_start
; /* offset of first byte in queue */
383 uint64_t q_end
; /* offset of last byte in queue */
384 struct cfil_mqhead q_mq
;
390 * The is one entry per content filter
393 TAILQ_ENTRY(cfil_entry
) cfe_link
;
394 struct content_filter
*cfe_filter
;
396 struct cfil_info
*cfe_cfil_info
;
398 uint32_t cfe_necp_control_unit
;
399 struct timeval cfe_last_event
; /* To user space */
400 struct timeval cfe_last_action
; /* From user space */
404 * cfe_pending_q holds data that has been delivered to
405 * the filter and for which we are waiting for an action
407 struct cfil_queue cfe_pending_q
;
409 * This queue is for data that has not be delivered to
410 * the content filter (new data, pass peek or flow control)
412 struct cfil_queue cfe_ctl_q
;
414 uint64_t cfe_pass_offset
;
415 uint64_t cfe_peek_offset
;
420 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
421 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
422 #define CFEF_DATA_START 0x0004 /* can send data event */
423 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
424 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
425 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
426 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
427 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
430 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
431 struct timeval _tdiff; \
432 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
433 timersub(t1, t0, &_tdiff); \
434 (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
435 (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
436 (cfil)->cfi_op_list_ctr ++; \
439 struct cfil_hash_entry
;
444 * There is a struct cfil_info per socket
447 TAILQ_ENTRY(cfil_info
) cfi_link
;
448 struct socket
*cfi_so
;
450 uint64_t cfi_sock_id
;
451 struct timeval64 cfi_first_event
;
452 uint32_t cfi_op_list_ctr
;
453 uint32_t cfi_op_time
[CFI_MAX_TIME_LOG_ENTRY
]; /* time interval in microseconds since first event */
454 unsigned char cfi_op_list
[CFI_MAX_TIME_LOG_ENTRY
];
458 * cfi_pending_first and cfi_pending_last describe the total
459 * amount of data outstanding for all the filters on
460 * this socket and data in the flow queue
461 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
463 uint64_t cfi_pending_first
;
464 uint64_t cfi_pending_last
;
465 uint32_t cfi_pending_mbcnt
;
466 uint32_t cfi_pending_mbnum
;
467 uint32_t cfi_tail_drop_cnt
;
469 * cfi_pass_offset is the minimum of all the filters
471 uint64_t cfi_pass_offset
;
473 * cfi_inject_q holds data that needs to be re-injected
474 * into the socket after filtering and that can
475 * be queued because of flow control
477 struct cfil_queue cfi_inject_q
;
480 struct cfil_entry cfi_entries
[MAX_CONTENT_FILTER
];
481 struct cfil_hash_entry
*cfi_hash_entry
;
482 } __attribute__((aligned(8)));
484 #define CFIF_DROP 0x0001 /* drop action applied */
485 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
486 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
487 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
488 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
489 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
490 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
492 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
493 #define CFI_SHIFT_GENCNT 32
494 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
495 #define CFI_SHIFT_FLOWHASH 0
497 TAILQ_HEAD(cfil_sock_head
, cfil_info
) cfil_sock_head
;
499 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
500 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
505 LIST_HEAD(cfilhashhead
, cfil_hash_entry
);
506 #define CFILHASHSIZE 16
507 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
508 #define IS_UDP(so) (so && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
509 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
510 ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
511 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
512 cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
513 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
516 * UDP Garbage Collection:
518 static struct thread
*cfil_udp_gc_thread
;
519 #define UDP_FLOW_GC_IDLE_TO 30 // Flow Idle Timeout in seconds
520 #define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
521 #define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
522 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC (10 * NSEC_PER_SEC) // GC wakes up every 10 seconds
525 * UDP flow queue thresholds
527 #define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
528 #define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
529 #define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
531 * UDP flow queue threshold globals:
533 static unsigned int cfil_udp_gc_mbuf_num_max
= UDP_FLOW_GC_MBUF_NUM_MAX
;
534 static unsigned int cfil_udp_gc_mbuf_cnt_max
= UDP_FLOW_GC_MBUF_CNT_MAX
;
537 * struct cfil_hash_entry
539 * Hash entry for cfil_info
541 struct cfil_hash_entry
{
542 LIST_ENTRY(cfil_hash_entry
) cfentry_link
;
543 struct cfil_info
*cfentry_cfil
;
544 u_short cfentry_fport
;
545 u_short cfentry_lport
;
546 sa_family_t cfentry_family
;
547 u_int32_t cfentry_flowhash
;
548 u_int32_t cfentry_lastused
;
550 /* foreign host table entry */
551 struct in_addr_4in6 addr46
;
552 struct in6_addr addr6
;
555 /* local host table entry */
556 struct in_addr_4in6 addr46
;
557 struct in6_addr addr6
;
564 * For each UDP socket, this is a hash table maintaining all cfil_info structs
565 * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
568 struct socket
*cfdb_so
;
569 uint32_t cfdb_count
; /* Number of total content filters */
570 struct cfilhashhead
*cfdb_hashbase
;
571 u_long cfdb_hashmask
;
572 struct cfil_hash_entry
*cfdb_only_entry
; /* Optimization for connected UDP */
576 * CFIL specific mbuf tag:
577 * Save state of socket at the point of data entry into cfil.
578 * Use saved state for reinjection at protocol layer.
581 union sockaddr_in_4_6 cfil_faddr
;
582 uint32_t cfil_so_state_change_cnt
;
583 short cfil_so_options
;
586 #define CFIL_HASH_ENTRY_ZONE_NAME "cfil_entry_hash"
587 #define CFIL_HASH_ENTRY_ZONE_MAX 1024
588 static struct zone
*cfil_hash_entry_zone
= NULL
;
590 #define CFIL_DB_ZONE_NAME "cfil_db"
591 #define CFIL_DB_ZONE_MAX 1024
592 static struct zone
*cfil_db_zone
= NULL
;
598 struct cfil_stats cfil_stats
;
601 * For troubleshooting
603 int cfil_log_level
= LOG_ERR
;
606 // Debug controls added for selective debugging.
607 // Disabled for production. If enabled,
608 // these will have performance impact
609 #define LIFECYCLE_DEBUG 0
610 #define VERDICT_DEBUG 0
616 * Sysctls for logs and statistics
618 static int sysctl_cfil_filter_list(struct sysctl_oid
*, void *, int,
619 struct sysctl_req
*);
620 static int sysctl_cfil_sock_list(struct sysctl_oid
*, void *, int,
621 struct sysctl_req
*);
623 SYSCTL_NODE(_net
, OID_AUTO
, cfil
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "cfil");
625 SYSCTL_INT(_net_cfil
, OID_AUTO
, log
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
626 &cfil_log_level
, 0, "");
628 SYSCTL_INT(_net_cfil
, OID_AUTO
, debug
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
631 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sock_attached_count
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
632 &cfil_sock_attached_count
, 0, "");
634 SYSCTL_UINT(_net_cfil
, OID_AUTO
, active_count
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
635 &cfil_active_count
, 0, "");
637 SYSCTL_UINT(_net_cfil
, OID_AUTO
, close_wait_timeout
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
638 &cfil_close_wait_timeout
, 0, "");
640 static int cfil_sbtrim
= 1;
641 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sbtrim
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
642 &cfil_sbtrim
, 0, "");
644 SYSCTL_PROC(_net_cfil
, OID_AUTO
, filter_list
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
645 0, 0, sysctl_cfil_filter_list
, "S,cfil_filter_stat", "");
647 SYSCTL_PROC(_net_cfil
, OID_AUTO
, sock_list
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
648 0, 0, sysctl_cfil_sock_list
, "S,cfil_sock_stat", "");
650 SYSCTL_STRUCT(_net_cfil
, OID_AUTO
, stats
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
651 &cfil_stats
, cfil_stats
, "");
654 * Forward declaration to appease the compiler
656 static int cfil_action_data_pass(struct socket
*, struct cfil_info
*, uint32_t, int,
658 static int cfil_action_drop(struct socket
*, struct cfil_info
*, uint32_t);
659 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr
*);
660 static int cfil_dispatch_closed_event(struct socket
*, struct cfil_info
*, int);
661 static int cfil_data_common(struct socket
*, struct cfil_info
*, int, struct sockaddr
*,
662 struct mbuf
*, struct mbuf
*, uint32_t);
663 static int cfil_data_filter(struct socket
*, struct cfil_info
*, uint32_t, int,
664 struct mbuf
*, uint64_t);
665 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*,
666 struct in_addr
, u_int16_t
);
667 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*,
668 struct in6_addr
*, u_int16_t
);
670 static int cfil_dispatch_attach_event(struct socket
*, struct cfil_info
*, uint32_t);
671 static void cfil_info_free(struct cfil_info
*);
672 static struct cfil_info
* cfil_info_alloc(struct socket
*, struct cfil_hash_entry
*);
673 static int cfil_info_attach_unit(struct socket
*, uint32_t, struct cfil_info
*);
674 static struct socket
* cfil_socket_from_sock_id(cfil_sock_id_t
, bool);
675 static struct socket
* cfil_socket_from_client_uuid(uuid_t
, bool *);
676 static int cfil_service_pending_queue(struct socket
*, struct cfil_info
*, uint32_t, int);
677 static int cfil_data_service_ctl_q(struct socket
*, struct cfil_info
*, uint32_t, int);
678 static void cfil_info_verify(struct cfil_info
*);
679 static int cfil_update_data_offsets(struct socket
*, struct cfil_info
*, uint32_t, int,
681 static int cfil_acquire_sockbuf(struct socket
*, struct cfil_info
*, int);
682 static void cfil_release_sockbuf(struct socket
*, int);
683 static int cfil_filters_attached(struct socket
*);
685 static void cfil_rw_lock_exclusive(lck_rw_t
*);
686 static void cfil_rw_unlock_exclusive(lck_rw_t
*);
687 static void cfil_rw_lock_shared(lck_rw_t
*);
688 static void cfil_rw_unlock_shared(lck_rw_t
*);
689 static boolean_t
cfil_rw_lock_shared_to_exclusive(lck_rw_t
*);
690 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t
*);
692 static unsigned int cfil_data_length(struct mbuf
*, int *, int *);
693 static errno_t
cfil_db_init(struct socket
*);
694 static void cfil_db_free(struct socket
*so
);
695 struct cfil_hash_entry
*cfil_db_lookup_entry(struct cfil_db
*, struct sockaddr
*, struct sockaddr
*);
696 struct cfil_hash_entry
*cfil_db_lookup_entry_with_sockid(struct cfil_db
*, u_int64_t
);
697 struct cfil_hash_entry
*cfil_db_add_entry(struct cfil_db
*, struct sockaddr
*, struct sockaddr
*);
698 void cfil_db_delete_entry(struct cfil_db
*, struct cfil_hash_entry
*);
699 struct cfil_hash_entry
*cfil_sock_udp_get_flow(struct socket
*, uint32_t, bool, struct sockaddr
*, struct sockaddr
*);
700 struct cfil_info
*cfil_db_get_cfil_info(struct cfil_db
*, cfil_sock_id_t
);
701 static errno_t
cfil_sock_udp_handle_data(bool, struct socket
*, struct sockaddr
*, struct sockaddr
*,
702 struct mbuf
*, struct mbuf
*, uint32_t);
703 static int32_t cfil_sock_udp_data_pending(struct sockbuf
*, bool);
704 static void cfil_sock_udp_is_closed(struct socket
*);
705 static int cfil_sock_udp_notify_shutdown(struct socket
*, int, int, int);
706 static int cfil_sock_udp_shutdown(struct socket
*, int *);
707 static void cfil_sock_udp_close_wait(struct socket
*);
708 static void cfil_sock_udp_buf_update(struct sockbuf
*);
709 static int cfil_filters_udp_attached(struct socket
*, bool);
710 static void cfil_get_flow_address_v6(struct cfil_hash_entry
*, struct inpcb
*,
711 struct in6_addr
**, struct in6_addr
**,
712 u_int16_t
*, u_int16_t
*);
713 static void cfil_get_flow_address(struct cfil_hash_entry
*, struct inpcb
*,
714 struct in_addr
*, struct in_addr
*,
715 u_int16_t
*, u_int16_t
*);
716 static void cfil_info_log(int, struct cfil_info
*, const char *);
717 void cfil_filter_show(u_int32_t
);
718 void cfil_info_show(void);
719 bool cfil_info_idle_timed_out(struct cfil_info
*, int, u_int32_t
);
720 bool cfil_info_action_timed_out(struct cfil_info
*, int);
721 bool cfil_info_buffer_threshold_exceeded(struct cfil_info
*);
722 struct m_tag
*cfil_udp_save_socket_state(struct cfil_info
*, struct mbuf
*);
723 static void cfil_udp_gc_thread_func(void *, wait_result_t
);
724 static void cfil_info_udp_expire(void *, wait_result_t
);
726 bool check_port(struct sockaddr
*, u_short
);
729 * Content filter global read write lock
733 cfil_rw_lock_exclusive(lck_rw_t
*lck
)
737 lr_saved
= __builtin_return_address(0);
739 lck_rw_lock_exclusive(lck
);
741 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
742 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
746 cfil_rw_unlock_exclusive(lck_rw_t
*lck
)
750 lr_saved
= __builtin_return_address(0);
752 lck_rw_unlock_exclusive(lck
);
754 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
755 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
759 cfil_rw_lock_shared(lck_rw_t
*lck
)
763 lr_saved
= __builtin_return_address(0);
765 lck_rw_lock_shared(lck
);
767 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
768 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
772 cfil_rw_unlock_shared(lck_rw_t
*lck
)
776 lr_saved
= __builtin_return_address(0);
778 lck_rw_unlock_shared(lck
);
780 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
781 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
785 cfil_rw_lock_shared_to_exclusive(lck_rw_t
*lck
)
790 lr_saved
= __builtin_return_address(0);
792 upgraded
= lck_rw_lock_shared_to_exclusive(lck
);
794 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
795 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
801 cfil_rw_lock_exclusive_to_shared(lck_rw_t
*lck
)
805 lr_saved
= __builtin_return_address(0);
807 lck_rw_lock_exclusive_to_shared(lck
);
809 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
810 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
814 cfil_rw_lock_assert_held(lck_rw_t
*lck
, int exclusive
)
817 #pragma unused(lck, exclusive)
820 exclusive
? LCK_RW_ASSERT_EXCLUSIVE
: LCK_RW_ASSERT_HELD
);
824 * Return the number of bytes in the mbuf chain using the same
825 * method as m_length() or sballoc()
827 * Returns data len - starting from PKT start
828 * - retmbcnt - optional param to get total mbuf bytes in chain
829 * - retmbnum - optional param to get number of mbufs in chain
832 cfil_data_length(struct mbuf
*m
, int *retmbcnt
, int *retmbnum
)
835 unsigned int pktlen
= 0;
839 // Locate the start of data
840 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
841 if (m0
->m_flags
& M_PKTHDR
) {
846 CFIL_LOG(LOG_ERR
, "cfil_data_length: no M_PKTHDR");
851 if (retmbcnt
== NULL
&& retmbnum
== NULL
) {
858 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
862 if (m0
->m_flags
& M_EXT
) {
863 mbcnt
+= m0
->m_ext
.ext_size
;
876 cfil_data_start(struct mbuf
*m
)
880 // Locate the start of data
881 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
882 if (m0
->m_flags
& M_PKTHDR
) {
890 * Common mbuf queue utilities
894 cfil_queue_init(struct cfil_queue
*cfq
)
898 MBUFQ_INIT(&cfq
->q_mq
);
901 static inline uint64_t
902 cfil_queue_drain(struct cfil_queue
*cfq
)
904 uint64_t drained
= cfq
->q_start
- cfq
->q_end
;
907 MBUFQ_DRAIN(&cfq
->q_mq
);
912 /* Return 1 when empty, 0 otherwise */
914 cfil_queue_empty(struct cfil_queue
*cfq
)
916 return MBUFQ_EMPTY(&cfq
->q_mq
);
919 static inline uint64_t
920 cfil_queue_offset_first(struct cfil_queue
*cfq
)
925 static inline uint64_t
926 cfil_queue_offset_last(struct cfil_queue
*cfq
)
931 static inline uint64_t
932 cfil_queue_len(struct cfil_queue
*cfq
)
934 return cfq
->q_end
- cfq
->q_start
;
938 * Routines to verify some fundamental assumptions
942 cfil_queue_verify(struct cfil_queue
*cfq
)
947 uint64_t queuesize
= 0;
949 /* Verify offset are ordered */
950 VERIFY(cfq
->q_start
<= cfq
->q_end
);
953 * When queue is empty, the offsets are equal otherwise the offsets
956 VERIFY((MBUFQ_EMPTY(&cfq
->q_mq
) && cfq
->q_start
== cfq
->q_end
) ||
957 (!MBUFQ_EMPTY(&cfq
->q_mq
) &&
958 cfq
->q_start
!= cfq
->q_end
));
960 MBUFQ_FOREACH(chain
, &cfq
->q_mq
) {
961 size_t chainsize
= 0;
963 unsigned int mlen
= cfil_data_length(m
, NULL
, NULL
);
964 // skip the addr and control stuff if present
965 m
= cfil_data_start(m
);
968 m
== (void *)M_TAG_FREE_PATTERN
||
969 m
->m_next
== (void *)M_TAG_FREE_PATTERN
||
970 m
->m_nextpkt
== (void *)M_TAG_FREE_PATTERN
) {
971 panic("%s - mq %p is free at %p", __func__
,
974 for (n
= m
; n
!= NULL
; n
= n
->m_next
) {
975 if (n
->m_type
!= MT_DATA
&&
976 n
->m_type
!= MT_HEADER
&&
977 n
->m_type
!= MT_OOBDATA
) {
978 panic("%s - %p unsupported type %u", __func__
,
981 chainsize
+= n
->m_len
;
983 if (mlen
!= chainsize
) {
984 panic("%s - %p m_length() %u != chainsize %lu",
985 __func__
, m
, mlen
, chainsize
);
987 queuesize
+= chainsize
;
989 if (queuesize
!= cfq
->q_end
- cfq
->q_start
) {
990 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__
,
991 m
, queuesize
, cfq
->q_end
- cfq
->q_start
);
996 cfil_queue_enqueue(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
998 CFIL_QUEUE_VERIFY(cfq
);
1000 MBUFQ_ENQUEUE(&cfq
->q_mq
, m
);
1003 CFIL_QUEUE_VERIFY(cfq
);
1007 cfil_queue_remove(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
1009 CFIL_QUEUE_VERIFY(cfq
);
1011 VERIFY(cfil_data_length(m
, NULL
, NULL
) == len
);
1013 MBUFQ_REMOVE(&cfq
->q_mq
, m
);
1014 MBUFQ_NEXT(m
) = NULL
;
1015 cfq
->q_start
+= len
;
1017 CFIL_QUEUE_VERIFY(cfq
);
1021 cfil_queue_first(struct cfil_queue
*cfq
)
1023 return MBUFQ_FIRST(&cfq
->q_mq
);
1027 cfil_queue_next(struct cfil_queue
*cfq
, mbuf_t m
)
1030 return MBUFQ_NEXT(m
);
1034 cfil_entry_buf_verify(struct cfe_buf
*cfe_buf
)
1036 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_ctl_q
);
1037 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_pending_q
);
1039 /* Verify the queues are ordered so that pending is before ctl */
1040 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
>= cfe_buf
->cfe_pending_q
.q_end
);
1042 /* The peek offset cannot be less than the pass offset */
1043 VERIFY(cfe_buf
->cfe_peek_offset
>= cfe_buf
->cfe_pass_offset
);
1045 /* Make sure we've updated the offset we peeked at */
1046 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
<= cfe_buf
->cfe_peeked
);
1050 cfil_entry_verify(struct cfil_entry
*entry
)
1052 cfil_entry_buf_verify(&entry
->cfe_snd
);
1053 cfil_entry_buf_verify(&entry
->cfe_rcv
);
1057 cfil_info_buf_verify(struct cfi_buf
*cfi_buf
)
1059 CFIL_QUEUE_VERIFY(&cfi_buf
->cfi_inject_q
);
1061 VERIFY(cfi_buf
->cfi_pending_first
<= cfi_buf
->cfi_pending_last
);
1062 VERIFY(cfi_buf
->cfi_pending_mbcnt
>= 0);
1066 cfil_info_verify(struct cfil_info
*cfil_info
)
1070 if (cfil_info
== NULL
) {
1074 cfil_info_buf_verify(&cfil_info
->cfi_snd
);
1075 cfil_info_buf_verify(&cfil_info
->cfi_rcv
);
1077 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++) {
1078 cfil_entry_verify(&cfil_info
->cfi_entries
[i
]);
1083 verify_content_filter(struct content_filter
*cfc
)
1085 struct cfil_entry
*entry
;
1088 VERIFY(cfc
->cf_sock_count
>= 0);
1090 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
1092 VERIFY(cfc
== entry
->cfe_filter
);
1094 VERIFY(count
== cfc
->cf_sock_count
);
1098 * Kernel control socket callbacks
1101 cfil_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
1105 struct content_filter
*cfc
= NULL
;
1107 CFIL_LOG(LOG_NOTICE
, "");
1109 cfc
= zalloc(content_filter_zone
);
1111 CFIL_LOG(LOG_ERR
, "zalloc failed");
1115 bzero(cfc
, sizeof(struct content_filter
));
1117 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1118 if (content_filters
== NULL
) {
1119 struct content_filter
**tmp
;
1121 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1124 struct content_filter
**,
1125 MAX_CONTENT_FILTER
* sizeof(struct content_filter
*),
1129 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1131 if (tmp
== NULL
&& content_filters
== NULL
) {
1133 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1136 /* Another thread may have won the race */
1137 if (content_filters
!= NULL
) {
1140 content_filters
= tmp
;
1144 if (sac
->sc_unit
== 0 || sac
->sc_unit
> MAX_CONTENT_FILTER
) {
1145 CFIL_LOG(LOG_ERR
, "bad sc_unit %u", sac
->sc_unit
);
1147 } else if (content_filters
[sac
->sc_unit
- 1] != NULL
) {
1148 CFIL_LOG(LOG_ERR
, "sc_unit %u in use", sac
->sc_unit
);
1152 * kernel control socket kcunit numbers start at 1
1154 content_filters
[sac
->sc_unit
- 1] = cfc
;
1156 cfc
->cf_kcref
= kctlref
;
1157 cfc
->cf_kcunit
= sac
->sc_unit
;
1158 TAILQ_INIT(&cfc
->cf_sock_entries
);
1161 cfil_active_count
++;
1163 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1165 if (error
!= 0 && cfc
!= NULL
) {
1166 zfree(content_filter_zone
, cfc
);
1170 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_ok
);
1172 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_fail
);
1175 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
1176 error
, cfil_active_count
, sac
->sc_unit
);
1182 cfil_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
)
1184 #pragma unused(kctlref)
1186 struct content_filter
*cfc
;
1187 struct cfil_entry
*entry
;
1188 uint64_t sock_flow_id
= 0;
1190 CFIL_LOG(LOG_NOTICE
, "");
1192 if (content_filters
== NULL
) {
1193 CFIL_LOG(LOG_ERR
, "no content filter");
1197 if (kcunit
> MAX_CONTENT_FILTER
) {
1198 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1199 kcunit
, MAX_CONTENT_FILTER
);
1204 cfc
= (struct content_filter
*)unitinfo
;
1209 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1210 if (content_filters
[kcunit
- 1] != cfc
|| cfc
->cf_kcunit
!= kcunit
) {
1211 CFIL_LOG(LOG_ERR
, "bad unit info %u)",
1213 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1216 cfc
->cf_flags
|= CFF_DETACHING
;
1218 * Remove all sockets from the filter
1220 while ((entry
= TAILQ_FIRST(&cfc
->cf_sock_entries
)) != NULL
) {
1221 cfil_rw_lock_assert_held(&cfil_lck_rw
, 1);
1223 verify_content_filter(cfc
);
1225 * Accept all outstanding data by pushing to next filter
1228 * TBD: Actually we should make sure all data has been pushed
1231 if (entry
->cfe_cfil_info
&& entry
->cfe_cfil_info
->cfi_so
) {
1232 struct cfil_info
*cfil_info
= entry
->cfe_cfil_info
;
1233 struct socket
*so
= cfil_info
->cfi_so
;
1234 sock_flow_id
= cfil_info
->cfi_sock_id
;
1236 /* Need to let data flow immediately */
1237 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
|
1241 * Respect locking hierarchy
1243 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1248 * When cfe_filter is NULL the filter is detached
1249 * and the entry has been removed from cf_sock_entries
1251 if ((so
->so_cfil
== NULL
&& so
->so_cfil_db
== NULL
) || entry
->cfe_filter
== NULL
) {
1252 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1256 (void) cfil_action_data_pass(so
, cfil_info
, kcunit
, 1,
1260 (void) cfil_action_data_pass(so
, cfil_info
, kcunit
, 0,
1264 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1267 * Check again to make sure if the cfil_info is still valid
1268 * as the socket may have been unlocked when when calling
1269 * cfil_acquire_sockbuf()
1271 if (entry
->cfe_filter
== NULL
||
1272 (so
->so_cfil
== NULL
&& cfil_db_get_cfil_info(so
->so_cfil_db
, sock_flow_id
) == NULL
)) {
1276 /* The filter is now detached */
1277 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
1279 cfil_info_log(LOG_DEBUG
, cfil_info
, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1281 CFIL_LOG(LOG_NOTICE
, "so %llx detached %u",
1282 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1283 if ((cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
1284 cfil_filters_attached(so
) == 0) {
1285 CFIL_LOG(LOG_NOTICE
, "so %llx waking",
1286 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1287 wakeup((caddr_t
)cfil_info
);
1291 * Remove the filter entry from the content filter
1292 * but leave the rest of the state intact as the queues
1293 * may not be empty yet
1295 entry
->cfe_filter
= NULL
;
1296 entry
->cfe_necp_control_unit
= 0;
1298 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1299 cfc
->cf_sock_count
--;
1301 socket_unlock(so
, 1);
1304 verify_content_filter(cfc
);
1306 VERIFY(cfc
->cf_sock_count
== 0);
1309 * Make filter inactive
1311 content_filters
[kcunit
- 1] = NULL
;
1312 cfil_active_count
--;
1313 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1315 zfree(content_filter_zone
, cfc
);
1318 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_ok
);
1320 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_fail
);
1323 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
1324 error
, cfil_active_count
, kcunit
);
1330 * cfil_acquire_sockbuf()
1332 * Prevent any other thread from acquiring the sockbuf
1333 * We use sb_cfil_thread as a semaphore to prevent other threads from
1334 * messing with the sockbuf -- see sblock()
1335 * Note: We do not set SB_LOCK here because the thread may check or modify
1336 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1337 * sblock(), sbunlock() or sodefunct()
1340 cfil_acquire_sockbuf(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
)
1342 thread_t tp
= current_thread();
1343 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1344 lck_mtx_t
*mutex_held
;
1348 * Wait until no thread is holding the sockbuf and other content
1349 * filter threads have released the sockbuf
1351 while ((sb
->sb_flags
& SB_LOCK
) ||
1352 (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
)) {
1353 if (so
->so_proto
->pr_getlock
!= NULL
) {
1354 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
1356 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1359 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1362 VERIFY(sb
->sb_wantlock
!= 0);
1364 msleep(&sb
->sb_flags
, mutex_held
, PSOCK
, "cfil_acquire_sockbuf",
1367 VERIFY(sb
->sb_wantlock
!= 0);
1371 * Use reference count for repetitive calls on same thread
1373 if (sb
->sb_cfil_refs
== 0) {
1374 VERIFY(sb
->sb_cfil_thread
== NULL
);
1375 VERIFY((sb
->sb_flags
& SB_LOCK
) == 0);
1377 sb
->sb_cfil_thread
= tp
;
1378 sb
->sb_flags
|= SB_LOCK
;
1382 /* We acquire the socket buffer when we need to cleanup */
1383 if (cfil_info
== NULL
) {
1384 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
1385 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1387 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
1388 CFIL_LOG(LOG_ERR
, "so %llx drop set",
1389 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1397 cfil_release_sockbuf(struct socket
*so
, int outgoing
)
1399 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1400 thread_t tp
= current_thread();
1402 socket_lock_assert_owned(so
);
1404 if (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
) {
1405 panic("%s sb_cfil_thread %p not current %p", __func__
,
1406 sb
->sb_cfil_thread
, tp
);
1409 * Don't panic if we are defunct because SB_LOCK has
1410 * been cleared by sodefunct()
1412 if (!(so
->so_flags
& SOF_DEFUNCT
) && !(sb
->sb_flags
& SB_LOCK
)) {
1413 panic("%s SB_LOCK not set on %p", __func__
,
1417 * We can unlock when the thread unwinds to the last reference
1420 if (sb
->sb_cfil_refs
== 0) {
1421 sb
->sb_cfil_thread
= NULL
;
1422 sb
->sb_flags
&= ~SB_LOCK
;
1424 if (sb
->sb_wantlock
> 0) {
1425 wakeup(&sb
->sb_flags
);
1431 cfil_sock_id_from_socket(struct socket
*so
)
1433 if ((so
->so_flags
& SOF_CONTENT_FILTER
) && so
->so_cfil
) {
1434 return so
->so_cfil
->cfi_sock_id
;
1436 return CFIL_SOCK_ID_NONE
;
1441 cfil_socket_safe_lock(struct inpcb
*inp
)
1443 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) != WNT_STOPUSING
) {
1444 socket_lock(inp
->inp_socket
, 1);
1445 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) != WNT_STOPUSING
) {
1448 socket_unlock(inp
->inp_socket
, 1);
1453 static struct socket
*
1454 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id
, bool udp_only
)
1456 struct socket
*so
= NULL
;
1457 u_int64_t gencnt
= cfil_sock_id
>> 32;
1458 u_int32_t flowhash
= (u_int32_t
)(cfil_sock_id
& 0x0ffffffff);
1459 struct inpcb
*inp
= NULL
;
1460 struct inpcbinfo
*pcbinfo
= NULL
;
1463 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id
, gencnt
, flowhash
);
1471 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1472 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1473 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1474 inp
->inp_socket
!= NULL
&&
1475 inp
->inp_flowhash
== flowhash
&&
1476 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
&&
1477 inp
->inp_socket
->so_cfil
!= NULL
) {
1478 if (cfil_socket_safe_lock(inp
)) {
1479 so
= inp
->inp_socket
;
1484 lck_rw_done(pcbinfo
->ipi_lock
);
1492 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1493 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1494 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1495 inp
->inp_socket
!= NULL
&&
1496 inp
->inp_socket
->so_cfil_db
!= NULL
&&
1497 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
) {
1498 if (cfil_socket_safe_lock(inp
)) {
1499 so
= inp
->inp_socket
;
1504 lck_rw_done(pcbinfo
->ipi_lock
);
1508 OSIncrementAtomic(&cfil_stats
.cfs_sock_id_not_found
);
1510 "no socket for sock_id %llx gencnt %llx flowhash %x",
1511 cfil_sock_id
, gencnt
, flowhash
);
1517 static struct socket
*
1518 cfil_socket_from_client_uuid(uuid_t necp_client_uuid
, bool *cfil_attached
)
1520 struct socket
*so
= NULL
;
1521 struct inpcb
*inp
= NULL
;
1522 struct inpcbinfo
*pcbinfo
= &tcbinfo
;
1524 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1525 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1526 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1527 inp
->inp_socket
!= NULL
&&
1528 uuid_compare(inp
->necp_client_uuid
, necp_client_uuid
) == 0) {
1529 *cfil_attached
= (inp
->inp_socket
->so_cfil
!= NULL
);
1530 if (cfil_socket_safe_lock(inp
)) {
1531 so
= inp
->inp_socket
;
1536 lck_rw_done(pcbinfo
->ipi_lock
);
1542 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1543 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1544 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1545 inp
->inp_socket
!= NULL
&&
1546 uuid_compare(inp
->necp_client_uuid
, necp_client_uuid
) == 0) {
1547 *cfil_attached
= (inp
->inp_socket
->so_cfil_db
!= NULL
);
1548 if (cfil_socket_safe_lock(inp
)) {
1549 so
= inp
->inp_socket
;
1554 lck_rw_done(pcbinfo
->ipi_lock
);
1561 cfil_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, mbuf_t m
,
1564 #pragma unused(kctlref, flags)
1566 struct cfil_msg_hdr
*msghdr
;
1567 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1569 struct cfil_msg_action
*action_msg
;
1570 struct cfil_entry
*entry
;
1571 struct cfil_info
*cfil_info
= NULL
;
1573 CFIL_LOG(LOG_INFO
, "");
1575 if (content_filters
== NULL
) {
1576 CFIL_LOG(LOG_ERR
, "no content filter");
1580 if (kcunit
> MAX_CONTENT_FILTER
) {
1581 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1582 kcunit
, MAX_CONTENT_FILTER
);
1587 if (m_length(m
) < sizeof(struct cfil_msg_hdr
)) {
1588 CFIL_LOG(LOG_ERR
, "too short %u", m_length(m
));
1592 msghdr
= (struct cfil_msg_hdr
*)mbuf_data(m
);
1593 if (msghdr
->cfm_version
!= CFM_VERSION_CURRENT
) {
1594 CFIL_LOG(LOG_ERR
, "bad version %u", msghdr
->cfm_version
);
1598 if (msghdr
->cfm_type
!= CFM_TYPE_ACTION
) {
1599 CFIL_LOG(LOG_ERR
, "bad type %u", msghdr
->cfm_type
);
1603 /* Validate action operation */
1604 switch (msghdr
->cfm_op
) {
1605 case CFM_OP_DATA_UPDATE
:
1607 &cfil_stats
.cfs_ctl_action_data_update
);
1610 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_drop
);
1612 case CFM_OP_BLESS_CLIENT
:
1613 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_bless_client
)) {
1614 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1616 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1621 error
= cfil_action_bless_client(kcunit
, msghdr
);
1624 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_op
);
1625 CFIL_LOG(LOG_ERR
, "bad op %u", msghdr
->cfm_op
);
1629 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_action
)) {
1630 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1632 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1637 cfil_rw_lock_shared(&cfil_lck_rw
);
1638 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1639 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1642 cfil_rw_unlock_shared(&cfil_lck_rw
);
1645 cfil_rw_unlock_shared(&cfil_lck_rw
);
1647 // Search for socket (TCP+UDP and lock so)
1648 so
= cfil_socket_from_sock_id(msghdr
->cfm_sock_id
, false);
1650 CFIL_LOG(LOG_NOTICE
, "bad sock_id %llx",
1651 msghdr
->cfm_sock_id
);
1656 cfil_info
= so
->so_cfil_db
!= NULL
?
1657 cfil_db_get_cfil_info(so
->so_cfil_db
, msghdr
->cfm_sock_id
) : so
->so_cfil
;
1659 if (cfil_info
== NULL
) {
1660 CFIL_LOG(LOG_NOTICE
, "so %llx <id %llu> not attached",
1661 (uint64_t)VM_KERNEL_ADDRPERM(so
), msghdr
->cfm_sock_id
);
1664 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
1665 CFIL_LOG(LOG_NOTICE
, "so %llx drop set",
1666 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1670 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1671 if (entry
->cfe_filter
== NULL
) {
1672 CFIL_LOG(LOG_NOTICE
, "so %llx no filter",
1673 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1678 if (entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) {
1679 entry
->cfe_flags
|= CFEF_DATA_START
;
1682 "so %llx attached not sent for %u",
1683 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1688 microuptime(&entry
->cfe_last_action
);
1689 CFI_ADD_TIME_LOG(cfil_info
, &entry
->cfe_last_action
, &cfil_info
->cfi_first_event
, msghdr
->cfm_op
);
1691 action_msg
= (struct cfil_msg_action
*)msghdr
;
1693 switch (msghdr
->cfm_op
) {
1694 case CFM_OP_DATA_UPDATE
:
1696 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1697 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1698 cfil_info
->cfi_sock_id
,
1699 action_msg
->cfa_in_peek_offset
, action_msg
->cfa_in_pass_offset
,
1700 action_msg
->cfa_out_peek_offset
, action_msg
->cfa_out_pass_offset
);
1702 if (action_msg
->cfa_out_peek_offset
!= 0 ||
1703 action_msg
->cfa_out_pass_offset
!= 0) {
1704 error
= cfil_action_data_pass(so
, cfil_info
, kcunit
, 1,
1705 action_msg
->cfa_out_pass_offset
,
1706 action_msg
->cfa_out_peek_offset
);
1708 if (error
== EJUSTRETURN
) {
1714 if (action_msg
->cfa_in_peek_offset
!= 0 ||
1715 action_msg
->cfa_in_pass_offset
!= 0) {
1716 error
= cfil_action_data_pass(so
, cfil_info
, kcunit
, 0,
1717 action_msg
->cfa_in_pass_offset
,
1718 action_msg
->cfa_in_peek_offset
);
1720 if (error
== EJUSTRETURN
) {
1726 error
= cfil_action_drop(so
, cfil_info
, kcunit
);
1734 socket_unlock(so
, 1);
1739 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_ok
);
1741 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_bad
);
1748 cfil_ctl_getopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
1749 int opt
, void *data
, size_t *len
)
1751 #pragma unused(kctlref, opt)
1752 struct cfil_info
*cfil_info
= NULL
;
1754 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1756 CFIL_LOG(LOG_NOTICE
, "");
1758 cfil_rw_lock_shared(&cfil_lck_rw
);
1760 if (content_filters
== NULL
) {
1761 CFIL_LOG(LOG_ERR
, "no content filter");
1765 if (kcunit
> MAX_CONTENT_FILTER
) {
1766 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1767 kcunit
, MAX_CONTENT_FILTER
);
1771 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1772 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1778 case CFIL_OPT_NECP_CONTROL_UNIT
:
1779 if (*len
< sizeof(uint32_t)) {
1780 CFIL_LOG(LOG_ERR
, "len too small %lu", *len
);
1785 *(uint32_t *)data
= cfc
->cf_necp_control_unit
;
1788 case CFIL_OPT_GET_SOCKET_INFO
:
1789 if (*len
!= sizeof(struct cfil_opt_sock_info
)) {
1790 CFIL_LOG(LOG_ERR
, "len does not match %lu", *len
);
1795 CFIL_LOG(LOG_ERR
, "data not passed");
1800 struct cfil_opt_sock_info
*sock_info
=
1801 (struct cfil_opt_sock_info
*) data
;
1803 // Unlock here so that we never hold both cfil_lck_rw and the
1804 // socket_lock at the same time. Otherwise, this can deadlock
1805 // because soclose() takes the socket_lock and then exclusive
1806 // cfil_lck_rw and we require the opposite order.
1808 // WARNING: Be sure to never use anything protected
1809 // by cfil_lck_rw beyond this point.
1810 // WARNING: Be sure to avoid fallthrough and
1811 // goto return_already_unlocked from this branch.
1812 cfil_rw_unlock_shared(&cfil_lck_rw
);
1814 // Search (TCP+UDP) and lock socket
1815 struct socket
*sock
=
1816 cfil_socket_from_sock_id(sock_info
->cfs_sock_id
, false);
1819 CFIL_LOG(LOG_ERR
, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
1820 sock_info
->cfs_sock_id
);
1823 goto return_already_unlocked
;
1826 cfil_info
= (sock
->so_cfil_db
!= NULL
) ?
1827 cfil_db_get_cfil_info(sock
->so_cfil_db
, sock_info
->cfs_sock_id
) : sock
->so_cfil
;
1829 if (cfil_info
== NULL
) {
1831 CFIL_LOG(LOG_ERR
, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
1832 (uint64_t)VM_KERNEL_ADDRPERM(sock
));
1835 socket_unlock(sock
, 1);
1836 goto return_already_unlocked
;
1839 // Fill out family, type, and protocol
1840 sock_info
->cfs_sock_family
= sock
->so_proto
->pr_domain
->dom_family
;
1841 sock_info
->cfs_sock_type
= sock
->so_proto
->pr_type
;
1842 sock_info
->cfs_sock_protocol
= sock
->so_proto
->pr_protocol
;
1844 // Source and destination addresses
1845 struct inpcb
*inp
= sotoinpcb(sock
);
1846 if (inp
->inp_vflag
& INP_IPV6
) {
1847 struct in6_addr
*laddr
= NULL
, *faddr
= NULL
;
1848 u_int16_t lport
= 0, fport
= 0;
1850 cfil_get_flow_address_v6(cfil_info
->cfi_hash_entry
, inp
,
1851 &laddr
, &faddr
, &lport
, &fport
);
1852 fill_ip6_sockaddr_4_6(&sock_info
->cfs_local
, laddr
, lport
);
1853 fill_ip6_sockaddr_4_6(&sock_info
->cfs_remote
, faddr
, fport
);
1854 } else if (inp
->inp_vflag
& INP_IPV4
) {
1855 struct in_addr laddr
= {0}, faddr
= {0};
1856 u_int16_t lport
= 0, fport
= 0;
1858 cfil_get_flow_address(cfil_info
->cfi_hash_entry
, inp
,
1859 &laddr
, &faddr
, &lport
, &fport
);
1860 fill_ip_sockaddr_4_6(&sock_info
->cfs_local
, laddr
, lport
);
1861 fill_ip_sockaddr_4_6(&sock_info
->cfs_remote
, faddr
, fport
);
1865 sock_info
->cfs_pid
= sock
->last_pid
;
1866 memcpy(sock_info
->cfs_uuid
, sock
->last_uuid
, sizeof(uuid_t
));
1868 if (sock
->so_flags
& SOF_DELEGATED
) {
1869 sock_info
->cfs_e_pid
= sock
->e_pid
;
1870 memcpy(sock_info
->cfs_e_uuid
, sock
->e_uuid
, sizeof(uuid_t
));
1872 sock_info
->cfs_e_pid
= sock
->last_pid
;
1873 memcpy(sock_info
->cfs_e_uuid
, sock
->last_uuid
, sizeof(uuid_t
));
1876 socket_unlock(sock
, 1);
1878 goto return_already_unlocked
;
1880 error
= ENOPROTOOPT
;
1884 cfil_rw_unlock_shared(&cfil_lck_rw
);
1888 return_already_unlocked
:
1894 cfil_ctl_setopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
1895 int opt
, void *data
, size_t len
)
1897 #pragma unused(kctlref, opt)
1899 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1901 CFIL_LOG(LOG_NOTICE
, "");
1903 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1905 if (content_filters
== NULL
) {
1906 CFIL_LOG(LOG_ERR
, "no content filter");
1910 if (kcunit
> MAX_CONTENT_FILTER
) {
1911 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1912 kcunit
, MAX_CONTENT_FILTER
);
1916 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1917 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1923 case CFIL_OPT_NECP_CONTROL_UNIT
:
1924 if (len
< sizeof(uint32_t)) {
1925 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
1926 "len too small %lu", len
);
1930 if (cfc
->cf_necp_control_unit
!= 0) {
1931 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
1933 cfc
->cf_necp_control_unit
);
1937 cfc
->cf_necp_control_unit
= *(uint32_t *)data
;
1940 error
= ENOPROTOOPT
;
1944 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1951 cfil_ctl_rcvd(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, int flags
)
1953 #pragma unused(kctlref, flags)
1954 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1955 struct socket
*so
= NULL
;
1957 struct cfil_entry
*entry
;
1958 struct cfil_info
*cfil_info
= NULL
;
1960 CFIL_LOG(LOG_INFO
, "");
1962 if (content_filters
== NULL
) {
1963 CFIL_LOG(LOG_ERR
, "no content filter");
1964 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1967 if (kcunit
> MAX_CONTENT_FILTER
) {
1968 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1969 kcunit
, MAX_CONTENT_FILTER
);
1970 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1973 cfil_rw_lock_shared(&cfil_lck_rw
);
1974 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1975 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1977 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1980 /* Let's assume the flow control is lifted */
1981 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
1982 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
1983 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1986 cfc
->cf_flags
&= ~CFF_FLOW_CONTROLLED
;
1988 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw
);
1989 LCK_RW_ASSERT(&cfil_lck_rw
, LCK_RW_ASSERT_SHARED
);
1992 * Flow control will be raised again as soon as an entry cannot enqueue
1993 * to the kernel control socket
1995 while ((cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) == 0) {
1996 verify_content_filter(cfc
);
1998 cfil_rw_lock_assert_held(&cfil_lck_rw
, 0);
2000 /* Find an entry that is flow controlled */
2001 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
2002 if (entry
->cfe_cfil_info
== NULL
||
2003 entry
->cfe_cfil_info
->cfi_so
== NULL
) {
2006 if ((entry
->cfe_flags
& CFEF_FLOW_CONTROLLED
) == 0) {
2010 if (entry
== NULL
) {
2014 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_flow_lift
);
2016 cfil_info
= entry
->cfe_cfil_info
;
2017 so
= cfil_info
->cfi_so
;
2019 cfil_rw_unlock_shared(&cfil_lck_rw
);
2023 error
= cfil_acquire_sockbuf(so
, cfil_info
, 1);
2025 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, 1);
2027 cfil_release_sockbuf(so
, 1);
2032 error
= cfil_acquire_sockbuf(so
, cfil_info
, 0);
2034 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, 0);
2036 cfil_release_sockbuf(so
, 0);
2039 socket_lock_assert_owned(so
);
2040 socket_unlock(so
, 1);
2042 cfil_rw_lock_shared(&cfil_lck_rw
);
2045 cfil_rw_unlock_shared(&cfil_lck_rw
);
2051 struct kern_ctl_reg kern_ctl
;
2053 vm_size_t content_filter_size
= 0; /* size of content_filter */
2054 vm_size_t cfil_info_size
= 0; /* size of cfil_info */
2055 vm_size_t cfil_hash_entry_size
= 0; /* size of cfil_hash_entry */
2056 vm_size_t cfil_db_size
= 0; /* size of cfil_db */
2057 unsigned int mbuf_limit
= 0;
2059 CFIL_LOG(LOG_NOTICE
, "");
2062 * Compile time verifications
2064 _CASSERT(CFIL_MAX_FILTER_COUNT
== MAX_CONTENT_FILTER
);
2065 _CASSERT(sizeof(struct cfil_filter_stat
) % sizeof(uint32_t) == 0);
2066 _CASSERT(sizeof(struct cfil_entry_stat
) % sizeof(uint32_t) == 0);
2067 _CASSERT(sizeof(struct cfil_sock_stat
) % sizeof(uint32_t) == 0);
2070 * Runtime time verifications
2072 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_enqueued
,
2074 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_enqueued
,
2076 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_peeked
,
2078 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_peeked
,
2081 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_in_enqueued
,
2083 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_out_enqueued
,
2086 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_enqueued
,
2088 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_enqueued
,
2090 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_passed
,
2092 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_passed
,
2096 * Zone for content filters kernel control sockets
2098 content_filter_size
= sizeof(struct content_filter
);
2099 content_filter_zone
= zinit(content_filter_size
,
2100 CONTENT_FILTER_ZONE_MAX
* content_filter_size
,
2102 CONTENT_FILTER_ZONE_NAME
);
2103 if (content_filter_zone
== NULL
) {
2104 panic("%s: zinit(%s) failed", __func__
,
2105 CONTENT_FILTER_ZONE_NAME
);
2108 zone_change(content_filter_zone
, Z_CALLERACCT
, FALSE
);
2109 zone_change(content_filter_zone
, Z_EXPAND
, TRUE
);
2112 * Zone for per socket content filters
2114 cfil_info_size
= sizeof(struct cfil_info
);
2115 cfil_info_zone
= zinit(cfil_info_size
,
2116 CFIL_INFO_ZONE_MAX
* cfil_info_size
,
2118 CFIL_INFO_ZONE_NAME
);
2119 if (cfil_info_zone
== NULL
) {
2120 panic("%s: zinit(%s) failed", __func__
, CFIL_INFO_ZONE_NAME
);
2123 zone_change(cfil_info_zone
, Z_CALLERACCT
, FALSE
);
2124 zone_change(cfil_info_zone
, Z_EXPAND
, TRUE
);
2127 * Zone for content filters cfil hash entries and db
2129 cfil_hash_entry_size
= sizeof(struct cfil_hash_entry
);
2130 cfil_hash_entry_zone
= zinit(cfil_hash_entry_size
,
2131 CFIL_HASH_ENTRY_ZONE_MAX
* cfil_hash_entry_size
,
2133 CFIL_HASH_ENTRY_ZONE_NAME
);
2134 if (cfil_hash_entry_zone
== NULL
) {
2135 panic("%s: zinit(%s) failed", __func__
, CFIL_HASH_ENTRY_ZONE_NAME
);
2138 zone_change(cfil_hash_entry_zone
, Z_CALLERACCT
, FALSE
);
2139 zone_change(cfil_hash_entry_zone
, Z_EXPAND
, TRUE
);
2141 cfil_db_size
= sizeof(struct cfil_db
);
2142 cfil_db_zone
= zinit(cfil_db_size
,
2143 CFIL_DB_ZONE_MAX
* cfil_db_size
,
2146 if (cfil_db_zone
== NULL
) {
2147 panic("%s: zinit(%s) failed", __func__
, CFIL_DB_ZONE_NAME
);
2150 zone_change(cfil_db_zone
, Z_CALLERACCT
, FALSE
);
2151 zone_change(cfil_db_zone
, Z_EXPAND
, TRUE
);
2156 cfil_lck_grp_attr
= lck_grp_attr_alloc_init();
2157 if (cfil_lck_grp_attr
== NULL
) {
2158 panic("%s: lck_grp_attr_alloc_init failed", __func__
);
2161 cfil_lck_grp
= lck_grp_alloc_init("content filter",
2163 if (cfil_lck_grp
== NULL
) {
2164 panic("%s: lck_grp_alloc_init failed", __func__
);
2167 cfil_lck_attr
= lck_attr_alloc_init();
2168 if (cfil_lck_attr
== NULL
) {
2169 panic("%s: lck_attr_alloc_init failed", __func__
);
2172 lck_rw_init(&cfil_lck_rw
, cfil_lck_grp
, cfil_lck_attr
);
2174 TAILQ_INIT(&cfil_sock_head
);
2177 * Register kernel control
2179 bzero(&kern_ctl
, sizeof(kern_ctl
));
2180 strlcpy(kern_ctl
.ctl_name
, CONTENT_FILTER_CONTROL_NAME
,
2181 sizeof(kern_ctl
.ctl_name
));
2182 kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
| CTL_FLAG_REG_EXTENDED
;
2183 kern_ctl
.ctl_sendsize
= 512 * 1024; /* enough? */
2184 kern_ctl
.ctl_recvsize
= 512 * 1024; /* enough? */
2185 kern_ctl
.ctl_connect
= cfil_ctl_connect
;
2186 kern_ctl
.ctl_disconnect
= cfil_ctl_disconnect
;
2187 kern_ctl
.ctl_send
= cfil_ctl_send
;
2188 kern_ctl
.ctl_getopt
= cfil_ctl_getopt
;
2189 kern_ctl
.ctl_setopt
= cfil_ctl_setopt
;
2190 kern_ctl
.ctl_rcvd
= cfil_ctl_rcvd
;
2191 error
= ctl_register(&kern_ctl
, &cfil_kctlref
);
2193 CFIL_LOG(LOG_ERR
, "ctl_register failed: %d", error
);
2197 // Spawn thread for gargage collection
2198 if (kernel_thread_start(cfil_udp_gc_thread_func
, NULL
,
2199 &cfil_udp_gc_thread
) != KERN_SUCCESS
) {
2200 panic_plain("%s: Can't create UDP GC thread", __func__
);
2203 /* this must not fail */
2204 VERIFY(cfil_udp_gc_thread
!= NULL
);
2206 // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2207 mbuf_limit
= MAX(UDP_FLOW_GC_MBUF_CNT_MAX
, (nmbclusters
<< MCLSHIFT
) >> UDP_FLOW_GC_MBUF_SHIFT
);
2208 cfil_udp_gc_mbuf_num_max
= (mbuf_limit
>> MCLSHIFT
);
2209 cfil_udp_gc_mbuf_cnt_max
= mbuf_limit
;
2213 cfil_info_alloc(struct socket
*so
, struct cfil_hash_entry
*hash_entry
)
2216 struct cfil_info
*cfil_info
= NULL
;
2217 struct inpcb
*inp
= sotoinpcb(so
);
2219 CFIL_LOG(LOG_INFO
, "");
2221 socket_lock_assert_owned(so
);
2223 cfil_info
= zalloc(cfil_info_zone
);
2224 if (cfil_info
== NULL
) {
2227 bzero(cfil_info
, sizeof(struct cfil_info
));
2229 cfil_queue_init(&cfil_info
->cfi_snd
.cfi_inject_q
);
2230 cfil_queue_init(&cfil_info
->cfi_rcv
.cfi_inject_q
);
2232 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2233 struct cfil_entry
*entry
;
2235 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2236 entry
->cfe_cfil_info
= cfil_info
;
2238 /* Initialize the filter entry */
2239 entry
->cfe_filter
= NULL
;
2240 entry
->cfe_flags
= 0;
2241 entry
->cfe_necp_control_unit
= 0;
2242 entry
->cfe_snd
.cfe_pass_offset
= 0;
2243 entry
->cfe_snd
.cfe_peek_offset
= 0;
2244 entry
->cfe_snd
.cfe_peeked
= 0;
2245 entry
->cfe_rcv
.cfe_pass_offset
= 0;
2246 entry
->cfe_rcv
.cfe_peek_offset
= 0;
2247 entry
->cfe_rcv
.cfe_peeked
= 0;
2249 * Timestamp the last action to avoid pre-maturely
2250 * triggering garbage collection
2252 microuptime(&entry
->cfe_last_action
);
2254 cfil_queue_init(&entry
->cfe_snd
.cfe_pending_q
);
2255 cfil_queue_init(&entry
->cfe_rcv
.cfe_pending_q
);
2256 cfil_queue_init(&entry
->cfe_snd
.cfe_ctl_q
);
2257 cfil_queue_init(&entry
->cfe_rcv
.cfe_ctl_q
);
2260 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2263 * Create a cfi_sock_id that's not the socket pointer!
2266 if (hash_entry
== NULL
) {
2267 // This is the TCP case, cfil_info is tracked per socket
2268 if (inp
->inp_flowhash
== 0) {
2269 inp
->inp_flowhash
= inp_calc_flowhash(inp
);
2272 so
->so_cfil
= cfil_info
;
2273 cfil_info
->cfi_so
= so
;
2274 cfil_info
->cfi_sock_id
=
2275 ((so
->so_gencnt
<< 32) | inp
->inp_flowhash
);
2277 // This is the UDP case, cfil_info is tracked in per-socket hash
2278 cfil_info
->cfi_so
= so
;
2279 hash_entry
->cfentry_cfil
= cfil_info
;
2280 cfil_info
->cfi_hash_entry
= hash_entry
;
2281 cfil_info
->cfi_sock_id
= ((so
->so_gencnt
<< 32) | (hash_entry
->cfentry_flowhash
& 0xffffffff));
2282 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2283 inp
->inp_flowhash
, so
->so_gencnt
, hash_entry
->cfentry_flowhash
, cfil_info
->cfi_sock_id
);
2285 // Wake up gc thread if this is first flow added
2286 if (cfil_sock_udp_attached_count
== 0) {
2287 thread_wakeup((caddr_t
)&cfil_sock_udp_attached_count
);
2290 cfil_sock_udp_attached_count
++;
2293 TAILQ_INSERT_TAIL(&cfil_sock_head
, cfil_info
, cfi_link
);
2295 cfil_sock_attached_count
++;
2297 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2300 if (cfil_info
!= NULL
) {
2301 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_ok
);
2303 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_fail
);
2310 cfil_info_attach_unit(struct socket
*so
, uint32_t filter_control_unit
, struct cfil_info
*cfil_info
)
2315 CFIL_LOG(LOG_INFO
, "");
2317 socket_lock_assert_owned(so
);
2319 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2322 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
2324 struct content_filter
*cfc
= content_filters
[kcunit
- 1];
2325 struct cfil_entry
*entry
;
2330 if (cfc
->cf_necp_control_unit
!= filter_control_unit
) {
2334 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2336 entry
->cfe_filter
= cfc
;
2337 entry
->cfe_necp_control_unit
= filter_control_unit
;
2338 TAILQ_INSERT_TAIL(&cfc
->cf_sock_entries
, entry
, cfe_link
);
2339 cfc
->cf_sock_count
++;
2340 verify_content_filter(cfc
);
2342 entry
->cfe_flags
|= CFEF_CFIL_ATTACHED
;
2346 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2352 cfil_info_free(struct cfil_info
*cfil_info
)
2355 uint64_t in_drain
= 0;
2356 uint64_t out_drained
= 0;
2358 if (cfil_info
== NULL
) {
2362 CFIL_LOG(LOG_INFO
, "");
2364 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2367 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
2369 struct cfil_entry
*entry
;
2370 struct content_filter
*cfc
;
2372 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2374 /* Don't be silly and try to detach twice */
2375 if (entry
->cfe_filter
== NULL
) {
2379 cfc
= content_filters
[kcunit
- 1];
2381 VERIFY(cfc
== entry
->cfe_filter
);
2383 entry
->cfe_filter
= NULL
;
2384 entry
->cfe_necp_control_unit
= 0;
2385 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
2386 cfc
->cf_sock_count
--;
2388 verify_content_filter(cfc
);
2390 if (cfil_info
->cfi_hash_entry
!= NULL
) {
2391 cfil_sock_udp_attached_count
--;
2393 cfil_sock_attached_count
--;
2394 TAILQ_REMOVE(&cfil_sock_head
, cfil_info
, cfi_link
);
2396 out_drained
+= cfil_queue_drain(&cfil_info
->cfi_snd
.cfi_inject_q
);
2397 in_drain
+= cfil_queue_drain(&cfil_info
->cfi_rcv
.cfi_inject_q
);
2399 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2400 struct cfil_entry
*entry
;
2402 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2403 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_pending_q
);
2404 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_pending_q
);
2405 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
2406 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_ctl_q
);
2408 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2411 OSIncrementAtomic(&cfil_stats
.cfs_flush_out_free
);
2414 OSIncrementAtomic(&cfil_stats
.cfs_flush_in_free
);
2417 zfree(cfil_info_zone
, cfil_info
);
2421 * Entry point from Sockets layer
2422 * The socket is locked.
2425 cfil_sock_attach(struct socket
*so
)
2428 uint32_t filter_control_unit
;
2430 socket_lock_assert_owned(so
);
2432 /* Limit ourselves to TCP that are not MPTCP subflows */
2433 if ((so
->so_proto
->pr_domain
->dom_family
!= PF_INET
&&
2434 so
->so_proto
->pr_domain
->dom_family
!= PF_INET6
) ||
2435 so
->so_proto
->pr_type
!= SOCK_STREAM
||
2436 so
->so_proto
->pr_protocol
!= IPPROTO_TCP
||
2437 (so
->so_flags
& SOF_MP_SUBFLOW
) != 0 ||
2438 (so
->so_flags1
& SOF1_CONTENT_FILTER_SKIP
) != 0) {
2442 filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
2443 if (filter_control_unit
== 0) {
2447 if ((filter_control_unit
& NECP_MASK_USERSPACE_ONLY
) != 0) {
2448 OSIncrementAtomic(&cfil_stats
.cfs_sock_userspace_only
);
2451 if (cfil_active_count
== 0) {
2452 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_in_vain
);
2455 if (so
->so_cfil
!= NULL
) {
2456 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_already
);
2457 CFIL_LOG(LOG_ERR
, "already attached");
2459 cfil_info_alloc(so
, NULL
);
2460 if (so
->so_cfil
== NULL
) {
2462 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
2466 if (cfil_info_attach_unit(so
, filter_control_unit
, so
->so_cfil
) == 0) {
2467 CFIL_LOG(LOG_ERR
, "cfil_info_attach_unit(%u) failed",
2468 filter_control_unit
);
2469 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_failed
);
2472 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u sockID %llx",
2473 (uint64_t)VM_KERNEL_ADDRPERM(so
),
2474 filter_control_unit
, so
->so_cfil
->cfi_sock_id
);
2476 so
->so_flags
|= SOF_CONTENT_FILTER
;
2477 OSIncrementAtomic(&cfil_stats
.cfs_sock_attached
);
2479 /* Hold a reference on the socket */
2482 error
= cfil_dispatch_attach_event(so
, so
->so_cfil
, filter_control_unit
);
2483 /* We can recover from flow control or out of memory errors */
2484 if (error
== ENOBUFS
|| error
== ENOMEM
) {
2486 } else if (error
!= 0) {
2490 CFIL_INFO_VERIFY(so
->so_cfil
);
2496 * Entry point from Sockets layer
2497 * The socket is locked.
2500 cfil_sock_detach(struct socket
*so
)
2508 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
2509 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
2510 VERIFY(so
->so_usecount
> 0);
2513 cfil_info_free(so
->so_cfil
);
2515 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
2521 cfil_dispatch_attach_event(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t filter_control_unit
)
2524 struct cfil_entry
*entry
= NULL
;
2525 struct cfil_msg_sock_attached msg_attached
;
2527 struct content_filter
*cfc
= NULL
;
2529 socket_lock_assert_owned(so
);
2531 cfil_rw_lock_shared(&cfil_lck_rw
);
2533 if (so
->so_proto
== NULL
|| so
->so_proto
->pr_domain
== NULL
) {
2538 * Find the matching filter unit
2540 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2541 cfc
= content_filters
[kcunit
- 1];
2546 if (cfc
->cf_necp_control_unit
!= filter_control_unit
) {
2549 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2550 if (entry
->cfe_filter
== NULL
) {
2554 VERIFY(cfc
== entry
->cfe_filter
);
2559 if (entry
== NULL
|| entry
->cfe_filter
== NULL
) {
2563 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
)) {
2567 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u kcunit %u",
2568 (uint64_t)VM_KERNEL_ADDRPERM(so
), filter_control_unit
, kcunit
);
2570 /* Would be wasteful to try when flow controlled */
2571 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2576 bzero(&msg_attached
, sizeof(struct cfil_msg_sock_attached
));
2577 msg_attached
.cfs_msghdr
.cfm_len
= sizeof(struct cfil_msg_sock_attached
);
2578 msg_attached
.cfs_msghdr
.cfm_version
= CFM_VERSION_CURRENT
;
2579 msg_attached
.cfs_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2580 msg_attached
.cfs_msghdr
.cfm_op
= CFM_OP_SOCKET_ATTACHED
;
2581 msg_attached
.cfs_msghdr
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2583 msg_attached
.cfs_sock_family
= so
->so_proto
->pr_domain
->dom_family
;
2584 msg_attached
.cfs_sock_type
= so
->so_proto
->pr_type
;
2585 msg_attached
.cfs_sock_protocol
= so
->so_proto
->pr_protocol
;
2586 msg_attached
.cfs_pid
= so
->last_pid
;
2587 memcpy(msg_attached
.cfs_uuid
, so
->last_uuid
, sizeof(uuid_t
));
2588 if (so
->so_flags
& SOF_DELEGATED
) {
2589 msg_attached
.cfs_e_pid
= so
->e_pid
;
2590 memcpy(msg_attached
.cfs_e_uuid
, so
->e_uuid
, sizeof(uuid_t
));
2592 msg_attached
.cfs_e_pid
= so
->last_pid
;
2593 memcpy(msg_attached
.cfs_e_uuid
, so
->last_uuid
, sizeof(uuid_t
));
2597 CFIL_LOG(LOG_DEBUG
, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
2598 entry
->cfe_cfil_info
->cfi_sock_id
);
2601 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2602 entry
->cfe_filter
->cf_kcunit
,
2604 sizeof(struct cfil_msg_sock_attached
),
2607 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d", error
);
2610 microuptime(&entry
->cfe_last_event
);
2611 cfil_info
->cfi_first_event
.tv_sec
= entry
->cfe_last_event
.tv_sec
;
2612 cfil_info
->cfi_first_event
.tv_usec
= entry
->cfe_last_event
.tv_usec
;
2614 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
;
2615 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_ok
);
2618 /* We can recover from flow control */
2619 if (error
== ENOBUFS
) {
2620 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2621 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_flow_control
);
2623 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
2624 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2627 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2629 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2632 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_fail
);
2635 cfil_rw_unlock_shared(&cfil_lck_rw
);
2641 cfil_dispatch_disconnect_event(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
)
2644 struct mbuf
*msg
= NULL
;
2645 struct cfil_entry
*entry
;
2646 struct cfe_buf
*entrybuf
;
2647 struct cfil_msg_hdr msg_disconnected
;
2648 struct content_filter
*cfc
;
2650 socket_lock_assert_owned(so
);
2652 cfil_rw_lock_shared(&cfil_lck_rw
);
2654 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2656 entrybuf
= &entry
->cfe_snd
;
2658 entrybuf
= &entry
->cfe_rcv
;
2661 cfc
= entry
->cfe_filter
;
2666 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2667 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2670 * Send the disconnection event once
2672 if ((outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
)) ||
2673 (!outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
))) {
2674 CFIL_LOG(LOG_INFO
, "so %llx disconnect already sent",
2675 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2680 * We're not disconnected as long as some data is waiting
2681 * to be delivered to the filter
2683 if (outgoing
&& cfil_queue_empty(&entrybuf
->cfe_ctl_q
) == 0) {
2684 CFIL_LOG(LOG_INFO
, "so %llx control queue not empty",
2685 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2689 /* Would be wasteful to try when flow controlled */
2690 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2696 cfil_info_log(LOG_ERR
, cfil_info
, outgoing
?
2697 "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
2698 "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
2701 bzero(&msg_disconnected
, sizeof(struct cfil_msg_hdr
));
2702 msg_disconnected
.cfm_len
= sizeof(struct cfil_msg_hdr
);
2703 msg_disconnected
.cfm_version
= CFM_VERSION_CURRENT
;
2704 msg_disconnected
.cfm_type
= CFM_TYPE_EVENT
;
2705 msg_disconnected
.cfm_op
= outgoing
? CFM_OP_DISCONNECT_OUT
:
2706 CFM_OP_DISCONNECT_IN
;
2707 msg_disconnected
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2708 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2709 entry
->cfe_filter
->cf_kcunit
,
2711 sizeof(struct cfil_msg_hdr
),
2714 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
2718 microuptime(&entry
->cfe_last_event
);
2719 CFI_ADD_TIME_LOG(cfil_info
, &entry
->cfe_last_event
, &cfil_info
->cfi_first_event
, msg_disconnected
.cfm_op
);
2721 /* Remember we have sent the disconnection message */
2723 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_OUT
;
2724 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_out_event_ok
);
2726 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_IN
;
2727 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_in_event_ok
);
2730 if (error
== ENOBUFS
) {
2731 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2733 &cfil_stats
.cfs_disconnect_event_flow_control
);
2735 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
2736 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2739 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2741 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2745 &cfil_stats
.cfs_disconnect_event_fail
);
2748 cfil_rw_unlock_shared(&cfil_lck_rw
);
2754 cfil_dispatch_closed_event(struct socket
*so
, struct cfil_info
*cfil_info
, int kcunit
)
2756 struct cfil_entry
*entry
;
2757 struct cfil_msg_sock_closed msg_closed
;
2759 struct content_filter
*cfc
;
2761 socket_lock_assert_owned(so
);
2763 cfil_rw_lock_shared(&cfil_lck_rw
);
2765 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2766 cfc
= entry
->cfe_filter
;
2771 CFIL_LOG(LOG_INFO
, "so %llx kcunit %d",
2772 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
2774 /* Would be wasteful to try when flow controlled */
2775 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2780 * Send a single closed message per filter
2782 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_CLOSED
) != 0) {
2785 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
2789 microuptime(&entry
->cfe_last_event
);
2790 CFI_ADD_TIME_LOG(cfil_info
, &entry
->cfe_last_event
, &cfil_info
->cfi_first_event
, CFM_OP_SOCKET_CLOSED
);
2792 bzero(&msg_closed
, sizeof(struct cfil_msg_sock_closed
));
2793 msg_closed
.cfc_msghdr
.cfm_len
= sizeof(struct cfil_msg_sock_closed
);
2794 msg_closed
.cfc_msghdr
.cfm_version
= CFM_VERSION_CURRENT
;
2795 msg_closed
.cfc_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2796 msg_closed
.cfc_msghdr
.cfm_op
= CFM_OP_SOCKET_CLOSED
;
2797 msg_closed
.cfc_msghdr
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2798 msg_closed
.cfc_first_event
.tv_sec
= cfil_info
->cfi_first_event
.tv_sec
;
2799 msg_closed
.cfc_first_event
.tv_usec
= cfil_info
->cfi_first_event
.tv_usec
;
2800 memcpy(msg_closed
.cfc_op_time
, cfil_info
->cfi_op_time
, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY
);
2801 memcpy(msg_closed
.cfc_op_list
, cfil_info
->cfi_op_list
, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY
);
2802 msg_closed
.cfc_op_list_ctr
= cfil_info
->cfi_op_list_ctr
;
2805 CFIL_LOG(LOG_ERR
, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed
.cfc_msghdr
.cfm_sock_id
, cfil_info
->cfi_op_list_ctr
, cfil_info
->cfi_first_event
.tv_sec
, cfil_info
->cfi_first_event
.tv_usec
);
2808 * if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
2809 * msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
2811 * for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
2812 * CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
2816 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2817 entry
->cfe_filter
->cf_kcunit
,
2819 sizeof(struct cfil_msg_sock_closed
),
2822 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d",
2827 entry
->cfe_flags
|= CFEF_SENT_SOCK_CLOSED
;
2828 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_ok
);
2830 /* We can recover from flow control */
2831 if (error
== ENOBUFS
) {
2832 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2833 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_flow_control
);
2835 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
2836 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2839 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2841 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2844 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_fail
);
2847 cfil_rw_unlock_shared(&cfil_lck_rw
);
2854 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
2855 struct in6_addr
*ip6
, u_int16_t port
)
2857 struct sockaddr_in6
*sin6
= &sin46
->sin6
;
2859 sin6
->sin6_family
= AF_INET6
;
2860 sin6
->sin6_len
= sizeof(*sin6
);
2861 sin6
->sin6_port
= port
;
2862 sin6
->sin6_addr
= *ip6
;
2863 if (IN6_IS_SCOPE_EMBED(&sin6
->sin6_addr
)) {
2864 sin6
->sin6_scope_id
= ntohs(sin6
->sin6_addr
.s6_addr16
[1]);
2865 sin6
->sin6_addr
.s6_addr16
[1] = 0;
2870 fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
2871 struct in_addr ip
, u_int16_t port
)
2873 struct sockaddr_in
*sin
= &sin46
->sin
;
2875 sin
->sin_family
= AF_INET
;
2876 sin
->sin_len
= sizeof(*sin
);
2877 sin
->sin_port
= port
;
2878 sin
->sin_addr
.s_addr
= ip
.s_addr
;
2882 cfil_get_flow_address_v6(struct cfil_hash_entry
*entry
, struct inpcb
*inp
,
2883 struct in6_addr
**laddr
, struct in6_addr
**faddr
,
2884 u_int16_t
*lport
, u_int16_t
*fport
)
2886 if (entry
!= NULL
) {
2887 *laddr
= &entry
->cfentry_laddr
.addr6
;
2888 *faddr
= &entry
->cfentry_faddr
.addr6
;
2889 *lport
= entry
->cfentry_lport
;
2890 *fport
= entry
->cfentry_fport
;
2892 *laddr
= &inp
->in6p_laddr
;
2893 *faddr
= &inp
->in6p_faddr
;
2894 *lport
= inp
->inp_lport
;
2895 *fport
= inp
->inp_fport
;
2900 cfil_get_flow_address(struct cfil_hash_entry
*entry
, struct inpcb
*inp
,
2901 struct in_addr
*laddr
, struct in_addr
*faddr
,
2902 u_int16_t
*lport
, u_int16_t
*fport
)
2904 if (entry
!= NULL
) {
2905 *laddr
= entry
->cfentry_laddr
.addr46
.ia46_addr4
;
2906 *faddr
= entry
->cfentry_faddr
.addr46
.ia46_addr4
;
2907 *lport
= entry
->cfentry_lport
;
2908 *fport
= entry
->cfentry_fport
;
2910 *laddr
= inp
->inp_laddr
;
2911 *faddr
= inp
->inp_faddr
;
2912 *lport
= inp
->inp_lport
;
2913 *fport
= inp
->inp_fport
;
2918 cfil_dispatch_data_event(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
2919 struct mbuf
*data
, unsigned int copyoffset
, unsigned int copylen
)
2922 struct mbuf
*copy
= NULL
;
2923 struct mbuf
*msg
= NULL
;
2924 unsigned int one
= 1;
2925 struct cfil_msg_data_event
*data_req
;
2927 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
2928 struct cfil_entry
*entry
;
2929 struct cfe_buf
*entrybuf
;
2930 struct content_filter
*cfc
;
2933 cfil_rw_lock_shared(&cfil_lck_rw
);
2935 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2937 entrybuf
= &entry
->cfe_snd
;
2939 entrybuf
= &entry
->cfe_rcv
;
2942 cfc
= entry
->cfe_filter
;
2947 data
= cfil_data_start(data
);
2948 if (data
== NULL
|| (data
->m_flags
& M_PKTHDR
) == 0) {
2949 CFIL_LOG(LOG_ERR
, "NOT PKTHDR");
2953 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2954 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2956 socket_lock_assert_owned(so
);
2958 /* Would be wasteful to try */
2959 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2964 /* Make a copy of the data to pass to kernel control socket */
2965 copy
= m_copym_mode(data
, copyoffset
, copylen
, M_DONTWAIT
,
2968 CFIL_LOG(LOG_ERR
, "m_copym_mode() failed");
2973 /* We need an mbuf packet for the message header */
2974 hdrsize
= sizeof(struct cfil_msg_data_event
);
2975 error
= mbuf_allocpacket(MBUF_DONTWAIT
, hdrsize
, &one
, &msg
);
2977 CFIL_LOG(LOG_ERR
, "mbuf_allocpacket() failed");
2980 * ENOBUFS is to indicate flow control
2985 mbuf_setlen(msg
, hdrsize
);
2986 mbuf_pkthdr_setlen(msg
, hdrsize
+ copylen
);
2988 data_req
= (struct cfil_msg_data_event
*)mbuf_data(msg
);
2989 bzero(data_req
, hdrsize
);
2990 data_req
->cfd_msghdr
.cfm_len
= hdrsize
+ copylen
;
2991 data_req
->cfd_msghdr
.cfm_version
= 1;
2992 data_req
->cfd_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2993 data_req
->cfd_msghdr
.cfm_op
=
2994 outgoing
? CFM_OP_DATA_OUT
: CFM_OP_DATA_IN
;
2995 data_req
->cfd_msghdr
.cfm_sock_id
=
2996 entry
->cfe_cfil_info
->cfi_sock_id
;
2997 data_req
->cfd_start_offset
= entrybuf
->cfe_peeked
;
2998 data_req
->cfd_end_offset
= entrybuf
->cfe_peeked
+ copylen
;
3002 * For non connected sockets need to copy addresses from passed
3005 if (inp
->inp_vflag
& INP_IPV6
) {
3006 struct in6_addr
*laddr
= NULL
, *faddr
= NULL
;
3007 u_int16_t lport
= 0, fport
= 0;
3009 cfil_get_flow_address_v6(cfil_info
->cfi_hash_entry
, inp
,
3010 &laddr
, &faddr
, &lport
, &fport
);
3012 fill_ip6_sockaddr_4_6(&data_req
->cfc_src
, laddr
, lport
);
3013 fill_ip6_sockaddr_4_6(&data_req
->cfc_dst
, faddr
, fport
);
3015 fill_ip6_sockaddr_4_6(&data_req
->cfc_src
, faddr
, fport
);
3016 fill_ip6_sockaddr_4_6(&data_req
->cfc_dst
, laddr
, lport
);
3018 } else if (inp
->inp_vflag
& INP_IPV4
) {
3019 struct in_addr laddr
= {0}, faddr
= {0};
3020 u_int16_t lport
= 0, fport
= 0;
3022 cfil_get_flow_address(cfil_info
->cfi_hash_entry
, inp
,
3023 &laddr
, &faddr
, &lport
, &fport
);
3026 fill_ip_sockaddr_4_6(&data_req
->cfc_src
, laddr
, lport
);
3027 fill_ip_sockaddr_4_6(&data_req
->cfc_dst
, faddr
, fport
);
3029 fill_ip_sockaddr_4_6(&data_req
->cfc_src
, faddr
, fport
);
3030 fill_ip_sockaddr_4_6(&data_req
->cfc_dst
, laddr
, lport
);
3035 CFI_ADD_TIME_LOG(cfil_info
, &tv
, &cfil_info
->cfi_first_event
, data_req
->cfd_msghdr
.cfm_op
);
3037 /* Pass the message to the content filter */
3038 error
= ctl_enqueuembuf(entry
->cfe_filter
->cf_kcref
,
3039 entry
->cfe_filter
->cf_kcunit
,
3042 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
3046 entry
->cfe_flags
&= ~CFEF_FLOW_CONTROLLED
;
3047 OSIncrementAtomic(&cfil_stats
.cfs_data_event_ok
);
3050 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3051 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, outgoing
, (uint64_t)VM_KERNEL_ADDRPERM(data
), copyoffset
, copylen
);
3055 if (error
== ENOBUFS
) {
3056 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
3058 &cfil_stats
.cfs_data_event_flow_control
);
3060 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
)) {
3061 cfil_rw_lock_exclusive(&cfil_lck_rw
);
3064 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
3066 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
3069 OSIncrementAtomic(&cfil_stats
.cfs_data_event_fail
);
3072 cfil_rw_unlock_shared(&cfil_lck_rw
);
3078 * Process the queue of data waiting to be delivered to content filter
3081 cfil_data_service_ctl_q(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
)
3084 struct mbuf
*data
, *tmp
= NULL
;
3085 unsigned int datalen
= 0, copylen
= 0, copyoffset
= 0;
3086 struct cfil_entry
*entry
;
3087 struct cfe_buf
*entrybuf
;
3088 uint64_t currentoffset
= 0;
3090 if (cfil_info
== NULL
) {
3094 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
3095 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
3097 socket_lock_assert_owned(so
);
3099 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3101 entrybuf
= &entry
->cfe_snd
;
3103 entrybuf
= &entry
->cfe_rcv
;
3106 /* Send attached message if not yet done */
3107 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
3108 error
= cfil_dispatch_attach_event(so
, cfil_info
, kcunit
);
3110 /* We can recover from flow control */
3111 if (error
== ENOBUFS
|| error
== ENOMEM
) {
3116 } else if ((entry
->cfe_flags
& CFEF_DATA_START
) == 0) {
3117 OSIncrementAtomic(&cfil_stats
.cfs_ctl_q_not_started
);
3122 CFIL_LOG(LOG_DEBUG
, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3123 entrybuf
->cfe_pass_offset
,
3124 entrybuf
->cfe_peeked
,
3125 entrybuf
->cfe_peek_offset
);
3128 /* Move all data that can pass */
3129 while ((data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
)) != NULL
&&
3130 entrybuf
->cfe_ctl_q
.q_start
< entrybuf
->cfe_pass_offset
) {
3131 datalen
= cfil_data_length(data
, NULL
, NULL
);
3134 if (entrybuf
->cfe_ctl_q
.q_start
+ datalen
<=
3135 entrybuf
->cfe_pass_offset
) {
3137 * The first mbuf can fully pass
3142 * The first mbuf can partially pass
3144 copylen
= entrybuf
->cfe_pass_offset
-
3145 entrybuf
->cfe_ctl_q
.q_start
;
3147 VERIFY(copylen
<= datalen
);
3151 "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3152 "datalen %u copylen %u",
3153 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3154 entrybuf
->cfe_ctl_q
.q_start
,
3155 entrybuf
->cfe_peeked
,
3156 entrybuf
->cfe_pass_offset
,
3157 entrybuf
->cfe_peek_offset
,
3162 * Data that passes has been peeked at explicitly or
3165 if (entrybuf
->cfe_ctl_q
.q_start
+ copylen
>
3166 entrybuf
->cfe_peeked
) {
3167 entrybuf
->cfe_peeked
=
3168 entrybuf
->cfe_ctl_q
.q_start
+ copylen
;
3171 * Stop on partial pass
3173 if (copylen
< datalen
) {
3177 /* All good, move full data from ctl queue to pending queue */
3178 cfil_queue_remove(&entrybuf
->cfe_ctl_q
, data
, datalen
);
3180 cfil_queue_enqueue(&entrybuf
->cfe_pending_q
, data
, datalen
);
3182 OSAddAtomic64(datalen
,
3183 &cfil_stats
.cfs_pending_q_out_enqueued
);
3185 OSAddAtomic64(datalen
,
3186 &cfil_stats
.cfs_pending_q_in_enqueued
);
3189 CFIL_INFO_VERIFY(cfil_info
);
3192 "%llx first %llu peeked %llu pass %llu peek %llu"
3193 "datalen %u copylen %u",
3194 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3195 entrybuf
->cfe_ctl_q
.q_start
,
3196 entrybuf
->cfe_peeked
,
3197 entrybuf
->cfe_pass_offset
,
3198 entrybuf
->cfe_peek_offset
,
3203 /* Now deal with remaining data the filter wants to peek at */
3204 for (data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
),
3205 currentoffset
= entrybuf
->cfe_ctl_q
.q_start
;
3206 data
!= NULL
&& currentoffset
< entrybuf
->cfe_peek_offset
;
3207 data
= cfil_queue_next(&entrybuf
->cfe_ctl_q
, data
),
3208 currentoffset
+= datalen
) {
3209 datalen
= cfil_data_length(data
, NULL
, NULL
);
3212 /* We've already peeked at this mbuf */
3213 if (currentoffset
+ datalen
<= entrybuf
->cfe_peeked
) {
3217 * The data in the first mbuf may have been
3218 * partially peeked at
3220 copyoffset
= entrybuf
->cfe_peeked
- currentoffset
;
3221 VERIFY(copyoffset
< datalen
);
3222 copylen
= datalen
- copyoffset
;
3223 VERIFY(copylen
<= datalen
);
3225 * Do not copy more than needed
3227 if (currentoffset
+ copyoffset
+ copylen
>
3228 entrybuf
->cfe_peek_offset
) {
3229 copylen
= entrybuf
->cfe_peek_offset
-
3230 (currentoffset
+ copyoffset
);
3235 "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3236 "datalen %u copylen %u copyoffset %u",
3237 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3239 entrybuf
->cfe_peeked
,
3240 entrybuf
->cfe_pass_offset
,
3241 entrybuf
->cfe_peek_offset
,
3242 datalen
, copylen
, copyoffset
);
3246 * Stop if there is nothing more to peek at
3252 * Let the filter get a peek at this span of data
3254 error
= cfil_dispatch_data_event(so
, cfil_info
, kcunit
,
3255 outgoing
, data
, copyoffset
, copylen
);
3257 /* On error, leave data in ctl_q */
3260 entrybuf
->cfe_peeked
+= copylen
;
3262 OSAddAtomic64(copylen
,
3263 &cfil_stats
.cfs_ctl_q_out_peeked
);
3265 OSAddAtomic64(copylen
,
3266 &cfil_stats
.cfs_ctl_q_in_peeked
);
3269 /* Stop when data could not be fully peeked at */
3270 if (copylen
+ copyoffset
< datalen
) {
3274 CFIL_INFO_VERIFY(cfil_info
);
3277 "%llx first %llu peeked %llu pass %llu peek %llu"
3278 "datalen %u copylen %u copyoffset %u",
3279 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
3281 entrybuf
->cfe_peeked
,
3282 entrybuf
->cfe_pass_offset
,
3283 entrybuf
->cfe_peek_offset
,
3284 datalen
, copylen
, copyoffset
);
3288 * Process data that has passed the filter
3290 error
= cfil_service_pending_queue(so
, cfil_info
, kcunit
, outgoing
);
3292 CFIL_LOG(LOG_ERR
, "cfil_service_pending_queue() error %d",
3298 * Dispatch disconnect events that could not be sent
3300 if (cfil_info
== NULL
) {
3302 } else if (outgoing
) {
3303 if ((cfil_info
->cfi_flags
& CFIF_SHUT_WR
) &&
3304 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
)) {
3305 cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 1);
3308 if ((cfil_info
->cfi_flags
& CFIF_SHUT_RD
) &&
3309 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
)) {
3310 cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 0);
3316 "first %llu peeked %llu pass %llu peek %llu",
3317 entrybuf
->cfe_ctl_q
.q_start
,
3318 entrybuf
->cfe_peeked
,
3319 entrybuf
->cfe_pass_offset
,
3320 entrybuf
->cfe_peek_offset
);
3322 CFIL_INFO_VERIFY(cfil_info
);
3327 * cfil_data_filter()
3329 * Process data for a content filter installed on a socket
3332 cfil_data_filter(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
3333 struct mbuf
*data
, uint64_t datalen
)
3336 struct cfil_entry
*entry
;
3337 struct cfe_buf
*entrybuf
;
3339 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
3340 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
3342 socket_lock_assert_owned(so
);
3344 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3346 entrybuf
= &entry
->cfe_snd
;
3348 entrybuf
= &entry
->cfe_rcv
;
3351 /* Are we attached to the filter? */
3352 if (entry
->cfe_filter
== NULL
) {
3357 /* Dispatch to filters */
3358 cfil_queue_enqueue(&entrybuf
->cfe_ctl_q
, data
, datalen
);
3360 OSAddAtomic64(datalen
,
3361 &cfil_stats
.cfs_ctl_q_out_enqueued
);
3363 OSAddAtomic64(datalen
,
3364 &cfil_stats
.cfs_ctl_q_in_enqueued
);
3367 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, outgoing
);
3369 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
3373 * We have to return EJUSTRETURN in all cases to avoid double free
3376 error
= EJUSTRETURN
;
3378 CFIL_INFO_VERIFY(cfil_info
);
3380 CFIL_LOG(LOG_INFO
, "return %d", error
);
3385 * cfil_service_inject_queue() re-inject data that passed the
3389 cfil_service_inject_queue(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
)
3392 unsigned int datalen
;
3396 struct cfi_buf
*cfi_buf
;
3397 struct cfil_queue
*inject_q
;
3398 int need_rwakeup
= 0;
3401 if (cfil_info
== NULL
) {
3405 socket_lock_assert_owned(so
);
3408 cfi_buf
= &cfil_info
->cfi_snd
;
3409 cfil_info
->cfi_flags
&= ~CFIF_RETRY_INJECT_OUT
;
3411 cfi_buf
= &cfil_info
->cfi_rcv
;
3412 cfil_info
->cfi_flags
&= ~CFIF_RETRY_INJECT_IN
;
3414 inject_q
= &cfi_buf
->cfi_inject_q
;
3416 if (cfil_queue_empty(inject_q
)) {
3420 #if DATA_DEBUG | VERDICT_DEBUG
3421 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
3422 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
, cfil_queue_len(inject_q
));
3425 while ((data
= cfil_queue_first(inject_q
)) != NULL
) {
3426 datalen
= cfil_data_length(data
, &mbcnt
, &mbnum
);
3429 CFIL_LOG(LOG_DEBUG
, "CFIL: SERVICE INJECT-Q: <%s>: <so %llx> data %llx datalen %u (mbcnt %u)",
3430 remote_addr_ptr
? "UNCONNECTED" : "CONNECTED",
3431 (uint64_t)VM_KERNEL_ADDRPERM(so
), (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, mbcnt
);
3434 /* Remove data from queue and adjust stats */
3435 cfil_queue_remove(inject_q
, data
, datalen
);
3436 cfi_buf
->cfi_pending_first
+= datalen
;
3437 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
3438 cfi_buf
->cfi_pending_mbnum
-= mbnum
;
3439 cfil_info_buf_verify(cfi_buf
);
3442 error
= sosend_reinject(so
, NULL
, data
, NULL
, 0);
3445 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: Error: sosend_reinject() failed");
3446 CFIL_LOG(LOG_ERR
, "### sosend() failed %d", error
);
3450 // At least one injection succeeded, need to wake up pending threads.
3453 data
->m_flags
|= M_SKIPCFIL
;
3456 * NOTE: We currently only support TCP and UDP.
3457 * For RAWIP, MPTCP and message TCP we'll
3458 * need to call the appropriate sbappendxxx()
3459 * of fix sock_inject_data_in()
3461 if (IS_UDP(so
) == TRUE
) {
3462 if (sbappendchain(&so
->so_rcv
, data
, 0)) {
3466 if (sbappendstream(&so
->so_rcv
, data
)) {
3473 OSAddAtomic64(datalen
,
3474 &cfil_stats
.cfs_inject_q_out_passed
);
3476 OSAddAtomic64(datalen
,
3477 &cfil_stats
.cfs_inject_q_in_passed
);
3483 #if DATA_DEBUG | VERDICT_DEBUG
3484 CFIL_LOG(LOG_ERR
, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
3485 (uint64_t)VM_KERNEL_ADDRPERM(so
), count
);
3488 /* A single wakeup is for several packets is more efficient */
3490 if (outgoing
== TRUE
) {
3497 if (error
!= 0 && cfil_info
) {
3498 if (error
== ENOBUFS
) {
3499 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nobufs
);
3501 if (error
== ENOMEM
) {
3502 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nomem
);
3506 cfil_info
->cfi_flags
|= CFIF_RETRY_INJECT_OUT
;
3507 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_fail
);
3509 cfil_info
->cfi_flags
|= CFIF_RETRY_INJECT_IN
;
3510 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_fail
);
3517 if (cfil_info
&& (cfil_info
->cfi_flags
& CFIF_SHUT_WR
)) {
3518 cfil_sock_notify_shutdown(so
, SHUT_WR
);
3519 if (cfil_sock_data_pending(&so
->so_snd
) == 0) {
3520 soshutdownlock_final(so
, SHUT_WR
);
3523 if (cfil_info
&& (cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
3524 if (cfil_filters_attached(so
) == 0) {
3525 CFIL_LOG(LOG_INFO
, "so %llx waking",
3526 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3527 wakeup((caddr_t
)cfil_info
);
3531 CFIL_INFO_VERIFY(cfil_info
);
3537 cfil_service_pending_queue(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
)
3539 uint64_t passlen
, curlen
;
3541 unsigned int datalen
;
3543 struct cfil_entry
*entry
;
3544 struct cfe_buf
*entrybuf
;
3545 struct cfil_queue
*pending_q
;
3547 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
3548 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
3550 socket_lock_assert_owned(so
);
3552 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3554 entrybuf
= &entry
->cfe_snd
;
3556 entrybuf
= &entry
->cfe_rcv
;
3559 pending_q
= &entrybuf
->cfe_pending_q
;
3561 passlen
= entrybuf
->cfe_pass_offset
- pending_q
->q_start
;
3564 * Locate the chunks of data that we can pass to the next filter
3565 * A data chunk must be on mbuf boundaries
3568 while ((data
= cfil_queue_first(pending_q
)) != NULL
) {
3569 datalen
= cfil_data_length(data
, NULL
, NULL
);
3573 "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
3574 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
,
3578 if (curlen
+ datalen
> passlen
) {
3582 cfil_queue_remove(pending_q
, data
, datalen
);
3587 kcunit
<= MAX_CONTENT_FILTER
;
3589 error
= cfil_data_filter(so
, cfil_info
, kcunit
, outgoing
,
3591 /* 0 means passed so we can continue */
3596 /* When data has passed all filters, re-inject */
3600 &cfil_info
->cfi_snd
.cfi_inject_q
,
3602 OSAddAtomic64(datalen
,
3603 &cfil_stats
.cfs_inject_q_out_enqueued
);
3606 &cfil_info
->cfi_rcv
.cfi_inject_q
,
3608 OSAddAtomic64(datalen
,
3609 &cfil_stats
.cfs_inject_q_in_enqueued
);
3614 CFIL_INFO_VERIFY(cfil_info
);
3620 cfil_update_data_offsets(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
3621 uint64_t pass_offset
, uint64_t peek_offset
)
3624 struct cfil_entry
*entry
= NULL
;
3625 struct cfe_buf
*entrybuf
;
3628 CFIL_LOG(LOG_INFO
, "pass %llu peek %llu", pass_offset
, peek_offset
);
3630 socket_lock_assert_owned(so
);
3632 if (cfil_info
== NULL
) {
3633 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
3634 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3637 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
3638 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3639 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3644 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3646 entrybuf
= &entry
->cfe_snd
;
3648 entrybuf
= &entry
->cfe_rcv
;
3651 /* Record updated offsets for this content filter */
3652 if (pass_offset
> entrybuf
->cfe_pass_offset
) {
3653 entrybuf
->cfe_pass_offset
= pass_offset
;
3655 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
) {
3656 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
3660 CFIL_LOG(LOG_INFO
, "pass_offset %llu <= cfe_pass_offset %llu",
3661 pass_offset
, entrybuf
->cfe_pass_offset
);
3663 /* Filter does not want or need to see data that's allowed to pass */
3664 if (peek_offset
> entrybuf
->cfe_pass_offset
&&
3665 peek_offset
> entrybuf
->cfe_peek_offset
) {
3666 entrybuf
->cfe_peek_offset
= peek_offset
;
3674 /* Move data held in control queue to pending queue if needed */
3675 error
= cfil_data_service_ctl_q(so
, cfil_info
, kcunit
, outgoing
);
3677 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
3681 error
= EJUSTRETURN
;
3685 * The filter is effectively detached when pass all from both sides
3686 * or when the socket is closed and no more data is waiting
3687 * to be delivered to the filter
3689 if (entry
!= NULL
&&
3690 ((entry
->cfe_snd
.cfe_pass_offset
== CFM_MAX_OFFSET
&&
3691 entry
->cfe_rcv
.cfe_pass_offset
== CFM_MAX_OFFSET
) ||
3692 ((cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
3693 cfil_queue_empty(&entry
->cfe_snd
.cfe_ctl_q
) &&
3694 cfil_queue_empty(&entry
->cfe_rcv
.cfe_ctl_q
)))) {
3695 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
3697 cfil_info_log(LOG_ERR
, cfil_info
, outgoing
?
3698 "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
3699 "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
3701 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
3702 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3703 if ((cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
3704 cfil_filters_attached(so
) == 0) {
3706 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAKING");
3708 CFIL_LOG(LOG_INFO
, "so %llx waking",
3709 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3710 wakeup((caddr_t
)cfil_info
);
3713 CFIL_INFO_VERIFY(cfil_info
);
3714 CFIL_LOG(LOG_INFO
, "return %d", error
);
3719 * Update pass offset for socket when no data is pending
3722 cfil_set_socket_pass_offset(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
)
3724 struct cfi_buf
*cfi_buf
;
3725 struct cfil_entry
*entry
;
3726 struct cfe_buf
*entrybuf
;
3728 uint64_t pass_offset
= 0;
3730 if (cfil_info
== NULL
) {
3734 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d",
3735 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
3737 socket_lock_assert_owned(so
);
3740 cfi_buf
= &cfil_info
->cfi_snd
;
3742 cfi_buf
= &cfil_info
->cfi_rcv
;
3745 CFIL_LOG(LOG_DEBUG
, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
3746 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, outgoing
,
3747 cfi_buf
->cfi_pending_first
, cfi_buf
->cfi_pending_last
);
3749 if (cfi_buf
->cfi_pending_last
- cfi_buf
->cfi_pending_first
== 0) {
3750 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3751 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3753 /* Are we attached to a filter? */
3754 if (entry
->cfe_filter
== NULL
) {
3759 entrybuf
= &entry
->cfe_snd
;
3761 entrybuf
= &entry
->cfe_rcv
;
3764 if (pass_offset
== 0 ||
3765 entrybuf
->cfe_pass_offset
< pass_offset
) {
3766 pass_offset
= entrybuf
->cfe_pass_offset
;
3769 cfi_buf
->cfi_pass_offset
= pass_offset
;
3772 CFIL_LOG(LOG_DEBUG
, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
3773 (uint64_t)VM_KERNEL_ADDRPERM(so
), cfil_info
->cfi_sock_id
, cfi_buf
->cfi_pass_offset
);
3779 cfil_action_data_pass(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
, int outgoing
,
3780 uint64_t pass_offset
, uint64_t peek_offset
)
3784 CFIL_LOG(LOG_INFO
, "");
3786 socket_lock_assert_owned(so
);
3788 error
= cfil_acquire_sockbuf(so
, cfil_info
, outgoing
);
3790 CFIL_LOG(LOG_INFO
, "so %llx %s dropped",
3791 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3792 outgoing
? "out" : "in");
3796 error
= cfil_update_data_offsets(so
, cfil_info
, kcunit
, outgoing
,
3797 pass_offset
, peek_offset
);
3799 cfil_service_inject_queue(so
, cfil_info
, outgoing
);
3801 cfil_set_socket_pass_offset(so
, cfil_info
, outgoing
);
3803 CFIL_INFO_VERIFY(cfil_info
);
3804 cfil_release_sockbuf(so
, outgoing
);
3811 cfil_flush_queues(struct socket
*so
, struct cfil_info
*cfil_info
)
3813 struct cfil_entry
*entry
;
3817 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || cfil_info
== NULL
) {
3821 socket_lock_assert_owned(so
);
3824 * Flush the output queues and ignore errors as long as
3827 (void) cfil_acquire_sockbuf(so
, cfil_info
, 1);
3828 if (cfil_info
!= NULL
) {
3830 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3831 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3833 drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
3834 drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_pending_q
);
3836 drained
+= cfil_queue_drain(&cfil_info
->cfi_snd
.cfi_inject_q
);
3839 if (cfil_info
->cfi_flags
& CFIF_DROP
) {
3841 &cfil_stats
.cfs_flush_out_drop
);
3844 &cfil_stats
.cfs_flush_out_close
);
3848 cfil_release_sockbuf(so
, 1);
3851 * Flush the input queues
3853 (void) cfil_acquire_sockbuf(so
, cfil_info
, 0);
3854 if (cfil_info
!= NULL
) {
3856 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3857 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3859 drained
+= cfil_queue_drain(
3860 &entry
->cfe_rcv
.cfe_ctl_q
);
3861 drained
+= cfil_queue_drain(
3862 &entry
->cfe_rcv
.cfe_pending_q
);
3864 drained
+= cfil_queue_drain(&cfil_info
->cfi_rcv
.cfi_inject_q
);
3867 if (cfil_info
->cfi_flags
& CFIF_DROP
) {
3869 &cfil_stats
.cfs_flush_in_drop
);
3872 &cfil_stats
.cfs_flush_in_close
);
3876 cfil_release_sockbuf(so
, 0);
3878 CFIL_INFO_VERIFY(cfil_info
);
3882 cfil_action_drop(struct socket
*so
, struct cfil_info
*cfil_info
, uint32_t kcunit
)
3885 struct cfil_entry
*entry
;
3888 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || cfil_info
== NULL
) {
3892 socket_lock_assert_owned(so
);
3894 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3896 /* Are we attached to the filter? */
3897 if (entry
->cfe_filter
== NULL
) {
3901 cfil_info
->cfi_flags
|= CFIF_DROP
;
3906 * Force the socket to be marked defunct
3907 * (forcing fixed along with rdar://19391339)
3909 if (so
->so_cfil_db
== NULL
) {
3910 error
= sosetdefunct(p
, so
,
3911 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
,
3914 /* Flush the socket buffer and disconnect */
3916 error
= sodefunct(p
, so
,
3917 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
);
3921 /* The filter is done, mark as detached */
3922 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
3924 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: DROP - DETACH");
3926 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
3927 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3929 /* Pending data needs to go */
3930 cfil_flush_queues(so
, cfil_info
);
3932 if (cfil_info
&& (cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
3933 if (cfil_filters_attached(so
) == 0) {
3934 CFIL_LOG(LOG_INFO
, "so %llx waking",
3935 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3936 wakeup((caddr_t
)cfil_info
);
3944 cfil_action_bless_client(uint32_t kcunit
, struct cfil_msg_hdr
*msghdr
)
3947 struct cfil_info
*cfil_info
= NULL
;
3949 bool cfil_attached
= false;
3950 struct cfil_msg_bless_client
*blessmsg
= (struct cfil_msg_bless_client
*)msghdr
;
3952 // Search and lock socket
3953 struct socket
*so
= cfil_socket_from_client_uuid(blessmsg
->cfb_client_uuid
, &cfil_attached
);
3957 // The client gets a pass automatically
3958 cfil_info
= (so
->so_cfil_db
!= NULL
) ?
3959 cfil_db_get_cfil_info(so
->so_cfil_db
, msghdr
->cfm_sock_id
) : so
->so_cfil
;
3961 if (cfil_attached
) {
3963 if (cfil_info
!= NULL
) {
3964 CFIL_LOG(LOG_ERR
, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
3965 cfil_info
->cfi_hash_entry
? "UDP" : "TCP",
3966 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3967 cfil_info
->cfi_sock_id
);
3970 (void)cfil_action_data_pass(so
, cfil_info
, kcunit
, 1, CFM_MAX_OFFSET
, CFM_MAX_OFFSET
);
3971 (void)cfil_action_data_pass(so
, cfil_info
, kcunit
, 0, CFM_MAX_OFFSET
, CFM_MAX_OFFSET
);
3973 so
->so_flags1
|= SOF1_CONTENT_FILTER_SKIP
;
3975 socket_unlock(so
, 1);
3982 cfil_update_entry_offsets(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
, unsigned int datalen
)
3984 struct cfil_entry
*entry
;
3985 struct cfe_buf
*entrybuf
;
3988 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d datalen %u",
3989 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
, datalen
);
3991 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3992 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
3994 /* Are we attached to the filter? */
3995 if (entry
->cfe_filter
== NULL
) {
4000 entrybuf
= &entry
->cfe_snd
;
4002 entrybuf
= &entry
->cfe_rcv
;
4005 entrybuf
->cfe_ctl_q
.q_start
+= datalen
;
4006 entrybuf
->cfe_pass_offset
= entrybuf
->cfe_ctl_q
.q_start
;
4007 entrybuf
->cfe_peeked
= entrybuf
->cfe_ctl_q
.q_start
;
4008 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
) {
4009 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
4012 entrybuf
->cfe_ctl_q
.q_end
+= datalen
;
4014 entrybuf
->cfe_pending_q
.q_start
+= datalen
;
4015 entrybuf
->cfe_pending_q
.q_end
+= datalen
;
4017 CFIL_INFO_VERIFY(cfil_info
);
4022 cfil_data_common(struct socket
*so
, struct cfil_info
*cfil_info
, int outgoing
, struct sockaddr
*to
,
4023 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
4025 #pragma unused(to, control, flags)
4027 unsigned int datalen
;
4031 struct cfi_buf
*cfi_buf
;
4032 struct mbuf
*chain
= NULL
;
4034 if (cfil_info
== NULL
) {
4035 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
4036 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4039 } else if (cfil_info
->cfi_flags
& CFIF_DROP
) {
4040 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4041 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4046 datalen
= cfil_data_length(data
, &mbcnt
, &mbnum
);
4049 cfi_buf
= &cfil_info
->cfi_snd
;
4051 cfi_buf
= &cfil_info
->cfi_rcv
;
4054 cfi_buf
->cfi_pending_last
+= datalen
;
4055 cfi_buf
->cfi_pending_mbcnt
+= mbcnt
;
4056 cfi_buf
->cfi_pending_mbnum
+= mbnum
;
4059 if (cfi_buf
->cfi_pending_mbnum
> cfil_udp_gc_mbuf_num_max
||
4060 cfi_buf
->cfi_pending_mbcnt
> cfil_udp_gc_mbuf_cnt_max
) {
4061 cfi_buf
->cfi_tail_drop_cnt
++;
4062 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
4063 cfi_buf
->cfi_pending_mbnum
-= mbnum
;
4068 cfil_info_buf_verify(cfi_buf
);
4071 CFIL_LOG(LOG_DEBUG
, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
4072 (uint64_t)VM_KERNEL_ADDRPERM(so
),
4073 outgoing
? "OUT" : "IN",
4074 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, data
->m_flags
,
4075 (uint64_t)VM_KERNEL_ADDRPERM(data
->m_nextpkt
),
4076 cfi_buf
->cfi_pending_last
,
4077 cfi_buf
->cfi_pending_mbcnt
,
4078 cfi_buf
->cfi_pass_offset
);
4081 /* Fast path when below pass offset */
4082 if (cfi_buf
->cfi_pending_last
<= cfi_buf
->cfi_pass_offset
) {
4083 cfil_update_entry_offsets(so
, cfil_info
, outgoing
, datalen
);
4085 CFIL_LOG(LOG_DEBUG
, "CFIL: QUEUEING DATA: FAST PATH");
4088 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4089 // Is cfil attached to this filter?
4090 if (IS_ENTRY_ATTACHED(cfil_info
, kcunit
)) {
4093 * Chain addr (incoming only TDB), control (optional) and data into one chain.
4094 * This full chain will be reinjected into socket after recieving verdict.
4096 (void) cfil_udp_save_socket_state(cfil_info
, data
);
4097 chain
= sbconcat_mbufs(NULL
, outgoing
? NULL
: to
, data
, control
);
4098 if (chain
== NULL
) {
4103 error
= cfil_data_filter(so
, cfil_info
, kcunit
, outgoing
, data
,
4106 /* 0 means passed so continue with next filter */
4113 /* Move cursor if no filter claimed the data */
4115 cfi_buf
->cfi_pending_first
+= datalen
;
4116 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
4117 cfi_buf
->cfi_pending_mbnum
-= mbnum
;
4118 cfil_info_buf_verify(cfi_buf
);
4121 CFIL_INFO_VERIFY(cfil_info
);
4127 * Callback from socket layer sosendxxx()
4130 cfil_sock_data_out(struct socket
*so
, struct sockaddr
*to
,
4131 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
4136 return cfil_sock_udp_handle_data(TRUE
, so
, NULL
, to
, data
, control
, flags
);
4139 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
4143 socket_lock_assert_owned(so
);
4145 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
4146 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4147 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4150 if (control
!= NULL
) {
4151 CFIL_LOG(LOG_ERR
, "so %llx control",
4152 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4153 OSIncrementAtomic(&cfil_stats
.cfs_data_out_control
);
4155 if ((flags
& MSG_OOB
)) {
4156 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
4157 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4158 OSIncrementAtomic(&cfil_stats
.cfs_data_out_oob
);
4160 if ((so
->so_snd
.sb_flags
& SB_LOCK
) == 0) {
4161 panic("so %p SB_LOCK not set", so
);
4164 if (so
->so_snd
.sb_cfil_thread
!= NULL
) {
4165 panic("%s sb_cfil_thread %p not NULL", __func__
,
4166 so
->so_snd
.sb_cfil_thread
);
4169 error
= cfil_data_common(so
, so
->so_cfil
, 1, to
, data
, control
, flags
);
4175 * Callback from socket layer sbappendxxx()
4178 cfil_sock_data_in(struct socket
*so
, struct sockaddr
*from
,
4179 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
4184 return cfil_sock_udp_handle_data(FALSE
, so
, NULL
, from
, data
, control
, flags
);
4187 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
4191 socket_lock_assert_owned(so
);
4193 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
4194 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4195 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4198 if (control
!= NULL
) {
4199 CFIL_LOG(LOG_ERR
, "so %llx control",
4200 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4201 OSIncrementAtomic(&cfil_stats
.cfs_data_in_control
);
4203 if (data
->m_type
== MT_OOBDATA
) {
4204 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
4205 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4206 OSIncrementAtomic(&cfil_stats
.cfs_data_in_oob
);
4208 error
= cfil_data_common(so
, so
->so_cfil
, 0, from
, data
, control
, flags
);
4214 * Callback from socket layer soshutdownxxx()
4216 * We may delay the shutdown write if there's outgoing data in process.
4218 * There is no point in delaying the shutdown read because the process
4219 * indicated that it does not want to read anymore data.
4222 cfil_sock_shutdown(struct socket
*so
, int *how
)
4227 return cfil_sock_udp_shutdown(so
, how
);
4230 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
4234 socket_lock_assert_owned(so
);
4236 CFIL_LOG(LOG_INFO
, "so %llx how %d",
4237 (uint64_t)VM_KERNEL_ADDRPERM(so
), *how
);
4240 * Check the state of the socket before the content filter
4242 if (*how
!= SHUT_WR
&& (so
->so_state
& SS_CANTRCVMORE
) != 0) {
4243 /* read already shut down */
4247 if (*how
!= SHUT_RD
&& (so
->so_state
& SS_CANTSENDMORE
) != 0) {
4248 /* write already shut down */
4253 if ((so
->so_cfil
->cfi_flags
& CFIF_DROP
) != 0) {
4254 CFIL_LOG(LOG_ERR
, "so %llx drop set",
4255 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4260 * shutdown read: SHUT_RD or SHUT_RDWR
4262 if (*how
!= SHUT_WR
) {
4263 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_RD
) {
4267 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_RD
;
4268 cfil_sock_notify_shutdown(so
, SHUT_RD
);
4271 * shutdown write: SHUT_WR or SHUT_RDWR
4273 if (*how
!= SHUT_RD
) {
4274 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
) {
4278 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_WR
;
4279 cfil_sock_notify_shutdown(so
, SHUT_WR
);
4281 * When outgoing data is pending, we delay the shutdown at the
4282 * protocol level until the content filters give the final
4283 * verdict on the pending data.
4285 if (cfil_sock_data_pending(&so
->so_snd
) != 0) {
4287 * When shutting down the read and write sides at once
4288 * we can proceed to the final shutdown of the read
4289 * side. Otherwise, we just return.
4291 if (*how
== SHUT_WR
) {
4292 error
= EJUSTRETURN
;
4293 } else if (*how
== SHUT_RDWR
) {
4303 * This is called when the socket is closed and there is no more
4304 * opportunity for filtering
4307 cfil_sock_is_closed(struct socket
*so
)
4313 cfil_sock_udp_is_closed(so
);
4317 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
4321 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
4323 socket_lock_assert_owned(so
);
4325 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4326 /* Let the filters know of the closing */
4327 error
= cfil_dispatch_closed_event(so
, so
->so_cfil
, kcunit
);
4330 /* Last chance to push passed data out */
4331 error
= cfil_acquire_sockbuf(so
, so
->so_cfil
, 1);
4333 cfil_service_inject_queue(so
, so
->so_cfil
, 1);
4335 cfil_release_sockbuf(so
, 1);
4337 so
->so_cfil
->cfi_flags
|= CFIF_SOCK_CLOSED
;
4339 /* Pending data needs to go */
4340 cfil_flush_queues(so
, so
->so_cfil
);
4342 CFIL_INFO_VERIFY(so
->so_cfil
);
4346 * This is called when the socket is disconnected so let the filters
4347 * know about the disconnection and that no more data will come
4349 * The how parameter has the same values as soshutown()
4352 cfil_sock_notify_shutdown(struct socket
*so
, int how
)
4358 cfil_sock_udp_notify_shutdown(so
, how
, 0, 0);
4362 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
4366 CFIL_LOG(LOG_INFO
, "so %llx how %d",
4367 (uint64_t)VM_KERNEL_ADDRPERM(so
), how
);
4369 socket_lock_assert_owned(so
);
4371 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4372 /* Disconnect incoming side */
4373 if (how
!= SHUT_WR
) {
4374 error
= cfil_dispatch_disconnect_event(so
, so
->so_cfil
, kcunit
, 0);
4376 /* Disconnect outgoing side */
4377 if (how
!= SHUT_RD
) {
4378 error
= cfil_dispatch_disconnect_event(so
, so
->so_cfil
, kcunit
, 1);
4384 cfil_filters_attached(struct socket
*so
)
4386 struct cfil_entry
*entry
;
4391 return cfil_filters_udp_attached(so
, FALSE
);
4394 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
4398 socket_lock_assert_owned(so
);
4400 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
4401 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
4403 /* Are we attached to the filter? */
4404 if (entry
->cfe_filter
== NULL
) {
4407 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
4410 if ((entry
->cfe_flags
& CFEF_CFIL_DETACHED
) != 0) {
4421 * This is called when the socket is closed and we are waiting for
4422 * the filters to gives the final pass or drop
4425 cfil_sock_close_wait(struct socket
*so
)
4427 lck_mtx_t
*mutex_held
;
4432 cfil_sock_udp_close_wait(so
);
4436 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
4440 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
4442 if (so
->so_proto
->pr_getlock
!= NULL
) {
4443 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
4445 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
4447 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
4449 while (cfil_filters_attached(so
)) {
4451 * Notify the filters we are going away so they can detach
4453 cfil_sock_notify_shutdown(so
, SHUT_RDWR
);
4456 * Make sure we need to wait after the filter are notified
4457 * of the disconnection
4459 if (cfil_filters_attached(so
) == 0) {
4463 CFIL_LOG(LOG_INFO
, "so %llx waiting",
4464 (uint64_t)VM_KERNEL_ADDRPERM(so
));
4466 ts
.tv_sec
= cfil_close_wait_timeout
/ 1000;
4467 ts
.tv_nsec
= (cfil_close_wait_timeout
% 1000) *
4468 NSEC_PER_USEC
* 1000;
4470 OSIncrementAtomic(&cfil_stats
.cfs_close_wait
);
4471 so
->so_cfil
->cfi_flags
|= CFIF_CLOSE_WAIT
;
4472 error
= msleep((caddr_t
)so
->so_cfil
, mutex_held
,
4473 PSOCK
| PCATCH
, "cfil_sock_close_wait", &ts
);
4474 so
->so_cfil
->cfi_flags
&= ~CFIF_CLOSE_WAIT
;
4476 CFIL_LOG(LOG_NOTICE
, "so %llx timed out %d",
4477 (uint64_t)VM_KERNEL_ADDRPERM(so
), (error
!= 0));
4480 * Force close in case of timeout
4483 OSIncrementAtomic(&cfil_stats
.cfs_close_wait_timeout
);
4490 * Returns the size of the data held by the content filter by using
4493 cfil_sock_data_pending(struct sockbuf
*sb
)
4495 struct socket
*so
= sb
->sb_so
;
4496 uint64_t pending
= 0;
4499 return cfil_sock_udp_data_pending(sb
, FALSE
);
4502 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
) {
4503 struct cfi_buf
*cfi_buf
;
4505 socket_lock_assert_owned(so
);
4507 if ((sb
->sb_flags
& SB_RECV
) == 0) {
4508 cfi_buf
= &so
->so_cfil
->cfi_snd
;
4510 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
4513 pending
= cfi_buf
->cfi_pending_last
-
4514 cfi_buf
->cfi_pending_first
;
4517 * If we are limited by the "chars of mbufs used" roughly
4518 * adjust so we won't overcommit
4520 if (pending
> (uint64_t)cfi_buf
->cfi_pending_mbcnt
) {
4521 pending
= cfi_buf
->cfi_pending_mbcnt
;
4525 VERIFY(pending
< INT32_MAX
);
4527 return (int32_t)(pending
);
4531 * Return the socket buffer space used by data being held by content filters
4532 * so processes won't clog the socket buffer
4535 cfil_sock_data_space(struct sockbuf
*sb
)
4537 struct socket
*so
= sb
->sb_so
;
4538 uint64_t pending
= 0;
4541 return cfil_sock_udp_data_pending(sb
, TRUE
);
4544 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
&&
4545 so
->so_snd
.sb_cfil_thread
!= current_thread()) {
4546 struct cfi_buf
*cfi_buf
;
4548 socket_lock_assert_owned(so
);
4550 if ((sb
->sb_flags
& SB_RECV
) == 0) {
4551 cfi_buf
= &so
->so_cfil
->cfi_snd
;
4553 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
4556 pending
= cfi_buf
->cfi_pending_last
-
4557 cfi_buf
->cfi_pending_first
;
4560 * If we are limited by the "chars of mbufs used" roughly
4561 * adjust so we won't overcommit
4563 if ((uint64_t)cfi_buf
->cfi_pending_mbcnt
> pending
) {
4564 pending
= cfi_buf
->cfi_pending_mbcnt
;
4568 VERIFY(pending
< INT32_MAX
);
4570 return (int32_t)(pending
);
4574 * A callback from the socket and protocol layer when data becomes
4575 * available in the socket buffer to give a chance for the content filter
4576 * to re-inject data that was held back
4579 cfil_sock_buf_update(struct sockbuf
*sb
)
4583 struct socket
*so
= sb
->sb_so
;
4586 cfil_sock_udp_buf_update(sb
);
4590 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
) {
4598 socket_lock_assert_owned(so
);
4600 if ((sb
->sb_flags
& SB_RECV
) == 0) {
4601 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) == 0) {
4605 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_retry
);
4607 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_IN
) == 0) {
4611 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_retry
);
4614 CFIL_LOG(LOG_NOTICE
, "so %llx outgoing %d",
4615 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
4617 error
= cfil_acquire_sockbuf(so
, so
->so_cfil
, outgoing
);
4619 cfil_service_inject_queue(so
, so
->so_cfil
, outgoing
);
4621 cfil_release_sockbuf(so
, outgoing
);
4625 sysctl_cfil_filter_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
4626 struct sysctl_req
*req
)
4628 #pragma unused(oidp, arg1, arg2)
4634 if (req
->newptr
!= USER_ADDR_NULL
) {
4638 cfil_rw_lock_shared(&cfil_lck_rw
);
4640 for (i
= 0; content_filters
!= NULL
&& i
< MAX_CONTENT_FILTER
; i
++) {
4641 struct cfil_filter_stat filter_stat
;
4642 struct content_filter
*cfc
= content_filters
[i
];
4648 /* If just asking for the size */
4649 if (req
->oldptr
== USER_ADDR_NULL
) {
4650 len
+= sizeof(struct cfil_filter_stat
);
4654 bzero(&filter_stat
, sizeof(struct cfil_filter_stat
));
4655 filter_stat
.cfs_len
= sizeof(struct cfil_filter_stat
);
4656 filter_stat
.cfs_filter_id
= cfc
->cf_kcunit
;
4657 filter_stat
.cfs_flags
= cfc
->cf_flags
;
4658 filter_stat
.cfs_sock_count
= cfc
->cf_sock_count
;
4659 filter_stat
.cfs_necp_control_unit
= cfc
->cf_necp_control_unit
;
4661 error
= SYSCTL_OUT(req
, &filter_stat
,
4662 sizeof(struct cfil_filter_stat
));
4667 /* If just asking for the size */
4668 if (req
->oldptr
== USER_ADDR_NULL
) {
4672 cfil_rw_unlock_shared(&cfil_lck_rw
);
4675 if (req
->oldptr
!= USER_ADDR_NULL
) {
4676 for (i
= 1; content_filters
!= NULL
&& i
<= MAX_CONTENT_FILTER
; i
++) {
4677 cfil_filter_show(i
);
4686 sysctl_cfil_sock_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
4687 struct sysctl_req
*req
)
4689 #pragma unused(oidp, arg1, arg2)
4692 struct cfil_info
*cfi
;
4695 if (req
->newptr
!= USER_ADDR_NULL
) {
4699 cfil_rw_lock_shared(&cfil_lck_rw
);
4702 * If just asking for the size,
4704 if (req
->oldptr
== USER_ADDR_NULL
) {
4705 req
->oldidx
= cfil_sock_attached_count
*
4706 sizeof(struct cfil_sock_stat
);
4707 /* Bump the length in case new sockets gets attached */
4708 req
->oldidx
+= req
->oldidx
>> 3;
4712 TAILQ_FOREACH(cfi
, &cfil_sock_head
, cfi_link
) {
4713 struct cfil_entry
*entry
;
4714 struct cfil_sock_stat stat
;
4715 struct socket
*so
= cfi
->cfi_so
;
4717 bzero(&stat
, sizeof(struct cfil_sock_stat
));
4718 stat
.cfs_len
= sizeof(struct cfil_sock_stat
);
4719 stat
.cfs_sock_id
= cfi
->cfi_sock_id
;
4720 stat
.cfs_flags
= cfi
->cfi_flags
;
4723 stat
.cfs_pid
= so
->last_pid
;
4724 memcpy(stat
.cfs_uuid
, so
->last_uuid
,
4726 if (so
->so_flags
& SOF_DELEGATED
) {
4727 stat
.cfs_e_pid
= so
->e_pid
;
4728 memcpy(stat
.cfs_e_uuid
, so
->e_uuid
,
4731 stat
.cfs_e_pid
= so
->last_pid
;
4732 memcpy(stat
.cfs_e_uuid
, so
->last_uuid
,
4736 stat
.cfs_sock_family
= so
->so_proto
->pr_domain
->dom_family
;
4737 stat
.cfs_sock_type
= so
->so_proto
->pr_type
;
4738 stat
.cfs_sock_protocol
= so
->so_proto
->pr_protocol
;
4741 stat
.cfs_snd
.cbs_pending_first
=
4742 cfi
->cfi_snd
.cfi_pending_first
;
4743 stat
.cfs_snd
.cbs_pending_last
=
4744 cfi
->cfi_snd
.cfi_pending_last
;
4745 stat
.cfs_snd
.cbs_inject_q_len
=
4746 cfil_queue_len(&cfi
->cfi_snd
.cfi_inject_q
);
4747 stat
.cfs_snd
.cbs_pass_offset
=
4748 cfi
->cfi_snd
.cfi_pass_offset
;
4750 stat
.cfs_rcv
.cbs_pending_first
=
4751 cfi
->cfi_rcv
.cfi_pending_first
;
4752 stat
.cfs_rcv
.cbs_pending_last
=
4753 cfi
->cfi_rcv
.cfi_pending_last
;
4754 stat
.cfs_rcv
.cbs_inject_q_len
=
4755 cfil_queue_len(&cfi
->cfi_rcv
.cfi_inject_q
);
4756 stat
.cfs_rcv
.cbs_pass_offset
=
4757 cfi
->cfi_rcv
.cfi_pass_offset
;
4759 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++) {
4760 struct cfil_entry_stat
*estat
;
4761 struct cfe_buf
*ebuf
;
4762 struct cfe_buf_stat
*sbuf
;
4764 entry
= &cfi
->cfi_entries
[i
];
4766 estat
= &stat
.ces_entries
[i
];
4768 estat
->ces_len
= sizeof(struct cfil_entry_stat
);
4769 estat
->ces_filter_id
= entry
->cfe_filter
?
4770 entry
->cfe_filter
->cf_kcunit
: 0;
4771 estat
->ces_flags
= entry
->cfe_flags
;
4772 estat
->ces_necp_control_unit
=
4773 entry
->cfe_necp_control_unit
;
4775 estat
->ces_last_event
.tv_sec
=
4776 (int64_t)entry
->cfe_last_event
.tv_sec
;
4777 estat
->ces_last_event
.tv_usec
=
4778 (int64_t)entry
->cfe_last_event
.tv_usec
;
4780 estat
->ces_last_action
.tv_sec
=
4781 (int64_t)entry
->cfe_last_action
.tv_sec
;
4782 estat
->ces_last_action
.tv_usec
=
4783 (int64_t)entry
->cfe_last_action
.tv_usec
;
4785 ebuf
= &entry
->cfe_snd
;
4786 sbuf
= &estat
->ces_snd
;
4787 sbuf
->cbs_pending_first
=
4788 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
4789 sbuf
->cbs_pending_last
=
4790 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
4791 sbuf
->cbs_ctl_first
=
4792 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
4793 sbuf
->cbs_ctl_last
=
4794 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
4795 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
4796 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
4797 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
4799 ebuf
= &entry
->cfe_rcv
;
4800 sbuf
= &estat
->ces_rcv
;
4801 sbuf
->cbs_pending_first
=
4802 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
4803 sbuf
->cbs_pending_last
=
4804 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
4805 sbuf
->cbs_ctl_first
=
4806 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
4807 sbuf
->cbs_ctl_last
=
4808 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
4809 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
4810 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
4811 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
4813 error
= SYSCTL_OUT(req
, &stat
,
4814 sizeof(struct cfil_sock_stat
));
4820 cfil_rw_unlock_shared(&cfil_lck_rw
);
4823 if (req
->oldptr
!= USER_ADDR_NULL
) {
4832 * UDP Socket Support
4835 cfil_hash_entry_log(int level
, struct socket
*so
, struct cfil_hash_entry
*entry
, uint64_t sockId
, const char* msg
)
4837 char local
[MAX_IPv6_STR_LEN
+ 6];
4838 char remote
[MAX_IPv6_STR_LEN
+ 6];
4841 // No sock or not UDP, no-op
4842 if (so
== NULL
|| entry
== NULL
) {
4846 local
[0] = remote
[0] = 0x0;
4848 switch (entry
->cfentry_family
) {
4850 addr
= &entry
->cfentry_laddr
.addr6
;
4851 inet_ntop(AF_INET6
, addr
, local
, sizeof(local
));
4852 addr
= &entry
->cfentry_faddr
.addr6
;
4853 inet_ntop(AF_INET6
, addr
, remote
, sizeof(local
));
4856 addr
= &entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
;
4857 inet_ntop(AF_INET
, addr
, local
, sizeof(local
));
4858 addr
= &entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
;
4859 inet_ntop(AF_INET
, addr
, remote
, sizeof(local
));
4865 CFIL_LOG(level
, "<%s>: <UDP so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s",
4867 (uint64_t)VM_KERNEL_ADDRPERM(so
), entry
, sockId
,
4868 ntohs(entry
->cfentry_lport
), ntohs(entry
->cfentry_fport
), local
, remote
);
4872 cfil_inp_log(int level
, struct socket
*so
, const char* msg
)
4874 struct inpcb
*inp
= NULL
;
4875 char local
[MAX_IPv6_STR_LEN
+ 6];
4876 char remote
[MAX_IPv6_STR_LEN
+ 6];
4883 inp
= sotoinpcb(so
);
4888 local
[0] = remote
[0] = 0x0;
4891 if (inp
->inp_vflag
& INP_IPV6
) {
4892 addr
= &inp
->in6p_laddr
.s6_addr32
;
4893 inet_ntop(AF_INET6
, addr
, local
, sizeof(local
));
4894 addr
= &inp
->in6p_faddr
.s6_addr32
;
4895 inet_ntop(AF_INET6
, addr
, remote
, sizeof(local
));
4899 addr
= &inp
->inp_laddr
.s_addr
;
4900 inet_ntop(AF_INET
, addr
, local
, sizeof(local
));
4901 addr
= &inp
->inp_faddr
.s_addr
;
4902 inet_ntop(AF_INET
, addr
, remote
, sizeof(local
));
4905 if (so
->so_cfil
!= NULL
) {
4906 CFIL_LOG(level
, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
4907 msg
, IS_UDP(so
) ? "UDP" : "TCP",
4908 (uint64_t)VM_KERNEL_ADDRPERM(so
), inp
->inp_flags
, inp
->inp_socket
->so_flags
, so
->so_cfil
->cfi_sock_id
,
4909 ntohs(inp
->inp_lport
), ntohs(inp
->inp_fport
), local
, remote
);
4911 CFIL_LOG(level
, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
4912 msg
, IS_UDP(so
) ? "UDP" : "TCP",
4913 (uint64_t)VM_KERNEL_ADDRPERM(so
), inp
->inp_flags
, inp
->inp_socket
->so_flags
,
4914 ntohs(inp
->inp_lport
), ntohs(inp
->inp_fport
), local
, remote
);
4919 cfil_info_log(int level
, struct cfil_info
*cfil_info
, const char* msg
)
4921 if (cfil_info
== NULL
) {
4925 if (cfil_info
->cfi_hash_entry
!= NULL
) {
4926 cfil_hash_entry_log(level
, cfil_info
->cfi_so
, cfil_info
->cfi_hash_entry
, cfil_info
->cfi_sock_id
, msg
);
4928 cfil_inp_log(level
, cfil_info
->cfi_so
, msg
);
4933 cfil_db_init(struct socket
*so
)
4936 struct cfil_db
*db
= NULL
;
4938 CFIL_LOG(LOG_INFO
, "");
4940 db
= zalloc(cfil_db_zone
);
4945 bzero(db
, sizeof(struct cfil_db
));
4947 db
->cfdb_hashbase
= hashinit(CFILHASHSIZE
, M_CFIL
, &db
->cfdb_hashmask
);
4948 if (db
->cfdb_hashbase
== NULL
) {
4949 zfree(cfil_db_zone
, db
);
4955 so
->so_cfil_db
= db
;
4962 cfil_db_free(struct socket
*so
)
4964 struct cfil_hash_entry
*entry
= NULL
;
4965 struct cfil_hash_entry
*temp_entry
= NULL
;
4966 struct cfilhashhead
*cfilhash
= NULL
;
4967 struct cfil_db
*db
= NULL
;
4969 CFIL_LOG(LOG_INFO
, "");
4971 if (so
== NULL
|| so
->so_cfil_db
== NULL
) {
4974 db
= so
->so_cfil_db
;
4977 CFIL_LOG(LOG_ERR
, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
4978 (uint64_t)VM_KERNEL_ADDRPERM(so
), db
, db
->cfdb_count
);
4981 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
4982 cfilhash
= &db
->cfdb_hashbase
[i
];
4983 LIST_FOREACH_SAFE(entry
, cfilhash
, cfentry_link
, temp_entry
) {
4984 if (entry
->cfentry_cfil
!= NULL
) {
4986 cfil_info_log(LOG_ERR
, entry
->cfentry_cfil
, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
4988 cfil_info_free(entry
->cfentry_cfil
);
4989 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
4990 entry
->cfentry_cfil
= NULL
;
4993 cfil_db_delete_entry(db
, entry
);
4994 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
4995 if (db
->cfdb_count
== 0) {
4996 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
4998 VERIFY(so
->so_usecount
> 0);
5004 // Make sure all entries are cleaned up!
5005 VERIFY(db
->cfdb_count
== 0);
5007 CFIL_LOG(LOG_ERR
, "CFIL: LIFECYCLE: so usecount %d", so
->so_usecount
);
5010 FREE(db
->cfdb_hashbase
, M_CFIL
);
5011 zfree(cfil_db_zone
, db
);
5012 so
->so_cfil_db
= NULL
;
5016 fill_cfil_hash_entry_from_address(struct cfil_hash_entry
*entry
, bool isLocal
, struct sockaddr
*addr
)
5018 struct sockaddr_in
*sin
= NULL
;
5019 struct sockaddr_in6
*sin6
= NULL
;
5021 if (entry
== NULL
|| addr
== NULL
) {
5025 switch (addr
->sa_family
) {
5027 sin
= satosin(addr
);
5028 if (sin
->sin_len
!= sizeof(*sin
)) {
5031 if (isLocal
== TRUE
) {
5032 entry
->cfentry_lport
= sin
->sin_port
;
5033 entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
= sin
->sin_addr
.s_addr
;
5035 entry
->cfentry_fport
= sin
->sin_port
;
5036 entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
= sin
->sin_addr
.s_addr
;
5038 entry
->cfentry_family
= AF_INET
;
5041 sin6
= satosin6(addr
);
5042 if (sin6
->sin6_len
!= sizeof(*sin6
)) {
5045 if (isLocal
== TRUE
) {
5046 entry
->cfentry_lport
= sin6
->sin6_port
;
5047 entry
->cfentry_laddr
.addr6
= sin6
->sin6_addr
;
5049 entry
->cfentry_fport
= sin6
->sin6_port
;
5050 entry
->cfentry_faddr
.addr6
= sin6
->sin6_addr
;
5052 entry
->cfentry_family
= AF_INET6
;
5060 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry
*entry
, bool isLocal
, struct inpcb
*inp
)
5062 if (entry
== NULL
|| inp
== NULL
) {
5066 if (inp
->inp_vflag
& INP_IPV4
) {
5067 if (isLocal
== TRUE
) {
5068 entry
->cfentry_lport
= inp
->inp_lport
;
5069 entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
= inp
->inp_laddr
.s_addr
;
5071 entry
->cfentry_fport
= inp
->inp_fport
;
5072 entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
= inp
->inp_faddr
.s_addr
;
5074 entry
->cfentry_family
= AF_INET
;
5076 } else if (inp
->inp_vflag
& INP_IPV6
) {
5077 if (isLocal
== TRUE
) {
5078 entry
->cfentry_lport
= inp
->inp_lport
;
5079 entry
->cfentry_laddr
.addr6
= inp
->in6p_laddr
;
5081 entry
->cfentry_fport
= inp
->inp_fport
;
5082 entry
->cfentry_faddr
.addr6
= inp
->in6p_faddr
;
5084 entry
->cfentry_family
= AF_INET6
;
5091 check_port(struct sockaddr
*addr
, u_short port
)
5093 struct sockaddr_in
*sin
= NULL
;
5094 struct sockaddr_in6
*sin6
= NULL
;
5096 if (addr
== NULL
|| port
== 0) {
5100 switch (addr
->sa_family
) {
5102 sin
= satosin(addr
);
5103 if (sin
->sin_len
!= sizeof(*sin
)) {
5106 if (port
== ntohs(sin
->sin_port
)) {
5111 sin6
= satosin6(addr
);
5112 if (sin6
->sin6_len
!= sizeof(*sin6
)) {
5115 if (port
== ntohs(sin6
->sin6_port
)) {
5125 struct cfil_hash_entry
*
5126 cfil_db_lookup_entry_with_sockid(struct cfil_db
*db
, u_int64_t sock_id
)
5128 struct cfilhashhead
*cfilhash
= NULL
;
5129 u_int32_t flowhash
= (u_int32_t
)(sock_id
& 0x0ffffffff);
5130 struct cfil_hash_entry
*nextentry
;
5132 if (db
== NULL
|| db
->cfdb_hashbase
== NULL
|| sock_id
== 0) {
5136 flowhash
&= db
->cfdb_hashmask
;
5137 cfilhash
= &db
->cfdb_hashbase
[flowhash
];
5139 LIST_FOREACH(nextentry
, cfilhash
, cfentry_link
) {
5140 if (nextentry
->cfentry_cfil
!= NULL
&&
5141 nextentry
->cfentry_cfil
->cfi_sock_id
== sock_id
) {
5142 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
5143 (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), nextentry
->cfentry_cfil
->cfi_sock_id
, flowhash
);
5144 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, nextentry
, 0, "CFIL: UDP found entry");
5149 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
5150 (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), sock_id
, flowhash
);
5154 struct cfil_hash_entry
*
5155 cfil_db_lookup_entry(struct cfil_db
*db
, struct sockaddr
*local
, struct sockaddr
*remote
)
5157 struct cfil_hash_entry matchentry
;
5158 struct cfil_hash_entry
*nextentry
= NULL
;
5159 struct inpcb
*inp
= sotoinpcb(db
->cfdb_so
);
5160 u_int32_t hashkey_faddr
= 0, hashkey_laddr
= 0;
5161 int inp_hash_element
= 0;
5162 struct cfilhashhead
*cfilhash
= NULL
;
5164 CFIL_LOG(LOG_INFO
, "");
5170 if (local
!= NULL
) {
5171 fill_cfil_hash_entry_from_address(&matchentry
, TRUE
, local
);
5173 fill_cfil_hash_entry_from_inp(&matchentry
, TRUE
, inp
);
5175 if (remote
!= NULL
) {
5176 fill_cfil_hash_entry_from_address(&matchentry
, FALSE
, remote
);
5178 fill_cfil_hash_entry_from_inp(&matchentry
, FALSE
, inp
);
5182 if (inp
->inp_vflag
& INP_IPV6
) {
5183 hashkey_faddr
= matchentry
.cfentry_faddr
.addr6
.s6_addr32
[3];
5184 hashkey_laddr
= matchentry
.cfentry_laddr
.addr6
.s6_addr32
[3];
5188 hashkey_faddr
= matchentry
.cfentry_faddr
.addr46
.ia46_addr4
.s_addr
;
5189 hashkey_laddr
= matchentry
.cfentry_laddr
.addr46
.ia46_addr4
.s_addr
;
5192 inp_hash_element
= CFIL_HASH(hashkey_laddr
, hashkey_faddr
,
5193 matchentry
.cfentry_lport
, matchentry
.cfentry_fport
);
5194 inp_hash_element
&= db
->cfdb_hashmask
;
5196 cfilhash
= &db
->cfdb_hashbase
[inp_hash_element
];
5198 LIST_FOREACH(nextentry
, cfilhash
, cfentry_link
) {
5200 if ((inp
->inp_vflag
& INP_IPV6
) &&
5201 nextentry
->cfentry_lport
== matchentry
.cfentry_lport
&&
5202 nextentry
->cfentry_fport
== matchentry
.cfentry_fport
&&
5203 IN6_ARE_ADDR_EQUAL(&nextentry
->cfentry_laddr
.addr6
, &matchentry
.cfentry_laddr
.addr6
) &&
5204 IN6_ARE_ADDR_EQUAL(&nextentry
->cfentry_faddr
.addr6
, &matchentry
.cfentry_faddr
.addr6
)) {
5206 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, &matchentry
, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
5211 if (nextentry
->cfentry_lport
== matchentry
.cfentry_lport
&&
5212 nextentry
->cfentry_fport
== matchentry
.cfentry_fport
&&
5213 nextentry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
== matchentry
.cfentry_laddr
.addr46
.ia46_addr4
.s_addr
&&
5214 nextentry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
== matchentry
.cfentry_faddr
.addr46
.ia46_addr4
.s_addr
) {
5216 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, &matchentry
, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
5224 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, &matchentry
, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
5230 cfil_db_delete_entry(struct cfil_db
*db
, struct cfil_hash_entry
*hash_entry
)
5232 if (hash_entry
== NULL
) {
5235 if (db
== NULL
|| db
->cfdb_count
== 0) {
5239 if (db
->cfdb_only_entry
== hash_entry
) {
5240 db
->cfdb_only_entry
= NULL
;
5242 LIST_REMOVE(hash_entry
, cfentry_link
);
5243 zfree(cfil_hash_entry_zone
, hash_entry
);
5246 struct cfil_hash_entry
*
5247 cfil_db_add_entry(struct cfil_db
*db
, struct sockaddr
*local
, struct sockaddr
*remote
)
5249 struct cfil_hash_entry
*entry
= NULL
;
5250 struct inpcb
*inp
= sotoinpcb(db
->cfdb_so
);
5251 u_int32_t hashkey_faddr
= 0, hashkey_laddr
= 0;
5252 int inp_hash_element
= 0;
5253 struct cfilhashhead
*cfilhash
= NULL
;
5255 CFIL_LOG(LOG_INFO
, "");
5261 entry
= zalloc(cfil_hash_entry_zone
);
5262 if (entry
== NULL
) {
5265 bzero(entry
, sizeof(struct cfil_hash_entry
));
5267 if (local
!= NULL
) {
5268 fill_cfil_hash_entry_from_address(entry
, TRUE
, local
);
5270 fill_cfil_hash_entry_from_inp(entry
, TRUE
, inp
);
5272 if (remote
!= NULL
) {
5273 fill_cfil_hash_entry_from_address(entry
, FALSE
, remote
);
5275 fill_cfil_hash_entry_from_inp(entry
, FALSE
, inp
);
5277 entry
->cfentry_lastused
= net_uptime();
5280 if (inp
->inp_vflag
& INP_IPV6
) {
5281 hashkey_faddr
= entry
->cfentry_faddr
.addr6
.s6_addr32
[3];
5282 hashkey_laddr
= entry
->cfentry_laddr
.addr6
.s6_addr32
[3];
5286 hashkey_faddr
= entry
->cfentry_faddr
.addr46
.ia46_addr4
.s_addr
;
5287 hashkey_laddr
= entry
->cfentry_laddr
.addr46
.ia46_addr4
.s_addr
;
5289 entry
->cfentry_flowhash
= CFIL_HASH(hashkey_laddr
, hashkey_faddr
,
5290 entry
->cfentry_lport
, entry
->cfentry_fport
);
5291 inp_hash_element
= entry
->cfentry_flowhash
& db
->cfdb_hashmask
;
5293 cfilhash
= &db
->cfdb_hashbase
[inp_hash_element
];
5295 LIST_INSERT_HEAD(cfilhash
, entry
, cfentry_link
);
5297 db
->cfdb_only_entry
= entry
;
5298 cfil_hash_entry_log(LOG_DEBUG
, db
->cfdb_so
, entry
, 0, "CFIL: cfil_db_add_entry: ADDED");
5301 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), db
->cfdb_count
);
5306 cfil_db_get_cfil_info(struct cfil_db
*db
, cfil_sock_id_t id
)
5308 struct cfil_hash_entry
*hash_entry
= NULL
;
5310 CFIL_LOG(LOG_INFO
, "");
5312 if (db
== NULL
|| id
== 0) {
5313 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> NULL DB <id %llu>",
5314 (uint64_t)VM_KERNEL_ADDRPERM(db
->cfdb_so
), id
);
5318 // This is an optimization for connected UDP socket which only has one flow.
5319 // No need to do the hash lookup.
5320 if (db
->cfdb_count
== 1) {
5321 if (db
->cfdb_only_entry
&& db
->cfdb_only_entry
->cfentry_cfil
&&
5322 db
->cfdb_only_entry
->cfentry_cfil
->cfi_sock_id
== id
) {
5323 return db
->cfdb_only_entry
->cfentry_cfil
;
5327 hash_entry
= cfil_db_lookup_entry_with_sockid(db
, id
);
5328 return hash_entry
!= NULL
? hash_entry
->cfentry_cfil
: NULL
;
5331 struct cfil_hash_entry
*
5332 cfil_sock_udp_get_flow(struct socket
*so
, uint32_t filter_control_unit
, bool outgoing
, struct sockaddr
*local
, struct sockaddr
*remote
)
5334 #pragma unused(so, filter_control_unit, outgoing, local, remote)
5335 struct cfil_hash_entry
*hash_entry
= NULL
;
5338 socket_lock_assert_owned(so
);
5340 // If new socket, allocate cfil db
5341 if (so
->so_cfil_db
== NULL
) {
5342 if (cfil_db_init(so
) != 0) {
5347 // See if flow already exists.
5348 hash_entry
= cfil_db_lookup_entry(so
->so_cfil_db
, local
, remote
);
5349 if (hash_entry
!= NULL
) {
5353 hash_entry
= cfil_db_add_entry(so
->so_cfil_db
, local
, remote
);
5354 if (hash_entry
== NULL
) {
5355 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
5356 CFIL_LOG(LOG_ERR
, "CFIL: UDP failed to add entry");
5360 if (cfil_info_alloc(so
, hash_entry
) == NULL
||
5361 hash_entry
->cfentry_cfil
== NULL
) {
5362 cfil_db_delete_entry(so
->so_cfil_db
, hash_entry
);
5363 CFIL_LOG(LOG_ERR
, "CFIL: UDP failed to alloc cfil_info");
5364 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
5369 cfil_info_log(LOG_ERR
, hash_entry
->cfentry_cfil
, "CFIL: LIFECYCLE: ADDED");
5372 if (cfil_info_attach_unit(so
, filter_control_unit
, hash_entry
->cfentry_cfil
) == 0) {
5373 cfil_info_free(hash_entry
->cfentry_cfil
);
5374 cfil_db_delete_entry(so
->so_cfil_db
, hash_entry
);
5375 CFIL_LOG(LOG_ERR
, "CFIL: UDP cfil_info_attach_unit(%u) failed",
5376 filter_control_unit
);
5377 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_failed
);
5380 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
5381 (uint64_t)VM_KERNEL_ADDRPERM(so
),
5382 filter_control_unit
, hash_entry
->cfentry_cfil
->cfi_sock_id
);
5384 so
->so_flags
|= SOF_CONTENT_FILTER
;
5385 OSIncrementAtomic(&cfil_stats
.cfs_sock_attached
);
5387 /* Hold a reference on the socket for each flow */
5390 error
= cfil_dispatch_attach_event(so
, hash_entry
->cfentry_cfil
, filter_control_unit
);
5391 /* We can recover from flow control or out of memory errors */
5392 if (error
!= 0 && error
!= ENOBUFS
&& error
!= ENOMEM
) {
5396 CFIL_INFO_VERIFY(hash_entry
->cfentry_cfil
);
5401 cfil_sock_udp_handle_data(bool outgoing
, struct socket
*so
,
5402 struct sockaddr
*local
, struct sockaddr
*remote
,
5403 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
5405 #pragma unused(outgoing, so, local, remote, data, control, flags)
5407 uint32_t filter_control_unit
;
5408 struct cfil_hash_entry
*hash_entry
= NULL
;
5409 struct cfil_info
*cfil_info
= NULL
;
5411 socket_lock_assert_owned(so
);
5413 if (cfil_active_count
== 0) {
5414 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP no active filter");
5415 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_in_vain
);
5419 filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
5420 if (filter_control_unit
== 0) {
5421 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP failed to get control unit");
5425 if ((filter_control_unit
& NECP_MASK_USERSPACE_ONLY
) != 0) {
5426 CFIL_LOG(LOG_DEBUG
, "CFIL: UDP user space only");
5427 OSIncrementAtomic(&cfil_stats
.cfs_sock_userspace_only
);
5431 hash_entry
= cfil_sock_udp_get_flow(so
, filter_control_unit
, outgoing
, local
, remote
);
5432 if (hash_entry
== NULL
|| hash_entry
->cfentry_cfil
== NULL
) {
5433 CFIL_LOG(LOG_ERR
, "CFIL: Falied to create UDP flow");
5436 // Update last used timestamp, this is for flow Idle TO
5437 hash_entry
->cfentry_lastused
= net_uptime();
5438 cfil_info
= hash_entry
->cfentry_cfil
;
5440 if (cfil_info
->cfi_flags
& CFIF_DROP
) {
5442 cfil_hash_entry_log(LOG_DEBUG
, so
, hash_entry
, 0, "CFIL: UDP DROP");
5446 if (control
!= NULL
) {
5447 OSIncrementAtomic(&cfil_stats
.cfs_data_in_control
);
5449 if (data
->m_type
== MT_OOBDATA
) {
5450 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
5451 (uint64_t)VM_KERNEL_ADDRPERM(so
));
5452 OSIncrementAtomic(&cfil_stats
.cfs_data_in_oob
);
5455 error
= cfil_data_common(so
, cfil_info
, outgoing
, remote
, data
, control
, flags
);
5461 * Go through all UDP flows for specified socket and returns TRUE if
5462 * any flow is still attached. If need_wait is TRUE, wait on first
5466 cfil_filters_udp_attached(struct socket
*so
, bool need_wait
)
5469 lck_mtx_t
*mutex_held
;
5470 struct cfilhashhead
*cfilhash
= NULL
;
5471 struct cfil_db
*db
= NULL
;
5472 struct cfil_hash_entry
*hash_entry
= NULL
;
5473 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
5474 struct cfil_info
*cfil_info
= NULL
;
5475 struct cfil_entry
*entry
= NULL
;
5479 uint64_t sock_flow_id
= 0;
5481 socket_lock_assert_owned(so
);
5483 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
5484 if (so
->so_proto
->pr_getlock
!= NULL
) {
5485 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
5487 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
5489 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
5491 db
= so
->so_cfil_db
;
5493 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
5494 cfilhash
= &db
->cfdb_hashbase
[i
];
5496 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
5497 if (hash_entry
->cfentry_cfil
!= NULL
) {
5498 cfil_info
= hash_entry
->cfentry_cfil
;
5499 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
5500 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
5502 /* Are we attached to the filter? */
5503 if (entry
->cfe_filter
== NULL
) {
5507 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
5510 if ((entry
->cfe_flags
& CFEF_CFIL_DETACHED
) != 0) {
5516 if (need_wait
== TRUE
) {
5518 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
5521 ts
.tv_sec
= cfil_close_wait_timeout
/ 1000;
5522 ts
.tv_nsec
= (cfil_close_wait_timeout
% 1000) *
5523 NSEC_PER_USEC
* 1000;
5525 OSIncrementAtomic(&cfil_stats
.cfs_close_wait
);
5526 cfil_info
->cfi_flags
|= CFIF_CLOSE_WAIT
;
5527 sock_flow_id
= cfil_info
->cfi_sock_id
;
5529 error
= msleep((caddr_t
)cfil_info
, mutex_held
,
5530 PSOCK
| PCATCH
, "cfil_filters_udp_attached", &ts
);
5532 // Woke up from sleep, validate if cfil_info is still valid
5533 if (so
->so_cfil_db
== NULL
||
5534 (cfil_info
!= cfil_db_get_cfil_info(so
->so_cfil_db
, sock_flow_id
))) {
5535 // cfil_info is not valid, do not continue
5539 cfil_info
->cfi_flags
&= ~CFIF_CLOSE_WAIT
;
5542 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
5546 * Force close in case of timeout
5549 OSIncrementAtomic(&cfil_stats
.cfs_close_wait_timeout
);
5551 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
5553 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
5568 cfil_sock_udp_data_pending(struct sockbuf
*sb
, bool check_thread
)
5570 struct socket
*so
= sb
->sb_so
;
5571 struct cfi_buf
*cfi_buf
;
5572 uint64_t pending
= 0;
5573 uint64_t total_pending
= 0;
5574 struct cfilhashhead
*cfilhash
= NULL
;
5575 struct cfil_db
*db
= NULL
;
5576 struct cfil_hash_entry
*hash_entry
= NULL
;
5577 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
5579 socket_lock_assert_owned(so
);
5581 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
&&
5582 (check_thread
== FALSE
|| so
->so_snd
.sb_cfil_thread
!= current_thread())) {
5583 db
= so
->so_cfil_db
;
5585 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
5586 cfilhash
= &db
->cfdb_hashbase
[i
];
5588 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
5589 if (hash_entry
->cfentry_cfil
!= NULL
) {
5590 if ((sb
->sb_flags
& SB_RECV
) == 0) {
5591 cfi_buf
= &hash_entry
->cfentry_cfil
->cfi_snd
;
5593 cfi_buf
= &hash_entry
->cfentry_cfil
->cfi_rcv
;
5596 pending
= cfi_buf
->cfi_pending_last
- cfi_buf
->cfi_pending_first
;
5598 * If we are limited by the "chars of mbufs used" roughly
5599 * adjust so we won't overcommit
5601 if ((uint64_t)cfi_buf
->cfi_pending_mbcnt
> pending
) {
5602 pending
= cfi_buf
->cfi_pending_mbcnt
;
5605 total_pending
+= pending
;
5610 VERIFY(total_pending
< INT32_MAX
);
5612 CFIL_LOG(LOG_DEBUG
, "CFIL: <so %llx> total pending %llu <check_thread %d>",
5613 (uint64_t)VM_KERNEL_ADDRPERM(so
),
5614 total_pending
, check_thread
);
5618 return (int32_t)(total_pending
);
5622 cfil_sock_udp_notify_shutdown(struct socket
*so
, int how
, int drop_flag
, int shut_flag
)
5624 struct cfil_info
*cfil_info
= NULL
;
5625 struct cfilhashhead
*cfilhash
= NULL
;
5626 struct cfil_db
*db
= NULL
;
5627 struct cfil_hash_entry
*hash_entry
= NULL
;
5628 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
5633 socket_lock_assert_owned(so
);
5635 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
5636 db
= so
->so_cfil_db
;
5638 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
5639 cfilhash
= &db
->cfdb_hashbase
[i
];
5641 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
5642 if (hash_entry
->cfentry_cfil
!= NULL
) {
5643 cfil_info
= hash_entry
->cfentry_cfil
;
5645 // This flow is marked as DROP
5646 if (cfil_info
->cfi_flags
& drop_flag
) {
5651 // This flow has been shut already, skip
5652 if (cfil_info
->cfi_flags
& shut_flag
) {
5655 // Mark flow as shut
5656 cfil_info
->cfi_flags
|= shut_flag
;
5659 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
5660 /* Disconnect incoming side */
5661 if (how
!= SHUT_WR
) {
5662 error
= cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 0);
5664 /* Disconnect outgoing side */
5665 if (how
!= SHUT_RD
) {
5666 error
= cfil_dispatch_disconnect_event(so
, cfil_info
, kcunit
, 1);
5674 if (done_count
== 0) {
5681 cfil_sock_udp_shutdown(struct socket
*so
, int *how
)
5685 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || (so
->so_cfil_db
== NULL
)) {
5689 socket_lock_assert_owned(so
);
5691 CFIL_LOG(LOG_INFO
, "so %llx how %d",
5692 (uint64_t)VM_KERNEL_ADDRPERM(so
), *how
);
5695 * Check the state of the socket before the content filter
5697 if (*how
!= SHUT_WR
&& (so
->so_state
& SS_CANTRCVMORE
) != 0) {
5698 /* read already shut down */
5702 if (*how
!= SHUT_RD
&& (so
->so_state
& SS_CANTSENDMORE
) != 0) {
5703 /* write already shut down */
5709 * shutdown read: SHUT_RD or SHUT_RDWR
5711 if (*how
!= SHUT_WR
) {
5712 error
= cfil_sock_udp_notify_shutdown(so
, SHUT_RD
, CFIF_DROP
, CFIF_SHUT_RD
);
5718 * shutdown write: SHUT_WR or SHUT_RDWR
5720 if (*how
!= SHUT_RD
) {
5721 error
= cfil_sock_udp_notify_shutdown(so
, SHUT_WR
, CFIF_DROP
, CFIF_SHUT_WR
);
5727 * When outgoing data is pending, we delay the shutdown at the
5728 * protocol level until the content filters give the final
5729 * verdict on the pending data.
5731 if (cfil_sock_data_pending(&so
->so_snd
) != 0) {
5733 * When shutting down the read and write sides at once
5734 * we can proceed to the final shutdown of the read
5735 * side. Otherwise, we just return.
5737 if (*how
== SHUT_WR
) {
5738 error
= EJUSTRETURN
;
5739 } else if (*how
== SHUT_RDWR
) {
5749 cfil_sock_udp_close_wait(struct socket
*so
)
5751 socket_lock_assert_owned(so
);
5753 while (cfil_filters_udp_attached(so
, FALSE
)) {
5755 * Notify the filters we are going away so they can detach
5757 cfil_sock_udp_notify_shutdown(so
, SHUT_RDWR
, 0, 0);
5760 * Make sure we need to wait after the filter are notified
5761 * of the disconnection
5763 if (cfil_filters_udp_attached(so
, TRUE
) == 0) {
5770 cfil_sock_udp_is_closed(struct socket
*so
)
5772 struct cfil_info
*cfil_info
= NULL
;
5773 struct cfilhashhead
*cfilhash
= NULL
;
5774 struct cfil_db
*db
= NULL
;
5775 struct cfil_hash_entry
*hash_entry
= NULL
;
5776 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
5780 socket_lock_assert_owned(so
);
5782 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
5783 db
= so
->so_cfil_db
;
5785 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
5786 cfilhash
= &db
->cfdb_hashbase
[i
];
5788 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
5789 if (hash_entry
->cfentry_cfil
!= NULL
) {
5790 cfil_info
= hash_entry
->cfentry_cfil
;
5792 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
5793 /* Let the filters know of the closing */
5794 error
= cfil_dispatch_closed_event(so
, cfil_info
, kcunit
);
5797 /* Last chance to push passed data out */
5798 error
= cfil_acquire_sockbuf(so
, cfil_info
, 1);
5800 cfil_service_inject_queue(so
, cfil_info
, 1);
5802 cfil_release_sockbuf(so
, 1);
5804 cfil_info
->cfi_flags
|= CFIF_SOCK_CLOSED
;
5806 /* Pending data needs to go */
5807 cfil_flush_queues(so
, cfil_info
);
5809 CFIL_INFO_VERIFY(cfil_info
);
5817 cfil_sock_udp_buf_update(struct sockbuf
*sb
)
5819 struct cfil_info
*cfil_info
= NULL
;
5820 struct cfilhashhead
*cfilhash
= NULL
;
5821 struct cfil_db
*db
= NULL
;
5822 struct cfil_hash_entry
*hash_entry
= NULL
;
5823 struct cfil_hash_entry
*temp_hash_entry
= NULL
;
5826 struct socket
*so
= sb
->sb_so
;
5828 socket_lock_assert_owned(so
);
5830 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil_db
!= NULL
) {
5835 db
= so
->so_cfil_db
;
5837 for (int i
= 0; i
< CFILHASHSIZE
; i
++) {
5838 cfilhash
= &db
->cfdb_hashbase
[i
];
5840 LIST_FOREACH_SAFE(hash_entry
, cfilhash
, cfentry_link
, temp_hash_entry
) {
5841 if (hash_entry
->cfentry_cfil
!= NULL
) {
5842 cfil_info
= hash_entry
->cfentry_cfil
;
5844 if ((sb
->sb_flags
& SB_RECV
) == 0) {
5845 if ((cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) == 0) {
5849 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_retry
);
5851 if ((cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_IN
) == 0) {
5855 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_retry
);
5858 CFIL_LOG(LOG_NOTICE
, "so %llx outgoing %d",
5859 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
5861 error
= cfil_acquire_sockbuf(so
, cfil_info
, outgoing
);
5863 cfil_service_inject_queue(so
, cfil_info
, outgoing
);
5865 cfil_release_sockbuf(so
, outgoing
);
5873 cfil_filter_show(u_int32_t kcunit
)
5875 struct content_filter
*cfc
= NULL
;
5876 struct cfil_entry
*entry
;
5879 if (content_filters
== NULL
) {
5882 if (kcunit
> MAX_CONTENT_FILTER
) {
5886 cfil_rw_lock_shared(&cfil_lck_rw
);
5888 if (content_filters
[kcunit
- 1] == NULL
) {
5889 cfil_rw_unlock_shared(&cfil_lck_rw
);
5892 cfc
= content_filters
[kcunit
- 1];
5894 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
5895 kcunit
, cfc
->cf_sock_count
, (unsigned long)cfc
->cf_flags
);
5896 if (cfc
->cf_flags
& CFF_DETACHING
) {
5897 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: - DETACHING");
5899 if (cfc
->cf_flags
& CFF_ACTIVE
) {
5900 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: - ACTIVE");
5902 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
5903 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
5906 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
5907 if (entry
->cfe_cfil_info
&& entry
->cfe_cfil_info
->cfi_so
) {
5908 struct cfil_info
*cfil_info
= entry
->cfe_cfil_info
;
5912 if (entry
->cfe_flags
& CFEF_CFIL_DETACHED
) {
5913 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: FILTER SHOW: - DETACHED");
5915 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: FILTER SHOW: - ATTACHED");
5920 CFIL_LOG(LOG_ERR
, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count
);
5922 cfil_rw_unlock_shared(&cfil_lck_rw
);
5926 cfil_info_show(void)
5928 struct cfil_info
*cfil_info
;
5931 cfil_rw_lock_shared(&cfil_lck_rw
);
5933 CFIL_LOG(LOG_ERR
, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count
);
5935 TAILQ_FOREACH(cfil_info
, &cfil_sock_head
, cfi_link
) {
5938 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: INFO SHOW");
5940 if (cfil_info
->cfi_flags
& CFIF_DROP
) {
5941 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - DROP");
5943 if (cfil_info
->cfi_flags
& CFIF_CLOSE_WAIT
) {
5944 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - CLOSE_WAIT");
5946 if (cfil_info
->cfi_flags
& CFIF_SOCK_CLOSED
) {
5947 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - SOCK_CLOSED");
5949 if (cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_IN
) {
5950 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - RETRY_INJECT_IN");
5952 if (cfil_info
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) {
5953 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
5955 if (cfil_info
->cfi_flags
& CFIF_SHUT_WR
) {
5956 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - SHUT_WR");
5958 if (cfil_info
->cfi_flags
& CFIF_SHUT_RD
) {
5959 CFIL_LOG(LOG_ERR
, "CFIL: INFO FLAG - SHUT_RD");
5963 CFIL_LOG(LOG_ERR
, "CFIL: INFO SHOW: total cfil_info shown: %d", count
);
5965 cfil_rw_unlock_shared(&cfil_lck_rw
);
5969 cfil_info_idle_timed_out(struct cfil_info
*cfil_info
, int timeout
, u_int32_t current_time
)
5971 if (cfil_info
&& cfil_info
->cfi_hash_entry
&&
5972 (current_time
- cfil_info
->cfi_hash_entry
->cfentry_lastused
>= (u_int32_t
)timeout
)) {
5974 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: flow IDLE timeout expired");
5982 cfil_info_action_timed_out(struct cfil_info
*cfil_info
, int timeout
)
5984 struct cfil_entry
*entry
;
5985 struct timeval current_tv
;
5986 struct timeval diff_time
;
5988 if (cfil_info
== NULL
) {
5993 * If we have queued up more data than passed offset and we haven't received
5994 * an action from user space for a while (the user space filter might have crashed),
5995 * return action timed out.
5997 if (cfil_info
->cfi_snd
.cfi_pending_last
> cfil_info
->cfi_snd
.cfi_pass_offset
||
5998 cfil_info
->cfi_rcv
.cfi_pending_last
> cfil_info
->cfi_rcv
.cfi_pass_offset
) {
5999 microuptime(¤t_tv
);
6001 for (int kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
6002 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
6004 if (entry
->cfe_filter
== NULL
) {
6008 if (cfil_info
->cfi_snd
.cfi_pending_last
> entry
->cfe_snd
.cfe_pass_offset
||
6009 cfil_info
->cfi_rcv
.cfi_pending_last
> entry
->cfe_rcv
.cfe_pass_offset
) {
6010 // haven't gotten an action from this filter, check timeout
6011 timersub(¤t_tv
, &entry
->cfe_last_action
, &diff_time
);
6012 if (diff_time
.tv_sec
>= timeout
) {
6014 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: flow ACTION timeout expired");
6025 cfil_info_buffer_threshold_exceeded(struct cfil_info
*cfil_info
)
6027 if (cfil_info
== NULL
) {
6032 * Clean up flow if it exceeded queue thresholds
6034 if (cfil_info
->cfi_snd
.cfi_tail_drop_cnt
||
6035 cfil_info
->cfi_rcv
.cfi_tail_drop_cnt
) {
6037 CFIL_LOG(LOG_ERR
, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
6038 cfil_udp_gc_mbuf_num_max
,
6039 cfil_udp_gc_mbuf_cnt_max
,
6040 cfil_info
->cfi_snd
.cfi_tail_drop_cnt
,
6041 cfil_info
->cfi_rcv
.cfi_tail_drop_cnt
);
6042 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: queue threshold exceeded");
6051 cfil_udp_gc_thread_sleep(bool forever
)
6054 (void) assert_wait((event_t
) &cfil_sock_udp_attached_count
,
6055 THREAD_INTERRUPTIBLE
);
6057 uint64_t deadline
= 0;
6058 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC
, &deadline
);
6059 clock_absolutetime_interval_to_deadline(deadline
, &deadline
);
6061 (void) assert_wait_deadline(&cfil_sock_udp_attached_count
,
6062 THREAD_INTERRUPTIBLE
, deadline
);
6067 cfil_udp_gc_thread_func(void *v
, wait_result_t w
)
6069 #pragma unused(v, w)
6071 ASSERT(cfil_udp_gc_thread
== current_thread());
6072 thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
6074 // Kick off gc shortly
6075 cfil_udp_gc_thread_sleep(false);
6076 thread_block_parameter((thread_continue_t
) cfil_info_udp_expire
, NULL
);
6081 cfil_info_udp_expire(void *v
, wait_result_t w
)
6083 #pragma unused(v, w)
6085 static uint64_t expired_array
[UDP_FLOW_GC_MAX_COUNT
];
6086 static uint32_t expired_count
= 0;
6088 struct cfil_info
*cfil_info
;
6089 struct cfil_hash_entry
*hash_entry
;
6092 u_int32_t current_time
= 0;
6094 current_time
= net_uptime();
6096 // Get all expired UDP flow ids
6097 cfil_rw_lock_shared(&cfil_lck_rw
);
6099 if (cfil_sock_udp_attached_count
== 0) {
6100 cfil_rw_unlock_shared(&cfil_lck_rw
);
6104 TAILQ_FOREACH(cfil_info
, &cfil_sock_head
, cfi_link
) {
6105 if (expired_count
>= UDP_FLOW_GC_MAX_COUNT
) {
6109 if (IS_UDP(cfil_info
->cfi_so
)) {
6110 if (cfil_info_idle_timed_out(cfil_info
, UDP_FLOW_GC_IDLE_TO
, current_time
) ||
6111 cfil_info_action_timed_out(cfil_info
, UDP_FLOW_GC_ACTION_TO
) ||
6112 cfil_info_buffer_threshold_exceeded(cfil_info
)) {
6113 expired_array
[expired_count
] = cfil_info
->cfi_sock_id
;
6118 cfil_rw_unlock_shared(&cfil_lck_rw
);
6120 if (expired_count
== 0) {
6124 for (uint32_t i
= 0; i
< expired_count
; i
++) {
6125 // Search for socket (UDP only and lock so)
6126 so
= cfil_socket_from_sock_id(expired_array
[i
], true);
6131 cfil_info
= cfil_db_get_cfil_info(so
->so_cfil_db
, expired_array
[i
]);
6132 if (cfil_info
== NULL
) {
6136 db
= so
->so_cfil_db
;
6137 hash_entry
= cfil_info
->cfi_hash_entry
;
6139 if (db
== NULL
|| hash_entry
== NULL
) {
6143 #if GC_DEBUG || LIFECYCLE_DEBUG
6144 cfil_info_log(LOG_ERR
, cfil_info
, "CFIL: LIFECYCLE: GC CLEAN UP");
6147 cfil_db_delete_entry(db
, hash_entry
);
6148 cfil_info_free(cfil_info
);
6149 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
6151 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
6152 if (db
->cfdb_count
== 0) {
6153 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
6155 VERIFY(so
->so_usecount
> 0);
6159 socket_unlock(so
, 1);
6163 CFIL_LOG(LOG_ERR
, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count
);
6169 // Sleep forever (until waken up) if no more UDP flow to clean
6170 cfil_rw_lock_shared(&cfil_lck_rw
);
6171 cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count
== 0 ? true : false);
6172 cfil_rw_unlock_shared(&cfil_lck_rw
);
6173 thread_block_parameter((thread_continue_t
)cfil_info_udp_expire
, NULL
);
6178 cfil_udp_save_socket_state(struct cfil_info
*cfil_info
, struct mbuf
*m
)
6180 struct m_tag
*tag
= NULL
;
6181 struct cfil_tag
*ctag
= NULL
;
6182 struct cfil_hash_entry
*hash_entry
= NULL
;
6184 if (cfil_info
== NULL
|| cfil_info
->cfi_so
== NULL
||
6185 cfil_info
->cfi_hash_entry
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
)) {
6189 /* Allocate a tag */
6190 tag
= m_tag_create(KERNEL_MODULE_TAG_ID
, KERNEL_TAG_TYPE_CFIL_UDP
,
6191 sizeof(struct cfil_tag
), M_DONTWAIT
, m
);
6194 ctag
= (struct cfil_tag
*)(tag
+ 1);
6195 ctag
->cfil_so_state_change_cnt
= cfil_info
->cfi_so
->so_state_change_cnt
;
6196 ctag
->cfil_so_options
= cfil_info
->cfi_so
->so_options
;
6198 hash_entry
= cfil_info
->cfi_hash_entry
;
6199 if (hash_entry
->cfentry_family
== AF_INET6
) {
6200 fill_ip6_sockaddr_4_6(&ctag
->cfil_faddr
,
6201 &hash_entry
->cfentry_faddr
.addr6
,
6202 hash_entry
->cfentry_fport
);
6203 } else if (hash_entry
->cfentry_family
== AF_INET
) {
6204 fill_ip_sockaddr_4_6(&ctag
->cfil_faddr
,
6205 hash_entry
->cfentry_faddr
.addr46
.ia46_addr4
,
6206 hash_entry
->cfentry_fport
);
6208 m_tag_prepend(m
, tag
);
6215 cfil_udp_get_socket_state(struct mbuf
*m
, uint32_t *state_change_cnt
, short *options
,
6216 struct sockaddr
**faddr
)
6218 struct m_tag
*tag
= NULL
;
6219 struct cfil_tag
*ctag
= NULL
;
6221 tag
= m_tag_locate(m
, KERNEL_MODULE_TAG_ID
, KERNEL_TAG_TYPE_CFIL_UDP
, NULL
);
6223 ctag
= (struct cfil_tag
*)(tag
+ 1);
6224 if (state_change_cnt
) {
6225 *state_change_cnt
= ctag
->cfil_so_state_change_cnt
;
6228 *options
= ctag
->cfil_so_options
;
6231 *faddr
= (struct sockaddr
*) &ctag
->cfil_faddr
;
6235 * Unlink tag and hand it over to caller.
6236 * Note that caller will be responsible to free it.
6238 m_tag_unlink(m
, tag
);